xref: /linux/drivers/net/ethernet/amazon/ena/ena_netdev.c (revision 1bd6676254b4ab6acd44b662b5e92822c036463a)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/ethtool.h>
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/numa.h>
12 #include <linux/pci.h>
13 #include <linux/utsname.h>
14 #include <linux/version.h>
15 #include <linux/vmalloc.h>
16 #include <net/ip.h>
17 
18 #include "ena_netdev.h"
19 #include "ena_pci_id_tbl.h"
20 #include "ena_xdp.h"
21 
22 #include "ena_phc.h"
23 
24 #include "ena_devlink.h"
25 
26 #include "ena_debugfs.h"
27 
28 MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
29 MODULE_DESCRIPTION(DEVICE_NAME);
30 MODULE_LICENSE("GPL");
31 
32 /* Time in jiffies before concluding the transmitter is hung. */
33 #define TX_TIMEOUT  (5 * HZ)
34 
35 #define ENA_MAX_RINGS min_t(unsigned int, ENA_MAX_NUM_IO_QUEUES, num_possible_cpus())
36 
37 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
38 		NETIF_MSG_IFDOWN | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
39 
40 static struct ena_aenq_handlers aenq_handlers;
41 
42 static struct workqueue_struct *ena_wq;
43 
44 MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
45 
46 static int ena_rss_init_default(struct ena_adapter *adapter);
47 static void check_for_admin_com_state(struct ena_adapter *adapter);
48 
49 static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
50 {
51 	enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_OS_NETDEV_WD;
52 	struct ena_adapter *adapter = netdev_priv(dev);
53 	unsigned int time_since_last_napi, threshold;
54 	struct ena_ring *tx_ring;
55 	int napi_scheduled;
56 
57 	if (txqueue >= adapter->num_io_queues) {
58 		netdev_err(dev, "TX timeout on invalid queue %u\n", txqueue);
59 		goto schedule_reset;
60 	}
61 
62 	threshold = jiffies_to_usecs(dev->watchdog_timeo);
63 	tx_ring = &adapter->tx_ring[txqueue];
64 
65 	time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
66 	napi_scheduled = !!(tx_ring->napi->state & NAPIF_STATE_SCHED);
67 
68 	netdev_err(dev,
69 		   "TX q %d is paused for too long (threshold %u). Time since last napi %u usec. napi scheduled: %d\n",
70 		   txqueue,
71 		   threshold,
72 		   time_since_last_napi,
73 		   napi_scheduled);
74 
75 	if (threshold < time_since_last_napi && napi_scheduled) {
76 		netdev_err(dev,
77 			   "napi handler hasn't been called for a long time but is scheduled\n");
78 		reset_reason = ENA_REGS_RESET_SUSPECTED_POLL_STARVATION;
79 	}
80 schedule_reset:
81 	/* Change the state of the device to trigger reset
82 	 * Check that we are not in the middle or a trigger already
83 	 */
84 	if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
85 		return;
86 
87 	ena_reset_device(adapter, reset_reason);
88 	ena_increase_stat(&adapter->dev_stats.tx_timeout, 1, &adapter->syncp);
89 }
90 
91 static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
92 {
93 	int i;
94 
95 	for (i = 0; i < adapter->num_io_queues; i++)
96 		adapter->rx_ring[i].mtu = mtu;
97 }
98 
99 static int ena_change_mtu(struct net_device *dev, int new_mtu)
100 {
101 	struct ena_adapter *adapter = netdev_priv(dev);
102 	int ret;
103 
104 	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
105 	if (!ret) {
106 		netif_dbg(adapter, drv, dev, "Set MTU to %d\n", new_mtu);
107 		update_rx_ring_mtu(adapter, new_mtu);
108 		WRITE_ONCE(dev->mtu, new_mtu);
109 	} else {
110 		netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
111 			  new_mtu);
112 	}
113 
114 	return ret;
115 }
116 
117 int ena_xmit_common(struct ena_adapter *adapter,
118 		    struct ena_ring *ring,
119 		    struct ena_tx_buffer *tx_info,
120 		    struct ena_com_tx_ctx *ena_tx_ctx,
121 		    u16 next_to_use,
122 		    u32 bytes)
123 {
124 	int rc, nb_hw_desc;
125 
126 	if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
127 						ena_tx_ctx))) {
128 		netif_dbg(adapter, tx_queued, adapter->netdev,
129 			  "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
130 			  ring->qid);
131 		ena_ring_tx_doorbell(ring);
132 	}
133 
134 	/* prepare the packet's descriptors to dma engine */
135 	rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx,
136 				&nb_hw_desc);
137 
138 	/* In case there isn't enough space in the queue for the packet,
139 	 * we simply drop it. All other failure reasons of
140 	 * ena_com_prepare_tx() are fatal and therefore require a device reset.
141 	 */
142 	if (unlikely(rc)) {
143 		netif_err(adapter, tx_queued, adapter->netdev,
144 			  "Failed to prepare tx bufs\n");
145 		ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1, &ring->syncp);
146 		if (rc != -ENOMEM)
147 			ena_reset_device(adapter, ENA_REGS_RESET_DRIVER_INVALID_STATE);
148 		return rc;
149 	}
150 
151 	u64_stats_update_begin(&ring->syncp);
152 	ring->tx_stats.cnt++;
153 	ring->tx_stats.bytes += bytes;
154 	u64_stats_update_end(&ring->syncp);
155 
156 	tx_info->tx_descs = nb_hw_desc;
157 	tx_info->total_tx_size = bytes;
158 	tx_info->last_jiffies = jiffies;
159 	tx_info->print_once = 0;
160 
161 	ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
162 						 ring->ring_size);
163 	return 0;
164 }
165 
166 static void ena_init_io_rings_common(struct ena_adapter *adapter,
167 				     struct ena_ring *ring, u16 qid)
168 {
169 	ring->qid = qid;
170 	ring->pdev = adapter->pdev;
171 	ring->dev = &adapter->pdev->dev;
172 	ring->netdev = adapter->netdev;
173 	ring->napi = &adapter->ena_napi[qid].napi;
174 	ring->adapter = adapter;
175 	ring->ena_dev = adapter->ena_dev;
176 	ring->per_napi_packets = 0;
177 	ring->cpu = 0;
178 	ring->numa_node = 0;
179 	ring->no_interrupt_event_cnt = 0;
180 	u64_stats_init(&ring->syncp);
181 }
182 
183 void ena_init_io_rings(struct ena_adapter *adapter,
184 		       int first_index, int count)
185 {
186 	struct ena_com_dev *ena_dev;
187 	struct ena_ring *txr, *rxr;
188 	int i;
189 
190 	ena_dev = adapter->ena_dev;
191 
192 	for (i = first_index; i < first_index + count; i++) {
193 		txr = &adapter->tx_ring[i];
194 		rxr = &adapter->rx_ring[i];
195 
196 		/* TX common ring state */
197 		ena_init_io_rings_common(adapter, txr, i);
198 
199 		/* TX specific ring state */
200 		txr->ring_size = adapter->requested_tx_ring_size;
201 		txr->tx_max_header_size = ena_dev->tx_max_header_size;
202 		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
203 		txr->sgl_size = adapter->max_tx_sgl_size;
204 		txr->smoothed_interval =
205 			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
206 		txr->disable_meta_caching = adapter->disable_meta_caching;
207 		spin_lock_init(&txr->xdp_tx_lock);
208 
209 		/* Don't init RX queues for xdp queues */
210 		if (!ENA_IS_XDP_INDEX(adapter, i)) {
211 			/* RX common ring state */
212 			ena_init_io_rings_common(adapter, rxr, i);
213 
214 			/* RX specific ring state */
215 			rxr->ring_size = adapter->requested_rx_ring_size;
216 			rxr->rx_copybreak = adapter->rx_copybreak;
217 			rxr->sgl_size = adapter->max_rx_sgl_size;
218 			rxr->smoothed_interval =
219 				ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
220 			rxr->empty_rx_queue = 0;
221 			rxr->rx_headroom = NET_SKB_PAD;
222 			adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
223 			rxr->xdp_ring = &adapter->tx_ring[i + adapter->num_io_queues];
224 		}
225 	}
226 }
227 
228 /* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
229  * @adapter: network interface device structure
230  * @qid: queue index
231  *
232  * Return 0 on success, negative on failure
233  */
234 static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
235 {
236 	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
237 	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
238 	int size, i, node;
239 
240 	if (tx_ring->tx_buffer_info) {
241 		netif_err(adapter, ifup,
242 			  adapter->netdev, "tx_buffer_info info is not NULL");
243 		return -EEXIST;
244 	}
245 
246 	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
247 	node = cpu_to_node(ena_irq->cpu);
248 
249 	tx_ring->tx_buffer_info = vzalloc_node(size, node);
250 	if (!tx_ring->tx_buffer_info) {
251 		tx_ring->tx_buffer_info = vzalloc(size);
252 		if (!tx_ring->tx_buffer_info)
253 			goto err_tx_buffer_info;
254 	}
255 
256 	size = sizeof(u16) * tx_ring->ring_size;
257 	tx_ring->free_ids = vzalloc_node(size, node);
258 	if (!tx_ring->free_ids) {
259 		tx_ring->free_ids = vzalloc(size);
260 		if (!tx_ring->free_ids)
261 			goto err_tx_free_ids;
262 	}
263 
264 	size = tx_ring->tx_max_header_size;
265 	tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
266 	if (!tx_ring->push_buf_intermediate_buf) {
267 		tx_ring->push_buf_intermediate_buf = vzalloc(size);
268 		if (!tx_ring->push_buf_intermediate_buf)
269 			goto err_push_buf_intermediate_buf;
270 	}
271 
272 	/* Req id ring for TX out of order completions */
273 	for (i = 0; i < tx_ring->ring_size; i++)
274 		tx_ring->free_ids[i] = i;
275 
276 	/* Reset tx statistics */
277 	memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
278 
279 	tx_ring->next_to_use = 0;
280 	tx_ring->next_to_clean = 0;
281 	tx_ring->cpu = ena_irq->cpu;
282 	tx_ring->numa_node = node;
283 	return 0;
284 
285 err_push_buf_intermediate_buf:
286 	vfree(tx_ring->free_ids);
287 	tx_ring->free_ids = NULL;
288 err_tx_free_ids:
289 	vfree(tx_ring->tx_buffer_info);
290 	tx_ring->tx_buffer_info = NULL;
291 err_tx_buffer_info:
292 	return -ENOMEM;
293 }
294 
295 /* ena_free_tx_resources - Free I/O Tx Resources per Queue
296  * @adapter: network interface device structure
297  * @qid: queue index
298  *
299  * Free all transmit software resources
300  */
301 static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
302 {
303 	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
304 
305 	vfree(tx_ring->tx_buffer_info);
306 	tx_ring->tx_buffer_info = NULL;
307 
308 	vfree(tx_ring->free_ids);
309 	tx_ring->free_ids = NULL;
310 
311 	vfree(tx_ring->push_buf_intermediate_buf);
312 	tx_ring->push_buf_intermediate_buf = NULL;
313 }
314 
315 int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
316 				    int first_index, int count)
317 {
318 	int i, rc = 0;
319 
320 	for (i = first_index; i < first_index + count; i++) {
321 		rc = ena_setup_tx_resources(adapter, i);
322 		if (rc)
323 			goto err_setup_tx;
324 	}
325 
326 	return 0;
327 
328 err_setup_tx:
329 
330 	netif_err(adapter, ifup, adapter->netdev,
331 		  "Tx queue %d: allocation failed\n", i);
332 
333 	/* rewind the index freeing the rings as we go */
334 	while (first_index < i--)
335 		ena_free_tx_resources(adapter, i);
336 	return rc;
337 }
338 
339 void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
340 					   int first_index, int count)
341 {
342 	int i;
343 
344 	for (i = first_index; i < first_index + count; i++)
345 		ena_free_tx_resources(adapter, i);
346 }
347 
348 /* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
349  * @adapter: board private structure
350  *
351  * Free all transmit software resources
352  */
353 void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
354 {
355 	ena_free_all_io_tx_resources_in_range(adapter,
356 					      0,
357 					      adapter->xdp_num_queues +
358 					      adapter->num_io_queues);
359 }
360 
361 /* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
362  * @adapter: network interface device structure
363  * @qid: queue index
364  *
365  * Returns 0 on success, negative on failure
366  */
367 static int ena_setup_rx_resources(struct ena_adapter *adapter,
368 				  u32 qid)
369 {
370 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
371 	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
372 	int size, node, i;
373 
374 	if (rx_ring->rx_buffer_info) {
375 		netif_err(adapter, ifup, adapter->netdev,
376 			  "rx_buffer_info is not NULL");
377 		return -EEXIST;
378 	}
379 
380 	/* alloc extra element so in rx path
381 	 * we can always prefetch rx_info + 1
382 	 */
383 	size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
384 	node = cpu_to_node(ena_irq->cpu);
385 
386 	rx_ring->rx_buffer_info = vzalloc_node(size, node);
387 	if (!rx_ring->rx_buffer_info) {
388 		rx_ring->rx_buffer_info = vzalloc(size);
389 		if (!rx_ring->rx_buffer_info)
390 			return -ENOMEM;
391 	}
392 
393 	size = sizeof(u16) * rx_ring->ring_size;
394 	rx_ring->free_ids = vzalloc_node(size, node);
395 	if (!rx_ring->free_ids) {
396 		rx_ring->free_ids = vzalloc(size);
397 		if (!rx_ring->free_ids) {
398 			vfree(rx_ring->rx_buffer_info);
399 			rx_ring->rx_buffer_info = NULL;
400 			return -ENOMEM;
401 		}
402 	}
403 
404 	/* Req id ring for receiving RX pkts out of order */
405 	for (i = 0; i < rx_ring->ring_size; i++)
406 		rx_ring->free_ids[i] = i;
407 
408 	/* Reset rx statistics */
409 	memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
410 
411 	rx_ring->next_to_clean = 0;
412 	rx_ring->next_to_use = 0;
413 	rx_ring->cpu = ena_irq->cpu;
414 	rx_ring->numa_node = node;
415 
416 	return 0;
417 }
418 
419 /* ena_free_rx_resources - Free I/O Rx Resources
420  * @adapter: network interface device structure
421  * @qid: queue index
422  *
423  * Free all receive software resources
424  */
425 static void ena_free_rx_resources(struct ena_adapter *adapter,
426 				  u32 qid)
427 {
428 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
429 
430 	vfree(rx_ring->rx_buffer_info);
431 	rx_ring->rx_buffer_info = NULL;
432 
433 	vfree(rx_ring->free_ids);
434 	rx_ring->free_ids = NULL;
435 }
436 
437 /* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
438  * @adapter: board private structure
439  *
440  * Return 0 on success, negative on failure
441  */
442 static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
443 {
444 	int i, rc = 0;
445 
446 	for (i = 0; i < adapter->num_io_queues; i++) {
447 		rc = ena_setup_rx_resources(adapter, i);
448 		if (rc)
449 			goto err_setup_rx;
450 	}
451 
452 	return 0;
453 
454 err_setup_rx:
455 
456 	netif_err(adapter, ifup, adapter->netdev,
457 		  "Rx queue %d: allocation failed\n", i);
458 
459 	/* rewind the index freeing the rings as we go */
460 	while (i--)
461 		ena_free_rx_resources(adapter, i);
462 	return rc;
463 }
464 
465 /* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
466  * @adapter: board private structure
467  *
468  * Free all receive software resources
469  */
470 static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
471 {
472 	int i;
473 
474 	for (i = 0; i < adapter->num_io_queues; i++)
475 		ena_free_rx_resources(adapter, i);
476 }
477 
478 static struct page *ena_alloc_map_page(struct ena_ring *rx_ring,
479 				       dma_addr_t *dma)
480 {
481 	struct page *page;
482 
483 	/* This would allocate the page on the same NUMA node the executing code
484 	 * is running on.
485 	 */
486 	page = dev_alloc_page();
487 	if (!page) {
488 		ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1, &rx_ring->syncp);
489 		return ERR_PTR(-ENOSPC);
490 	}
491 
492 	/* To enable NIC-side port-mirroring, AKA SPAN port,
493 	 * we make the buffer readable from the nic as well
494 	 */
495 	*dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
496 			    DMA_BIDIRECTIONAL);
497 	if (unlikely(dma_mapping_error(rx_ring->dev, *dma))) {
498 		ena_increase_stat(&rx_ring->rx_stats.dma_mapping_err, 1,
499 				  &rx_ring->syncp);
500 		__free_page(page);
501 		return ERR_PTR(-EIO);
502 	}
503 
504 	return page;
505 }
506 
507 static int ena_alloc_rx_buffer(struct ena_ring *rx_ring,
508 			       struct ena_rx_buffer *rx_info)
509 {
510 	int headroom = rx_ring->rx_headroom;
511 	struct ena_com_buf *ena_buf;
512 	struct page *page;
513 	dma_addr_t dma;
514 	int tailroom;
515 
516 	/* restore page offset value in case it has been changed by device */
517 	rx_info->buf_offset = headroom;
518 
519 	/* if previous allocated page is not used */
520 	if (unlikely(rx_info->page))
521 		return 0;
522 
523 	/* We handle DMA here */
524 	page = ena_alloc_map_page(rx_ring, &dma);
525 	if (IS_ERR(page))
526 		return PTR_ERR(page);
527 
528 	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
529 		  "Allocate page %p, rx_info %p\n", page, rx_info);
530 
531 	tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
532 
533 	rx_info->page = page;
534 	rx_info->dma_addr = dma;
535 	rx_info->page_offset = 0;
536 	ena_buf = &rx_info->ena_buf;
537 	ena_buf->paddr = dma + headroom;
538 	ena_buf->len = ENA_PAGE_SIZE - headroom - tailroom;
539 
540 	return 0;
541 }
542 
543 static void ena_unmap_rx_buff_attrs(struct ena_ring *rx_ring,
544 				    struct ena_rx_buffer *rx_info,
545 				    unsigned long attrs)
546 {
547 	dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE, DMA_BIDIRECTIONAL,
548 			     attrs);
549 }
550 
551 static void ena_free_rx_page(struct ena_ring *rx_ring,
552 			     struct ena_rx_buffer *rx_info)
553 {
554 	struct page *page = rx_info->page;
555 
556 	if (unlikely(!page)) {
557 		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
558 			   "Trying to free unallocated buffer\n");
559 		return;
560 	}
561 
562 	ena_unmap_rx_buff_attrs(rx_ring, rx_info, 0);
563 
564 	__free_page(page);
565 	rx_info->page = NULL;
566 }
567 
568 static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
569 {
570 	u16 next_to_use, req_id;
571 	u32 i;
572 	int rc;
573 
574 	next_to_use = rx_ring->next_to_use;
575 
576 	for (i = 0; i < num; i++) {
577 		struct ena_rx_buffer *rx_info;
578 
579 		req_id = rx_ring->free_ids[next_to_use];
580 
581 		rx_info = &rx_ring->rx_buffer_info[req_id];
582 
583 		rc = ena_alloc_rx_buffer(rx_ring, rx_info);
584 		if (unlikely(rc < 0)) {
585 			netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
586 				   "Failed to allocate buffer for rx queue %d\n",
587 				   rx_ring->qid);
588 			break;
589 		}
590 		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
591 						&rx_info->ena_buf,
592 						req_id);
593 		if (unlikely(rc)) {
594 			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
595 				   "Failed to add buffer for rx queue %d\n",
596 				   rx_ring->qid);
597 			break;
598 		}
599 		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
600 						   rx_ring->ring_size);
601 	}
602 
603 	if (unlikely(i < num)) {
604 		ena_increase_stat(&rx_ring->rx_stats.refil_partial, 1,
605 				  &rx_ring->syncp);
606 		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
607 			   "Refilled rx qid %d with only %d buffers (from %d)\n",
608 			   rx_ring->qid, i, num);
609 	}
610 
611 	/* ena_com_write_sq_doorbell issues a wmb() */
612 	if (likely(i))
613 		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
614 
615 	rx_ring->next_to_use = next_to_use;
616 
617 	return i;
618 }
619 
620 static void ena_free_rx_bufs(struct ena_adapter *adapter,
621 			     u32 qid)
622 {
623 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
624 	u32 i;
625 
626 	for (i = 0; i < rx_ring->ring_size; i++) {
627 		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
628 
629 		if (rx_info->page)
630 			ena_free_rx_page(rx_ring, rx_info);
631 	}
632 }
633 
634 /* ena_refill_all_rx_bufs - allocate all queues Rx buffers
635  * @adapter: board private structure
636  */
637 static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
638 {
639 	struct ena_ring *rx_ring;
640 	int i, rc, bufs_num;
641 
642 	for (i = 0; i < adapter->num_io_queues; i++) {
643 		rx_ring = &adapter->rx_ring[i];
644 		bufs_num = rx_ring->ring_size - 1;
645 		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
646 
647 		if (unlikely(rc != bufs_num))
648 			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
649 				   "Refilling Queue %d failed. allocated %d buffers from: %d\n",
650 				   i, rc, bufs_num);
651 	}
652 }
653 
654 static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
655 {
656 	int i;
657 
658 	for (i = 0; i < adapter->num_io_queues; i++)
659 		ena_free_rx_bufs(adapter, i);
660 }
661 
662 void ena_unmap_tx_buff(struct ena_ring *tx_ring,
663 		       struct ena_tx_buffer *tx_info)
664 {
665 	struct ena_com_buf *ena_buf;
666 	u32 cnt;
667 	int i;
668 
669 	ena_buf = tx_info->bufs;
670 	cnt = tx_info->num_of_bufs;
671 
672 	if (unlikely(!cnt))
673 		return;
674 
675 	if (tx_info->map_linear_data) {
676 		dma_unmap_single(tx_ring->dev,
677 				 dma_unmap_addr(ena_buf, paddr),
678 				 dma_unmap_len(ena_buf, len),
679 				 DMA_TO_DEVICE);
680 		ena_buf++;
681 		cnt--;
682 	}
683 
684 	/* unmap remaining mapped pages */
685 	for (i = 0; i < cnt; i++) {
686 		dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
687 			       dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
688 		ena_buf++;
689 	}
690 }
691 
692 /* ena_free_tx_bufs - Free Tx Buffers per Queue
693  * @tx_ring: TX ring for which buffers be freed
694  */
695 static void ena_free_tx_bufs(struct ena_ring *tx_ring)
696 {
697 	bool print_once = true;
698 	bool is_xdp_ring;
699 	u32 i;
700 
701 	is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid);
702 
703 	for (i = 0; i < tx_ring->ring_size; i++) {
704 		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
705 
706 		if (!tx_info->skb)
707 			continue;
708 
709 		if (print_once) {
710 			netif_notice(tx_ring->adapter, ifdown, tx_ring->netdev,
711 				     "Free uncompleted tx skb qid %d idx 0x%x\n",
712 				     tx_ring->qid, i);
713 			print_once = false;
714 		} else {
715 			netif_dbg(tx_ring->adapter, ifdown, tx_ring->netdev,
716 				  "Free uncompleted tx skb qid %d idx 0x%x\n",
717 				  tx_ring->qid, i);
718 		}
719 
720 		ena_unmap_tx_buff(tx_ring, tx_info);
721 
722 		if (is_xdp_ring)
723 			xdp_return_frame(tx_info->xdpf);
724 		else
725 			dev_kfree_skb_any(tx_info->skb);
726 	}
727 
728 	if (!is_xdp_ring)
729 		netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
730 							  tx_ring->qid));
731 }
732 
733 static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
734 {
735 	struct ena_ring *tx_ring;
736 	int i;
737 
738 	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
739 		tx_ring = &adapter->tx_ring[i];
740 		ena_free_tx_bufs(tx_ring);
741 	}
742 }
743 
744 static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
745 {
746 	u16 ena_qid;
747 	int i;
748 
749 	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
750 		ena_qid = ENA_IO_TXQ_IDX(i);
751 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
752 	}
753 }
754 
755 static void ena_destroy_xdp_tx_queues(struct ena_adapter *adapter)
756 {
757 	u16 ena_qid;
758 	int i;
759 
760 	for (i = adapter->xdp_first_ring;
761 	     i < adapter->xdp_first_ring + adapter->xdp_num_queues; i++) {
762 		ena_qid = ENA_IO_TXQ_IDX(i);
763 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
764 	}
765 }
766 
767 static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
768 {
769 	u16 ena_qid;
770 	int i;
771 
772 	for (i = 0; i < adapter->num_io_queues; i++) {
773 		ena_qid = ENA_IO_RXQ_IDX(i);
774 		cancel_work_sync(&adapter->ena_napi[i].dim.work);
775 		ena_xdp_unregister_rxq_info(&adapter->rx_ring[i]);
776 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
777 	}
778 }
779 
780 static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
781 {
782 	ena_destroy_all_tx_queues(adapter);
783 	ena_destroy_all_rx_queues(adapter);
784 }
785 
786 int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
787 			  struct ena_tx_buffer *tx_info, bool is_xdp)
788 {
789 	if (tx_info)
790 		netif_err(ring->adapter,
791 			  tx_done,
792 			  ring->netdev,
793 			  "tx_info doesn't have valid %s. qid %u req_id %u",
794 			   is_xdp ? "xdp frame" : "skb", ring->qid, req_id);
795 	else
796 		netif_err(ring->adapter,
797 			  tx_done,
798 			  ring->netdev,
799 			  "Invalid req_id %u in qid %u\n",
800 			  req_id, ring->qid);
801 
802 	ena_increase_stat(&ring->tx_stats.bad_req_id, 1, &ring->syncp);
803 	ena_reset_device(ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
804 
805 	return -EFAULT;
806 }
807 
808 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
809 {
810 	struct ena_tx_buffer *tx_info;
811 
812 	tx_info = &tx_ring->tx_buffer_info[req_id];
813 	if (likely(tx_info->skb))
814 		return 0;
815 
816 	return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
817 }
818 
819 static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
820 {
821 	struct netdev_queue *txq;
822 	bool above_thresh;
823 	u32 tx_bytes = 0;
824 	u32 total_done = 0;
825 	u16 next_to_clean;
826 	u16 req_id;
827 	int tx_pkts = 0;
828 	int rc;
829 
830 	next_to_clean = tx_ring->next_to_clean;
831 	txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
832 
833 	while (tx_pkts < budget) {
834 		struct ena_tx_buffer *tx_info;
835 		struct sk_buff *skb;
836 
837 		rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
838 						&req_id);
839 		if (rc) {
840 			if (unlikely(rc == -EINVAL))
841 				handle_invalid_req_id(tx_ring, req_id, NULL, false);
842 			break;
843 		}
844 
845 		/* validate that the request id points to a valid skb */
846 		rc = validate_tx_req_id(tx_ring, req_id);
847 		if (rc)
848 			break;
849 
850 		tx_info = &tx_ring->tx_buffer_info[req_id];
851 		skb = tx_info->skb;
852 
853 		/* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
854 		prefetch(&skb->end);
855 
856 		tx_info->skb = NULL;
857 		tx_info->last_jiffies = 0;
858 
859 		ena_unmap_tx_buff(tx_ring, tx_info);
860 
861 		netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
862 			  "tx_poll: q %d skb %p completed\n", tx_ring->qid,
863 			  skb);
864 
865 		tx_bytes += tx_info->total_tx_size;
866 		dev_kfree_skb(skb);
867 		tx_pkts++;
868 		total_done += tx_info->tx_descs;
869 
870 		tx_ring->free_ids[next_to_clean] = req_id;
871 		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
872 						     tx_ring->ring_size);
873 	}
874 
875 	tx_ring->next_to_clean = next_to_clean;
876 	ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
877 
878 	netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
879 
880 	netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
881 		  "tx_poll: q %d done. total pkts: %d\n",
882 		  tx_ring->qid, tx_pkts);
883 
884 	/* need to make the rings circular update visible to
885 	 * ena_start_xmit() before checking for netif_queue_stopped().
886 	 */
887 	smp_mb();
888 
889 	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
890 						    ENA_TX_WAKEUP_THRESH);
891 	if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
892 		__netif_tx_lock(txq, smp_processor_id());
893 		above_thresh =
894 			ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
895 						     ENA_TX_WAKEUP_THRESH);
896 		if (netif_tx_queue_stopped(txq) && above_thresh &&
897 		    test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
898 			netif_tx_wake_queue(txq);
899 			ena_increase_stat(&tx_ring->tx_stats.queue_wakeup, 1,
900 					  &tx_ring->syncp);
901 		}
902 		__netif_tx_unlock(txq);
903 	}
904 
905 	return tx_pkts;
906 }
907 
908 static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag, u16 len)
909 {
910 	struct sk_buff *skb;
911 
912 	if (!first_frag)
913 		skb = napi_alloc_skb(rx_ring->napi, len);
914 	else
915 		skb = napi_build_skb(first_frag, len);
916 
917 	if (unlikely(!skb)) {
918 		ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1,
919 				  &rx_ring->syncp);
920 
921 		netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
922 			  "Failed to allocate skb. first_frag %s\n",
923 			  first_frag ? "provided" : "not provided");
924 	}
925 
926 	return skb;
927 }
928 
929 static bool ena_try_rx_buf_page_reuse(struct ena_rx_buffer *rx_info, u16 buf_len,
930 				      u16 len, int pkt_offset)
931 {
932 	struct ena_com_buf *ena_buf = &rx_info->ena_buf;
933 
934 	/* More than ENA_MIN_RX_BUF_SIZE left in the reused buffer
935 	 * for data + headroom + tailroom.
936 	 */
937 	if (SKB_DATA_ALIGN(len + pkt_offset) + ENA_MIN_RX_BUF_SIZE <= ena_buf->len) {
938 		page_ref_inc(rx_info->page);
939 		rx_info->page_offset += buf_len;
940 		ena_buf->paddr += buf_len;
941 		ena_buf->len -= buf_len;
942 		return true;
943 	}
944 
945 	return false;
946 }
947 
948 static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
949 				  struct ena_com_rx_buf_info *ena_bufs,
950 				  u32 descs,
951 				  u16 *next_to_clean)
952 {
953 	int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
954 	bool is_xdp_loaded = ena_xdp_present_ring(rx_ring);
955 	struct ena_rx_buffer *rx_info;
956 	struct ena_adapter *adapter;
957 	int page_offset, pkt_offset;
958 	dma_addr_t pre_reuse_paddr;
959 	u16 len, req_id, buf = 0;
960 	bool reuse_rx_buf_page;
961 	struct sk_buff *skb;
962 	void *buf_addr;
963 	int buf_offset;
964 	u16 buf_len;
965 
966 	len = ena_bufs[buf].len;
967 	req_id = ena_bufs[buf].req_id;
968 
969 	rx_info = &rx_ring->rx_buffer_info[req_id];
970 
971 	if (unlikely(!rx_info->page)) {
972 		adapter = rx_ring->adapter;
973 		netif_err(adapter, rx_err, rx_ring->netdev,
974 			  "Page is NULL. qid %u req_id %u\n", rx_ring->qid, req_id);
975 		ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp);
976 		ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
977 		return NULL;
978 	}
979 
980 	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
981 		  "rx_info %p page %p\n",
982 		  rx_info, rx_info->page);
983 
984 	buf_offset = rx_info->buf_offset;
985 	pkt_offset = buf_offset - rx_ring->rx_headroom;
986 	page_offset = rx_info->page_offset;
987 	buf_addr = page_address(rx_info->page) + page_offset;
988 
989 	if (len <= rx_ring->rx_copybreak) {
990 		skb = ena_alloc_skb(rx_ring, NULL, len);
991 		if (unlikely(!skb))
992 			return NULL;
993 
994 		skb_copy_to_linear_data(skb, buf_addr + buf_offset, len);
995 		dma_sync_single_for_device(rx_ring->dev,
996 					   dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
997 					   len,
998 					   DMA_FROM_DEVICE);
999 
1000 		skb_put(skb, len);
1001 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1002 			  "RX allocated small packet. len %d.\n", skb->len);
1003 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1004 		rx_ring->free_ids[*next_to_clean] = req_id;
1005 		*next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
1006 						     rx_ring->ring_size);
1007 		return skb;
1008 	}
1009 
1010 	buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom);
1011 
1012 	/* If XDP isn't loaded try to reuse part of the RX buffer */
1013 	reuse_rx_buf_page = !is_xdp_loaded &&
1014 			    ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset);
1015 
1016 	if (!reuse_rx_buf_page)
1017 		ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC);
1018 
1019 	skb = ena_alloc_skb(rx_ring, buf_addr, buf_len);
1020 	if (unlikely(!skb))
1021 		return NULL;
1022 
1023 	/* Populate skb's linear part */
1024 	skb_reserve(skb, buf_offset);
1025 	skb_put(skb, len);
1026 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1027 
1028 	do {
1029 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1030 			  "RX skb updated. len %d. data_len %d\n",
1031 			  skb->len, skb->data_len);
1032 
1033 		if (!reuse_rx_buf_page)
1034 			rx_info->page = NULL;
1035 
1036 		rx_ring->free_ids[*next_to_clean] = req_id;
1037 		*next_to_clean =
1038 			ENA_RX_RING_IDX_NEXT(*next_to_clean,
1039 					     rx_ring->ring_size);
1040 		if (likely(--descs == 0))
1041 			break;
1042 
1043 		buf++;
1044 		len = ena_bufs[buf].len;
1045 		req_id = ena_bufs[buf].req_id;
1046 
1047 		rx_info = &rx_ring->rx_buffer_info[req_id];
1048 
1049 		/* rx_info->buf_offset includes rx_ring->rx_headroom */
1050 		buf_offset = rx_info->buf_offset;
1051 		pkt_offset = buf_offset - rx_ring->rx_headroom;
1052 		buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom);
1053 		page_offset = rx_info->page_offset;
1054 
1055 		pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr);
1056 
1057 		reuse_rx_buf_page = !is_xdp_loaded &&
1058 				    ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset);
1059 
1060 		dma_sync_single_for_cpu(rx_ring->dev,
1061 					pre_reuse_paddr + pkt_offset,
1062 					len,
1063 					DMA_FROM_DEVICE);
1064 
1065 		if (!reuse_rx_buf_page)
1066 			ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC);
1067 
1068 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
1069 				page_offset + buf_offset, len, buf_len);
1070 
1071 	} while (1);
1072 
1073 	return skb;
1074 }
1075 
1076 /* ena_rx_checksum - indicate in skb if hw indicated a good cksum
1077  * @adapter: structure containing adapter specific data
1078  * @ena_rx_ctx: received packet context/metadata
1079  * @skb: skb currently being received and modified
1080  */
1081 static void ena_rx_checksum(struct ena_ring *rx_ring,
1082 				   struct ena_com_rx_ctx *ena_rx_ctx,
1083 				   struct sk_buff *skb)
1084 {
1085 	/* Rx csum disabled */
1086 	if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
1087 		skb->ip_summed = CHECKSUM_NONE;
1088 		return;
1089 	}
1090 
1091 	/* For fragmented packets the checksum isn't valid */
1092 	if (ena_rx_ctx->frag) {
1093 		skb->ip_summed = CHECKSUM_NONE;
1094 		return;
1095 	}
1096 
1097 	/* if IP and error */
1098 	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
1099 		     (ena_rx_ctx->l3_csum_err))) {
1100 		/* ipv4 checksum error */
1101 		skb->ip_summed = CHECKSUM_NONE;
1102 		ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
1103 				  &rx_ring->syncp);
1104 		netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1105 			  "RX IPv4 header checksum error\n");
1106 		return;
1107 	}
1108 
1109 	/* if TCP/UDP */
1110 	if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1111 		   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
1112 		if (unlikely(ena_rx_ctx->l4_csum_err)) {
1113 			/* TCP/UDP checksum error */
1114 			ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
1115 					  &rx_ring->syncp);
1116 			netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1117 				  "RX L4 checksum error\n");
1118 			skb->ip_summed = CHECKSUM_NONE;
1119 			return;
1120 		}
1121 
1122 		if (likely(ena_rx_ctx->l4_csum_checked)) {
1123 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1124 			ena_increase_stat(&rx_ring->rx_stats.csum_good, 1,
1125 					  &rx_ring->syncp);
1126 		} else {
1127 			ena_increase_stat(&rx_ring->rx_stats.csum_unchecked, 1,
1128 					  &rx_ring->syncp);
1129 			skb->ip_summed = CHECKSUM_NONE;
1130 		}
1131 	} else {
1132 		skb->ip_summed = CHECKSUM_NONE;
1133 		return;
1134 	}
1135 
1136 }
1137 
1138 static void ena_set_rx_hash(struct ena_ring *rx_ring,
1139 			    struct ena_com_rx_ctx *ena_rx_ctx,
1140 			    struct sk_buff *skb)
1141 {
1142 	enum pkt_hash_types hash_type;
1143 
1144 	if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
1145 		if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1146 			   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
1147 
1148 			hash_type = PKT_HASH_TYPE_L4;
1149 		else
1150 			hash_type = PKT_HASH_TYPE_NONE;
1151 
1152 		/* Override hash type if the packet is fragmented */
1153 		if (ena_rx_ctx->frag)
1154 			hash_type = PKT_HASH_TYPE_NONE;
1155 
1156 		skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
1157 	}
1158 }
1159 
1160 static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u16 num_descs)
1161 {
1162 	struct ena_rx_buffer *rx_info;
1163 	int ret;
1164 
1165 	/* XDP multi-buffer packets not supported */
1166 	if (unlikely(num_descs > 1)) {
1167 		netdev_err_once(rx_ring->adapter->netdev,
1168 				"xdp: dropped unsupported multi-buffer packets\n");
1169 		ena_increase_stat(&rx_ring->rx_stats.xdp_drop, 1, &rx_ring->syncp);
1170 		return ENA_XDP_DROP;
1171 	}
1172 
1173 	rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1174 	xdp_prepare_buff(xdp, page_address(rx_info->page),
1175 			 rx_info->buf_offset,
1176 			 rx_ring->ena_bufs[0].len, false);
1177 
1178 	ret = ena_xdp_execute(rx_ring, xdp);
1179 
1180 	/* The xdp program might expand the headers */
1181 	if (ret == ENA_XDP_PASS) {
1182 		rx_info->buf_offset = xdp->data - xdp->data_hard_start;
1183 		rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
1184 	}
1185 
1186 	return ret;
1187 }
1188 
1189 /* ena_clean_rx_irq - Cleanup RX irq
1190  * @rx_ring: RX ring to clean
1191  * @napi: napi handler
1192  * @budget: how many packets driver is allowed to clean
1193  *
1194  * Returns the number of cleaned buffers.
1195  */
1196 static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
1197 			    u32 budget)
1198 {
1199 	u16 next_to_clean = rx_ring->next_to_clean;
1200 	struct ena_com_rx_ctx ena_rx_ctx;
1201 	struct ena_rx_buffer *rx_info;
1202 	struct ena_adapter *adapter;
1203 	u32 res_budget, work_done;
1204 	int rx_copybreak_pkt = 0;
1205 	int refill_threshold;
1206 	struct sk_buff *skb;
1207 	int refill_required;
1208 	struct xdp_buff xdp;
1209 	int xdp_flags = 0;
1210 	int total_len = 0;
1211 	int xdp_verdict;
1212 	u8 pkt_offset;
1213 	int rc = 0;
1214 	int i;
1215 
1216 	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1217 		  "%s qid %d\n", __func__, rx_ring->qid);
1218 	res_budget = budget;
1219 	xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq);
1220 
1221 	do {
1222 		xdp_verdict = ENA_XDP_PASS;
1223 		skb = NULL;
1224 		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1225 		ena_rx_ctx.max_bufs = rx_ring->sgl_size;
1226 		ena_rx_ctx.descs = 0;
1227 		ena_rx_ctx.pkt_offset = 0;
1228 		rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
1229 				    rx_ring->ena_com_io_sq,
1230 				    &ena_rx_ctx);
1231 		if (unlikely(rc))
1232 			goto error;
1233 
1234 		if (unlikely(ena_rx_ctx.descs == 0))
1235 			break;
1236 
1237 		/* First descriptor might have an offset set by the device */
1238 		rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1239 		pkt_offset = ena_rx_ctx.pkt_offset;
1240 		rx_info->buf_offset += pkt_offset;
1241 
1242 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1243 			  "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
1244 			  rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1245 			  ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1246 
1247 		dma_sync_single_for_cpu(rx_ring->dev,
1248 					dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
1249 					rx_ring->ena_bufs[0].len,
1250 					DMA_FROM_DEVICE);
1251 
1252 		if (ena_xdp_present_ring(rx_ring))
1253 			xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs);
1254 
1255 		/* allocate skb and fill it */
1256 		if (xdp_verdict == ENA_XDP_PASS)
1257 			skb = ena_rx_skb(rx_ring,
1258 					 rx_ring->ena_bufs,
1259 					 ena_rx_ctx.descs,
1260 					 &next_to_clean);
1261 
1262 		if (unlikely(!skb)) {
1263 			for (i = 0; i < ena_rx_ctx.descs; i++) {
1264 				int req_id = rx_ring->ena_bufs[i].req_id;
1265 
1266 				rx_ring->free_ids[next_to_clean] = req_id;
1267 				next_to_clean =
1268 					ENA_RX_RING_IDX_NEXT(next_to_clean,
1269 							     rx_ring->ring_size);
1270 
1271 				/* Packets was passed for transmission, unmap it
1272 				 * from RX side.
1273 				 */
1274 				if (xdp_verdict & ENA_XDP_FORWARDED) {
1275 					ena_unmap_rx_buff_attrs(rx_ring,
1276 								&rx_ring->rx_buffer_info[req_id],
1277 								DMA_ATTR_SKIP_CPU_SYNC);
1278 					rx_ring->rx_buffer_info[req_id].page = NULL;
1279 				}
1280 			}
1281 			if (xdp_verdict != ENA_XDP_PASS) {
1282 				xdp_flags |= xdp_verdict;
1283 				total_len += ena_rx_ctx.ena_bufs[0].len;
1284 				res_budget--;
1285 				continue;
1286 			}
1287 			break;
1288 		}
1289 
1290 		ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
1291 
1292 		ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
1293 
1294 		skb_record_rx_queue(skb, rx_ring->qid);
1295 
1296 		if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak)
1297 			rx_copybreak_pkt++;
1298 
1299 		total_len += skb->len;
1300 
1301 		napi_gro_receive(napi, skb);
1302 
1303 		res_budget--;
1304 	} while (likely(res_budget));
1305 
1306 	work_done = budget - res_budget;
1307 	rx_ring->per_napi_packets += work_done;
1308 	u64_stats_update_begin(&rx_ring->syncp);
1309 	rx_ring->rx_stats.bytes += total_len;
1310 	rx_ring->rx_stats.cnt += work_done;
1311 	rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
1312 	u64_stats_update_end(&rx_ring->syncp);
1313 
1314 	rx_ring->next_to_clean = next_to_clean;
1315 
1316 	refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
1317 	refill_threshold =
1318 		min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
1319 		      ENA_RX_REFILL_THRESH_PACKET);
1320 
1321 	/* Optimization, try to batch new rx buffers */
1322 	if (refill_required > refill_threshold)
1323 		ena_refill_rx_bufs(rx_ring, refill_required);
1324 
1325 	if (xdp_flags & ENA_XDP_REDIRECT)
1326 		xdp_do_flush();
1327 
1328 	return work_done;
1329 
1330 error:
1331 	if (xdp_flags & ENA_XDP_REDIRECT)
1332 		xdp_do_flush();
1333 
1334 	adapter = netdev_priv(rx_ring->netdev);
1335 
1336 	if (rc == -ENOSPC) {
1337 		ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1, &rx_ring->syncp);
1338 		ena_reset_device(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS);
1339 	} else if (rc == -EFAULT) {
1340 		ena_reset_device(adapter, ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED);
1341 	} else {
1342 		ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1,
1343 				  &rx_ring->syncp);
1344 		ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
1345 	}
1346 	return 0;
1347 }
1348 
1349 static void ena_dim_work(struct work_struct *w)
1350 {
1351 	struct dim *dim = container_of(w, struct dim, work);
1352 	struct dim_cq_moder cur_moder =
1353 		net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
1354 	struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim);
1355 
1356 	ena_napi->rx_ring->smoothed_interval = cur_moder.usec;
1357 	dim->state = DIM_START_MEASURE;
1358 }
1359 
1360 static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
1361 {
1362 	struct dim_sample dim_sample;
1363 	struct ena_ring *rx_ring = ena_napi->rx_ring;
1364 
1365 	if (!rx_ring->per_napi_packets)
1366 		return;
1367 
1368 	rx_ring->non_empty_napi_events++;
1369 
1370 	dim_update_sample(rx_ring->non_empty_napi_events,
1371 			  rx_ring->rx_stats.cnt,
1372 			  rx_ring->rx_stats.bytes,
1373 			  &dim_sample);
1374 
1375 	net_dim(&ena_napi->dim, &dim_sample);
1376 
1377 	rx_ring->per_napi_packets = 0;
1378 }
1379 
1380 void ena_unmask_interrupt(struct ena_ring *tx_ring,
1381 			  struct ena_ring *rx_ring)
1382 {
1383 	u32 rx_interval = tx_ring->smoothed_interval;
1384 	struct ena_eth_io_intr_reg intr_reg;
1385 
1386 	/* Rx ring can be NULL when for XDP tx queues which don't have an
1387 	 * accompanying rx_ring pair.
1388 	 */
1389 	if (rx_ring)
1390 		rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
1391 			rx_ring->smoothed_interval :
1392 			ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
1393 
1394 	/* Update intr register: rx intr delay,
1395 	 * tx intr delay and interrupt unmask
1396 	 */
1397 	ena_com_update_intr_reg(&intr_reg,
1398 				rx_interval,
1399 				tx_ring->smoothed_interval,
1400 				true);
1401 
1402 	ena_increase_stat(&tx_ring->tx_stats.unmask_interrupt, 1,
1403 			  &tx_ring->syncp);
1404 
1405 	/* It is a shared MSI-X.
1406 	 * Tx and Rx CQ have pointer to it.
1407 	 * So we use one of them to reach the intr reg
1408 	 * The Tx ring is used because the rx_ring is NULL for XDP queues
1409 	 */
1410 	ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
1411 }
1412 
1413 void ena_update_ring_numa_node(struct ena_ring *tx_ring,
1414 			       struct ena_ring *rx_ring)
1415 {
1416 	int cpu = get_cpu();
1417 	int numa_node;
1418 
1419 	/* Check only one ring since the 2 rings are running on the same cpu */
1420 	if (likely(tx_ring->cpu == cpu))
1421 		goto out;
1422 
1423 	tx_ring->cpu = cpu;
1424 	if (rx_ring)
1425 		rx_ring->cpu = cpu;
1426 
1427 	numa_node = cpu_to_node(cpu);
1428 
1429 	if (likely(tx_ring->numa_node == numa_node))
1430 		goto out;
1431 
1432 	put_cpu();
1433 
1434 	if (numa_node != NUMA_NO_NODE) {
1435 		ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
1436 		tx_ring->numa_node = numa_node;
1437 		if (rx_ring) {
1438 			rx_ring->numa_node = numa_node;
1439 			ena_com_update_numa_node(rx_ring->ena_com_io_cq,
1440 						 numa_node);
1441 		}
1442 	}
1443 
1444 	return;
1445 out:
1446 	put_cpu();
1447 }
1448 
1449 static int ena_io_poll(struct napi_struct *napi, int budget)
1450 {
1451 	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
1452 	struct ena_ring *tx_ring, *rx_ring;
1453 	int tx_work_done;
1454 	int rx_work_done = 0;
1455 	int tx_budget;
1456 	int napi_comp_call = 0;
1457 	int ret;
1458 
1459 	tx_ring = ena_napi->tx_ring;
1460 	rx_ring = ena_napi->rx_ring;
1461 
1462 	tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
1463 
1464 	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1465 	    test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
1466 		napi_complete_done(napi, 0);
1467 		return 0;
1468 	}
1469 
1470 	tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
1471 	/* On netpoll the budget is zero and the handler should only clean the
1472 	 * tx completions.
1473 	 */
1474 	if (likely(budget))
1475 		rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
1476 
1477 	/* If the device is about to reset or down, avoid unmask
1478 	 * the interrupt and return 0 so NAPI won't reschedule
1479 	 */
1480 	if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1481 		     test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
1482 		napi_complete_done(napi, 0);
1483 		ret = 0;
1484 
1485 	} else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
1486 		napi_comp_call = 1;
1487 
1488 		/* Update numa and unmask the interrupt only when schedule
1489 		 * from the interrupt context (vs from sk_busy_loop)
1490 		 */
1491 		if (napi_complete_done(napi, rx_work_done) &&
1492 		    READ_ONCE(ena_napi->interrupts_masked)) {
1493 			smp_rmb(); /* make sure interrupts_masked is read */
1494 			WRITE_ONCE(ena_napi->interrupts_masked, false);
1495 			/* We apply adaptive moderation on Rx path only.
1496 			 * Tx uses static interrupt moderation.
1497 			 */
1498 			if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
1499 				ena_adjust_adaptive_rx_intr_moderation(ena_napi);
1500 
1501 			ena_update_ring_numa_node(tx_ring, rx_ring);
1502 			ena_unmask_interrupt(tx_ring, rx_ring);
1503 		}
1504 
1505 		ret = rx_work_done;
1506 	} else {
1507 		ret = budget;
1508 	}
1509 
1510 	u64_stats_update_begin(&tx_ring->syncp);
1511 	tx_ring->tx_stats.napi_comp += napi_comp_call;
1512 	tx_ring->tx_stats.tx_poll++;
1513 	u64_stats_update_end(&tx_ring->syncp);
1514 
1515 	tx_ring->tx_stats.last_napi_jiffies = jiffies;
1516 
1517 	return ret;
1518 }
1519 
1520 static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
1521 {
1522 	struct ena_adapter *adapter = (struct ena_adapter *)data;
1523 
1524 	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1525 
1526 	/* Don't call the aenq handler before probe is done */
1527 	if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
1528 		ena_com_aenq_intr_handler(adapter->ena_dev, data);
1529 
1530 	return IRQ_HANDLED;
1531 }
1532 
1533 /* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
1534  * @irq: interrupt number
1535  * @data: pointer to a network interface private napi device structure
1536  */
1537 static irqreturn_t ena_intr_msix_io(int irq, void *data)
1538 {
1539 	struct ena_napi *ena_napi = data;
1540 
1541 	/* Used to check HW health */
1542 	WRITE_ONCE(ena_napi->first_interrupt, true);
1543 
1544 	WRITE_ONCE(ena_napi->interrupts_masked, true);
1545 	smp_wmb(); /* write interrupts_masked before calling napi */
1546 
1547 	napi_schedule_irqoff(&ena_napi->napi);
1548 
1549 	return IRQ_HANDLED;
1550 }
1551 
1552 /* Reserve a single MSI-X vector for management (admin + aenq).
1553  * plus reserve one vector for each potential io queue.
1554  * the number of potential io queues is the minimum of what the device
1555  * supports and the number of vCPUs.
1556  */
1557 static int ena_enable_msix(struct ena_adapter *adapter)
1558 {
1559 	int msix_vecs, irq_cnt;
1560 
1561 	if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1562 		netif_err(adapter, probe, adapter->netdev,
1563 			  "Error, MSI-X is already enabled\n");
1564 		return -EPERM;
1565 	}
1566 
1567 	/* Reserved the max msix vectors we might need */
1568 	msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1569 	netif_dbg(adapter, probe, adapter->netdev,
1570 		  "Trying to enable MSI-X, vectors %d\n", msix_vecs);
1571 
1572 	irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
1573 					msix_vecs, PCI_IRQ_MSIX);
1574 
1575 	if (irq_cnt < 0) {
1576 		netif_err(adapter, probe, adapter->netdev,
1577 			  "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt);
1578 		return -ENOSPC;
1579 	}
1580 
1581 	if (irq_cnt != msix_vecs) {
1582 		netif_notice(adapter, probe, adapter->netdev,
1583 			     "Enable only %d MSI-X (out of %d), reduce the number of queues\n",
1584 			     irq_cnt, msix_vecs);
1585 		adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
1586 	}
1587 
1588 	if (netif_enable_cpu_rmap(adapter->netdev, adapter->num_io_queues))
1589 		netif_warn(adapter, probe, adapter->netdev,
1590 			   "Failed to map IRQs to CPUs\n");
1591 
1592 	adapter->msix_vecs = irq_cnt;
1593 	set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
1594 
1595 	return 0;
1596 }
1597 
1598 static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1599 {
1600 	u32 cpu;
1601 
1602 	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1603 		 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1604 		 pci_name(adapter->pdev));
1605 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
1606 		ena_intr_msix_mgmnt;
1607 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1608 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1609 		pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
1610 	cpu = cpumask_first(cpu_online_mask);
1611 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
1612 	cpumask_set_cpu(cpu,
1613 			&adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
1614 }
1615 
1616 static void ena_setup_io_intr(struct ena_adapter *adapter)
1617 {
1618 	struct net_device *netdev;
1619 	int irq_idx, i, cpu;
1620 	int io_queue_count;
1621 
1622 	netdev = adapter->netdev;
1623 	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
1624 
1625 	for (i = 0; i < io_queue_count; i++) {
1626 		irq_idx = ENA_IO_IRQ_IDX(i);
1627 		cpu = i % num_online_cpus();
1628 
1629 		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1630 			 "%s-Tx-Rx-%d", netdev->name, i);
1631 		adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
1632 		adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
1633 		adapter->irq_tbl[irq_idx].vector =
1634 			pci_irq_vector(adapter->pdev, irq_idx);
1635 		adapter->irq_tbl[irq_idx].cpu = cpu;
1636 
1637 		cpumask_set_cpu(cpu,
1638 				&adapter->irq_tbl[irq_idx].affinity_hint_mask);
1639 	}
1640 }
1641 
1642 static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
1643 {
1644 	unsigned long flags = 0;
1645 	struct ena_irq *irq;
1646 	int rc;
1647 
1648 	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1649 	rc = request_irq(irq->vector, irq->handler, flags, irq->name,
1650 			 irq->data);
1651 	if (rc) {
1652 		netif_err(adapter, probe, adapter->netdev,
1653 			  "Failed to request admin irq\n");
1654 		return rc;
1655 	}
1656 
1657 	netif_dbg(adapter, probe, adapter->netdev,
1658 		  "Set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
1659 		  irq->affinity_hint_mask.bits[0], irq->vector);
1660 
1661 	irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
1662 
1663 	return rc;
1664 }
1665 
1666 static int ena_request_io_irq(struct ena_adapter *adapter)
1667 {
1668 	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
1669 	int rc = 0, i, k, irq_idx;
1670 	unsigned long flags = 0;
1671 	struct ena_irq *irq;
1672 
1673 	if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1674 		netif_err(adapter, ifup, adapter->netdev,
1675 			  "Failed to request I/O IRQ: MSI-X is not enabled\n");
1676 		return -EINVAL;
1677 	}
1678 
1679 	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
1680 		irq = &adapter->irq_tbl[i];
1681 		rc = request_irq(irq->vector, irq->handler, flags, irq->name,
1682 				 irq->data);
1683 		if (rc) {
1684 			netif_err(adapter, ifup, adapter->netdev,
1685 				  "Failed to request I/O IRQ. index %d rc %d\n",
1686 				   i, rc);
1687 			goto err;
1688 		}
1689 
1690 		netif_dbg(adapter, ifup, adapter->netdev,
1691 			  "Set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
1692 			  i, irq->affinity_hint_mask.bits[0], irq->vector);
1693 
1694 		irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
1695 	}
1696 
1697 	/* Now that IO IRQs have been successfully allocated map them to the
1698 	 * corresponding IO NAPI instance. Note that the mgmnt IRQ does not
1699 	 * have a NAPI, so care must be taken to correctly map IRQs to NAPIs.
1700 	 */
1701 	for (i = 0; i < io_queue_count; i++) {
1702 		irq_idx = ENA_IO_IRQ_IDX(i);
1703 		irq = &adapter->irq_tbl[irq_idx];
1704 		netif_napi_set_irq(&adapter->ena_napi[i].napi, irq->vector);
1705 	}
1706 
1707 	return rc;
1708 
1709 err:
1710 	for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
1711 		irq = &adapter->irq_tbl[k];
1712 		free_irq(irq->vector, irq->data);
1713 	}
1714 
1715 	return rc;
1716 }
1717 
1718 static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
1719 {
1720 	struct ena_irq *irq;
1721 
1722 	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1723 	synchronize_irq(irq->vector);
1724 	irq_set_affinity_hint(irq->vector, NULL);
1725 	free_irq(irq->vector, irq->data);
1726 }
1727 
1728 static void ena_free_io_irq(struct ena_adapter *adapter)
1729 {
1730 	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
1731 	struct ena_irq *irq;
1732 	int i;
1733 
1734 	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
1735 		struct ena_napi *ena_napi;
1736 
1737 		irq = &adapter->irq_tbl[i];
1738 		irq_set_affinity_hint(irq->vector, NULL);
1739 		ena_napi = irq->data;
1740 		netif_napi_set_irq(&ena_napi->napi, -1);
1741 		free_irq(irq->vector, irq->data);
1742 	}
1743 }
1744 
1745 static void ena_disable_msix(struct ena_adapter *adapter)
1746 {
1747 	if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
1748 		pci_free_irq_vectors(adapter->pdev);
1749 }
1750 
1751 static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
1752 {
1753 	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
1754 	int i;
1755 
1756 	if (!netif_running(adapter->netdev))
1757 		return;
1758 
1759 	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++)
1760 		synchronize_irq(adapter->irq_tbl[i].vector);
1761 }
1762 
1763 static void ena_del_napi_in_range(struct ena_adapter *adapter,
1764 				  int first_index,
1765 				  int count)
1766 {
1767 	int i;
1768 
1769 	for (i = first_index; i < first_index + count; i++) {
1770 		netif_napi_del(&adapter->ena_napi[i].napi);
1771 
1772 		WARN_ON(ENA_IS_XDP_INDEX(adapter, i) &&
1773 			adapter->ena_napi[i].rx_ring);
1774 	}
1775 }
1776 
1777 static void ena_init_napi_in_range(struct ena_adapter *adapter,
1778 				   int first_index, int count)
1779 {
1780 	int (*napi_handler)(struct napi_struct *napi, int budget);
1781 	int i;
1782 
1783 	for (i = first_index; i < first_index + count; i++) {
1784 		struct ena_napi *napi = &adapter->ena_napi[i];
1785 		struct ena_ring *rx_ring, *tx_ring;
1786 
1787 		memset(napi, 0, sizeof(*napi));
1788 
1789 		rx_ring = &adapter->rx_ring[i];
1790 		tx_ring = &adapter->tx_ring[i];
1791 
1792 		napi_handler = ena_io_poll;
1793 		if (ENA_IS_XDP_INDEX(adapter, i))
1794 			napi_handler = ena_xdp_io_poll;
1795 
1796 		netif_napi_add_config(adapter->netdev, &napi->napi, napi_handler, i);
1797 
1798 		if (!ENA_IS_XDP_INDEX(adapter, i))
1799 			napi->rx_ring = rx_ring;
1800 
1801 		napi->tx_ring = tx_ring;
1802 		napi->qid = i;
1803 	}
1804 }
1805 
1806 static void ena_napi_disable_in_range(struct ena_adapter *adapter,
1807 				      int first_index,
1808 				      int count)
1809 {
1810 	struct napi_struct *napi;
1811 	int i;
1812 
1813 	for (i = first_index; i < first_index + count; i++) {
1814 		napi = &adapter->ena_napi[i].napi;
1815 		if (!ENA_IS_XDP_INDEX(adapter, i)) {
1816 			/* This API is supported for non-XDP queues only */
1817 			netif_queue_set_napi(adapter->netdev, i,
1818 					     NETDEV_QUEUE_TYPE_TX, NULL);
1819 			netif_queue_set_napi(adapter->netdev, i,
1820 					     NETDEV_QUEUE_TYPE_RX, NULL);
1821 		}
1822 		napi_disable(napi);
1823 	}
1824 }
1825 
1826 static void ena_napi_enable_in_range(struct ena_adapter *adapter,
1827 				     int first_index,
1828 				     int count)
1829 {
1830 	struct napi_struct *napi;
1831 	int i;
1832 
1833 	for (i = first_index; i < first_index + count; i++) {
1834 		napi = &adapter->ena_napi[i].napi;
1835 		napi_enable(napi);
1836 		if (!ENA_IS_XDP_INDEX(adapter, i)) {
1837 			/* This API is supported for non-XDP queues only */
1838 			netif_queue_set_napi(adapter->netdev, i,
1839 					     NETDEV_QUEUE_TYPE_RX, napi);
1840 			netif_queue_set_napi(adapter->netdev, i,
1841 					     NETDEV_QUEUE_TYPE_TX, napi);
1842 		}
1843 	}
1844 }
1845 
1846 /* Configure the Rx forwarding */
1847 static int ena_rss_configure(struct ena_adapter *adapter)
1848 {
1849 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1850 	int rc;
1851 
1852 	/* In case the RSS table wasn't initialized by probe */
1853 	if (!ena_dev->rss.tbl_log_size) {
1854 		rc = ena_rss_init_default(adapter);
1855 		if (rc && (rc != -EOPNOTSUPP)) {
1856 			netif_err(adapter, ifup, adapter->netdev, "Failed to init RSS rc: %d\n", rc);
1857 			return rc;
1858 		}
1859 	}
1860 
1861 	/* Set indirect table */
1862 	rc = ena_com_indirect_table_set(ena_dev);
1863 	if (unlikely(rc && rc != -EOPNOTSUPP))
1864 		return rc;
1865 
1866 	/* Configure hash function (if supported) */
1867 	rc = ena_com_set_hash_function(ena_dev);
1868 	if (unlikely(rc && (rc != -EOPNOTSUPP)))
1869 		return rc;
1870 
1871 	/* Configure hash inputs (if supported) */
1872 	rc = ena_com_set_hash_ctrl(ena_dev);
1873 	if (unlikely(rc && (rc != -EOPNOTSUPP)))
1874 		return rc;
1875 
1876 	return 0;
1877 }
1878 
1879 static int ena_up_complete(struct ena_adapter *adapter)
1880 {
1881 	int rc;
1882 
1883 	rc = ena_rss_configure(adapter);
1884 	if (rc)
1885 		return rc;
1886 
1887 	ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
1888 
1889 	ena_refill_all_rx_bufs(adapter);
1890 
1891 	/* enable transmits */
1892 	netif_tx_start_all_queues(adapter->netdev);
1893 
1894 	ena_napi_enable_in_range(adapter,
1895 				 0,
1896 				 adapter->xdp_num_queues + adapter->num_io_queues);
1897 
1898 	return 0;
1899 }
1900 
1901 static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
1902 {
1903 	struct ena_com_create_io_ctx ctx;
1904 	struct ena_com_dev *ena_dev;
1905 	struct ena_ring *tx_ring;
1906 	u32 msix_vector;
1907 	u16 ena_qid;
1908 	int rc;
1909 
1910 	ena_dev = adapter->ena_dev;
1911 
1912 	tx_ring = &adapter->tx_ring[qid];
1913 	msix_vector = ENA_IO_IRQ_IDX(qid);
1914 	ena_qid = ENA_IO_TXQ_IDX(qid);
1915 
1916 	memset(&ctx, 0x0, sizeof(ctx));
1917 
1918 	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1919 	ctx.qid = ena_qid;
1920 	ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1921 	ctx.msix_vector = msix_vector;
1922 	ctx.queue_size = tx_ring->ring_size;
1923 	ctx.numa_node = tx_ring->numa_node;
1924 
1925 	rc = ena_com_create_io_queue(ena_dev, &ctx);
1926 	if (rc) {
1927 		netif_err(adapter, ifup, adapter->netdev,
1928 			  "Failed to create I/O TX queue num %d rc: %d\n",
1929 			  qid, rc);
1930 		return rc;
1931 	}
1932 
1933 	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1934 				     &tx_ring->ena_com_io_sq,
1935 				     &tx_ring->ena_com_io_cq);
1936 	if (rc) {
1937 		netif_err(adapter, ifup, adapter->netdev,
1938 			  "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
1939 			  qid, rc);
1940 		ena_com_destroy_io_queue(ena_dev, ena_qid);
1941 		return rc;
1942 	}
1943 
1944 	ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
1945 	return rc;
1946 }
1947 
1948 int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
1949 				     int first_index, int count)
1950 {
1951 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1952 	int rc, i;
1953 
1954 	for (i = first_index; i < first_index + count; i++) {
1955 		rc = ena_create_io_tx_queue(adapter, i);
1956 		if (rc)
1957 			goto create_err;
1958 	}
1959 
1960 	return 0;
1961 
1962 create_err:
1963 	while (i-- > first_index)
1964 		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1965 
1966 	return rc;
1967 }
1968 
1969 static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
1970 {
1971 	struct ena_com_dev *ena_dev;
1972 	struct ena_com_create_io_ctx ctx;
1973 	struct ena_ring *rx_ring;
1974 	u32 msix_vector;
1975 	u16 ena_qid;
1976 	int rc;
1977 
1978 	ena_dev = adapter->ena_dev;
1979 
1980 	rx_ring = &adapter->rx_ring[qid];
1981 	msix_vector = ENA_IO_IRQ_IDX(qid);
1982 	ena_qid = ENA_IO_RXQ_IDX(qid);
1983 
1984 	memset(&ctx, 0x0, sizeof(ctx));
1985 
1986 	ctx.qid = ena_qid;
1987 	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1988 	ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1989 	ctx.msix_vector = msix_vector;
1990 	ctx.queue_size = rx_ring->ring_size;
1991 	ctx.numa_node = rx_ring->numa_node;
1992 
1993 	rc = ena_com_create_io_queue(ena_dev, &ctx);
1994 	if (rc) {
1995 		netif_err(adapter, ifup, adapter->netdev,
1996 			  "Failed to create I/O RX queue num %d rc: %d\n",
1997 			  qid, rc);
1998 		return rc;
1999 	}
2000 
2001 	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
2002 				     &rx_ring->ena_com_io_sq,
2003 				     &rx_ring->ena_com_io_cq);
2004 	if (rc) {
2005 		netif_err(adapter, ifup, adapter->netdev,
2006 			  "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
2007 			  qid, rc);
2008 		goto err;
2009 	}
2010 
2011 	ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
2012 
2013 	return rc;
2014 err:
2015 	ena_com_destroy_io_queue(ena_dev, ena_qid);
2016 	return rc;
2017 }
2018 
2019 static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
2020 {
2021 	struct ena_com_dev *ena_dev = adapter->ena_dev;
2022 	int rc, i;
2023 
2024 	for (i = 0; i < adapter->num_io_queues; i++) {
2025 		rc = ena_create_io_rx_queue(adapter, i);
2026 		if (rc)
2027 			goto create_err;
2028 		INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work);
2029 
2030 		ena_xdp_register_rxq_info(&adapter->rx_ring[i]);
2031 	}
2032 
2033 	return 0;
2034 
2035 create_err:
2036 	while (i--) {
2037 		ena_xdp_unregister_rxq_info(&adapter->rx_ring[i]);
2038 		cancel_work_sync(&adapter->ena_napi[i].dim.work);
2039 		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
2040 	}
2041 
2042 	return rc;
2043 }
2044 
2045 static void set_io_rings_size(struct ena_adapter *adapter,
2046 			      int new_tx_size,
2047 			      int new_rx_size)
2048 {
2049 	int i;
2050 
2051 	for (i = 0; i < adapter->num_io_queues; i++) {
2052 		adapter->tx_ring[i].ring_size = new_tx_size;
2053 		adapter->rx_ring[i].ring_size = new_rx_size;
2054 	}
2055 }
2056 
2057 /* This function allows queue allocation to backoff when the system is
2058  * low on memory. If there is not enough memory to allocate io queues
2059  * the driver will try to allocate smaller queues.
2060  *
2061  * The backoff algorithm is as follows:
2062  *  1. Try to allocate TX and RX and if successful.
2063  *  1.1. return success
2064  *
2065  *  2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
2066  *
2067  *  3. If TX or RX is smaller than 256
2068  *  3.1. return failure.
2069  *  4. else
2070  *  4.1. go back to 1.
2071  */
2072 static int create_queues_with_size_backoff(struct ena_adapter *adapter)
2073 {
2074 	int rc, cur_rx_ring_size, cur_tx_ring_size;
2075 	int new_rx_ring_size, new_tx_ring_size;
2076 
2077 	/* current queue sizes might be set to smaller than the requested
2078 	 * ones due to past queue allocation failures.
2079 	 */
2080 	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2081 			  adapter->requested_rx_ring_size);
2082 
2083 	while (1) {
2084 		if (ena_xdp_present(adapter)) {
2085 			rc = ena_setup_and_create_all_xdp_queues(adapter);
2086 
2087 			if (rc)
2088 				goto err_setup_tx;
2089 		}
2090 		rc = ena_setup_tx_resources_in_range(adapter,
2091 						     0,
2092 						     adapter->num_io_queues);
2093 		if (rc) {
2094 			ena_destroy_xdp_tx_queues(adapter);
2095 			ena_free_all_io_tx_resources_in_range(adapter,
2096 							      adapter->xdp_first_ring,
2097 							      adapter->xdp_num_queues);
2098 			goto err_setup_tx;
2099 		}
2100 
2101 		rc = ena_create_io_tx_queues_in_range(adapter,
2102 						      0,
2103 						      adapter->num_io_queues);
2104 		if (rc) {
2105 			ena_destroy_xdp_tx_queues(adapter);
2106 			goto err_create_tx_queues;
2107 		}
2108 
2109 		rc = ena_setup_all_rx_resources(adapter);
2110 		if (rc)
2111 			goto err_setup_rx;
2112 
2113 		rc = ena_create_all_io_rx_queues(adapter);
2114 		if (rc)
2115 			goto err_create_rx_queues;
2116 
2117 		return 0;
2118 
2119 err_create_rx_queues:
2120 		ena_free_all_io_rx_resources(adapter);
2121 err_setup_rx:
2122 		ena_destroy_all_tx_queues(adapter);
2123 err_create_tx_queues:
2124 		ena_free_all_io_tx_resources(adapter);
2125 err_setup_tx:
2126 		if (rc != -ENOMEM) {
2127 			netif_err(adapter, ifup, adapter->netdev,
2128 				  "Queue creation failed with error code %d\n",
2129 				  rc);
2130 			return rc;
2131 		}
2132 
2133 		cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2134 		cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2135 
2136 		netif_err(adapter, ifup, adapter->netdev,
2137 			  "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2138 			  cur_tx_ring_size, cur_rx_ring_size);
2139 
2140 		new_tx_ring_size = cur_tx_ring_size;
2141 		new_rx_ring_size = cur_rx_ring_size;
2142 
2143 		/* Decrease the size of the larger queue, or
2144 		 * decrease both if they are the same size.
2145 		 */
2146 		if (cur_rx_ring_size <= cur_tx_ring_size)
2147 			new_tx_ring_size = cur_tx_ring_size / 2;
2148 		if (cur_rx_ring_size >= cur_tx_ring_size)
2149 			new_rx_ring_size = cur_rx_ring_size / 2;
2150 
2151 		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2152 		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
2153 			netif_err(adapter, ifup, adapter->netdev,
2154 				  "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
2155 				  ENA_MIN_RING_SIZE);
2156 			return rc;
2157 		}
2158 
2159 		netif_err(adapter, ifup, adapter->netdev,
2160 			  "Retrying queue creation with sizes TX=%d, RX=%d\n",
2161 			  new_tx_ring_size,
2162 			  new_rx_ring_size);
2163 
2164 		set_io_rings_size(adapter, new_tx_ring_size,
2165 				  new_rx_ring_size);
2166 	}
2167 }
2168 
2169 int ena_up(struct ena_adapter *adapter)
2170 {
2171 	int io_queue_count, rc, i;
2172 
2173 	netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
2174 
2175 	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2176 	ena_setup_io_intr(adapter);
2177 
2178 	/* napi poll functions should be initialized before running
2179 	 * request_irq(), to handle a rare condition where there is a pending
2180 	 * interrupt, causing the ISR to fire immediately while the poll
2181 	 * function wasn't set yet, causing a null dereference
2182 	 */
2183 	ena_init_napi_in_range(adapter, 0, io_queue_count);
2184 
2185 	/* Enabling DIM needs to happen before enabling IRQs since DIM
2186 	 * is run from napi routine
2187 	 */
2188 	if (ena_com_interrupt_moderation_supported(adapter->ena_dev))
2189 		ena_com_enable_adaptive_moderation(adapter->ena_dev);
2190 
2191 	rc = ena_request_io_irq(adapter);
2192 	if (rc)
2193 		goto err_req_irq;
2194 
2195 	rc = create_queues_with_size_backoff(adapter);
2196 	if (rc)
2197 		goto err_create_queues_with_backoff;
2198 
2199 	rc = ena_up_complete(adapter);
2200 	if (rc)
2201 		goto err_up;
2202 
2203 	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
2204 		netif_carrier_on(adapter->netdev);
2205 
2206 	ena_increase_stat(&adapter->dev_stats.interface_up, 1,
2207 			  &adapter->syncp);
2208 
2209 	set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2210 
2211 	/* Enable completion queues interrupt */
2212 	for (i = 0; i < adapter->num_io_queues; i++)
2213 		ena_unmask_interrupt(&adapter->tx_ring[i],
2214 				     &adapter->rx_ring[i]);
2215 
2216 	/* schedule napi in case we had pending packets
2217 	 * from the last time we disable napi
2218 	 */
2219 	for (i = 0; i < io_queue_count; i++)
2220 		napi_schedule(&adapter->ena_napi[i].napi);
2221 
2222 	return rc;
2223 
2224 err_up:
2225 	ena_destroy_all_tx_queues(adapter);
2226 	ena_free_all_io_tx_resources(adapter);
2227 	ena_destroy_all_rx_queues(adapter);
2228 	ena_free_all_io_rx_resources(adapter);
2229 err_create_queues_with_backoff:
2230 	ena_free_io_irq(adapter);
2231 err_req_irq:
2232 	ena_del_napi_in_range(adapter, 0, io_queue_count);
2233 
2234 	return rc;
2235 }
2236 
2237 void ena_down(struct ena_adapter *adapter)
2238 {
2239 	int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2240 
2241 	netif_dbg(adapter, ifdown, adapter->netdev, "%s\n", __func__);
2242 
2243 	clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2244 
2245 	ena_increase_stat(&adapter->dev_stats.interface_down, 1,
2246 			  &adapter->syncp);
2247 
2248 	netif_carrier_off(adapter->netdev);
2249 	netif_tx_disable(adapter->netdev);
2250 
2251 	/* After this point the napi handler won't enable the tx queue */
2252 	ena_napi_disable_in_range(adapter, 0, io_queue_count);
2253 
2254 	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
2255 		int rc;
2256 
2257 		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
2258 		if (rc)
2259 			netif_err(adapter, ifdown, adapter->netdev,
2260 				  "Device reset failed\n");
2261 		/* stop submitting admin commands on a device that was reset */
2262 		ena_com_set_admin_running_state(adapter->ena_dev, false);
2263 	}
2264 
2265 	ena_destroy_all_io_queues(adapter);
2266 
2267 	ena_disable_io_intr_sync(adapter);
2268 	ena_free_io_irq(adapter);
2269 	ena_del_napi_in_range(adapter, 0, io_queue_count);
2270 
2271 	ena_free_all_tx_bufs(adapter);
2272 	ena_free_all_rx_bufs(adapter);
2273 	ena_free_all_io_tx_resources(adapter);
2274 	ena_free_all_io_rx_resources(adapter);
2275 }
2276 
2277 /* ena_open - Called when a network interface is made active
2278  * @netdev: network interface device structure
2279  *
2280  * Returns 0 on success, negative value on failure
2281  *
2282  * The open entry point is called when a network interface is made
2283  * active by the system (IFF_UP).  At this point all resources needed
2284  * for transmit and receive operations are allocated, the interrupt
2285  * handler is registered with the OS, the watchdog timer is started,
2286  * and the stack is notified that the interface is ready.
2287  */
2288 static int ena_open(struct net_device *netdev)
2289 {
2290 	struct ena_adapter *adapter = netdev_priv(netdev);
2291 	int rc;
2292 
2293 	/* Notify the stack of the actual queue counts. */
2294 	rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues);
2295 	if (rc) {
2296 		netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
2297 		return rc;
2298 	}
2299 
2300 	rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues);
2301 	if (rc) {
2302 		netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
2303 		return rc;
2304 	}
2305 
2306 	rc = ena_up(adapter);
2307 	if (rc)
2308 		return rc;
2309 
2310 	return rc;
2311 }
2312 
2313 /* ena_close - Disables a network interface
2314  * @netdev: network interface device structure
2315  *
2316  * Returns 0, this is not allowed to fail
2317  *
2318  * The close entry point is called when an interface is de-activated
2319  * by the OS.  The hardware is still under the drivers control, but
2320  * needs to be disabled.  A global MAC reset is issued to stop the
2321  * hardware, and all transmit and receive resources are freed.
2322  */
2323 static int ena_close(struct net_device *netdev)
2324 {
2325 	struct ena_adapter *adapter = netdev_priv(netdev);
2326 
2327 	netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
2328 
2329 	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
2330 		return 0;
2331 
2332 	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2333 		ena_down(adapter);
2334 
2335 	/* Check for device status and issue reset if needed*/
2336 	check_for_admin_com_state(adapter);
2337 	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2338 		netif_err(adapter, ifdown, adapter->netdev,
2339 			  "Destroy failure, restarting device\n");
2340 		ena_dump_stats_to_dmesg(adapter);
2341 		/* rtnl lock already obtained in dev_ioctl() layer */
2342 		ena_destroy_device(adapter, false);
2343 		ena_restore_device(adapter);
2344 	}
2345 
2346 	return 0;
2347 }
2348 
2349 int ena_update_queue_params(struct ena_adapter *adapter,
2350 			    u32 new_tx_size,
2351 			    u32 new_rx_size,
2352 			    u32 new_llq_header_len)
2353 {
2354 	bool dev_was_up, large_llq_changed = false;
2355 	int rc = 0;
2356 
2357 	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2358 	ena_close(adapter->netdev);
2359 	adapter->requested_tx_ring_size = new_tx_size;
2360 	adapter->requested_rx_ring_size = new_rx_size;
2361 	ena_init_io_rings(adapter,
2362 			  0,
2363 			  adapter->xdp_num_queues +
2364 			  adapter->num_io_queues);
2365 
2366 	large_llq_changed = adapter->ena_dev->tx_mem_queue_type ==
2367 			    ENA_ADMIN_PLACEMENT_POLICY_DEV;
2368 	large_llq_changed &=
2369 		new_llq_header_len != adapter->ena_dev->tx_max_header_size;
2370 
2371 	/* a check that the configuration is valid is done by caller */
2372 	if (large_llq_changed) {
2373 		adapter->large_llq_header_enabled = !adapter->large_llq_header_enabled;
2374 
2375 		ena_destroy_device(adapter, false);
2376 		rc = ena_restore_device(adapter);
2377 	}
2378 
2379 	return dev_was_up && !rc ? ena_up(adapter) : rc;
2380 }
2381 
2382 int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak)
2383 {
2384 	struct ena_ring *rx_ring;
2385 	int i;
2386 
2387 	if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE))
2388 		return -EINVAL;
2389 
2390 	adapter->rx_copybreak = rx_copybreak;
2391 
2392 	for (i = 0; i < adapter->num_io_queues; i++) {
2393 		rx_ring = &adapter->rx_ring[i];
2394 		rx_ring->rx_copybreak = rx_copybreak;
2395 	}
2396 
2397 	return 0;
2398 }
2399 
2400 int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
2401 {
2402 	struct ena_com_dev *ena_dev = adapter->ena_dev;
2403 	int prev_channel_count;
2404 	bool dev_was_up;
2405 
2406 	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2407 	ena_close(adapter->netdev);
2408 	prev_channel_count = adapter->num_io_queues;
2409 	adapter->num_io_queues = new_channel_count;
2410 	if (ena_xdp_present(adapter) &&
2411 	    ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) {
2412 		adapter->xdp_first_ring = new_channel_count;
2413 		adapter->xdp_num_queues = new_channel_count;
2414 		if (prev_channel_count > new_channel_count)
2415 			ena_xdp_exchange_program_rx_in_range(adapter,
2416 							     NULL,
2417 							     new_channel_count,
2418 							     prev_channel_count);
2419 		else
2420 			ena_xdp_exchange_program_rx_in_range(adapter,
2421 							     adapter->xdp_bpf_prog,
2422 							     prev_channel_count,
2423 							     new_channel_count);
2424 	}
2425 
2426 	/* We need to destroy the rss table so that the indirection
2427 	 * table will be reinitialized by ena_up()
2428 	 */
2429 	ena_com_rss_destroy(ena_dev);
2430 	ena_init_io_rings(adapter,
2431 			  0,
2432 			  adapter->xdp_num_queues +
2433 			  adapter->num_io_queues);
2434 	return dev_was_up ? ena_open(adapter->netdev) : 0;
2435 }
2436 
2437 static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx,
2438 			struct sk_buff *skb,
2439 			bool disable_meta_caching)
2440 {
2441 	u32 mss = skb_shinfo(skb)->gso_size;
2442 	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
2443 	u8 l4_protocol = 0;
2444 
2445 	if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
2446 		ena_tx_ctx->l4_csum_enable = 1;
2447 		if (mss) {
2448 			ena_tx_ctx->tso_enable = 1;
2449 			ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
2450 			ena_tx_ctx->l4_csum_partial = 0;
2451 		} else {
2452 			ena_tx_ctx->tso_enable = 0;
2453 			ena_meta->l4_hdr_len = 0;
2454 			ena_tx_ctx->l4_csum_partial = 1;
2455 		}
2456 
2457 		switch (ip_hdr(skb)->version) {
2458 		case IPVERSION:
2459 			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2460 			if (ip_hdr(skb)->frag_off & htons(IP_DF))
2461 				ena_tx_ctx->df = 1;
2462 			if (mss)
2463 				ena_tx_ctx->l3_csum_enable = 1;
2464 			l4_protocol = ip_hdr(skb)->protocol;
2465 			break;
2466 		case 6:
2467 			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2468 			l4_protocol = ipv6_hdr(skb)->nexthdr;
2469 			break;
2470 		default:
2471 			break;
2472 		}
2473 
2474 		if (l4_protocol == IPPROTO_TCP)
2475 			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2476 		else
2477 			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2478 
2479 		ena_meta->mss = mss;
2480 		ena_meta->l3_hdr_len = skb_network_header_len(skb);
2481 		ena_meta->l3_hdr_offset = skb_network_offset(skb);
2482 		ena_tx_ctx->meta_valid = 1;
2483 	} else if (disable_meta_caching) {
2484 		memset(ena_meta, 0, sizeof(*ena_meta));
2485 		ena_tx_ctx->meta_valid = 1;
2486 	} else {
2487 		ena_tx_ctx->meta_valid = 0;
2488 	}
2489 }
2490 
2491 static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
2492 				       struct sk_buff *skb)
2493 {
2494 	int num_frags, header_len, rc;
2495 
2496 	num_frags = skb_shinfo(skb)->nr_frags;
2497 	header_len = skb_headlen(skb);
2498 
2499 	if (num_frags < tx_ring->sgl_size)
2500 		return 0;
2501 
2502 	if ((num_frags == tx_ring->sgl_size) &&
2503 	    (header_len < tx_ring->tx_max_header_size))
2504 		return 0;
2505 
2506 	ena_increase_stat(&tx_ring->tx_stats.linearize, 1, &tx_ring->syncp);
2507 
2508 	rc = skb_linearize(skb);
2509 	if (unlikely(rc)) {
2510 		ena_increase_stat(&tx_ring->tx_stats.linearize_failed, 1,
2511 				  &tx_ring->syncp);
2512 	}
2513 
2514 	return rc;
2515 }
2516 
2517 static int ena_tx_map_skb(struct ena_ring *tx_ring,
2518 			  struct ena_tx_buffer *tx_info,
2519 			  struct sk_buff *skb,
2520 			  void **push_hdr,
2521 			  u16 *header_len)
2522 {
2523 	struct ena_adapter *adapter = tx_ring->adapter;
2524 	struct ena_com_buf *ena_buf;
2525 	dma_addr_t dma;
2526 	u32 skb_head_len, frag_len, last_frag;
2527 	u16 push_len = 0;
2528 	u16 delta = 0;
2529 	int i = 0;
2530 
2531 	skb_head_len = skb_headlen(skb);
2532 	tx_info->skb = skb;
2533 	ena_buf = tx_info->bufs;
2534 
2535 	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2536 		/* When the device is LLQ mode, the driver will copy
2537 		 * the header into the device memory space.
2538 		 * the ena_com layer assume the header is in a linear
2539 		 * memory space.
2540 		 * This assumption might be wrong since part of the header
2541 		 * can be in the fragmented buffers.
2542 		 * Use skb_header_pointer to make sure the header is in a
2543 		 * linear memory space.
2544 		 */
2545 
2546 		push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
2547 		*push_hdr = skb_header_pointer(skb, 0, push_len,
2548 					       tx_ring->push_buf_intermediate_buf);
2549 		*header_len = push_len;
2550 		if (unlikely(skb->data != *push_hdr)) {
2551 			ena_increase_stat(&tx_ring->tx_stats.llq_buffer_copy, 1,
2552 					  &tx_ring->syncp);
2553 
2554 			delta = push_len - skb_head_len;
2555 		}
2556 	} else {
2557 		*push_hdr = NULL;
2558 		*header_len = min_t(u32, skb_head_len,
2559 				    tx_ring->tx_max_header_size);
2560 	}
2561 
2562 	netif_dbg(adapter, tx_queued, adapter->netdev,
2563 		  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
2564 		  *push_hdr, push_len);
2565 
2566 	if (skb_head_len > push_len) {
2567 		dma = dma_map_single(tx_ring->dev, skb->data + push_len,
2568 				     skb_head_len - push_len, DMA_TO_DEVICE);
2569 		if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2570 			goto error_report_dma_error;
2571 
2572 		ena_buf->paddr = dma;
2573 		ena_buf->len = skb_head_len - push_len;
2574 
2575 		ena_buf++;
2576 		tx_info->num_of_bufs++;
2577 		tx_info->map_linear_data = 1;
2578 	} else {
2579 		tx_info->map_linear_data = 0;
2580 	}
2581 
2582 	last_frag = skb_shinfo(skb)->nr_frags;
2583 
2584 	for (i = 0; i < last_frag; i++) {
2585 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2586 
2587 		frag_len = skb_frag_size(frag);
2588 
2589 		if (unlikely(delta >= frag_len)) {
2590 			delta -= frag_len;
2591 			continue;
2592 		}
2593 
2594 		dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
2595 				       frag_len - delta, DMA_TO_DEVICE);
2596 		if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2597 			goto error_report_dma_error;
2598 
2599 		ena_buf->paddr = dma;
2600 		ena_buf->len = frag_len - delta;
2601 		ena_buf++;
2602 		tx_info->num_of_bufs++;
2603 		delta = 0;
2604 	}
2605 
2606 	return 0;
2607 
2608 error_report_dma_error:
2609 	ena_increase_stat(&tx_ring->tx_stats.dma_mapping_err, 1,
2610 			  &tx_ring->syncp);
2611 	netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map skb\n");
2612 
2613 	tx_info->skb = NULL;
2614 
2615 	tx_info->num_of_bufs += i;
2616 	ena_unmap_tx_buff(tx_ring, tx_info);
2617 
2618 	return -EINVAL;
2619 }
2620 
2621 /* Called with netif_tx_lock. */
2622 static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
2623 {
2624 	struct ena_adapter *adapter = netdev_priv(dev);
2625 	struct ena_tx_buffer *tx_info;
2626 	struct ena_com_tx_ctx ena_tx_ctx;
2627 	struct ena_ring *tx_ring;
2628 	struct netdev_queue *txq;
2629 	void *push_hdr;
2630 	u16 next_to_use, req_id, header_len;
2631 	int qid, rc;
2632 
2633 	netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
2634 	/*  Determine which tx ring we will be placed on */
2635 	qid = skb_get_queue_mapping(skb);
2636 	tx_ring = &adapter->tx_ring[qid];
2637 	txq = netdev_get_tx_queue(dev, qid);
2638 
2639 	rc = ena_check_and_linearize_skb(tx_ring, skb);
2640 	if (unlikely(rc))
2641 		goto error_drop_packet;
2642 
2643 	next_to_use = tx_ring->next_to_use;
2644 	req_id = tx_ring->free_ids[next_to_use];
2645 	tx_info = &tx_ring->tx_buffer_info[req_id];
2646 	tx_info->num_of_bufs = 0;
2647 
2648 	WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
2649 
2650 	rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
2651 	if (unlikely(rc))
2652 		goto error_drop_packet;
2653 
2654 	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
2655 	ena_tx_ctx.ena_bufs = tx_info->bufs;
2656 	ena_tx_ctx.push_header = push_hdr;
2657 	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
2658 	ena_tx_ctx.req_id = req_id;
2659 	ena_tx_ctx.header_len = header_len;
2660 
2661 	/* set flags and meta data */
2662 	ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching);
2663 
2664 	rc = ena_xmit_common(adapter,
2665 			     tx_ring,
2666 			     tx_info,
2667 			     &ena_tx_ctx,
2668 			     next_to_use,
2669 			     skb->len);
2670 	if (rc)
2671 		goto error_unmap_dma;
2672 
2673 	netdev_tx_sent_queue(txq, skb->len);
2674 
2675 	/* stop the queue when no more space available, the packet can have up
2676 	 * to sgl_size + 2. one for the meta descriptor and one for header
2677 	 * (if the header is larger than tx_max_header_size).
2678 	 */
2679 	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
2680 						   tx_ring->sgl_size + 2))) {
2681 		netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
2682 			  __func__, qid);
2683 
2684 		netif_tx_stop_queue(txq);
2685 		ena_increase_stat(&tx_ring->tx_stats.queue_stop, 1,
2686 				  &tx_ring->syncp);
2687 
2688 		/* There is a rare condition where this function decide to
2689 		 * stop the queue but meanwhile clean_tx_irq updates
2690 		 * next_to_completion and terminates.
2691 		 * The queue will remain stopped forever.
2692 		 * To solve this issue add a mb() to make sure that
2693 		 * netif_tx_stop_queue() write is vissible before checking if
2694 		 * there is additional space in the queue.
2695 		 */
2696 		smp_mb();
2697 
2698 		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
2699 						 ENA_TX_WAKEUP_THRESH)) {
2700 			netif_tx_wake_queue(txq);
2701 			ena_increase_stat(&tx_ring->tx_stats.queue_wakeup, 1,
2702 					  &tx_ring->syncp);
2703 		}
2704 	}
2705 
2706 	skb_tx_timestamp(skb);
2707 
2708 	if (netif_xmit_stopped(txq) || !netdev_xmit_more())
2709 		/* trigger the dma engine. ena_ring_tx_doorbell()
2710 		 * calls a memory barrier inside it.
2711 		 */
2712 		ena_ring_tx_doorbell(tx_ring);
2713 
2714 	return NETDEV_TX_OK;
2715 
2716 error_unmap_dma:
2717 	ena_unmap_tx_buff(tx_ring, tx_info);
2718 	tx_info->skb = NULL;
2719 
2720 error_drop_packet:
2721 	dev_kfree_skb(skb);
2722 	return NETDEV_TX_OK;
2723 }
2724 
2725 static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
2726 {
2727 	struct device *dev = &pdev->dev;
2728 	struct ena_admin_host_info *host_info;
2729 	ssize_t ret;
2730 	int rc;
2731 
2732 	/* Allocate only the host info */
2733 	rc = ena_com_allocate_host_info(ena_dev);
2734 	if (rc) {
2735 		dev_err(dev, "Cannot allocate host info\n");
2736 		return;
2737 	}
2738 
2739 	host_info = ena_dev->host_attr.host_info;
2740 
2741 	host_info->bdf = pci_dev_id(pdev);
2742 	host_info->os_type = ENA_ADMIN_OS_LINUX;
2743 	host_info->kernel_ver = LINUX_VERSION_CODE;
2744 	ret = strscpy(host_info->kernel_ver_str, utsname()->version,
2745 		      sizeof(host_info->kernel_ver_str));
2746 	if (ret < 0)
2747 		dev_dbg(dev,
2748 			"kernel version string will be truncated, status = %zd\n", ret);
2749 
2750 	host_info->os_dist = 0;
2751 	ret = strscpy(host_info->os_dist_str, utsname()->release,
2752 		      sizeof(host_info->os_dist_str));
2753 	if (ret < 0)
2754 		dev_dbg(dev,
2755 			"OS distribution string will be truncated, status = %zd\n", ret);
2756 
2757 	host_info->driver_version =
2758 		(DRV_MODULE_GEN_MAJOR) |
2759 		(DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2760 		(DRV_MODULE_GEN_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
2761 		("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
2762 	host_info->num_cpus = num_online_cpus();
2763 
2764 	host_info->driver_supported_features =
2765 		ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
2766 		ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK |
2767 		ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK |
2768 		ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK |
2769 		ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_MASK |
2770 		ENA_ADMIN_HOST_INFO_PHC_MASK;
2771 
2772 	rc = ena_com_set_host_attributes(ena_dev);
2773 	if (rc) {
2774 		if (rc == -EOPNOTSUPP)
2775 			dev_warn(dev, "Cannot set host attributes\n");
2776 		else
2777 			dev_err(dev, "Cannot set host attributes\n");
2778 
2779 		goto err;
2780 	}
2781 
2782 	return;
2783 
2784 err:
2785 	ena_com_delete_host_info(ena_dev);
2786 }
2787 
2788 static void ena_config_debug_area(struct ena_adapter *adapter)
2789 {
2790 	u32 debug_area_size;
2791 	int rc, ss_count;
2792 
2793 	ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
2794 	if (ss_count <= 0) {
2795 		netif_err(adapter, drv, adapter->netdev,
2796 			  "SS count is negative\n");
2797 		return;
2798 	}
2799 
2800 	/* allocate 32 bytes for each string and 64bit for the value */
2801 	debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
2802 
2803 	rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
2804 	if (rc) {
2805 		netif_err(adapter, drv, adapter->netdev,
2806 			  "Cannot allocate debug area\n");
2807 		return;
2808 	}
2809 
2810 	rc = ena_com_set_host_attributes(adapter->ena_dev);
2811 	if (rc) {
2812 		if (rc == -EOPNOTSUPP)
2813 			netif_warn(adapter, drv, adapter->netdev, "Cannot set host attributes\n");
2814 		else
2815 			netif_err(adapter, drv, adapter->netdev,
2816 				  "Cannot set host attributes\n");
2817 		goto err;
2818 	}
2819 
2820 	return;
2821 err:
2822 	ena_com_delete_debug_area(adapter->ena_dev);
2823 }
2824 
2825 static void ena_get_stats64(struct net_device *netdev,
2826 			    struct rtnl_link_stats64 *stats)
2827 {
2828 	struct ena_adapter *adapter = netdev_priv(netdev);
2829 	struct ena_ring *rx_ring, *tx_ring;
2830 	u64 total_xdp_rx_drops = 0;
2831 	unsigned int start;
2832 	u64 rx_drops;
2833 	u64 tx_drops;
2834 	int i;
2835 
2836 	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2837 		return;
2838 
2839 	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
2840 		u64 bytes, packets, xdp_rx_drops;
2841 
2842 		tx_ring = &adapter->tx_ring[i];
2843 
2844 		do {
2845 			start = u64_stats_fetch_begin(&tx_ring->syncp);
2846 			packets = tx_ring->tx_stats.cnt;
2847 			bytes = tx_ring->tx_stats.bytes;
2848 		} while (u64_stats_fetch_retry(&tx_ring->syncp, start));
2849 
2850 		stats->tx_packets += packets;
2851 		stats->tx_bytes += bytes;
2852 
2853 		/* In XDP there isn't an RX queue counterpart */
2854 		if (ENA_IS_XDP_INDEX(adapter, i))
2855 			continue;
2856 
2857 		rx_ring = &adapter->rx_ring[i];
2858 
2859 		do {
2860 			start = u64_stats_fetch_begin(&rx_ring->syncp);
2861 			packets = rx_ring->rx_stats.cnt;
2862 			bytes = rx_ring->rx_stats.bytes;
2863 			xdp_rx_drops = rx_ring->rx_stats.xdp_drop;
2864 		} while (u64_stats_fetch_retry(&rx_ring->syncp, start));
2865 
2866 		stats->rx_packets += packets;
2867 		stats->rx_bytes += bytes;
2868 		total_xdp_rx_drops += xdp_rx_drops;
2869 	}
2870 
2871 	do {
2872 		start = u64_stats_fetch_begin(&adapter->syncp);
2873 		rx_drops = adapter->dev_stats.rx_drops;
2874 		tx_drops = adapter->dev_stats.tx_drops;
2875 	} while (u64_stats_fetch_retry(&adapter->syncp, start));
2876 
2877 	stats->rx_dropped = rx_drops + total_xdp_rx_drops;
2878 	stats->tx_dropped = tx_drops;
2879 
2880 	stats->multicast = 0;
2881 	stats->collisions = 0;
2882 
2883 	stats->rx_length_errors = 0;
2884 	stats->rx_crc_errors = 0;
2885 	stats->rx_frame_errors = 0;
2886 	stats->rx_fifo_errors = 0;
2887 	stats->rx_missed_errors = 0;
2888 	stats->tx_window_errors = 0;
2889 
2890 	stats->rx_errors = 0;
2891 	stats->tx_errors = 0;
2892 }
2893 
2894 static const struct net_device_ops ena_netdev_ops = {
2895 	.ndo_open		= ena_open,
2896 	.ndo_stop		= ena_close,
2897 	.ndo_start_xmit		= ena_start_xmit,
2898 	.ndo_get_stats64	= ena_get_stats64,
2899 	.ndo_tx_timeout		= ena_tx_timeout,
2900 	.ndo_change_mtu		= ena_change_mtu,
2901 	.ndo_validate_addr	= eth_validate_addr,
2902 	.ndo_bpf		= ena_xdp,
2903 	.ndo_xdp_xmit		= ena_xdp_xmit,
2904 };
2905 
2906 static int ena_calc_io_queue_size(struct ena_adapter *adapter,
2907 				  struct ena_com_dev_get_features_ctx *get_feat_ctx)
2908 {
2909 	struct ena_admin_feature_llq_desc *llq = &get_feat_ctx->llq;
2910 	struct ena_com_dev *ena_dev = adapter->ena_dev;
2911 	u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
2912 	u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
2913 	u32 max_tx_queue_size;
2914 	u32 max_rx_queue_size;
2915 
2916 	/* If this function is called after driver load, the ring sizes have already
2917 	 * been configured. Take it into account when recalculating ring size.
2918 	 */
2919 	if (adapter->tx_ring->ring_size)
2920 		tx_queue_size = adapter->tx_ring->ring_size;
2921 
2922 	if (adapter->rx_ring->ring_size)
2923 		rx_queue_size = adapter->rx_ring->ring_size;
2924 
2925 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2926 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2927 			&get_feat_ctx->max_queue_ext.max_queue_ext;
2928 		max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
2929 					  max_queue_ext->max_rx_sq_depth);
2930 		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
2931 
2932 		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2933 			max_tx_queue_size = min_t(u32, max_tx_queue_size,
2934 						  llq->max_llq_depth);
2935 		else
2936 			max_tx_queue_size = min_t(u32, max_tx_queue_size,
2937 						  max_queue_ext->max_tx_sq_depth);
2938 
2939 		adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
2940 						 max_queue_ext->max_per_packet_tx_descs);
2941 		adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
2942 						 max_queue_ext->max_per_packet_rx_descs);
2943 	} else {
2944 		struct ena_admin_queue_feature_desc *max_queues =
2945 			&get_feat_ctx->max_queues;
2946 		max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
2947 					  max_queues->max_sq_depth);
2948 		max_tx_queue_size = max_queues->max_cq_depth;
2949 
2950 		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2951 			max_tx_queue_size = min_t(u32, max_tx_queue_size,
2952 						  llq->max_llq_depth);
2953 		else
2954 			max_tx_queue_size = min_t(u32, max_tx_queue_size,
2955 						  max_queues->max_sq_depth);
2956 
2957 		adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
2958 						 max_queues->max_packet_tx_descs);
2959 		adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
2960 						 max_queues->max_packet_rx_descs);
2961 	}
2962 
2963 	max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
2964 	max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
2965 
2966 	if (max_tx_queue_size < ENA_MIN_RING_SIZE) {
2967 		netdev_err(adapter->netdev, "Device max TX queue size: %d < minimum: %d\n",
2968 			   max_tx_queue_size, ENA_MIN_RING_SIZE);
2969 		return -EINVAL;
2970 	}
2971 
2972 	if (max_rx_queue_size < ENA_MIN_RING_SIZE) {
2973 		netdev_err(adapter->netdev, "Device max RX queue size: %d < minimum: %d\n",
2974 			   max_rx_queue_size, ENA_MIN_RING_SIZE);
2975 		return -EINVAL;
2976 	}
2977 
2978 	/* When forcing large headers, we multiply the entry size by 2, and therefore divide
2979 	 * the queue size by 2, leaving the amount of memory used by the queues unchanged.
2980 	 */
2981 	if (adapter->large_llq_header_enabled) {
2982 		if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) &&
2983 		    ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2984 			max_tx_queue_size /= 2;
2985 			dev_info(&adapter->pdev->dev,
2986 				 "Forcing large headers and decreasing maximum TX queue size to %d\n",
2987 				 max_tx_queue_size);
2988 		} else {
2989 			dev_err(&adapter->pdev->dev,
2990 				"Forcing large headers failed: LLQ is disabled or device does not support large headers\n");
2991 
2992 			adapter->large_llq_header_enabled = false;
2993 		}
2994 	}
2995 
2996 	tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
2997 				  max_tx_queue_size);
2998 	rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
2999 				  max_rx_queue_size);
3000 
3001 	tx_queue_size = rounddown_pow_of_two(tx_queue_size);
3002 	rx_queue_size = rounddown_pow_of_two(rx_queue_size);
3003 
3004 	adapter->max_tx_ring_size  = max_tx_queue_size;
3005 	adapter->max_rx_ring_size = max_rx_queue_size;
3006 	adapter->requested_tx_ring_size = tx_queue_size;
3007 	adapter->requested_rx_ring_size = rx_queue_size;
3008 
3009 	return 0;
3010 }
3011 
3012 static int ena_device_validate_params(struct ena_adapter *adapter,
3013 				      struct ena_com_dev_get_features_ctx *get_feat_ctx)
3014 {
3015 	struct net_device *netdev = adapter->netdev;
3016 	int rc;
3017 
3018 	rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
3019 			      adapter->mac_addr);
3020 	if (!rc) {
3021 		netif_err(adapter, drv, netdev,
3022 			  "Error, mac address are different\n");
3023 		return -EINVAL;
3024 	}
3025 
3026 	if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
3027 		netif_err(adapter, drv, netdev,
3028 			  "Error, device max mtu is smaller than netdev MTU\n");
3029 		return -EINVAL;
3030 	}
3031 
3032 	return 0;
3033 }
3034 
3035 static void set_default_llq_configurations(struct ena_adapter *adapter,
3036 					   struct ena_llq_configurations *llq_config,
3037 					   struct ena_admin_feature_llq_desc *llq)
3038 {
3039 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3040 
3041 	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
3042 	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
3043 	llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
3044 
3045 	adapter->large_llq_header_supported =
3046 		!!(ena_dev->supported_features & BIT(ENA_ADMIN_LLQ));
3047 	adapter->large_llq_header_supported &=
3048 		!!(llq->entry_size_ctrl_supported &
3049 			ENA_ADMIN_LIST_ENTRY_SIZE_256B);
3050 
3051 	if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) &&
3052 	    adapter->large_llq_header_enabled) {
3053 		llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_256B;
3054 		llq_config->llq_ring_entry_size_value = 256;
3055 	} else {
3056 		llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
3057 		llq_config->llq_ring_entry_size_value = 128;
3058 	}
3059 }
3060 
3061 static int ena_set_queues_placement_policy(struct pci_dev *pdev,
3062 					   struct ena_com_dev *ena_dev,
3063 					   struct ena_admin_feature_llq_desc *llq,
3064 					   struct ena_llq_configurations *llq_default_configurations)
3065 {
3066 	int rc;
3067 	u32 llq_feature_mask;
3068 
3069 	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
3070 	if (!(ena_dev->supported_features & llq_feature_mask)) {
3071 		dev_warn(&pdev->dev,
3072 			"LLQ is not supported Fallback to host mode policy.\n");
3073 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3074 		return 0;
3075 	}
3076 
3077 	if (!ena_dev->mem_bar) {
3078 		netdev_err(ena_dev->net_device,
3079 			   "LLQ is advertised as supported but device doesn't expose mem bar\n");
3080 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3081 		return 0;
3082 	}
3083 
3084 	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
3085 	if (unlikely(rc)) {
3086 		dev_err(&pdev->dev,
3087 			"Failed to configure the device mode.  Fallback to host mode policy.\n");
3088 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3089 	}
3090 
3091 	return 0;
3092 }
3093 
3094 static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
3095 			       int bars)
3096 {
3097 	bool has_mem_bar = !!(bars & BIT(ENA_MEM_BAR));
3098 
3099 	if (!has_mem_bar)
3100 		return 0;
3101 
3102 	ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
3103 					   pci_resource_start(pdev, ENA_MEM_BAR),
3104 					   pci_resource_len(pdev, ENA_MEM_BAR));
3105 
3106 	if (!ena_dev->mem_bar)
3107 		return -EFAULT;
3108 
3109 	return 0;
3110 }
3111 
3112 static int ena_device_init(struct ena_adapter *adapter, struct pci_dev *pdev,
3113 			   struct ena_com_dev_get_features_ctx *get_feat_ctx,
3114 			   bool *wd_state)
3115 {
3116 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3117 	struct net_device *netdev = adapter->netdev;
3118 	struct ena_llq_configurations llq_config;
3119 	struct device *dev = &pdev->dev;
3120 	bool readless_supported;
3121 	u32 aenq_groups;
3122 	int dma_width;
3123 	int rc;
3124 
3125 	rc = ena_com_mmio_reg_read_request_init(ena_dev);
3126 	if (rc) {
3127 		dev_err(dev, "Failed to init mmio read less\n");
3128 		return rc;
3129 	}
3130 
3131 	/* The PCIe configuration space revision id indicate if mmio reg
3132 	 * read is disabled
3133 	 */
3134 	readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
3135 	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
3136 
3137 	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
3138 	if (rc) {
3139 		dev_err(dev, "Can not reset device\n");
3140 		goto err_mmio_read_less;
3141 	}
3142 
3143 	rc = ena_com_validate_version(ena_dev);
3144 	if (rc) {
3145 		dev_err(dev, "Device version is too low\n");
3146 		goto err_mmio_read_less;
3147 	}
3148 
3149 	dma_width = ena_com_get_dma_width(ena_dev);
3150 	if (dma_width < 0) {
3151 		dev_err(dev, "Invalid dma width value %d", dma_width);
3152 		rc = dma_width;
3153 		goto err_mmio_read_less;
3154 	}
3155 
3156 	rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_width));
3157 	if (rc) {
3158 		dev_err(dev, "dma_set_mask_and_coherent failed %d\n", rc);
3159 		goto err_mmio_read_less;
3160 	}
3161 
3162 	ena_devlink_params_get(adapter->devlink);
3163 
3164 	/* ENA admin level init */
3165 	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
3166 	if (rc) {
3167 		dev_err(dev,
3168 			"Can not initialize ena admin queue with device\n");
3169 		goto err_mmio_read_less;
3170 	}
3171 
3172 	/* To enable the msix interrupts the driver needs to know the number
3173 	 * of queues. So the driver uses polling mode to retrieve this
3174 	 * information
3175 	 */
3176 	ena_com_set_admin_polling_mode(ena_dev, true);
3177 
3178 	ena_config_host_info(ena_dev, pdev);
3179 
3180 	/* Get Device Attributes*/
3181 	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
3182 	if (rc) {
3183 		dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
3184 		goto err_admin_init;
3185 	}
3186 
3187 	/* Try to turn all the available aenq groups */
3188 	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
3189 		BIT(ENA_ADMIN_FATAL_ERROR) |
3190 		BIT(ENA_ADMIN_WARNING) |
3191 		BIT(ENA_ADMIN_NOTIFICATION) |
3192 		BIT(ENA_ADMIN_KEEP_ALIVE);
3193 
3194 	aenq_groups &= get_feat_ctx->aenq.supported_groups;
3195 
3196 	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
3197 	if (rc) {
3198 		dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
3199 		goto err_admin_init;
3200 	}
3201 
3202 	*wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
3203 
3204 	set_default_llq_configurations(adapter, &llq_config, &get_feat_ctx->llq);
3205 
3206 	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
3207 					     &llq_config);
3208 	if (rc) {
3209 		netdev_err(netdev, "Cannot set queues placement policy rc= %d\n", rc);
3210 		goto err_admin_init;
3211 	}
3212 
3213 	rc = ena_calc_io_queue_size(adapter, get_feat_ctx);
3214 	if (unlikely(rc))
3215 		goto err_admin_init;
3216 
3217 	rc = ena_phc_init(adapter);
3218 	if (unlikely(rc && (rc != -EOPNOTSUPP)))
3219 		netdev_err(netdev, "Failed initializing PHC, error: %d\n", rc);
3220 
3221 	return 0;
3222 
3223 err_admin_init:
3224 	ena_com_abort_admin_commands(ena_dev);
3225 	ena_com_wait_for_abort_completion(ena_dev);
3226 	ena_com_delete_host_info(ena_dev);
3227 	ena_com_admin_destroy(ena_dev);
3228 err_mmio_read_less:
3229 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3230 
3231 	return rc;
3232 }
3233 
3234 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
3235 {
3236 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3237 	struct device *dev = &adapter->pdev->dev;
3238 	int rc;
3239 
3240 	rc = ena_enable_msix(adapter);
3241 	if (rc) {
3242 		dev_err(dev, "Can not reserve msix vectors\n");
3243 		return rc;
3244 	}
3245 
3246 	ena_setup_mgmnt_intr(adapter);
3247 
3248 	rc = ena_request_mgmnt_irq(adapter);
3249 	if (rc) {
3250 		dev_err(dev, "Can not setup management interrupts\n");
3251 		goto err_disable_msix;
3252 	}
3253 
3254 	ena_com_set_admin_polling_mode(ena_dev, false);
3255 
3256 	ena_com_admin_aenq_enable(ena_dev);
3257 
3258 	return 0;
3259 
3260 err_disable_msix:
3261 	ena_disable_msix(adapter);
3262 
3263 	return rc;
3264 }
3265 
3266 int ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3267 {
3268 	struct net_device *netdev = adapter->netdev;
3269 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3270 	bool dev_up;
3271 	int rc = 0;
3272 
3273 	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
3274 		return 0;
3275 
3276 	netif_carrier_off(netdev);
3277 
3278 	timer_delete_sync(&adapter->timer_service);
3279 
3280 	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
3281 	adapter->dev_up_before_reset = dev_up;
3282 	if (!graceful)
3283 		ena_com_set_admin_running_state(ena_dev, false);
3284 
3285 	if (dev_up)
3286 		ena_down(adapter);
3287 
3288 	/* Stop the device from sending AENQ events (in case reset flag is set
3289 	 *  and device is up, ena_down() already reset the device.
3290 	 */
3291 	if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
3292 		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3293 
3294 	ena_free_mgmnt_irq(adapter);
3295 
3296 	ena_disable_msix(adapter);
3297 
3298 	ena_com_abort_admin_commands(ena_dev);
3299 
3300 	ena_com_wait_for_abort_completion(ena_dev);
3301 
3302 	ena_com_admin_destroy(ena_dev);
3303 
3304 	ena_phc_destroy(adapter);
3305 
3306 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3307 
3308 	/* return reset reason to default value */
3309 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3310 
3311 	clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3312 	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3313 
3314 	return rc;
3315 }
3316 
3317 int ena_restore_device(struct ena_adapter *adapter)
3318 {
3319 	struct ena_com_dev_get_features_ctx get_feat_ctx;
3320 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3321 	struct pci_dev *pdev = adapter->pdev;
3322 	struct ena_ring *txr;
3323 	int rc, count, i;
3324 	bool wd_state;
3325 
3326 	set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3327 	rc = ena_device_init(adapter, adapter->pdev, &get_feat_ctx, &wd_state);
3328 	if (rc) {
3329 		dev_err(&pdev->dev, "Can not initialize device\n");
3330 		goto err;
3331 	}
3332 	adapter->wd_state = wd_state;
3333 
3334 	count =  adapter->xdp_num_queues + adapter->num_io_queues;
3335 	for (i = 0 ; i < count; i++) {
3336 		txr = &adapter->tx_ring[i];
3337 		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
3338 		txr->tx_max_header_size = ena_dev->tx_max_header_size;
3339 	}
3340 
3341 	rc = ena_device_validate_params(adapter, &get_feat_ctx);
3342 	if (rc) {
3343 		dev_err(&pdev->dev, "Validation of device parameters failed\n");
3344 		goto err_device_destroy;
3345 	}
3346 
3347 	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3348 	if (rc) {
3349 		dev_err(&pdev->dev, "Enable MSI-X failed\n");
3350 		goto err_device_destroy;
3351 	}
3352 	/* If the interface was up before the reset bring it up */
3353 	if (adapter->dev_up_before_reset) {
3354 		rc = ena_up(adapter);
3355 		if (rc) {
3356 			dev_err(&pdev->dev, "Failed to create I/O queues\n");
3357 			goto err_disable_msix;
3358 		}
3359 	}
3360 
3361 	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3362 
3363 	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3364 	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
3365 		netif_carrier_on(adapter->netdev);
3366 
3367 	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3368 	adapter->last_keep_alive_jiffies = jiffies;
3369 
3370 	return rc;
3371 err_disable_msix:
3372 	ena_free_mgmnt_irq(adapter);
3373 	ena_disable_msix(adapter);
3374 err_device_destroy:
3375 	ena_com_abort_admin_commands(ena_dev);
3376 	ena_com_wait_for_abort_completion(ena_dev);
3377 	ena_com_admin_destroy(ena_dev);
3378 	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3379 	ena_phc_destroy(adapter);
3380 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3381 err:
3382 	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3383 	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3384 	dev_err(&pdev->dev,
3385 		"Reset attempt failed. Can not reset the device\n");
3386 
3387 	return rc;
3388 }
3389 
3390 static void ena_fw_reset_device(struct work_struct *work)
3391 {
3392 	int rc = 0;
3393 
3394 	struct ena_adapter *adapter =
3395 		container_of(work, struct ena_adapter, reset_task);
3396 
3397 	rtnl_lock();
3398 
3399 	if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3400 		rc |= ena_destroy_device(adapter, false);
3401 		rc |= ena_restore_device(adapter);
3402 		adapter->dev_stats.reset_fail += !!rc;
3403 
3404 		dev_err(&adapter->pdev->dev, "Device reset completed successfully\n");
3405 	}
3406 
3407 	rtnl_unlock();
3408 }
3409 
3410 static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3411 					struct ena_ring *rx_ring)
3412 {
3413 	struct ena_napi *ena_napi = container_of(rx_ring->napi, struct ena_napi, napi);
3414 
3415 	if (likely(READ_ONCE(ena_napi->first_interrupt)))
3416 		return 0;
3417 
3418 	if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3419 		return 0;
3420 
3421 	rx_ring->no_interrupt_event_cnt++;
3422 
3423 	if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3424 		netif_err(adapter, rx_err, adapter->netdev,
3425 			  "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
3426 			  rx_ring->qid);
3427 
3428 		ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3429 		return -EIO;
3430 	}
3431 
3432 	return 0;
3433 }
3434 
3435 static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3436 					  struct ena_ring *tx_ring)
3437 {
3438 	struct ena_napi *ena_napi = container_of(tx_ring->napi, struct ena_napi, napi);
3439 	enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
3440 	unsigned int time_since_last_napi;
3441 	unsigned int missing_tx_comp_to;
3442 	bool is_tx_comp_time_expired;
3443 	struct ena_tx_buffer *tx_buf;
3444 	unsigned long last_jiffies;
3445 	int napi_scheduled;
3446 	u32 missed_tx = 0;
3447 	int i, rc = 0;
3448 
3449 	missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to);
3450 
3451 	for (i = 0; i < tx_ring->ring_size; i++) {
3452 		tx_buf = &tx_ring->tx_buffer_info[i];
3453 		last_jiffies = tx_buf->last_jiffies;
3454 
3455 		if (last_jiffies == 0)
3456 			/* no pending Tx at this location */
3457 			continue;
3458 
3459 		is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
3460 			 2 * adapter->missing_tx_completion_to);
3461 
3462 		if (unlikely(!READ_ONCE(ena_napi->first_interrupt) && is_tx_comp_time_expired)) {
3463 			/* If after graceful period interrupt is still not
3464 			 * received, we schedule a reset
3465 			 */
3466 			netif_err(adapter, tx_err, adapter->netdev,
3467 				  "Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
3468 				  tx_ring->qid);
3469 			ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3470 			return -EIO;
3471 		}
3472 
3473 		is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
3474 			adapter->missing_tx_completion_to);
3475 
3476 		if (unlikely(is_tx_comp_time_expired)) {
3477 			time_since_last_napi =
3478 				jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
3479 			napi_scheduled = !!(ena_napi->napi.state & NAPIF_STATE_SCHED);
3480 
3481 			if (missing_tx_comp_to < time_since_last_napi && napi_scheduled) {
3482 				/* We suspect napi isn't called because the
3483 				 * bottom half is not run. Require a bigger
3484 				 * timeout for these cases
3485 				 */
3486 				if (!time_is_before_jiffies(last_jiffies +
3487 					2 * adapter->missing_tx_completion_to))
3488 					continue;
3489 
3490 				reset_reason = ENA_REGS_RESET_SUSPECTED_POLL_STARVATION;
3491 			}
3492 
3493 			missed_tx++;
3494 
3495 			if (tx_buf->print_once)
3496 				continue;
3497 
3498 			netif_notice(adapter, tx_err, adapter->netdev,
3499 				     "TX hasn't completed, qid %d, index %d. %u usecs from last napi execution, napi scheduled: %d\n",
3500 				     tx_ring->qid, i, time_since_last_napi, napi_scheduled);
3501 
3502 			tx_buf->print_once = 1;
3503 		}
3504 	}
3505 
3506 	if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
3507 		netif_err(adapter, tx_err, adapter->netdev,
3508 			  "Lost TX completions are above the threshold (%d > %d). Completion transmission timeout: %u.\n",
3509 			  missed_tx,
3510 			  adapter->missing_tx_completion_threshold,
3511 			  missing_tx_comp_to);
3512 		netif_err(adapter, tx_err, adapter->netdev,
3513 			  "Resetting the device\n");
3514 
3515 		ena_reset_device(adapter, reset_reason);
3516 		rc = -EIO;
3517 	}
3518 
3519 	ena_increase_stat(&tx_ring->tx_stats.missed_tx, missed_tx,
3520 			  &tx_ring->syncp);
3521 
3522 	return rc;
3523 }
3524 
3525 static void check_for_missing_completions(struct ena_adapter *adapter)
3526 {
3527 	struct ena_ring *tx_ring;
3528 	struct ena_ring *rx_ring;
3529 	int qid, budget, rc;
3530 	int io_queue_count;
3531 
3532 	io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
3533 
3534 	/* Make sure the driver doesn't turn the device in other process */
3535 	smp_rmb();
3536 
3537 	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3538 		return;
3539 
3540 	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3541 		return;
3542 
3543 	if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
3544 		return;
3545 
3546 	budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES);
3547 
3548 	qid = adapter->last_monitored_tx_qid;
3549 
3550 	while (budget) {
3551 		qid = (qid + 1) % io_queue_count;
3552 
3553 		tx_ring = &adapter->tx_ring[qid];
3554 		rx_ring = &adapter->rx_ring[qid];
3555 
3556 		rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3557 		if (unlikely(rc))
3558 			return;
3559 
3560 		rc =  !ENA_IS_XDP_INDEX(adapter, qid) ?
3561 			check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
3562 		if (unlikely(rc))
3563 			return;
3564 
3565 		budget--;
3566 	}
3567 
3568 	adapter->last_monitored_tx_qid = qid;
3569 }
3570 
3571 /* trigger napi schedule after 2 consecutive detections */
3572 #define EMPTY_RX_REFILL 2
3573 /* For the rare case where the device runs out of Rx descriptors and the
3574  * napi handler failed to refill new Rx descriptors (due to a lack of memory
3575  * for example).
3576  * This case will lead to a deadlock:
3577  * The device won't send interrupts since all the new Rx packets will be dropped
3578  * The napi handler won't allocate new Rx descriptors so the device will be
3579  * able to send new packets.
3580  *
3581  * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
3582  * It is recommended to have at least 512MB, with a minimum of 128MB for
3583  * constrained environment).
3584  *
3585  * When such a situation is detected - Reschedule napi
3586  */
3587 static void check_for_empty_rx_ring(struct ena_adapter *adapter)
3588 {
3589 	struct ena_ring *rx_ring;
3590 	int i, refill_required;
3591 
3592 	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3593 		return;
3594 
3595 	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3596 		return;
3597 
3598 	for (i = 0; i < adapter->num_io_queues; i++) {
3599 		rx_ring = &adapter->rx_ring[i];
3600 
3601 		refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
3602 		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3603 			rx_ring->empty_rx_queue++;
3604 
3605 			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3606 				ena_increase_stat(&rx_ring->rx_stats.empty_rx_ring, 1,
3607 						  &rx_ring->syncp);
3608 
3609 				netif_err(adapter, drv, adapter->netdev,
3610 					  "Trigger refill for ring %d\n", i);
3611 
3612 				napi_schedule(rx_ring->napi);
3613 				rx_ring->empty_rx_queue = 0;
3614 			}
3615 		} else {
3616 			rx_ring->empty_rx_queue = 0;
3617 		}
3618 	}
3619 }
3620 
3621 /* Check for keep alive expiration */
3622 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3623 {
3624 	unsigned long keep_alive_expired;
3625 
3626 	if (!adapter->wd_state)
3627 		return;
3628 
3629 	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3630 		return;
3631 
3632 	keep_alive_expired = adapter->last_keep_alive_jiffies +
3633 			     adapter->keep_alive_timeout;
3634 	if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
3635 		netif_err(adapter, drv, adapter->netdev,
3636 			  "Keep alive watchdog timeout.\n");
3637 		ena_increase_stat(&adapter->dev_stats.wd_expired, 1,
3638 				  &adapter->syncp);
3639 		ena_reset_device(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
3640 	}
3641 }
3642 
3643 static void check_for_admin_com_state(struct ena_adapter *adapter)
3644 {
3645 	if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
3646 		netif_err(adapter, drv, adapter->netdev,
3647 			  "ENA admin queue is not in running state!\n");
3648 		ena_increase_stat(&adapter->dev_stats.admin_q_pause, 1,
3649 				  &adapter->syncp);
3650 		ena_reset_device(adapter, ENA_REGS_RESET_ADMIN_TO);
3651 	}
3652 }
3653 
3654 static void ena_update_hints(struct ena_adapter *adapter,
3655 			     struct ena_admin_ena_hw_hints *hints)
3656 {
3657 	struct net_device *netdev = adapter->netdev;
3658 
3659 	if (hints->admin_completion_tx_timeout)
3660 		adapter->ena_dev->admin_queue.completion_timeout =
3661 			hints->admin_completion_tx_timeout * 1000;
3662 
3663 	if (hints->mmio_read_timeout)
3664 		/* convert to usec */
3665 		adapter->ena_dev->mmio_read.reg_read_to =
3666 			hints->mmio_read_timeout * 1000;
3667 
3668 	if (hints->missed_tx_completion_count_threshold_to_reset)
3669 		adapter->missing_tx_completion_threshold =
3670 			hints->missed_tx_completion_count_threshold_to_reset;
3671 
3672 	if (hints->missing_tx_completion_timeout) {
3673 		if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3674 			adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
3675 		else
3676 			adapter->missing_tx_completion_to =
3677 				msecs_to_jiffies(hints->missing_tx_completion_timeout);
3678 	}
3679 
3680 	if (hints->netdev_wd_timeout)
3681 		netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
3682 
3683 	if (hints->driver_watchdog_timeout) {
3684 		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3685 			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3686 		else
3687 			adapter->keep_alive_timeout =
3688 				msecs_to_jiffies(hints->driver_watchdog_timeout);
3689 	}
3690 }
3691 
3692 static void ena_update_host_info(struct ena_admin_host_info *host_info,
3693 				 struct net_device *netdev)
3694 {
3695 	host_info->supported_network_features[0] =
3696 		netdev->features & GENMASK_ULL(31, 0);
3697 	host_info->supported_network_features[1] =
3698 		(netdev->features & GENMASK_ULL(63, 32)) >> 32;
3699 }
3700 
3701 static void ena_timer_service(struct timer_list *t)
3702 {
3703 	struct ena_adapter *adapter = timer_container_of(adapter, t,
3704 							 timer_service);
3705 	u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
3706 	struct ena_admin_host_info *host_info =
3707 		adapter->ena_dev->host_attr.host_info;
3708 
3709 	check_for_missing_keep_alive(adapter);
3710 
3711 	check_for_admin_com_state(adapter);
3712 
3713 	check_for_missing_completions(adapter);
3714 
3715 	check_for_empty_rx_ring(adapter);
3716 
3717 	if (debug_area)
3718 		ena_dump_stats_to_buf(adapter, debug_area);
3719 
3720 	if (host_info)
3721 		ena_update_host_info(host_info, adapter->netdev);
3722 
3723 	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3724 		netif_err(adapter, drv, adapter->netdev,
3725 			  "Trigger reset is on\n");
3726 		ena_dump_stats_to_dmesg(adapter);
3727 		queue_work(ena_wq, &adapter->reset_task);
3728 		return;
3729 	}
3730 
3731 	/* Reset the timer */
3732 	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3733 }
3734 
3735 static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
3736 				     struct ena_com_dev *ena_dev,
3737 				     struct ena_com_dev_get_features_ctx *get_feat_ctx)
3738 {
3739 	u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
3740 
3741 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3742 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3743 			&get_feat_ctx->max_queue_ext.max_queue_ext;
3744 		io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num,
3745 				  max_queue_ext->max_rx_cq_num);
3746 
3747 		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
3748 		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
3749 	} else {
3750 		struct ena_admin_queue_feature_desc *max_queues =
3751 			&get_feat_ctx->max_queues;
3752 		io_tx_sq_num = max_queues->max_sq_num;
3753 		io_tx_cq_num = max_queues->max_cq_num;
3754 		io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num);
3755 	}
3756 
3757 	/* In case of LLQ use the llq fields for the tx SQ/CQ */
3758 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3759 		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
3760 
3761 	max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
3762 	max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
3763 	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
3764 	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
3765 	/* 1 IRQ for mgmnt and 1 IRQs for each IO direction */
3766 	max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
3767 
3768 	return max_num_io_queues;
3769 }
3770 
3771 static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
3772 				 struct net_device *netdev)
3773 {
3774 	netdev_features_t dev_features = 0;
3775 
3776 	/* Set offload features */
3777 	if (feat->offload.tx &
3778 		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
3779 		dev_features |= NETIF_F_IP_CSUM;
3780 
3781 	if (feat->offload.tx &
3782 		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
3783 		dev_features |= NETIF_F_IPV6_CSUM;
3784 
3785 	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
3786 		dev_features |= NETIF_F_TSO;
3787 
3788 	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
3789 		dev_features |= NETIF_F_TSO6;
3790 
3791 	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
3792 		dev_features |= NETIF_F_TSO_ECN;
3793 
3794 	if (feat->offload.rx_supported &
3795 		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
3796 		dev_features |= NETIF_F_RXCSUM;
3797 
3798 	if (feat->offload.rx_supported &
3799 		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
3800 		dev_features |= NETIF_F_RXCSUM;
3801 
3802 	netdev->features =
3803 		dev_features |
3804 		NETIF_F_SG |
3805 		NETIF_F_RXHASH |
3806 		NETIF_F_HIGHDMA;
3807 
3808 	netdev->hw_features |= netdev->features;
3809 	netdev->vlan_features |= netdev->features;
3810 }
3811 
3812 static void ena_set_conf_feat_params(struct ena_adapter *adapter,
3813 				     struct ena_com_dev_get_features_ctx *feat)
3814 {
3815 	struct net_device *netdev = adapter->netdev;
3816 
3817 	/* Copy mac address */
3818 	if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
3819 		eth_hw_addr_random(netdev);
3820 		ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
3821 	} else {
3822 		ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
3823 		eth_hw_addr_set(netdev, adapter->mac_addr);
3824 	}
3825 
3826 	/* Set offload features */
3827 	ena_set_dev_offloads(feat, netdev);
3828 
3829 	adapter->max_mtu = feat->dev_attr.max_mtu;
3830 	netdev->max_mtu = adapter->max_mtu;
3831 	netdev->min_mtu = ENA_MIN_MTU;
3832 }
3833 
3834 static int ena_rss_init_default(struct ena_adapter *adapter)
3835 {
3836 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3837 	struct device *dev = &adapter->pdev->dev;
3838 	int rc, i;
3839 	u32 val;
3840 
3841 	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
3842 	if (unlikely(rc)) {
3843 		dev_err(dev, "Cannot init indirect table\n");
3844 		goto err_rss_init;
3845 	}
3846 
3847 	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
3848 		val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
3849 		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
3850 						       ENA_IO_RXQ_IDX(val));
3851 		if (unlikely(rc)) {
3852 			dev_err(dev, "Cannot fill indirect table\n");
3853 			goto err_fill_indir;
3854 		}
3855 	}
3856 
3857 	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL, ENA_HASH_KEY_SIZE,
3858 					0xFFFFFFFF);
3859 	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3860 		dev_err(dev, "Cannot fill hash function\n");
3861 		goto err_fill_indir;
3862 	}
3863 
3864 	rc = ena_com_set_default_hash_ctrl(ena_dev);
3865 	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3866 		dev_err(dev, "Cannot fill hash control\n");
3867 		goto err_fill_indir;
3868 	}
3869 
3870 	return 0;
3871 
3872 err_fill_indir:
3873 	ena_com_rss_destroy(ena_dev);
3874 err_rss_init:
3875 
3876 	return rc;
3877 }
3878 
3879 static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
3880 {
3881 	int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
3882 
3883 	pci_release_selected_regions(pdev, release_bars);
3884 }
3885 
3886 /* ena_probe - Device Initialization Routine
3887  * @pdev: PCI device information struct
3888  * @ent: entry in ena_pci_tbl
3889  *
3890  * Returns 0 on success, negative on failure
3891  *
3892  * ena_probe initializes an adapter identified by a pci_dev structure.
3893  * The OS initialization, configuring of the adapter private structure,
3894  * and a hardware reset occur.
3895  */
3896 static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
3897 {
3898 	struct ena_com_dev_get_features_ctx get_feat_ctx;
3899 	struct ena_com_dev *ena_dev = NULL;
3900 	struct ena_adapter *adapter;
3901 	struct net_device *netdev;
3902 	static int adapters_found;
3903 	struct devlink *devlink;
3904 	u32 max_num_io_queues;
3905 	bool wd_state;
3906 	int bars, rc;
3907 
3908 	dev_dbg(&pdev->dev, "%s\n", __func__);
3909 
3910 	rc = pci_enable_device_mem(pdev);
3911 	if (rc) {
3912 		dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
3913 		return rc;
3914 	}
3915 
3916 	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ENA_MAX_PHYS_ADDR_SIZE_BITS));
3917 	if (rc) {
3918 		dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", rc);
3919 		goto err_disable_device;
3920 	}
3921 
3922 	pci_set_master(pdev);
3923 
3924 	ena_dev = vzalloc(sizeof(*ena_dev));
3925 	if (!ena_dev) {
3926 		rc = -ENOMEM;
3927 		goto err_disable_device;
3928 	}
3929 
3930 	bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
3931 	rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
3932 	if (rc) {
3933 		dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
3934 			rc);
3935 		goto err_free_ena_dev;
3936 	}
3937 
3938 	ena_dev->reg_bar = devm_ioremap(&pdev->dev,
3939 					pci_resource_start(pdev, ENA_REG_BAR),
3940 					pci_resource_len(pdev, ENA_REG_BAR));
3941 	if (!ena_dev->reg_bar) {
3942 		dev_err(&pdev->dev, "Failed to remap regs bar\n");
3943 		rc = -EFAULT;
3944 		goto err_free_region;
3945 	}
3946 
3947 	ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US;
3948 
3949 	ena_dev->dmadev = &pdev->dev;
3950 
3951 	netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), ENA_MAX_RINGS);
3952 	if (!netdev) {
3953 		dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
3954 		rc = -ENOMEM;
3955 		goto err_free_region;
3956 	}
3957 
3958 	SET_NETDEV_DEV(netdev, &pdev->dev);
3959 	adapter = netdev_priv(netdev);
3960 	adapter->ena_dev = ena_dev;
3961 	adapter->netdev = netdev;
3962 	adapter->pdev = pdev;
3963 	adapter->msg_enable = DEFAULT_MSG_ENABLE;
3964 
3965 	ena_dev->net_device = netdev;
3966 
3967 	pci_set_drvdata(pdev, adapter);
3968 
3969 	rc = ena_phc_alloc(adapter);
3970 	if (rc) {
3971 		netdev_err(netdev, "ena_phc_alloc failed\n");
3972 		goto err_netdev_destroy;
3973 	}
3974 
3975 	rc = ena_com_allocate_customer_metrics_buffer(ena_dev);
3976 	if (rc) {
3977 		netdev_err(netdev, "ena_com_allocate_customer_metrics_buffer failed\n");
3978 		goto err_free_phc;
3979 	}
3980 
3981 	rc = ena_map_llq_mem_bar(pdev, ena_dev, bars);
3982 	if (rc) {
3983 		dev_err(&pdev->dev, "ENA LLQ bar mapping failed\n");
3984 		goto err_metrics_destroy;
3985 	}
3986 
3987 	/* Need to do this before ena_device_init */
3988 	devlink = ena_devlink_alloc(adapter);
3989 	if (!devlink) {
3990 		netdev_err(netdev, "ena_devlink_alloc failed\n");
3991 		rc = -ENOMEM;
3992 		goto err_metrics_destroy;
3993 	}
3994 
3995 	rc = ena_device_init(adapter, pdev, &get_feat_ctx, &wd_state);
3996 	if (rc) {
3997 		dev_err(&pdev->dev, "ENA device init failed\n");
3998 		if (rc == -ETIME)
3999 			rc = -EPROBE_DEFER;
4000 		goto ena_devlink_destroy;
4001 	}
4002 
4003 	/* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
4004 	 * Updated during device initialization with the real granularity
4005 	 */
4006 	ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
4007 	ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
4008 	ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
4009 	max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
4010 	if (unlikely(!max_num_io_queues)) {
4011 		rc = -EFAULT;
4012 		goto err_device_destroy;
4013 	}
4014 
4015 	ena_set_conf_feat_params(adapter, &get_feat_ctx);
4016 
4017 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
4018 
4019 	adapter->num_io_queues = max_num_io_queues;
4020 	adapter->max_num_io_queues = max_num_io_queues;
4021 	adapter->last_monitored_tx_qid = 0;
4022 
4023 	adapter->xdp_first_ring = 0;
4024 	adapter->xdp_num_queues = 0;
4025 
4026 	adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
4027 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4028 		adapter->disable_meta_caching =
4029 			!!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
4030 			   BIT(ENA_ADMIN_DISABLE_META_CACHING));
4031 
4032 	adapter->wd_state = wd_state;
4033 
4034 	snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
4035 
4036 	rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
4037 	if (rc) {
4038 		dev_err(&pdev->dev,
4039 			"Failed to query interrupt moderation feature\n");
4040 		goto err_device_destroy;
4041 	}
4042 
4043 	ena_init_io_rings(adapter,
4044 			  0,
4045 			  adapter->xdp_num_queues +
4046 			  adapter->num_io_queues);
4047 
4048 	netdev->netdev_ops = &ena_netdev_ops;
4049 	netdev->watchdog_timeo = TX_TIMEOUT;
4050 	ena_set_ethtool_ops(netdev);
4051 
4052 	netdev->priv_flags |= IFF_UNICAST_FLT;
4053 
4054 	u64_stats_init(&adapter->syncp);
4055 
4056 	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
4057 	if (rc) {
4058 		dev_err(&pdev->dev,
4059 			"Failed to enable and set the admin interrupts\n");
4060 		goto err_worker_destroy;
4061 	}
4062 	rc = ena_rss_init_default(adapter);
4063 	if (rc && (rc != -EOPNOTSUPP)) {
4064 		dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
4065 		goto err_free_msix;
4066 	}
4067 
4068 	ena_config_debug_area(adapter);
4069 
4070 	if (ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
4071 		netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
4072 				       NETDEV_XDP_ACT_REDIRECT;
4073 
4074 	memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
4075 
4076 	netif_carrier_off(netdev);
4077 
4078 	rc = register_netdev(netdev);
4079 	if (rc) {
4080 		dev_err(&pdev->dev, "Cannot register net device\n");
4081 		goto err_rss;
4082 	}
4083 
4084 	ena_debugfs_init(netdev);
4085 
4086 	INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
4087 
4088 	adapter->last_keep_alive_jiffies = jiffies;
4089 	adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
4090 	adapter->missing_tx_completion_to = TX_TIMEOUT;
4091 	adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
4092 
4093 	ena_update_hints(adapter, &get_feat_ctx.hw_hints);
4094 
4095 	timer_setup(&adapter->timer_service, ena_timer_service, 0);
4096 	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
4097 
4098 	dev_info(&pdev->dev,
4099 		 "%s found at mem %lx, mac addr %pM\n",
4100 		 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
4101 		 netdev->dev_addr);
4102 
4103 	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
4104 
4105 	adapters_found++;
4106 
4107 	/* From this point, the devlink device is visible to users.
4108 	 * Perform the registration last to ensure that all the resources
4109 	 * are available and that the netdevice is registered.
4110 	 */
4111 	ena_devlink_register(devlink, &pdev->dev);
4112 
4113 	return 0;
4114 
4115 err_rss:
4116 	ena_com_delete_debug_area(ena_dev);
4117 	ena_com_rss_destroy(ena_dev);
4118 err_free_msix:
4119 	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
4120 	/* stop submitting admin commands on a device that was reset */
4121 	ena_com_set_admin_running_state(ena_dev, false);
4122 	ena_free_mgmnt_irq(adapter);
4123 	ena_disable_msix(adapter);
4124 err_worker_destroy:
4125 	timer_delete(&adapter->timer_service);
4126 err_device_destroy:
4127 	ena_com_delete_host_info(ena_dev);
4128 	ena_com_admin_destroy(ena_dev);
4129 ena_devlink_destroy:
4130 	ena_devlink_free(devlink);
4131 err_metrics_destroy:
4132 	ena_com_delete_customer_metrics_buffer(ena_dev);
4133 err_free_phc:
4134 	ena_phc_free(adapter);
4135 err_netdev_destroy:
4136 	free_netdev(netdev);
4137 err_free_region:
4138 	ena_release_bars(ena_dev, pdev);
4139 err_free_ena_dev:
4140 	vfree(ena_dev);
4141 err_disable_device:
4142 	pci_disable_device(pdev);
4143 	return rc;
4144 }
4145 
4146 /*****************************************************************************/
4147 
4148 /* __ena_shutoff - Helper used in both PCI remove/shutdown routines
4149  * @pdev: PCI device information struct
4150  * @shutdown: Is it a shutdown operation? If false, means it is a removal
4151  *
4152  * __ena_shutoff is a helper routine that does the real work on shutdown and
4153  * removal paths; the difference between those paths is with regards to whether
4154  * dettach or unregister the netdevice.
4155  */
4156 static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
4157 {
4158 	struct ena_adapter *adapter = pci_get_drvdata(pdev);
4159 	struct ena_com_dev *ena_dev;
4160 	struct net_device *netdev;
4161 
4162 	ena_dev = adapter->ena_dev;
4163 	netdev = adapter->netdev;
4164 
4165 	ena_debugfs_terminate(netdev);
4166 
4167 	/* Make sure timer and reset routine won't be called after
4168 	 * freeing device resources.
4169 	 */
4170 	timer_delete_sync(&adapter->timer_service);
4171 	cancel_work_sync(&adapter->reset_task);
4172 
4173 	rtnl_lock(); /* lock released inside the below if-else block */
4174 	adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN;
4175 	ena_destroy_device(adapter, true);
4176 
4177 	ena_phc_free(adapter);
4178 
4179 	ena_devlink_unregister(adapter->devlink);
4180 	ena_devlink_free(adapter->devlink);
4181 
4182 	if (shutdown) {
4183 		netif_device_detach(netdev);
4184 		dev_close(netdev);
4185 		rtnl_unlock();
4186 	} else {
4187 		rtnl_unlock();
4188 		unregister_netdev(netdev);
4189 		free_netdev(netdev);
4190 	}
4191 
4192 	ena_com_rss_destroy(ena_dev);
4193 
4194 	ena_com_delete_debug_area(ena_dev);
4195 
4196 	ena_com_delete_host_info(ena_dev);
4197 
4198 	ena_com_delete_customer_metrics_buffer(ena_dev);
4199 
4200 	ena_release_bars(ena_dev, pdev);
4201 
4202 	pci_disable_device(pdev);
4203 
4204 	vfree(ena_dev);
4205 }
4206 
4207 /* ena_remove - Device Removal Routine
4208  * @pdev: PCI device information struct
4209  *
4210  * ena_remove is called by the PCI subsystem to alert the driver
4211  * that it should release a PCI device.
4212  */
4213 
4214 static void ena_remove(struct pci_dev *pdev)
4215 {
4216 	__ena_shutoff(pdev, false);
4217 }
4218 
4219 /* ena_shutdown - Device Shutdown Routine
4220  * @pdev: PCI device information struct
4221  *
4222  * ena_shutdown is called by the PCI subsystem to alert the driver that
4223  * a shutdown/reboot (or kexec) is happening and device must be disabled.
4224  */
4225 
4226 static void ena_shutdown(struct pci_dev *pdev)
4227 {
4228 	__ena_shutoff(pdev, true);
4229 }
4230 
4231 /* ena_suspend - PM suspend callback
4232  * @dev_d: Device information struct
4233  */
4234 static int __maybe_unused ena_suspend(struct device *dev_d)
4235 {
4236 	struct pci_dev *pdev = to_pci_dev(dev_d);
4237 	struct ena_adapter *adapter = pci_get_drvdata(pdev);
4238 
4239 	ena_increase_stat(&adapter->dev_stats.suspend, 1, &adapter->syncp);
4240 
4241 	rtnl_lock();
4242 	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
4243 		dev_err(&pdev->dev,
4244 			"Ignoring device reset request as the device is being suspended\n");
4245 		clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
4246 	}
4247 	ena_destroy_device(adapter, true);
4248 	rtnl_unlock();
4249 	return 0;
4250 }
4251 
4252 /* ena_resume - PM resume callback
4253  * @dev_d: Device information struct
4254  */
4255 static int __maybe_unused ena_resume(struct device *dev_d)
4256 {
4257 	struct ena_adapter *adapter = dev_get_drvdata(dev_d);
4258 	int rc;
4259 
4260 	ena_increase_stat(&adapter->dev_stats.resume, 1, &adapter->syncp);
4261 
4262 	rtnl_lock();
4263 	rc = ena_restore_device(adapter);
4264 	rtnl_unlock();
4265 	return rc;
4266 }
4267 
4268 static SIMPLE_DEV_PM_OPS(ena_pm_ops, ena_suspend, ena_resume);
4269 
4270 static struct pci_driver ena_pci_driver = {
4271 	.name		= DRV_MODULE_NAME,
4272 	.id_table	= ena_pci_tbl,
4273 	.probe		= ena_probe,
4274 	.remove		= ena_remove,
4275 	.shutdown	= ena_shutdown,
4276 	.driver.pm	= &ena_pm_ops,
4277 	.sriov_configure = pci_sriov_configure_simple,
4278 };
4279 
4280 static int __init ena_init(void)
4281 {
4282 	int ret;
4283 
4284 	ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
4285 	if (!ena_wq) {
4286 		pr_err("Failed to create workqueue\n");
4287 		return -ENOMEM;
4288 	}
4289 
4290 	ret = pci_register_driver(&ena_pci_driver);
4291 	if (ret)
4292 		destroy_workqueue(ena_wq);
4293 
4294 	return ret;
4295 }
4296 
4297 static void __exit ena_cleanup(void)
4298 {
4299 	pci_unregister_driver(&ena_pci_driver);
4300 
4301 	if (ena_wq) {
4302 		destroy_workqueue(ena_wq);
4303 		ena_wq = NULL;
4304 	}
4305 }
4306 
4307 /******************************************************************************
4308  ******************************** AENQ Handlers *******************************
4309  *****************************************************************************/
4310 /* ena_update_on_link_change:
4311  * Notify the network interface about the change in link status
4312  */
4313 static void ena_update_on_link_change(void *adapter_data,
4314 				      struct ena_admin_aenq_entry *aenq_e)
4315 {
4316 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4317 	struct ena_admin_aenq_link_change_desc *aenq_desc =
4318 		(struct ena_admin_aenq_link_change_desc *)aenq_e;
4319 	int status = aenq_desc->flags &
4320 		ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
4321 
4322 	if (status) {
4323 		netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
4324 		set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4325 		if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags))
4326 			netif_carrier_on(adapter->netdev);
4327 	} else {
4328 		clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4329 		netif_carrier_off(adapter->netdev);
4330 	}
4331 }
4332 
4333 static void ena_keep_alive_wd(void *adapter_data,
4334 			      struct ena_admin_aenq_entry *aenq_e)
4335 {
4336 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4337 	struct ena_admin_aenq_keep_alive_desc *desc;
4338 	u64 rx_drops;
4339 	u64 tx_drops;
4340 
4341 	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
4342 	adapter->last_keep_alive_jiffies = jiffies;
4343 
4344 	rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
4345 	tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low;
4346 
4347 	u64_stats_update_begin(&adapter->syncp);
4348 	/* These stats are accumulated by the device, so the counters indicate
4349 	 * all drops since last reset.
4350 	 */
4351 	adapter->dev_stats.rx_drops = rx_drops;
4352 	adapter->dev_stats.tx_drops = tx_drops;
4353 	u64_stats_update_end(&adapter->syncp);
4354 }
4355 
4356 static void ena_notification(void *adapter_data,
4357 			     struct ena_admin_aenq_entry *aenq_e)
4358 {
4359 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4360 	struct ena_admin_ena_hw_hints *hints;
4361 
4362 	WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
4363 	     "Invalid group(%x) expected %x\n",
4364 	     aenq_e->aenq_common_desc.group,
4365 	     ENA_ADMIN_NOTIFICATION);
4366 
4367 	switch (aenq_e->aenq_common_desc.syndrome) {
4368 	case ENA_ADMIN_UPDATE_HINTS:
4369 		hints = (struct ena_admin_ena_hw_hints *)
4370 			(&aenq_e->inline_data_w4);
4371 		ena_update_hints(adapter, hints);
4372 		break;
4373 	default:
4374 		netif_err(adapter, drv, adapter->netdev,
4375 			  "Invalid aenq notification link state %d\n",
4376 			  aenq_e->aenq_common_desc.syndrome);
4377 	}
4378 }
4379 
4380 /* This handler will called for unknown event group or unimplemented handlers*/
4381 static void unimplemented_aenq_handler(void *data,
4382 				       struct ena_admin_aenq_entry *aenq_e)
4383 {
4384 	struct ena_adapter *adapter = (struct ena_adapter *)data;
4385 
4386 	netif_err(adapter, drv, adapter->netdev,
4387 		  "Unknown event was received or event with unimplemented handler\n");
4388 }
4389 
4390 static struct ena_aenq_handlers aenq_handlers = {
4391 	.handlers = {
4392 		[ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4393 		[ENA_ADMIN_NOTIFICATION] = ena_notification,
4394 		[ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
4395 	},
4396 	.unimplemented_handler = unimplemented_aenq_handler
4397 };
4398 
4399 module_init(ena_init);
4400 module_exit(ena_cleanup);
4401