xref: /linux/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c (revision a8a6531164e54cea6df4d82f1770451f68945972)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */
3 
4 #include <linux/bitfield.h>
5 #include <linux/iopoll.h>
6 #include <linux/pci.h>
7 #include <net/netdev_queues.h>
8 #include <net/page_pool/helpers.h>
9 
10 #include "fbnic.h"
11 #include "fbnic_csr.h"
12 #include "fbnic_netdev.h"
13 #include "fbnic_txrx.h"
14 
15 enum {
16 	FBNIC_XMIT_CB_TS	= 0x01,
17 };
18 
19 struct fbnic_xmit_cb {
20 	u32 bytecount;
21 	u8 desc_count;
22 	u8 flags;
23 	int hw_head;
24 };
25 
26 #define FBNIC_XMIT_CB(__skb) ((struct fbnic_xmit_cb *)((__skb)->cb))
27 
28 static u32 __iomem *fbnic_ring_csr_base(const struct fbnic_ring *ring)
29 {
30 	unsigned long csr_base = (unsigned long)ring->doorbell;
31 
32 	csr_base &= ~(FBNIC_QUEUE_STRIDE * sizeof(u32) - 1);
33 
34 	return (u32 __iomem *)csr_base;
35 }
36 
37 static u32 fbnic_ring_rd32(struct fbnic_ring *ring, unsigned int csr)
38 {
39 	u32 __iomem *csr_base = fbnic_ring_csr_base(ring);
40 
41 	return readl(csr_base + csr);
42 }
43 
44 static void fbnic_ring_wr32(struct fbnic_ring *ring, unsigned int csr, u32 val)
45 {
46 	u32 __iomem *csr_base = fbnic_ring_csr_base(ring);
47 
48 	writel(val, csr_base + csr);
49 }
50 
51 /**
52  * fbnic_ts40_to_ns() - convert descriptor timestamp to PHC time
53  * @fbn: netdev priv of the FB NIC
54  * @ts40: timestamp read from a descriptor
55  *
56  * Return: u64 value of PHC time in nanoseconds
57  *
58  * Convert truncated 40 bit device timestamp as read from a descriptor
59  * to the full PHC time in nanoseconds.
60  */
61 static __maybe_unused u64 fbnic_ts40_to_ns(struct fbnic_net *fbn, u64 ts40)
62 {
63 	unsigned int s;
64 	u64 time_ns;
65 	s64 offset;
66 	u8 ts_top;
67 	u32 high;
68 
69 	do {
70 		s = u64_stats_fetch_begin(&fbn->time_seq);
71 		offset = READ_ONCE(fbn->time_offset);
72 	} while (u64_stats_fetch_retry(&fbn->time_seq, s));
73 
74 	high = READ_ONCE(fbn->time_high);
75 
76 	/* Bits 63..40 from periodic clock reads, 39..0 from ts40 */
77 	time_ns = (u64)(high >> 8) << 40 | ts40;
78 
79 	/* Compare bits 32-39 between periodic reads and ts40,
80 	 * see if HW clock may have wrapped since last read. We are sure
81 	 * that periodic reads are always at least ~1 minute behind, so
82 	 * this logic works perfectly fine.
83 	 */
84 	ts_top = ts40 >> 32;
85 	if (ts_top < (u8)high && (u8)high - ts_top > U8_MAX / 2)
86 		time_ns += 1ULL << 40;
87 
88 	return time_ns + offset;
89 }
90 
91 static unsigned int fbnic_desc_unused(struct fbnic_ring *ring)
92 {
93 	return (ring->head - ring->tail - 1) & ring->size_mask;
94 }
95 
96 static unsigned int fbnic_desc_used(struct fbnic_ring *ring)
97 {
98 	return (ring->tail - ring->head) & ring->size_mask;
99 }
100 
101 static struct netdev_queue *txring_txq(const struct net_device *dev,
102 				       const struct fbnic_ring *ring)
103 {
104 	return netdev_get_tx_queue(dev, ring->q_idx);
105 }
106 
107 static int fbnic_maybe_stop_tx(const struct net_device *dev,
108 			       struct fbnic_ring *ring,
109 			       const unsigned int size)
110 {
111 	struct netdev_queue *txq = txring_txq(dev, ring);
112 	int res;
113 
114 	res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size,
115 				   FBNIC_TX_DESC_WAKEUP);
116 
117 	return !res;
118 }
119 
120 static bool fbnic_tx_sent_queue(struct sk_buff *skb, struct fbnic_ring *ring)
121 {
122 	struct netdev_queue *dev_queue = txring_txq(skb->dev, ring);
123 	unsigned int bytecount = FBNIC_XMIT_CB(skb)->bytecount;
124 	bool xmit_more = netdev_xmit_more();
125 
126 	/* TBD: Request completion more often if xmit_more becomes large */
127 
128 	return __netdev_tx_sent_queue(dev_queue, bytecount, xmit_more);
129 }
130 
131 static void fbnic_unmap_single_twd(struct device *dev, __le64 *twd)
132 {
133 	u64 raw_twd = le64_to_cpu(*twd);
134 	unsigned int len;
135 	dma_addr_t dma;
136 
137 	dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd);
138 	len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd);
139 
140 	dma_unmap_single(dev, dma, len, DMA_TO_DEVICE);
141 }
142 
143 static void fbnic_unmap_page_twd(struct device *dev, __le64 *twd)
144 {
145 	u64 raw_twd = le64_to_cpu(*twd);
146 	unsigned int len;
147 	dma_addr_t dma;
148 
149 	dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd);
150 	len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd);
151 
152 	dma_unmap_page(dev, dma, len, DMA_TO_DEVICE);
153 }
154 
155 #define FBNIC_TWD_TYPE(_type) \
156 	cpu_to_le64(FIELD_PREP(FBNIC_TWD_TYPE_MASK, FBNIC_TWD_TYPE_##_type))
157 
158 static bool fbnic_tx_tstamp(struct sk_buff *skb)
159 {
160 	struct fbnic_net *fbn;
161 
162 	if (!unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
163 		return false;
164 
165 	fbn = netdev_priv(skb->dev);
166 	if (fbn->hwtstamp_config.tx_type == HWTSTAMP_TX_OFF)
167 		return false;
168 
169 	skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
170 	FBNIC_XMIT_CB(skb)->flags |= FBNIC_XMIT_CB_TS;
171 	FBNIC_XMIT_CB(skb)->hw_head = -1;
172 
173 	return true;
174 }
175 
176 static bool
177 fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
178 {
179 	unsigned int l2len, i3len;
180 
181 	if (fbnic_tx_tstamp(skb))
182 		*meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_TS);
183 
184 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL))
185 		return false;
186 
187 	l2len = skb_mac_header_len(skb);
188 	i3len = skb_checksum_start(skb) - skb_network_header(skb);
189 
190 	*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_CSUM_OFFSET_MASK,
191 					skb->csum_offset / 2));
192 
193 	*meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO);
194 
195 	*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) |
196 			     FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2));
197 	return false;
198 }
199 
200 static void
201 fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq)
202 {
203 	skb_checksum_none_assert(skb);
204 
205 	if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM)))
206 		return;
207 
208 	if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) {
209 		skb->ip_summed = CHECKSUM_UNNECESSARY;
210 	} else {
211 		u16 csum = FIELD_GET(FBNIC_RCD_META_L2_CSUM_MASK, rcd);
212 
213 		skb->ip_summed = CHECKSUM_COMPLETE;
214 		skb->csum = (__force __wsum)csum;
215 	}
216 }
217 
218 static bool
219 fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
220 {
221 	struct device *dev = skb->dev->dev.parent;
222 	unsigned int tail = ring->tail, first;
223 	unsigned int size, data_len;
224 	skb_frag_t *frag;
225 	dma_addr_t dma;
226 	__le64 *twd;
227 
228 	ring->tx_buf[tail] = skb;
229 
230 	tail++;
231 	tail &= ring->size_mask;
232 	first = tail;
233 
234 	size = skb_headlen(skb);
235 	data_len = skb->data_len;
236 
237 	if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK))
238 		goto dma_error;
239 
240 	dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
241 
242 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
243 		twd = &ring->desc[tail];
244 
245 		if (dma_mapping_error(dev, dma))
246 			goto dma_error;
247 
248 		*twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) |
249 				   FIELD_PREP(FBNIC_TWD_LEN_MASK, size) |
250 				   FIELD_PREP(FBNIC_TWD_TYPE_MASK,
251 					      FBNIC_TWD_TYPE_AL));
252 
253 		tail++;
254 		tail &= ring->size_mask;
255 
256 		if (!data_len)
257 			break;
258 
259 		size = skb_frag_size(frag);
260 		data_len -= size;
261 
262 		if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK))
263 			goto dma_error;
264 
265 		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
266 	}
267 
268 	*twd |= FBNIC_TWD_TYPE(LAST_AL);
269 
270 	FBNIC_XMIT_CB(skb)->desc_count = ((twd - meta) + 1) & ring->size_mask;
271 
272 	ring->tail = tail;
273 
274 	/* Record SW timestamp */
275 	skb_tx_timestamp(skb);
276 
277 	/* Verify there is room for another packet */
278 	fbnic_maybe_stop_tx(skb->dev, ring, FBNIC_MAX_SKB_DESC);
279 
280 	if (fbnic_tx_sent_queue(skb, ring)) {
281 		*meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_COMPLETION);
282 
283 		/* Force DMA writes to flush before writing to tail */
284 		dma_wmb();
285 
286 		writel(tail, ring->doorbell);
287 	}
288 
289 	return false;
290 dma_error:
291 	if (net_ratelimit())
292 		netdev_err(skb->dev, "TX DMA map failed\n");
293 
294 	while (tail != first) {
295 		tail--;
296 		tail &= ring->size_mask;
297 		twd = &ring->desc[tail];
298 		if (tail == first)
299 			fbnic_unmap_single_twd(dev, twd);
300 		else
301 			fbnic_unmap_page_twd(dev, twd);
302 	}
303 
304 	return true;
305 }
306 
307 #define FBNIC_MIN_FRAME_LEN	60
308 
309 static netdev_tx_t
310 fbnic_xmit_frame_ring(struct sk_buff *skb, struct fbnic_ring *ring)
311 {
312 	__le64 *meta = &ring->desc[ring->tail];
313 	u16 desc_needed;
314 
315 	if (skb_put_padto(skb, FBNIC_MIN_FRAME_LEN))
316 		goto err_count;
317 
318 	/* Need: 1 descriptor per page,
319 	 *       + 1 desc for skb_head,
320 	 *       + 2 desc for metadata and timestamp metadata
321 	 *       + 7 desc gap to keep tail from touching head
322 	 * otherwise try next time
323 	 */
324 	desc_needed = skb_shinfo(skb)->nr_frags + 10;
325 	if (fbnic_maybe_stop_tx(skb->dev, ring, desc_needed))
326 		return NETDEV_TX_BUSY;
327 
328 	*meta = cpu_to_le64(FBNIC_TWD_FLAG_DEST_MAC);
329 
330 	/* Write all members within DWORD to condense this into 2 4B writes */
331 	FBNIC_XMIT_CB(skb)->bytecount = skb->len;
332 	FBNIC_XMIT_CB(skb)->desc_count = 0;
333 
334 	if (fbnic_tx_offloads(ring, skb, meta))
335 		goto err_free;
336 
337 	if (fbnic_tx_map(ring, skb, meta))
338 		goto err_free;
339 
340 	return NETDEV_TX_OK;
341 
342 err_free:
343 	dev_kfree_skb_any(skb);
344 err_count:
345 	u64_stats_update_begin(&ring->stats.syncp);
346 	ring->stats.dropped++;
347 	u64_stats_update_end(&ring->stats.syncp);
348 	return NETDEV_TX_OK;
349 }
350 
351 netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev)
352 {
353 	struct fbnic_net *fbn = netdev_priv(dev);
354 	unsigned int q_map = skb->queue_mapping;
355 
356 	return fbnic_xmit_frame_ring(skb, fbn->tx[q_map]);
357 }
358 
359 netdev_features_t
360 fbnic_features_check(struct sk_buff *skb, struct net_device *dev,
361 		     netdev_features_t features)
362 {
363 	unsigned int l2len, l3len;
364 
365 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL))
366 		return features;
367 
368 	l2len = skb_mac_header_len(skb);
369 	l3len = skb_checksum_start(skb) - skb_network_header(skb);
370 
371 	/* Check header lengths are multiple of 2.
372 	 * In case of 6in6 we support longer headers (IHLEN + OHLEN)
373 	 * but keep things simple for now, 512B is plenty.
374 	 */
375 	if ((l2len | l3len | skb->csum_offset) % 2 ||
376 	    !FIELD_FIT(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) ||
377 	    !FIELD_FIT(FBNIC_TWD_L3_IHLEN_MASK, l3len / 2) ||
378 	    !FIELD_FIT(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2))
379 		return features & ~NETIF_F_CSUM_MASK;
380 
381 	return features;
382 }
383 
384 static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
385 			     struct fbnic_ring *ring, bool discard,
386 			     unsigned int hw_head)
387 {
388 	u64 total_bytes = 0, total_packets = 0, ts_lost = 0;
389 	unsigned int head = ring->head;
390 	struct netdev_queue *txq;
391 	unsigned int clean_desc;
392 
393 	clean_desc = (hw_head - head) & ring->size_mask;
394 
395 	while (clean_desc) {
396 		struct sk_buff *skb = ring->tx_buf[head];
397 		unsigned int desc_cnt;
398 
399 		desc_cnt = FBNIC_XMIT_CB(skb)->desc_count;
400 		if (desc_cnt > clean_desc)
401 			break;
402 
403 		if (unlikely(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS)) {
404 			FBNIC_XMIT_CB(skb)->hw_head = hw_head;
405 			if (likely(!discard))
406 				break;
407 			ts_lost++;
408 		}
409 
410 		ring->tx_buf[head] = NULL;
411 
412 		clean_desc -= desc_cnt;
413 
414 		while (!(ring->desc[head] & FBNIC_TWD_TYPE(AL))) {
415 			head++;
416 			head &= ring->size_mask;
417 			desc_cnt--;
418 		}
419 
420 		fbnic_unmap_single_twd(nv->dev, &ring->desc[head]);
421 		head++;
422 		head &= ring->size_mask;
423 		desc_cnt--;
424 
425 		while (desc_cnt--) {
426 			fbnic_unmap_page_twd(nv->dev, &ring->desc[head]);
427 			head++;
428 			head &= ring->size_mask;
429 		}
430 
431 		total_bytes += FBNIC_XMIT_CB(skb)->bytecount;
432 		total_packets += 1;
433 
434 		napi_consume_skb(skb, napi_budget);
435 	}
436 
437 	if (!total_bytes)
438 		return;
439 
440 	ring->head = head;
441 
442 	txq = txring_txq(nv->napi.dev, ring);
443 
444 	if (unlikely(discard)) {
445 		u64_stats_update_begin(&ring->stats.syncp);
446 		ring->stats.dropped += total_packets;
447 		ring->stats.ts_lost += ts_lost;
448 		u64_stats_update_end(&ring->stats.syncp);
449 
450 		netdev_tx_completed_queue(txq, total_packets, total_bytes);
451 		return;
452 	}
453 
454 	u64_stats_update_begin(&ring->stats.syncp);
455 	ring->stats.bytes += total_bytes;
456 	ring->stats.packets += total_packets;
457 	u64_stats_update_end(&ring->stats.syncp);
458 
459 	netif_txq_completed_wake(txq, total_packets, total_bytes,
460 				 fbnic_desc_unused(ring),
461 				 FBNIC_TX_DESC_WAKEUP);
462 }
463 
464 static void fbnic_clean_tsq(struct fbnic_napi_vector *nv,
465 			    struct fbnic_ring *ring,
466 			    u64 tcd, int *ts_head, int *head0)
467 {
468 	struct skb_shared_hwtstamps hwtstamp;
469 	struct fbnic_net *fbn;
470 	struct sk_buff *skb;
471 	int head;
472 	u64 ns;
473 
474 	head = (*ts_head < 0) ? ring->head : *ts_head;
475 
476 	do {
477 		unsigned int desc_cnt;
478 
479 		if (head == ring->tail) {
480 			if (unlikely(net_ratelimit()))
481 				netdev_err(nv->napi.dev,
482 					   "Tx timestamp without matching packet\n");
483 			return;
484 		}
485 
486 		skb = ring->tx_buf[head];
487 		desc_cnt = FBNIC_XMIT_CB(skb)->desc_count;
488 
489 		head += desc_cnt;
490 		head &= ring->size_mask;
491 	} while (!(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS));
492 
493 	fbn = netdev_priv(nv->napi.dev);
494 	ns = fbnic_ts40_to_ns(fbn, FIELD_GET(FBNIC_TCD_TYPE1_TS_MASK, tcd));
495 
496 	memset(&hwtstamp, 0, sizeof(hwtstamp));
497 	hwtstamp.hwtstamp = ns_to_ktime(ns);
498 
499 	*ts_head = head;
500 
501 	FBNIC_XMIT_CB(skb)->flags &= ~FBNIC_XMIT_CB_TS;
502 	if (*head0 < 0) {
503 		head = FBNIC_XMIT_CB(skb)->hw_head;
504 		if (head >= 0)
505 			*head0 = head;
506 	}
507 
508 	skb_tstamp_tx(skb, &hwtstamp);
509 	u64_stats_update_begin(&ring->stats.syncp);
510 	ring->stats.ts_packets++;
511 	u64_stats_update_end(&ring->stats.syncp);
512 }
513 
514 static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx,
515 				 struct page *page)
516 {
517 	struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
518 
519 	page_pool_fragment_page(page, PAGECNT_BIAS_MAX);
520 	rx_buf->pagecnt_bias = PAGECNT_BIAS_MAX;
521 	rx_buf->page = page;
522 }
523 
524 static struct page *fbnic_page_pool_get(struct fbnic_ring *ring,
525 					unsigned int idx)
526 {
527 	struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
528 
529 	rx_buf->pagecnt_bias--;
530 
531 	return rx_buf->page;
532 }
533 
534 static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx,
535 				  struct fbnic_napi_vector *nv, int budget)
536 {
537 	struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
538 	struct page *page = rx_buf->page;
539 
540 	if (!page_pool_unref_page(page, rx_buf->pagecnt_bias))
541 		page_pool_put_unrefed_page(nv->page_pool, page, -1, !!budget);
542 
543 	rx_buf->page = NULL;
544 }
545 
546 static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget,
547 			    struct fbnic_q_triad *qt, s32 ts_head, s32 head0)
548 {
549 	if (head0 >= 0)
550 		fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0);
551 	else if (ts_head >= 0)
552 		fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, ts_head);
553 }
554 
555 static void
556 fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt,
557 		int napi_budget)
558 {
559 	struct fbnic_ring *cmpl = &qt->cmpl;
560 	s32 head0 = -1, ts_head = -1;
561 	__le64 *raw_tcd, done;
562 	u32 head = cmpl->head;
563 
564 	done = (head & (cmpl->size_mask + 1)) ? 0 : cpu_to_le64(FBNIC_TCD_DONE);
565 	raw_tcd = &cmpl->desc[head & cmpl->size_mask];
566 
567 	/* Walk the completion queue collecting the heads reported by NIC */
568 	while ((*raw_tcd & cpu_to_le64(FBNIC_TCD_DONE)) == done) {
569 		u64 tcd;
570 
571 		dma_rmb();
572 
573 		tcd = le64_to_cpu(*raw_tcd);
574 
575 		switch (FIELD_GET(FBNIC_TCD_TYPE_MASK, tcd)) {
576 		case FBNIC_TCD_TYPE_0:
577 			if (!(tcd & FBNIC_TCD_TWQ1))
578 				head0 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD0_MASK,
579 						  tcd);
580 			/* Currently all err status bits are related to
581 			 * timestamps and as those have yet to be added
582 			 * they are skipped for now.
583 			 */
584 			break;
585 		case FBNIC_TCD_TYPE_1:
586 			if (WARN_ON_ONCE(tcd & FBNIC_TCD_TWQ1))
587 				break;
588 
589 			fbnic_clean_tsq(nv, &qt->sub0, tcd, &ts_head, &head0);
590 			break;
591 		default:
592 			break;
593 		}
594 
595 		raw_tcd++;
596 		head++;
597 		if (!(head & cmpl->size_mask)) {
598 			done ^= cpu_to_le64(FBNIC_TCD_DONE);
599 			raw_tcd = &cmpl->desc[0];
600 		}
601 	}
602 
603 	/* Record the current head/tail of the queue */
604 	if (cmpl->head != head) {
605 		cmpl->head = head;
606 		writel(head & cmpl->size_mask, cmpl->doorbell);
607 	}
608 
609 	/* Unmap and free processed buffers */
610 	fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0);
611 }
612 
613 static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget,
614 			    struct fbnic_ring *ring, unsigned int hw_head)
615 {
616 	unsigned int head = ring->head;
617 
618 	if (head == hw_head)
619 		return;
620 
621 	do {
622 		fbnic_page_pool_drain(ring, head, nv, napi_budget);
623 
624 		head++;
625 		head &= ring->size_mask;
626 	} while (head != hw_head);
627 
628 	ring->head = head;
629 }
630 
631 static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, struct page *page)
632 {
633 	__le64 *bdq_desc = &bdq->desc[id * FBNIC_BD_FRAG_COUNT];
634 	dma_addr_t dma = page_pool_get_dma_addr(page);
635 	u64 bd, i = FBNIC_BD_FRAG_COUNT;
636 
637 	bd = (FBNIC_BD_PAGE_ADDR_MASK & dma) |
638 	     FIELD_PREP(FBNIC_BD_PAGE_ID_MASK, id);
639 
640 	/* In the case that a page size is larger than 4K we will map a
641 	 * single page to multiple fragments. The fragments will be
642 	 * FBNIC_BD_FRAG_COUNT in size and the lower n bits will be use
643 	 * to indicate the individual fragment IDs.
644 	 */
645 	do {
646 		*bdq_desc = cpu_to_le64(bd);
647 		bd += FIELD_PREP(FBNIC_BD_DESC_ADDR_MASK, 1) |
648 		      FIELD_PREP(FBNIC_BD_DESC_ID_MASK, 1);
649 	} while (--i);
650 }
651 
652 static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq)
653 {
654 	unsigned int count = fbnic_desc_unused(bdq);
655 	unsigned int i = bdq->tail;
656 
657 	if (!count)
658 		return;
659 
660 	do {
661 		struct page *page;
662 
663 		page = page_pool_dev_alloc_pages(nv->page_pool);
664 		if (!page)
665 			break;
666 
667 		fbnic_page_pool_init(bdq, i, page);
668 		fbnic_bd_prep(bdq, i, page);
669 
670 		i++;
671 		i &= bdq->size_mask;
672 
673 		count--;
674 	} while (count);
675 
676 	if (bdq->tail != i) {
677 		bdq->tail = i;
678 
679 		/* Force DMA writes to flush before writing to tail */
680 		dma_wmb();
681 
682 		writel(i, bdq->doorbell);
683 	}
684 }
685 
686 static unsigned int fbnic_hdr_pg_start(unsigned int pg_off)
687 {
688 	/* The headroom of the first header may be larger than FBNIC_RX_HROOM
689 	 * due to alignment. So account for that by just making the page
690 	 * offset 0 if we are starting at the first header.
691 	 */
692 	if (ALIGN(FBNIC_RX_HROOM, 128) > FBNIC_RX_HROOM &&
693 	    pg_off == ALIGN(FBNIC_RX_HROOM, 128))
694 		return 0;
695 
696 	return pg_off - FBNIC_RX_HROOM;
697 }
698 
699 static unsigned int fbnic_hdr_pg_end(unsigned int pg_off, unsigned int len)
700 {
701 	/* Determine the end of the buffer by finding the start of the next
702 	 * and then subtracting the headroom from that frame.
703 	 */
704 	pg_off += len + FBNIC_RX_TROOM + FBNIC_RX_HROOM;
705 
706 	return ALIGN(pg_off, 128) - FBNIC_RX_HROOM;
707 }
708 
709 static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd,
710 			      struct fbnic_pkt_buff *pkt,
711 			      struct fbnic_q_triad *qt)
712 {
713 	unsigned int hdr_pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
714 	unsigned int hdr_pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd);
715 	struct page *page = fbnic_page_pool_get(&qt->sub0, hdr_pg_idx);
716 	unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd);
717 	unsigned int frame_sz, hdr_pg_start, hdr_pg_end, headroom;
718 	unsigned char *hdr_start;
719 
720 	/* data_hard_start should always be NULL when this is called */
721 	WARN_ON_ONCE(pkt->buff.data_hard_start);
722 
723 	/* Short-cut the end calculation if we know page is fully consumed */
724 	hdr_pg_end = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ?
725 		     FBNIC_BD_FRAG_SIZE : fbnic_hdr_pg_end(hdr_pg_off, len);
726 	hdr_pg_start = fbnic_hdr_pg_start(hdr_pg_off);
727 
728 	headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD;
729 	frame_sz = hdr_pg_end - hdr_pg_start;
730 	xdp_init_buff(&pkt->buff, frame_sz, NULL);
731 	hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
732 			FBNIC_BD_FRAG_SIZE;
733 
734 	/* Sync DMA buffer */
735 	dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page),
736 				      hdr_pg_start, frame_sz,
737 				      DMA_BIDIRECTIONAL);
738 
739 	/* Build frame around buffer */
740 	hdr_start = page_address(page) + hdr_pg_start;
741 
742 	xdp_prepare_buff(&pkt->buff, hdr_start, headroom,
743 			 len - FBNIC_RX_PAD, true);
744 
745 	pkt->data_truesize = 0;
746 	pkt->data_len = 0;
747 	pkt->nr_frags = 0;
748 }
749 
750 static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd,
751 			      struct fbnic_pkt_buff *pkt,
752 			      struct fbnic_q_triad *qt)
753 {
754 	unsigned int pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
755 	unsigned int pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd);
756 	unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd);
757 	struct page *page = fbnic_page_pool_get(&qt->sub1, pg_idx);
758 	struct skb_shared_info *shinfo;
759 	unsigned int truesize;
760 
761 	truesize = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ?
762 		   FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128);
763 
764 	pg_off += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
765 		  FBNIC_BD_FRAG_SIZE;
766 
767 	/* Sync DMA buffer */
768 	dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page),
769 				      pg_off, truesize, DMA_BIDIRECTIONAL);
770 
771 	/* Add page to xdp shared info */
772 	shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
773 
774 	/* We use gso_segs to store truesize */
775 	pkt->data_truesize += truesize;
776 
777 	__skb_fill_page_desc_noacc(shinfo, pkt->nr_frags++, page, pg_off, len);
778 
779 	/* Store data_len in gso_size */
780 	pkt->data_len += len;
781 }
782 
783 static void fbnic_put_pkt_buff(struct fbnic_napi_vector *nv,
784 			       struct fbnic_pkt_buff *pkt, int budget)
785 {
786 	struct skb_shared_info *shinfo;
787 	struct page *page;
788 	int nr_frags;
789 
790 	if (!pkt->buff.data_hard_start)
791 		return;
792 
793 	shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
794 	nr_frags = pkt->nr_frags;
795 
796 	while (nr_frags--) {
797 		page = skb_frag_page(&shinfo->frags[nr_frags]);
798 		page_pool_put_full_page(nv->page_pool, page, !!budget);
799 	}
800 
801 	page = virt_to_page(pkt->buff.data_hard_start);
802 	page_pool_put_full_page(nv->page_pool, page, !!budget);
803 }
804 
805 static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv,
806 				       struct fbnic_pkt_buff *pkt)
807 {
808 	unsigned int nr_frags = pkt->nr_frags;
809 	struct skb_shared_info *shinfo;
810 	unsigned int truesize;
811 	struct sk_buff *skb;
812 
813 	truesize = xdp_data_hard_end(&pkt->buff) + FBNIC_RX_TROOM -
814 		   pkt->buff.data_hard_start;
815 
816 	/* Build frame around buffer */
817 	skb = napi_build_skb(pkt->buff.data_hard_start, truesize);
818 	if (unlikely(!skb))
819 		return NULL;
820 
821 	/* Push data pointer to start of data, put tail to end of data */
822 	skb_reserve(skb, pkt->buff.data - pkt->buff.data_hard_start);
823 	__skb_put(skb, pkt->buff.data_end - pkt->buff.data);
824 
825 	/* Add tracking for metadata at the start of the frame */
826 	skb_metadata_set(skb, pkt->buff.data - pkt->buff.data_meta);
827 
828 	/* Add Rx frags */
829 	if (nr_frags) {
830 		/* Verify that shared info didn't move */
831 		shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
832 		WARN_ON(skb_shinfo(skb) != shinfo);
833 
834 		skb->truesize += pkt->data_truesize;
835 		skb->data_len += pkt->data_len;
836 		shinfo->nr_frags = nr_frags;
837 		skb->len += pkt->data_len;
838 	}
839 
840 	skb_mark_for_recycle(skb);
841 
842 	/* Set MAC header specific fields */
843 	skb->protocol = eth_type_trans(skb, nv->napi.dev);
844 
845 	/* Add timestamp if present */
846 	if (pkt->hwtstamp)
847 		skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp;
848 
849 	return skb;
850 }
851 
852 static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd)
853 {
854 	return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 :
855 	       (FBNIC_RCD_META_L3_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L3 :
856 						     PKT_HASH_TYPE_L2;
857 }
858 
859 static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd,
860 			    struct fbnic_pkt_buff *pkt)
861 {
862 	struct fbnic_net *fbn;
863 	u64 ns, ts;
864 
865 	if (!FIELD_GET(FBNIC_RCD_OPT_META_TS, rcd))
866 		return;
867 
868 	fbn = netdev_priv(nv->napi.dev);
869 	ts = FIELD_GET(FBNIC_RCD_OPT_META_TS_MASK, rcd);
870 	ns = fbnic_ts40_to_ns(fbn, ts);
871 
872 	/* Add timestamp to shared info */
873 	pkt->hwtstamp = ns_to_ktime(ns);
874 }
875 
876 static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv,
877 				      u64 rcd, struct sk_buff *skb,
878 				      struct fbnic_q_triad *qt)
879 {
880 	struct net_device *netdev = nv->napi.dev;
881 	struct fbnic_ring *rcq = &qt->cmpl;
882 
883 	fbnic_rx_csum(rcd, skb, rcq);
884 
885 	if (netdev->features & NETIF_F_RXHASH)
886 		skb_set_hash(skb,
887 			     FIELD_GET(FBNIC_RCD_META_RSS_HASH_MASK, rcd),
888 			     fbnic_skb_hash_type(rcd));
889 
890 	skb_record_rx_queue(skb, rcq->q_idx);
891 }
892 
893 static bool fbnic_rcd_metadata_err(u64 rcd)
894 {
895 	return !!(FBNIC_RCD_META_UNCORRECTABLE_ERR_MASK & rcd);
896 }
897 
898 static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
899 			   struct fbnic_q_triad *qt, int budget)
900 {
901 	unsigned int packets = 0, bytes = 0, dropped = 0;
902 	struct fbnic_ring *rcq = &qt->cmpl;
903 	struct fbnic_pkt_buff *pkt;
904 	s32 head0 = -1, head1 = -1;
905 	__le64 *raw_rcd, done;
906 	u32 head = rcq->head;
907 
908 	done = (head & (rcq->size_mask + 1)) ? cpu_to_le64(FBNIC_RCD_DONE) : 0;
909 	raw_rcd = &rcq->desc[head & rcq->size_mask];
910 	pkt = rcq->pkt;
911 
912 	/* Walk the completion queue collecting the heads reported by NIC */
913 	while (likely(packets < budget)) {
914 		struct sk_buff *skb = ERR_PTR(-EINVAL);
915 		u64 rcd;
916 
917 		if ((*raw_rcd & cpu_to_le64(FBNIC_RCD_DONE)) == done)
918 			break;
919 
920 		dma_rmb();
921 
922 		rcd = le64_to_cpu(*raw_rcd);
923 
924 		switch (FIELD_GET(FBNIC_RCD_TYPE_MASK, rcd)) {
925 		case FBNIC_RCD_TYPE_HDR_AL:
926 			head0 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
927 			fbnic_pkt_prepare(nv, rcd, pkt, qt);
928 
929 			break;
930 		case FBNIC_RCD_TYPE_PAY_AL:
931 			head1 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
932 			fbnic_add_rx_frag(nv, rcd, pkt, qt);
933 
934 			break;
935 		case FBNIC_RCD_TYPE_OPT_META:
936 			/* Only type 0 is currently supported */
937 			if (FIELD_GET(FBNIC_RCD_OPT_META_TYPE_MASK, rcd))
938 				break;
939 
940 			fbnic_rx_tstamp(nv, rcd, pkt);
941 
942 			/* We currently ignore the action table index */
943 			break;
944 		case FBNIC_RCD_TYPE_META:
945 			if (likely(!fbnic_rcd_metadata_err(rcd)))
946 				skb = fbnic_build_skb(nv, pkt);
947 
948 			/* Populate skb and invalidate XDP */
949 			if (!IS_ERR_OR_NULL(skb)) {
950 				fbnic_populate_skb_fields(nv, rcd, skb, qt);
951 
952 				packets++;
953 				bytes += skb->len;
954 
955 				napi_gro_receive(&nv->napi, skb);
956 			} else {
957 				dropped++;
958 				fbnic_put_pkt_buff(nv, pkt, 1);
959 			}
960 
961 			pkt->buff.data_hard_start = NULL;
962 
963 			break;
964 		}
965 
966 		raw_rcd++;
967 		head++;
968 		if (!(head & rcq->size_mask)) {
969 			done ^= cpu_to_le64(FBNIC_RCD_DONE);
970 			raw_rcd = &rcq->desc[0];
971 		}
972 	}
973 
974 	u64_stats_update_begin(&rcq->stats.syncp);
975 	rcq->stats.packets += packets;
976 	rcq->stats.bytes += bytes;
977 	/* Re-add ethernet header length (removed in fbnic_build_skb) */
978 	rcq->stats.bytes += ETH_HLEN * packets;
979 	rcq->stats.dropped += dropped;
980 	u64_stats_update_end(&rcq->stats.syncp);
981 
982 	/* Unmap and free processed buffers */
983 	if (head0 >= 0)
984 		fbnic_clean_bdq(nv, budget, &qt->sub0, head0);
985 	fbnic_fill_bdq(nv, &qt->sub0);
986 
987 	if (head1 >= 0)
988 		fbnic_clean_bdq(nv, budget, &qt->sub1, head1);
989 	fbnic_fill_bdq(nv, &qt->sub1);
990 
991 	/* Record the current head/tail of the queue */
992 	if (rcq->head != head) {
993 		rcq->head = head;
994 		writel(head & rcq->size_mask, rcq->doorbell);
995 	}
996 
997 	return packets;
998 }
999 
1000 static void fbnic_nv_irq_disable(struct fbnic_napi_vector *nv)
1001 {
1002 	struct fbnic_dev *fbd = nv->fbd;
1003 	u32 v_idx = nv->v_idx;
1004 
1005 	fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(v_idx / 32), 1 << (v_idx % 32));
1006 }
1007 
1008 static void fbnic_nv_irq_rearm(struct fbnic_napi_vector *nv)
1009 {
1010 	struct fbnic_dev *fbd = nv->fbd;
1011 	u32 v_idx = nv->v_idx;
1012 
1013 	fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(v_idx),
1014 		   FBNIC_INTR_CQ_REARM_INTR_UNMASK);
1015 }
1016 
1017 static int fbnic_poll(struct napi_struct *napi, int budget)
1018 {
1019 	struct fbnic_napi_vector *nv = container_of(napi,
1020 						    struct fbnic_napi_vector,
1021 						    napi);
1022 	int i, j, work_done = 0;
1023 
1024 	for (i = 0; i < nv->txt_count; i++)
1025 		fbnic_clean_tcq(nv, &nv->qt[i], budget);
1026 
1027 	for (j = 0; j < nv->rxt_count; j++, i++)
1028 		work_done += fbnic_clean_rcq(nv, &nv->qt[i], budget);
1029 
1030 	if (work_done >= budget)
1031 		return budget;
1032 
1033 	if (likely(napi_complete_done(napi, work_done)))
1034 		fbnic_nv_irq_rearm(nv);
1035 
1036 	return work_done;
1037 }
1038 
1039 irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data)
1040 {
1041 	struct fbnic_napi_vector *nv = *(void **)data;
1042 
1043 	napi_schedule_irqoff(&nv->napi);
1044 
1045 	return IRQ_HANDLED;
1046 }
1047 
1048 void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn,
1049 				      struct fbnic_ring *rxr)
1050 {
1051 	struct fbnic_queue_stats *stats = &rxr->stats;
1052 
1053 	/* Capture stats from queues before dissasociating them */
1054 	fbn->rx_stats.bytes += stats->bytes;
1055 	fbn->rx_stats.packets += stats->packets;
1056 	fbn->rx_stats.dropped += stats->dropped;
1057 }
1058 
1059 void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn,
1060 				      struct fbnic_ring *txr)
1061 {
1062 	struct fbnic_queue_stats *stats = &txr->stats;
1063 
1064 	/* Capture stats from queues before dissasociating them */
1065 	fbn->tx_stats.bytes += stats->bytes;
1066 	fbn->tx_stats.packets += stats->packets;
1067 	fbn->tx_stats.dropped += stats->dropped;
1068 	fbn->tx_stats.ts_lost += stats->ts_lost;
1069 	fbn->tx_stats.ts_packets += stats->ts_packets;
1070 }
1071 
1072 static void fbnic_remove_tx_ring(struct fbnic_net *fbn,
1073 				 struct fbnic_ring *txr)
1074 {
1075 	if (!(txr->flags & FBNIC_RING_F_STATS))
1076 		return;
1077 
1078 	fbnic_aggregate_ring_tx_counters(fbn, txr);
1079 
1080 	/* Remove pointer to the Tx ring */
1081 	WARN_ON(fbn->tx[txr->q_idx] && fbn->tx[txr->q_idx] != txr);
1082 	fbn->tx[txr->q_idx] = NULL;
1083 }
1084 
1085 static void fbnic_remove_rx_ring(struct fbnic_net *fbn,
1086 				 struct fbnic_ring *rxr)
1087 {
1088 	if (!(rxr->flags & FBNIC_RING_F_STATS))
1089 		return;
1090 
1091 	fbnic_aggregate_ring_rx_counters(fbn, rxr);
1092 
1093 	/* Remove pointer to the Rx ring */
1094 	WARN_ON(fbn->rx[rxr->q_idx] && fbn->rx[rxr->q_idx] != rxr);
1095 	fbn->rx[rxr->q_idx] = NULL;
1096 }
1097 
1098 static void fbnic_free_napi_vector(struct fbnic_net *fbn,
1099 				   struct fbnic_napi_vector *nv)
1100 {
1101 	struct fbnic_dev *fbd = nv->fbd;
1102 	int i, j;
1103 
1104 	for (i = 0; i < nv->txt_count; i++) {
1105 		fbnic_remove_tx_ring(fbn, &nv->qt[i].sub0);
1106 		fbnic_remove_tx_ring(fbn, &nv->qt[i].cmpl);
1107 	}
1108 
1109 	for (j = 0; j < nv->rxt_count; j++, i++) {
1110 		fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0);
1111 		fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1);
1112 		fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl);
1113 	}
1114 
1115 	fbnic_napi_free_irq(fbd, nv);
1116 	page_pool_destroy(nv->page_pool);
1117 	netif_napi_del(&nv->napi);
1118 	fbn->napi[fbnic_napi_idx(nv)] = NULL;
1119 	kfree(nv);
1120 }
1121 
1122 void fbnic_free_napi_vectors(struct fbnic_net *fbn)
1123 {
1124 	int i;
1125 
1126 	for (i = 0; i < fbn->num_napi; i++)
1127 		if (fbn->napi[i])
1128 			fbnic_free_napi_vector(fbn, fbn->napi[i]);
1129 }
1130 
1131 #define FBNIC_PAGE_POOL_FLAGS \
1132 	(PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
1133 
1134 static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn,
1135 				    struct fbnic_napi_vector *nv)
1136 {
1137 	struct page_pool_params pp_params = {
1138 		.order = 0,
1139 		.flags = FBNIC_PAGE_POOL_FLAGS,
1140 		.pool_size = (fbn->hpq_size + fbn->ppq_size) * nv->rxt_count,
1141 		.nid = NUMA_NO_NODE,
1142 		.dev = nv->dev,
1143 		.dma_dir = DMA_BIDIRECTIONAL,
1144 		.offset = 0,
1145 		.max_len = PAGE_SIZE
1146 	};
1147 	struct page_pool *pp;
1148 
1149 	/* Page pool cannot exceed a size of 32768. This doesn't limit the
1150 	 * pages on the ring but the number we can have cached waiting on
1151 	 * the next use.
1152 	 *
1153 	 * TBD: Can this be reduced further? Would a multiple of
1154 	 * NAPI_POLL_WEIGHT possibly make more sense? The question is how
1155 	 * may pages do we need to hold in reserve to get the best return
1156 	 * without hogging too much system memory.
1157 	 */
1158 	if (pp_params.pool_size > 32768)
1159 		pp_params.pool_size = 32768;
1160 
1161 	pp = page_pool_create(&pp_params);
1162 	if (IS_ERR(pp))
1163 		return PTR_ERR(pp);
1164 
1165 	nv->page_pool = pp;
1166 
1167 	return 0;
1168 }
1169 
1170 static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell,
1171 			    int q_idx, u8 flags)
1172 {
1173 	u64_stats_init(&ring->stats.syncp);
1174 	ring->doorbell = doorbell;
1175 	ring->q_idx = q_idx;
1176 	ring->flags = flags;
1177 }
1178 
1179 static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
1180 				   unsigned int v_count, unsigned int v_idx,
1181 				   unsigned int txq_count, unsigned int txq_idx,
1182 				   unsigned int rxq_count, unsigned int rxq_idx)
1183 {
1184 	int txt_count = txq_count, rxt_count = rxq_count;
1185 	u32 __iomem *uc_addr = fbd->uc_addr0;
1186 	struct fbnic_napi_vector *nv;
1187 	struct fbnic_q_triad *qt;
1188 	int qt_count, err;
1189 	u32 __iomem *db;
1190 
1191 	qt_count = txt_count + rxq_count;
1192 	if (!qt_count)
1193 		return -EINVAL;
1194 
1195 	/* If MMIO has already failed there are no rings to initialize */
1196 	if (!uc_addr)
1197 		return -EIO;
1198 
1199 	/* Allocate NAPI vector and queue triads */
1200 	nv = kzalloc(struct_size(nv, qt, qt_count), GFP_KERNEL);
1201 	if (!nv)
1202 		return -ENOMEM;
1203 
1204 	/* Record queue triad counts */
1205 	nv->txt_count = txt_count;
1206 	nv->rxt_count = rxt_count;
1207 
1208 	/* Provide pointer back to fbnic and MSI-X vectors */
1209 	nv->fbd = fbd;
1210 	nv->v_idx = v_idx;
1211 
1212 	/* Tie napi to netdev */
1213 	fbn->napi[fbnic_napi_idx(nv)] = nv;
1214 	netif_napi_add(fbn->netdev, &nv->napi, fbnic_poll);
1215 
1216 	/* Record IRQ to NAPI struct */
1217 	netif_napi_set_irq(&nv->napi,
1218 			   pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx));
1219 
1220 	/* Tie nv back to PCIe dev */
1221 	nv->dev = fbd->dev;
1222 
1223 	/* Allocate page pool */
1224 	if (rxq_count) {
1225 		err = fbnic_alloc_nv_page_pool(fbn, nv);
1226 		if (err)
1227 			goto napi_del;
1228 	}
1229 
1230 	/* Request the IRQ for napi vector */
1231 	err = fbnic_napi_request_irq(fbd, nv);
1232 	if (err)
1233 		goto pp_destroy;
1234 
1235 	/* Initialize queue triads */
1236 	qt = nv->qt;
1237 
1238 	while (txt_count) {
1239 		/* Configure Tx queue */
1240 		db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ0_TAIL];
1241 
1242 		/* Assign Tx queue to netdev if applicable */
1243 		if (txq_count > 0) {
1244 			u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS;
1245 
1246 			fbnic_ring_init(&qt->sub0, db, txq_idx, flags);
1247 			fbn->tx[txq_idx] = &qt->sub0;
1248 			txq_count--;
1249 		} else {
1250 			fbnic_ring_init(&qt->sub0, db, 0,
1251 					FBNIC_RING_F_DISABLED);
1252 		}
1253 
1254 		/* Configure Tx completion queue */
1255 		db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TCQ_HEAD];
1256 		fbnic_ring_init(&qt->cmpl, db, 0, 0);
1257 
1258 		/* Update Tx queue index */
1259 		txt_count--;
1260 		txq_idx += v_count;
1261 
1262 		/* Move to next queue triad */
1263 		qt++;
1264 	}
1265 
1266 	while (rxt_count) {
1267 		/* Configure header queue */
1268 		db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_HPQ_TAIL];
1269 		fbnic_ring_init(&qt->sub0, db, 0, FBNIC_RING_F_CTX);
1270 
1271 		/* Configure payload queue */
1272 		db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_PPQ_TAIL];
1273 		fbnic_ring_init(&qt->sub1, db, 0, FBNIC_RING_F_CTX);
1274 
1275 		/* Configure Rx completion queue */
1276 		db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_RCQ_HEAD];
1277 		fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS);
1278 		fbn->rx[rxq_idx] = &qt->cmpl;
1279 
1280 		/* Update Rx queue index */
1281 		rxt_count--;
1282 		rxq_idx += v_count;
1283 
1284 		/* Move to next queue triad */
1285 		qt++;
1286 	}
1287 
1288 	return 0;
1289 
1290 pp_destroy:
1291 	page_pool_destroy(nv->page_pool);
1292 napi_del:
1293 	netif_napi_del(&nv->napi);
1294 	fbn->napi[fbnic_napi_idx(nv)] = NULL;
1295 	kfree(nv);
1296 	return err;
1297 }
1298 
1299 int fbnic_alloc_napi_vectors(struct fbnic_net *fbn)
1300 {
1301 	unsigned int txq_idx = 0, rxq_idx = 0, v_idx = FBNIC_NON_NAPI_VECTORS;
1302 	unsigned int num_tx = fbn->num_tx_queues;
1303 	unsigned int num_rx = fbn->num_rx_queues;
1304 	unsigned int num_napi = fbn->num_napi;
1305 	struct fbnic_dev *fbd = fbn->fbd;
1306 	int err;
1307 
1308 	/* Allocate 1 Tx queue per napi vector */
1309 	if (num_napi < FBNIC_MAX_TXQS && num_napi == num_tx + num_rx) {
1310 		while (num_tx) {
1311 			err = fbnic_alloc_napi_vector(fbd, fbn,
1312 						      num_napi, v_idx,
1313 						      1, txq_idx, 0, 0);
1314 			if (err)
1315 				goto free_vectors;
1316 
1317 			/* Update counts and index */
1318 			num_tx--;
1319 			txq_idx++;
1320 
1321 			v_idx++;
1322 		}
1323 	}
1324 
1325 	/* Allocate Tx/Rx queue pairs per vector, or allocate remaining Rx */
1326 	while (num_rx | num_tx) {
1327 		int tqpv = DIV_ROUND_UP(num_tx, num_napi - txq_idx);
1328 		int rqpv = DIV_ROUND_UP(num_rx, num_napi - rxq_idx);
1329 
1330 		err = fbnic_alloc_napi_vector(fbd, fbn, num_napi, v_idx,
1331 					      tqpv, txq_idx, rqpv, rxq_idx);
1332 		if (err)
1333 			goto free_vectors;
1334 
1335 		/* Update counts and index */
1336 		num_tx -= tqpv;
1337 		txq_idx++;
1338 
1339 		num_rx -= rqpv;
1340 		rxq_idx++;
1341 
1342 		v_idx++;
1343 	}
1344 
1345 	return 0;
1346 
1347 free_vectors:
1348 	fbnic_free_napi_vectors(fbn);
1349 
1350 	return -ENOMEM;
1351 }
1352 
1353 static void fbnic_free_ring_resources(struct device *dev,
1354 				      struct fbnic_ring *ring)
1355 {
1356 	kvfree(ring->buffer);
1357 	ring->buffer = NULL;
1358 
1359 	/* If size is not set there are no descriptors present */
1360 	if (!ring->size)
1361 		return;
1362 
1363 	dma_free_coherent(dev, ring->size, ring->desc, ring->dma);
1364 	ring->size_mask = 0;
1365 	ring->size = 0;
1366 }
1367 
1368 static int fbnic_alloc_tx_ring_desc(struct fbnic_net *fbn,
1369 				    struct fbnic_ring *txr)
1370 {
1371 	struct device *dev = fbn->netdev->dev.parent;
1372 	size_t size;
1373 
1374 	/* Round size up to nearest 4K */
1375 	size = ALIGN(array_size(sizeof(*txr->desc), fbn->txq_size), 4096);
1376 
1377 	txr->desc = dma_alloc_coherent(dev, size, &txr->dma,
1378 				       GFP_KERNEL | __GFP_NOWARN);
1379 	if (!txr->desc)
1380 		return -ENOMEM;
1381 
1382 	/* txq_size should be a power of 2, so mask is just that -1 */
1383 	txr->size_mask = fbn->txq_size - 1;
1384 	txr->size = size;
1385 
1386 	return 0;
1387 }
1388 
1389 static int fbnic_alloc_tx_ring_buffer(struct fbnic_ring *txr)
1390 {
1391 	size_t size = array_size(sizeof(*txr->tx_buf), txr->size_mask + 1);
1392 
1393 	txr->tx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1394 
1395 	return txr->tx_buf ? 0 : -ENOMEM;
1396 }
1397 
1398 static int fbnic_alloc_tx_ring_resources(struct fbnic_net *fbn,
1399 					 struct fbnic_ring *txr)
1400 {
1401 	struct device *dev = fbn->netdev->dev.parent;
1402 	int err;
1403 
1404 	if (txr->flags & FBNIC_RING_F_DISABLED)
1405 		return 0;
1406 
1407 	err = fbnic_alloc_tx_ring_desc(fbn, txr);
1408 	if (err)
1409 		return err;
1410 
1411 	if (!(txr->flags & FBNIC_RING_F_CTX))
1412 		return 0;
1413 
1414 	err = fbnic_alloc_tx_ring_buffer(txr);
1415 	if (err)
1416 		goto free_desc;
1417 
1418 	return 0;
1419 
1420 free_desc:
1421 	fbnic_free_ring_resources(dev, txr);
1422 	return err;
1423 }
1424 
1425 static int fbnic_alloc_rx_ring_desc(struct fbnic_net *fbn,
1426 				    struct fbnic_ring *rxr)
1427 {
1428 	struct device *dev = fbn->netdev->dev.parent;
1429 	size_t desc_size = sizeof(*rxr->desc);
1430 	u32 rxq_size;
1431 	size_t size;
1432 
1433 	switch (rxr->doorbell - fbnic_ring_csr_base(rxr)) {
1434 	case FBNIC_QUEUE_BDQ_HPQ_TAIL:
1435 		rxq_size = fbn->hpq_size / FBNIC_BD_FRAG_COUNT;
1436 		desc_size *= FBNIC_BD_FRAG_COUNT;
1437 		break;
1438 	case FBNIC_QUEUE_BDQ_PPQ_TAIL:
1439 		rxq_size = fbn->ppq_size / FBNIC_BD_FRAG_COUNT;
1440 		desc_size *= FBNIC_BD_FRAG_COUNT;
1441 		break;
1442 	case FBNIC_QUEUE_RCQ_HEAD:
1443 		rxq_size = fbn->rcq_size;
1444 		break;
1445 	default:
1446 		return -EINVAL;
1447 	}
1448 
1449 	/* Round size up to nearest 4K */
1450 	size = ALIGN(array_size(desc_size, rxq_size), 4096);
1451 
1452 	rxr->desc = dma_alloc_coherent(dev, size, &rxr->dma,
1453 				       GFP_KERNEL | __GFP_NOWARN);
1454 	if (!rxr->desc)
1455 		return -ENOMEM;
1456 
1457 	/* rxq_size should be a power of 2, so mask is just that -1 */
1458 	rxr->size_mask = rxq_size - 1;
1459 	rxr->size = size;
1460 
1461 	return 0;
1462 }
1463 
1464 static int fbnic_alloc_rx_ring_buffer(struct fbnic_ring *rxr)
1465 {
1466 	size_t size = array_size(sizeof(*rxr->rx_buf), rxr->size_mask + 1);
1467 
1468 	if (rxr->flags & FBNIC_RING_F_CTX)
1469 		size = sizeof(*rxr->rx_buf) * (rxr->size_mask + 1);
1470 	else
1471 		size = sizeof(*rxr->pkt);
1472 
1473 	rxr->rx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1474 
1475 	return rxr->rx_buf ? 0 : -ENOMEM;
1476 }
1477 
1478 static int fbnic_alloc_rx_ring_resources(struct fbnic_net *fbn,
1479 					 struct fbnic_ring *rxr)
1480 {
1481 	struct device *dev = fbn->netdev->dev.parent;
1482 	int err;
1483 
1484 	err = fbnic_alloc_rx_ring_desc(fbn, rxr);
1485 	if (err)
1486 		return err;
1487 
1488 	err = fbnic_alloc_rx_ring_buffer(rxr);
1489 	if (err)
1490 		goto free_desc;
1491 
1492 	return 0;
1493 
1494 free_desc:
1495 	fbnic_free_ring_resources(dev, rxr);
1496 	return err;
1497 }
1498 
1499 static void fbnic_free_qt_resources(struct fbnic_net *fbn,
1500 				    struct fbnic_q_triad *qt)
1501 {
1502 	struct device *dev = fbn->netdev->dev.parent;
1503 
1504 	fbnic_free_ring_resources(dev, &qt->cmpl);
1505 	fbnic_free_ring_resources(dev, &qt->sub1);
1506 	fbnic_free_ring_resources(dev, &qt->sub0);
1507 }
1508 
1509 static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn,
1510 				       struct fbnic_q_triad *qt)
1511 {
1512 	struct device *dev = fbn->netdev->dev.parent;
1513 	int err;
1514 
1515 	err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub0);
1516 	if (err)
1517 		return err;
1518 
1519 	err = fbnic_alloc_tx_ring_resources(fbn, &qt->cmpl);
1520 	if (err)
1521 		goto free_sub1;
1522 
1523 	return 0;
1524 
1525 free_sub1:
1526 	fbnic_free_ring_resources(dev, &qt->sub0);
1527 	return err;
1528 }
1529 
1530 static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn,
1531 				       struct fbnic_q_triad *qt)
1532 {
1533 	struct device *dev = fbn->netdev->dev.parent;
1534 	int err;
1535 
1536 	err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0);
1537 	if (err)
1538 		return err;
1539 
1540 	err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub1);
1541 	if (err)
1542 		goto free_sub0;
1543 
1544 	err = fbnic_alloc_rx_ring_resources(fbn, &qt->cmpl);
1545 	if (err)
1546 		goto free_sub1;
1547 
1548 	return 0;
1549 
1550 free_sub1:
1551 	fbnic_free_ring_resources(dev, &qt->sub1);
1552 free_sub0:
1553 	fbnic_free_ring_resources(dev, &qt->sub0);
1554 	return err;
1555 }
1556 
1557 static void fbnic_free_nv_resources(struct fbnic_net *fbn,
1558 				    struct fbnic_napi_vector *nv)
1559 {
1560 	int i, j;
1561 
1562 	/* Free Tx Resources  */
1563 	for (i = 0; i < nv->txt_count; i++)
1564 		fbnic_free_qt_resources(fbn, &nv->qt[i]);
1565 
1566 	for (j = 0; j < nv->rxt_count; j++, i++)
1567 		fbnic_free_qt_resources(fbn, &nv->qt[i]);
1568 }
1569 
1570 static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
1571 				    struct fbnic_napi_vector *nv)
1572 {
1573 	int i, j, err;
1574 
1575 	/* Allocate Tx Resources */
1576 	for (i = 0; i < nv->txt_count; i++) {
1577 		err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]);
1578 		if (err)
1579 			goto free_resources;
1580 	}
1581 
1582 	/* Allocate Rx Resources */
1583 	for (j = 0; j < nv->rxt_count; j++, i++) {
1584 		err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]);
1585 		if (err)
1586 			goto free_resources;
1587 	}
1588 
1589 	return 0;
1590 
1591 free_resources:
1592 	while (i--)
1593 		fbnic_free_qt_resources(fbn, &nv->qt[i]);
1594 	return err;
1595 }
1596 
1597 void fbnic_free_resources(struct fbnic_net *fbn)
1598 {
1599 	int i;
1600 
1601 	for (i = 0; i < fbn->num_napi; i++)
1602 		fbnic_free_nv_resources(fbn, fbn->napi[i]);
1603 }
1604 
1605 int fbnic_alloc_resources(struct fbnic_net *fbn)
1606 {
1607 	int i, err = -ENODEV;
1608 
1609 	for (i = 0; i < fbn->num_napi; i++) {
1610 		err = fbnic_alloc_nv_resources(fbn, fbn->napi[i]);
1611 		if (err)
1612 			goto free_resources;
1613 	}
1614 
1615 	return 0;
1616 
1617 free_resources:
1618 	while (i--)
1619 		fbnic_free_nv_resources(fbn, fbn->napi[i]);
1620 
1621 	return err;
1622 }
1623 
1624 static void fbnic_set_netif_napi(struct fbnic_napi_vector *nv)
1625 {
1626 	int i, j;
1627 
1628 	/* Associate Tx queue with NAPI */
1629 	for (i = 0; i < nv->txt_count; i++) {
1630 		struct fbnic_q_triad *qt = &nv->qt[i];
1631 
1632 		netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx,
1633 				     NETDEV_QUEUE_TYPE_TX, &nv->napi);
1634 	}
1635 
1636 	/* Associate Rx queue with NAPI */
1637 	for (j = 0; j < nv->rxt_count; j++, i++) {
1638 		struct fbnic_q_triad *qt = &nv->qt[i];
1639 
1640 		netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx,
1641 				     NETDEV_QUEUE_TYPE_RX, &nv->napi);
1642 	}
1643 }
1644 
1645 static void fbnic_reset_netif_napi(struct fbnic_napi_vector *nv)
1646 {
1647 	int i, j;
1648 
1649 	/* Disassociate Tx queue from NAPI */
1650 	for (i = 0; i < nv->txt_count; i++) {
1651 		struct fbnic_q_triad *qt = &nv->qt[i];
1652 
1653 		netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx,
1654 				     NETDEV_QUEUE_TYPE_TX, NULL);
1655 	}
1656 
1657 	/* Disassociate Rx queue from NAPI */
1658 	for (j = 0; j < nv->rxt_count; j++, i++) {
1659 		struct fbnic_q_triad *qt = &nv->qt[i];
1660 
1661 		netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx,
1662 				     NETDEV_QUEUE_TYPE_RX, NULL);
1663 	}
1664 }
1665 
1666 int fbnic_set_netif_queues(struct fbnic_net *fbn)
1667 {
1668 	int i, err;
1669 
1670 	err = netif_set_real_num_queues(fbn->netdev, fbn->num_tx_queues,
1671 					fbn->num_rx_queues);
1672 	if (err)
1673 		return err;
1674 
1675 	for (i = 0; i < fbn->num_napi; i++)
1676 		fbnic_set_netif_napi(fbn->napi[i]);
1677 
1678 	return 0;
1679 }
1680 
1681 void fbnic_reset_netif_queues(struct fbnic_net *fbn)
1682 {
1683 	int i;
1684 
1685 	for (i = 0; i < fbn->num_napi; i++)
1686 		fbnic_reset_netif_napi(fbn->napi[i]);
1687 }
1688 
1689 static void fbnic_disable_twq0(struct fbnic_ring *txr)
1690 {
1691 	u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ0_CTL);
1692 
1693 	twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE;
1694 
1695 	fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ0_CTL, twq_ctl);
1696 }
1697 
1698 static void fbnic_disable_tcq(struct fbnic_ring *txr)
1699 {
1700 	fbnic_ring_wr32(txr, FBNIC_QUEUE_TCQ_CTL, 0);
1701 	fbnic_ring_wr32(txr, FBNIC_QUEUE_TIM_MASK, FBNIC_QUEUE_TIM_MASK_MASK);
1702 }
1703 
1704 static void fbnic_disable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq)
1705 {
1706 	u32 bdq_ctl = fbnic_ring_rd32(hpq, FBNIC_QUEUE_BDQ_CTL);
1707 
1708 	bdq_ctl &= ~FBNIC_QUEUE_BDQ_CTL_ENABLE;
1709 
1710 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl);
1711 }
1712 
1713 static void fbnic_disable_rcq(struct fbnic_ring *rxr)
1714 {
1715 	fbnic_ring_wr32(rxr, FBNIC_QUEUE_RCQ_CTL, 0);
1716 	fbnic_ring_wr32(rxr, FBNIC_QUEUE_RIM_MASK, FBNIC_QUEUE_RIM_MASK_MASK);
1717 }
1718 
1719 void fbnic_napi_disable(struct fbnic_net *fbn)
1720 {
1721 	int i;
1722 
1723 	for (i = 0; i < fbn->num_napi; i++) {
1724 		napi_disable(&fbn->napi[i]->napi);
1725 
1726 		fbnic_nv_irq_disable(fbn->napi[i]);
1727 	}
1728 }
1729 
1730 void fbnic_disable(struct fbnic_net *fbn)
1731 {
1732 	struct fbnic_dev *fbd = fbn->fbd;
1733 	int i, j, t;
1734 
1735 	for (i = 0; i < fbn->num_napi; i++) {
1736 		struct fbnic_napi_vector *nv = fbn->napi[i];
1737 
1738 		/* Disable Tx queue triads */
1739 		for (t = 0; t < nv->txt_count; t++) {
1740 			struct fbnic_q_triad *qt = &nv->qt[t];
1741 
1742 			fbnic_disable_twq0(&qt->sub0);
1743 			fbnic_disable_tcq(&qt->cmpl);
1744 		}
1745 
1746 		/* Disable Rx queue triads */
1747 		for (j = 0; j < nv->rxt_count; j++, t++) {
1748 			struct fbnic_q_triad *qt = &nv->qt[t];
1749 
1750 			fbnic_disable_bdq(&qt->sub0, &qt->sub1);
1751 			fbnic_disable_rcq(&qt->cmpl);
1752 		}
1753 	}
1754 
1755 	fbnic_wrfl(fbd);
1756 }
1757 
1758 static void fbnic_tx_flush(struct fbnic_dev *fbd)
1759 {
1760 	netdev_warn(fbd->netdev, "triggering Tx flush\n");
1761 
1762 	fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN,
1763 		    FBNIC_TMI_DROP_CTRL_EN);
1764 }
1765 
1766 static void fbnic_tx_flush_off(struct fbnic_dev *fbd)
1767 {
1768 	fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN, 0);
1769 }
1770 
1771 struct fbnic_idle_regs {
1772 	u32 reg_base;
1773 	u8 reg_cnt;
1774 };
1775 
1776 static bool fbnic_all_idle(struct fbnic_dev *fbd,
1777 			   const struct fbnic_idle_regs *regs,
1778 			   unsigned int nregs)
1779 {
1780 	unsigned int i, j;
1781 
1782 	for (i = 0; i < nregs; i++) {
1783 		for (j = 0; j < regs[i].reg_cnt; j++) {
1784 			if (fbnic_rd32(fbd, regs[i].reg_base + j) != ~0U)
1785 				return false;
1786 		}
1787 	}
1788 	return true;
1789 }
1790 
1791 static void fbnic_idle_dump(struct fbnic_dev *fbd,
1792 			    const struct fbnic_idle_regs *regs,
1793 			    unsigned int nregs, const char *dir, int err)
1794 {
1795 	unsigned int i, j;
1796 
1797 	netdev_err(fbd->netdev, "error waiting for %s idle %d\n", dir, err);
1798 	for (i = 0; i < nregs; i++)
1799 		for (j = 0; j < regs[i].reg_cnt; j++)
1800 			netdev_err(fbd->netdev, "0x%04x: %08x\n",
1801 				   regs[i].reg_base + j,
1802 				   fbnic_rd32(fbd, regs[i].reg_base + j));
1803 }
1804 
1805 int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail)
1806 {
1807 	static const struct fbnic_idle_regs tx[] = {
1808 		{ FBNIC_QM_TWQ_IDLE(0),	FBNIC_QM_TWQ_IDLE_CNT, },
1809 		{ FBNIC_QM_TQS_IDLE(0),	FBNIC_QM_TQS_IDLE_CNT, },
1810 		{ FBNIC_QM_TDE_IDLE(0),	FBNIC_QM_TDE_IDLE_CNT, },
1811 		{ FBNIC_QM_TCQ_IDLE(0),	FBNIC_QM_TCQ_IDLE_CNT, },
1812 	}, rx[] = {
1813 		{ FBNIC_QM_HPQ_IDLE(0),	FBNIC_QM_HPQ_IDLE_CNT, },
1814 		{ FBNIC_QM_PPQ_IDLE(0),	FBNIC_QM_PPQ_IDLE_CNT, },
1815 		{ FBNIC_QM_RCQ_IDLE(0),	FBNIC_QM_RCQ_IDLE_CNT, },
1816 	};
1817 	bool idle;
1818 	int err;
1819 
1820 	err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000,
1821 				       false, fbd, tx, ARRAY_SIZE(tx));
1822 	if (err == -ETIMEDOUT) {
1823 		fbnic_tx_flush(fbd);
1824 		err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle,
1825 					       2, 500000, false,
1826 					       fbd, tx, ARRAY_SIZE(tx));
1827 		fbnic_tx_flush_off(fbd);
1828 	}
1829 	if (err) {
1830 		fbnic_idle_dump(fbd, tx, ARRAY_SIZE(tx), "Tx", err);
1831 		if (may_fail)
1832 			return err;
1833 	}
1834 
1835 	err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000,
1836 				       false, fbd, rx, ARRAY_SIZE(rx));
1837 	if (err)
1838 		fbnic_idle_dump(fbd, rx, ARRAY_SIZE(rx), "Rx", err);
1839 	return err;
1840 }
1841 
1842 void fbnic_flush(struct fbnic_net *fbn)
1843 {
1844 	int i;
1845 
1846 	for (i = 0; i < fbn->num_napi; i++) {
1847 		struct fbnic_napi_vector *nv = fbn->napi[i];
1848 		int j, t;
1849 
1850 		/* Flush any processed Tx Queue Triads and drop the rest */
1851 		for (t = 0; t < nv->txt_count; t++) {
1852 			struct fbnic_q_triad *qt = &nv->qt[t];
1853 			struct netdev_queue *tx_queue;
1854 
1855 			/* Clean the work queues of unprocessed work */
1856 			fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail);
1857 
1858 			/* Reset completion queue descriptor ring */
1859 			memset(qt->cmpl.desc, 0, qt->cmpl.size);
1860 
1861 			/* Nothing else to do if Tx queue is disabled */
1862 			if (qt->sub0.flags & FBNIC_RING_F_DISABLED)
1863 				continue;
1864 
1865 			/* Reset BQL associated with Tx queue */
1866 			tx_queue = netdev_get_tx_queue(nv->napi.dev,
1867 						       qt->sub0.q_idx);
1868 			netdev_tx_reset_queue(tx_queue);
1869 		}
1870 
1871 		/* Flush any processed Rx Queue Triads and drop the rest */
1872 		for (j = 0; j < nv->rxt_count; j++, t++) {
1873 			struct fbnic_q_triad *qt = &nv->qt[t];
1874 
1875 			/* Clean the work queues of unprocessed work */
1876 			fbnic_clean_bdq(nv, 0, &qt->sub0, qt->sub0.tail);
1877 			fbnic_clean_bdq(nv, 0, &qt->sub1, qt->sub1.tail);
1878 
1879 			/* Reset completion queue descriptor ring */
1880 			memset(qt->cmpl.desc, 0, qt->cmpl.size);
1881 
1882 			fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0);
1883 			qt->cmpl.pkt->buff.data_hard_start = NULL;
1884 		}
1885 	}
1886 }
1887 
1888 void fbnic_fill(struct fbnic_net *fbn)
1889 {
1890 	int i;
1891 
1892 	for (i = 0; i < fbn->num_napi; i++) {
1893 		struct fbnic_napi_vector *nv = fbn->napi[i];
1894 		int j, t;
1895 
1896 		/* Configure NAPI mapping and populate pages
1897 		 * in the BDQ rings to use for Rx
1898 		 */
1899 		for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) {
1900 			struct fbnic_q_triad *qt = &nv->qt[t];
1901 
1902 			/* Populate the header and payload BDQs */
1903 			fbnic_fill_bdq(nv, &qt->sub0);
1904 			fbnic_fill_bdq(nv, &qt->sub1);
1905 		}
1906 	}
1907 }
1908 
1909 static void fbnic_enable_twq0(struct fbnic_ring *twq)
1910 {
1911 	u32 log_size = fls(twq->size_mask);
1912 
1913 	if (!twq->size_mask)
1914 		return;
1915 
1916 	/* Reset head/tail */
1917 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_RESET);
1918 	twq->tail = 0;
1919 	twq->head = 0;
1920 
1921 	/* Store descriptor ring address and size */
1922 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAL, lower_32_bits(twq->dma));
1923 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAH, upper_32_bits(twq->dma));
1924 
1925 	/* Write lower 4 bits of log size as 64K ring size is 0 */
1926 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_SIZE, log_size & 0xf);
1927 
1928 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE);
1929 }
1930 
1931 static void fbnic_enable_tcq(struct fbnic_napi_vector *nv,
1932 			     struct fbnic_ring *tcq)
1933 {
1934 	u32 log_size = fls(tcq->size_mask);
1935 
1936 	if (!tcq->size_mask)
1937 		return;
1938 
1939 	/* Reset head/tail */
1940 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_RESET);
1941 	tcq->tail = 0;
1942 	tcq->head = 0;
1943 
1944 	/* Store descriptor ring address and size */
1945 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAL, lower_32_bits(tcq->dma));
1946 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAH, upper_32_bits(tcq->dma));
1947 
1948 	/* Write lower 4 bits of log size as 64K ring size is 0 */
1949 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_SIZE, log_size & 0xf);
1950 
1951 	/* Store interrupt information for the completion queue */
1952 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_CTL, nv->v_idx);
1953 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_THRESHOLD, tcq->size_mask / 2);
1954 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_MASK, 0);
1955 
1956 	/* Enable queue */
1957 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_ENABLE);
1958 }
1959 
1960 static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq)
1961 {
1962 	u32 bdq_ctl = FBNIC_QUEUE_BDQ_CTL_ENABLE;
1963 	u32 log_size;
1964 
1965 	/* Reset head/tail */
1966 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, FBNIC_QUEUE_BDQ_CTL_RESET);
1967 	ppq->tail = 0;
1968 	ppq->head = 0;
1969 	hpq->tail = 0;
1970 	hpq->head = 0;
1971 
1972 	log_size = fls(hpq->size_mask);
1973 
1974 	/* Store descriptor ring address and size */
1975 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma));
1976 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAH, upper_32_bits(hpq->dma));
1977 
1978 	/* Write lower 4 bits of log size as 64K ring size is 0 */
1979 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_SIZE, log_size & 0xf);
1980 
1981 	if (!ppq->size_mask)
1982 		goto write_ctl;
1983 
1984 	log_size = fls(ppq->size_mask);
1985 
1986 	/* Add enabling of PPQ to BDQ control */
1987 	bdq_ctl |= FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE;
1988 
1989 	/* Store descriptor ring address and size */
1990 	fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAL, lower_32_bits(ppq->dma));
1991 	fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAH, upper_32_bits(ppq->dma));
1992 	fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_SIZE, log_size & 0xf);
1993 
1994 write_ctl:
1995 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl);
1996 }
1997 
1998 static void fbnic_config_drop_mode_rcq(struct fbnic_napi_vector *nv,
1999 				       struct fbnic_ring *rcq)
2000 {
2001 	u32 drop_mode, rcq_ctl;
2002 
2003 	drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_IMMEDIATE;
2004 
2005 	/* Specify packet layout */
2006 	rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_DROP_MODE_MASK, drop_mode) |
2007 	    FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_HROOM_MASK, FBNIC_RX_HROOM) |
2008 	    FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_TROOM_MASK, FBNIC_RX_TROOM);
2009 
2010 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL0, rcq_ctl);
2011 }
2012 
2013 static void fbnic_enable_rcq(struct fbnic_napi_vector *nv,
2014 			     struct fbnic_ring *rcq)
2015 {
2016 	u32 log_size = fls(rcq->size_mask);
2017 	u32 rcq_ctl;
2018 
2019 	fbnic_config_drop_mode_rcq(nv, rcq);
2020 
2021 	rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) |
2022 		   FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK,
2023 			      FBNIC_RX_MAX_HDR) |
2024 		   FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK,
2025 			      FBNIC_RX_PAYLD_OFFSET) |
2026 		   FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_PG_CL_MASK,
2027 			      FBNIC_RX_PAYLD_PG_CL);
2028 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL1, rcq_ctl);
2029 
2030 	/* Reset head/tail */
2031 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_RESET);
2032 	rcq->head = 0;
2033 	rcq->tail = 0;
2034 
2035 	/* Store descriptor ring address and size */
2036 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAL, lower_32_bits(rcq->dma));
2037 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAH, upper_32_bits(rcq->dma));
2038 
2039 	/* Write lower 4 bits of log size as 64K ring size is 0 */
2040 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_SIZE, log_size & 0xf);
2041 
2042 	/* Store interrupt information for the completion queue */
2043 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_CTL, nv->v_idx);
2044 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_THRESHOLD, rcq->size_mask / 2);
2045 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_MASK, 0);
2046 
2047 	/* Enable queue */
2048 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_ENABLE);
2049 }
2050 
2051 void fbnic_enable(struct fbnic_net *fbn)
2052 {
2053 	struct fbnic_dev *fbd = fbn->fbd;
2054 	int i;
2055 
2056 	for (i = 0; i < fbn->num_napi; i++) {
2057 		struct fbnic_napi_vector *nv = fbn->napi[i];
2058 		int j, t;
2059 
2060 		/* Setup Tx Queue Triads */
2061 		for (t = 0; t < nv->txt_count; t++) {
2062 			struct fbnic_q_triad *qt = &nv->qt[t];
2063 
2064 			fbnic_enable_twq0(&qt->sub0);
2065 			fbnic_enable_tcq(nv, &qt->cmpl);
2066 		}
2067 
2068 		/* Setup Rx Queue Triads */
2069 		for (j = 0; j < nv->rxt_count; j++, t++) {
2070 			struct fbnic_q_triad *qt = &nv->qt[t];
2071 
2072 			fbnic_enable_bdq(&qt->sub0, &qt->sub1);
2073 			fbnic_config_drop_mode_rcq(nv, &qt->cmpl);
2074 			fbnic_enable_rcq(nv, &qt->cmpl);
2075 		}
2076 	}
2077 
2078 	fbnic_wrfl(fbd);
2079 }
2080 
2081 static void fbnic_nv_irq_enable(struct fbnic_napi_vector *nv)
2082 {
2083 	struct fbnic_dev *fbd = nv->fbd;
2084 	u32 val;
2085 
2086 	val = FBNIC_INTR_CQ_REARM_INTR_UNMASK;
2087 
2088 	fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(nv->v_idx), val);
2089 }
2090 
2091 void fbnic_napi_enable(struct fbnic_net *fbn)
2092 {
2093 	u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {};
2094 	struct fbnic_dev *fbd = fbn->fbd;
2095 	int i;
2096 
2097 	for (i = 0; i < fbn->num_napi; i++) {
2098 		struct fbnic_napi_vector *nv = fbn->napi[i];
2099 
2100 		napi_enable(&nv->napi);
2101 
2102 		fbnic_nv_irq_enable(nv);
2103 
2104 		/* Record bit used for NAPI IRQs so we can
2105 		 * set the mask appropriately
2106 		 */
2107 		irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32);
2108 	}
2109 
2110 	/* Force the first interrupt on the device to guarantee
2111 	 * that any packets that may have been enqueued during the
2112 	 * bringup are processed.
2113 	 */
2114 	for (i = 0; i < ARRAY_SIZE(irqs); i++) {
2115 		if (!irqs[i])
2116 			continue;
2117 		fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]);
2118 	}
2119 
2120 	fbnic_wrfl(fbd);
2121 }
2122 
2123 void fbnic_napi_depletion_check(struct net_device *netdev)
2124 {
2125 	struct fbnic_net *fbn = netdev_priv(netdev);
2126 	u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {};
2127 	struct fbnic_dev *fbd = fbn->fbd;
2128 	int i, j, t;
2129 
2130 	for (i = 0; i < fbn->num_napi; i++) {
2131 		struct fbnic_napi_vector *nv = fbn->napi[i];
2132 
2133 		/* Find RQs which are completely out of pages */
2134 		for (t = nv->txt_count, j = 0; j < nv->rxt_count; j++, t++) {
2135 			/* Assume 4 pages is always enough to fit a packet
2136 			 * and therefore generate a completion and an IRQ.
2137 			 */
2138 			if (fbnic_desc_used(&nv->qt[t].sub0) < 4 ||
2139 			    fbnic_desc_used(&nv->qt[t].sub1) < 4)
2140 				irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32);
2141 		}
2142 	}
2143 
2144 	for (i = 0; i < ARRAY_SIZE(irqs); i++) {
2145 		if (!irqs[i])
2146 			continue;
2147 		fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(i), irqs[i]);
2148 		fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]);
2149 	}
2150 
2151 	fbnic_wrfl(fbd);
2152 }
2153