xref: /linux/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c (revision fc3a2810412c163b5df1b377d332e048860f45db)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */
3 
4 #include <linux/bitfield.h>
5 #include <linux/bpf.h>
6 #include <linux/bpf_trace.h>
7 #include <linux/iopoll.h>
8 #include <linux/pci.h>
9 #include <net/netdev_queues.h>
10 #include <net/page_pool/helpers.h>
11 #include <net/tcp.h>
12 #include <net/xdp.h>
13 
14 #include "fbnic.h"
15 #include "fbnic_csr.h"
16 #include "fbnic_netdev.h"
17 #include "fbnic_txrx.h"
18 
19 enum {
20 	FBNIC_XDP_PASS = 0,
21 	FBNIC_XDP_CONSUME,
22 	FBNIC_XDP_TX,
23 	FBNIC_XDP_LEN_ERR,
24 };
25 
26 enum {
27 	FBNIC_XMIT_CB_TS	= 0x01,
28 };
29 
30 struct fbnic_xmit_cb {
31 	u32 bytecount;
32 	u16 gso_segs;
33 	u8 desc_count;
34 	u8 flags;
35 	int hw_head;
36 };
37 
38 #define FBNIC_XMIT_CB(__skb) ((struct fbnic_xmit_cb *)((__skb)->cb))
39 
40 static u32 __iomem *fbnic_ring_csr_base(const struct fbnic_ring *ring)
41 {
42 	unsigned long csr_base = (unsigned long)ring->doorbell;
43 
44 	csr_base &= ~(FBNIC_QUEUE_STRIDE * sizeof(u32) - 1);
45 
46 	return (u32 __iomem *)csr_base;
47 }
48 
49 static u32 fbnic_ring_rd32(struct fbnic_ring *ring, unsigned int csr)
50 {
51 	u32 __iomem *csr_base = fbnic_ring_csr_base(ring);
52 
53 	return readl(csr_base + csr);
54 }
55 
56 static void fbnic_ring_wr32(struct fbnic_ring *ring, unsigned int csr, u32 val)
57 {
58 	u32 __iomem *csr_base = fbnic_ring_csr_base(ring);
59 
60 	writel(val, csr_base + csr);
61 }
62 
63 /**
64  * fbnic_ts40_to_ns() - convert descriptor timestamp to PHC time
65  * @fbn: netdev priv of the FB NIC
66  * @ts40: timestamp read from a descriptor
67  *
68  * Return: u64 value of PHC time in nanoseconds
69  *
70  * Convert truncated 40 bit device timestamp as read from a descriptor
71  * to the full PHC time in nanoseconds.
72  */
73 static __maybe_unused u64 fbnic_ts40_to_ns(struct fbnic_net *fbn, u64 ts40)
74 {
75 	unsigned int s;
76 	u64 time_ns;
77 	s64 offset;
78 	u8 ts_top;
79 	u32 high;
80 
81 	do {
82 		s = u64_stats_fetch_begin(&fbn->time_seq);
83 		offset = READ_ONCE(fbn->time_offset);
84 	} while (u64_stats_fetch_retry(&fbn->time_seq, s));
85 
86 	high = READ_ONCE(fbn->time_high);
87 
88 	/* Bits 63..40 from periodic clock reads, 39..0 from ts40 */
89 	time_ns = (u64)(high >> 8) << 40 | ts40;
90 
91 	/* Compare bits 32-39 between periodic reads and ts40,
92 	 * see if HW clock may have wrapped since last read. We are sure
93 	 * that periodic reads are always at least ~1 minute behind, so
94 	 * this logic works perfectly fine.
95 	 */
96 	ts_top = ts40 >> 32;
97 	if (ts_top < (u8)high && (u8)high - ts_top > U8_MAX / 2)
98 		time_ns += 1ULL << 40;
99 
100 	return time_ns + offset;
101 }
102 
103 static unsigned int fbnic_desc_unused(struct fbnic_ring *ring)
104 {
105 	return (ring->head - ring->tail - 1) & ring->size_mask;
106 }
107 
108 static unsigned int fbnic_desc_used(struct fbnic_ring *ring)
109 {
110 	return (ring->tail - ring->head) & ring->size_mask;
111 }
112 
113 static struct netdev_queue *txring_txq(const struct net_device *dev,
114 				       const struct fbnic_ring *ring)
115 {
116 	return netdev_get_tx_queue(dev, ring->q_idx);
117 }
118 
119 static int fbnic_maybe_stop_tx(const struct net_device *dev,
120 			       struct fbnic_ring *ring,
121 			       const unsigned int size)
122 {
123 	struct netdev_queue *txq = txring_txq(dev, ring);
124 	int res;
125 
126 	res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size,
127 				   FBNIC_TX_DESC_WAKEUP);
128 	if (!res) {
129 		u64_stats_update_begin(&ring->stats.syncp);
130 		ring->stats.twq.stop++;
131 		u64_stats_update_end(&ring->stats.syncp);
132 	}
133 
134 	return !res;
135 }
136 
137 static bool fbnic_tx_sent_queue(struct sk_buff *skb, struct fbnic_ring *ring)
138 {
139 	struct netdev_queue *dev_queue = txring_txq(skb->dev, ring);
140 	unsigned int bytecount = FBNIC_XMIT_CB(skb)->bytecount;
141 	bool xmit_more = netdev_xmit_more();
142 
143 	/* TBD: Request completion more often if xmit_more becomes large */
144 
145 	return __netdev_tx_sent_queue(dev_queue, bytecount, xmit_more);
146 }
147 
148 static void fbnic_unmap_single_twd(struct device *dev, __le64 *twd)
149 {
150 	u64 raw_twd = le64_to_cpu(*twd);
151 	unsigned int len;
152 	dma_addr_t dma;
153 
154 	dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd);
155 	len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd);
156 
157 	dma_unmap_single(dev, dma, len, DMA_TO_DEVICE);
158 }
159 
160 static void fbnic_unmap_page_twd(struct device *dev, __le64 *twd)
161 {
162 	u64 raw_twd = le64_to_cpu(*twd);
163 	unsigned int len;
164 	dma_addr_t dma;
165 
166 	dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd);
167 	len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd);
168 
169 	dma_unmap_page(dev, dma, len, DMA_TO_DEVICE);
170 }
171 
172 #define FBNIC_TWD_TYPE(_type) \
173 	cpu_to_le64(FIELD_PREP(FBNIC_TWD_TYPE_MASK, FBNIC_TWD_TYPE_##_type))
174 
175 static bool fbnic_tx_tstamp(struct sk_buff *skb)
176 {
177 	struct fbnic_net *fbn;
178 
179 	if (!unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
180 		return false;
181 
182 	fbn = netdev_priv(skb->dev);
183 	if (fbn->hwtstamp_config.tx_type == HWTSTAMP_TX_OFF)
184 		return false;
185 
186 	skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
187 	FBNIC_XMIT_CB(skb)->flags |= FBNIC_XMIT_CB_TS;
188 	FBNIC_XMIT_CB(skb)->hw_head = -1;
189 
190 	return true;
191 }
192 
193 static bool
194 fbnic_tx_lso(struct fbnic_ring *ring, struct sk_buff *skb,
195 	     struct skb_shared_info *shinfo, __le64 *meta,
196 	     unsigned int *l2len, unsigned int *i3len)
197 {
198 	unsigned int l3_type, l4_type, l4len, hdrlen;
199 	unsigned char *l4hdr;
200 	__be16 payload_len;
201 
202 	if (unlikely(skb_cow_head(skb, 0)))
203 		return true;
204 
205 	if (shinfo->gso_type & SKB_GSO_PARTIAL) {
206 		l3_type = FBNIC_TWD_L3_TYPE_OTHER;
207 	} else if (!skb->encapsulation) {
208 		if (ip_hdr(skb)->version == 4)
209 			l3_type = FBNIC_TWD_L3_TYPE_IPV4;
210 		else
211 			l3_type = FBNIC_TWD_L3_TYPE_IPV6;
212 	} else {
213 		unsigned int o3len;
214 
215 		o3len = skb_inner_network_header(skb) - skb_network_header(skb);
216 		*i3len -= o3len;
217 		*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_OHLEN_MASK,
218 						o3len / 2));
219 		l3_type = FBNIC_TWD_L3_TYPE_V6V6;
220 	}
221 
222 	l4hdr = skb_checksum_start(skb);
223 	payload_len = cpu_to_be16(skb->len - (l4hdr - skb->data));
224 
225 	if (shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) {
226 		struct tcphdr *tcph = (struct tcphdr *)l4hdr;
227 
228 		l4_type = FBNIC_TWD_L4_TYPE_TCP;
229 		l4len = __tcp_hdrlen((struct tcphdr *)l4hdr);
230 		csum_replace_by_diff(&tcph->check, (__force __wsum)payload_len);
231 	} else {
232 		struct udphdr *udph = (struct udphdr *)l4hdr;
233 
234 		l4_type = FBNIC_TWD_L4_TYPE_UDP;
235 		l4len = sizeof(struct udphdr);
236 		csum_replace_by_diff(&udph->check, (__force __wsum)payload_len);
237 	}
238 
239 	hdrlen = (l4hdr - skb->data) + l4len;
240 	*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_TYPE_MASK, l3_type) |
241 			     FIELD_PREP(FBNIC_TWD_L4_TYPE_MASK, l4_type) |
242 			     FIELD_PREP(FBNIC_TWD_L4_HLEN_MASK, l4len / 4) |
243 			     FIELD_PREP(FBNIC_TWD_MSS_MASK, shinfo->gso_size) |
244 			     FBNIC_TWD_FLAG_REQ_LSO);
245 
246 	FBNIC_XMIT_CB(skb)->bytecount += (shinfo->gso_segs - 1) * hdrlen;
247 	FBNIC_XMIT_CB(skb)->gso_segs = shinfo->gso_segs;
248 
249 	u64_stats_update_begin(&ring->stats.syncp);
250 	ring->stats.twq.lso += shinfo->gso_segs;
251 	u64_stats_update_end(&ring->stats.syncp);
252 
253 	return false;
254 }
255 
256 static bool
257 fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
258 {
259 	struct skb_shared_info *shinfo = skb_shinfo(skb);
260 	unsigned int l2len, i3len;
261 
262 	if (fbnic_tx_tstamp(skb))
263 		*meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_TS);
264 
265 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL))
266 		return false;
267 
268 	l2len = skb_mac_header_len(skb);
269 	i3len = skb_checksum_start(skb) - skb_network_header(skb);
270 
271 	*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_CSUM_OFFSET_MASK,
272 					skb->csum_offset / 2));
273 
274 	if (shinfo->gso_size) {
275 		if (fbnic_tx_lso(ring, skb, shinfo, meta, &l2len, &i3len))
276 			return true;
277 	} else {
278 		*meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO);
279 		u64_stats_update_begin(&ring->stats.syncp);
280 		ring->stats.twq.csum_partial++;
281 		u64_stats_update_end(&ring->stats.syncp);
282 	}
283 
284 	*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) |
285 			     FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2));
286 	return false;
287 }
288 
289 static void
290 fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq,
291 	      u64 *csum_cmpl, u64 *csum_none)
292 {
293 	skb_checksum_none_assert(skb);
294 
295 	if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) {
296 		(*csum_none)++;
297 		return;
298 	}
299 
300 	if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) {
301 		skb->ip_summed = CHECKSUM_UNNECESSARY;
302 	} else {
303 		u16 csum = FIELD_GET(FBNIC_RCD_META_L2_CSUM_MASK, rcd);
304 
305 		skb->ip_summed = CHECKSUM_COMPLETE;
306 		skb->csum = (__force __wsum)csum;
307 		(*csum_cmpl)++;
308 	}
309 }
310 
311 static bool
312 fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
313 {
314 	struct device *dev = skb->dev->dev.parent;
315 	unsigned int tail = ring->tail, first;
316 	unsigned int size, data_len;
317 	skb_frag_t *frag;
318 	dma_addr_t dma;
319 	__le64 *twd;
320 
321 	ring->tx_buf[tail] = skb;
322 
323 	tail++;
324 	tail &= ring->size_mask;
325 	first = tail;
326 
327 	size = skb_headlen(skb);
328 	data_len = skb->data_len;
329 
330 	if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK))
331 		goto dma_error;
332 
333 	dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
334 
335 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
336 		twd = &ring->desc[tail];
337 
338 		if (dma_mapping_error(dev, dma))
339 			goto dma_error;
340 
341 		*twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) |
342 				   FIELD_PREP(FBNIC_TWD_LEN_MASK, size) |
343 				   FIELD_PREP(FBNIC_TWD_TYPE_MASK,
344 					      FBNIC_TWD_TYPE_AL));
345 
346 		tail++;
347 		tail &= ring->size_mask;
348 
349 		if (!data_len)
350 			break;
351 
352 		size = skb_frag_size(frag);
353 		data_len -= size;
354 
355 		if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK))
356 			goto dma_error;
357 
358 		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
359 	}
360 
361 	*twd |= FBNIC_TWD_TYPE(LAST_AL);
362 
363 	FBNIC_XMIT_CB(skb)->desc_count = ((twd - meta) + 1) & ring->size_mask;
364 
365 	ring->tail = tail;
366 
367 	/* Record SW timestamp */
368 	skb_tx_timestamp(skb);
369 
370 	/* Verify there is room for another packet */
371 	fbnic_maybe_stop_tx(skb->dev, ring, FBNIC_MAX_SKB_DESC);
372 
373 	if (fbnic_tx_sent_queue(skb, ring)) {
374 		*meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_COMPLETION);
375 
376 		/* Force DMA writes to flush before writing to tail */
377 		dma_wmb();
378 
379 		writel(tail, ring->doorbell);
380 	}
381 
382 	return false;
383 dma_error:
384 	if (net_ratelimit())
385 		netdev_err(skb->dev, "TX DMA map failed\n");
386 
387 	while (tail != first) {
388 		tail--;
389 		tail &= ring->size_mask;
390 		twd = &ring->desc[tail];
391 		if (tail == first)
392 			fbnic_unmap_single_twd(dev, twd);
393 		else
394 			fbnic_unmap_page_twd(dev, twd);
395 	}
396 
397 	return true;
398 }
399 
400 #define FBNIC_MIN_FRAME_LEN	60
401 
402 static netdev_tx_t
403 fbnic_xmit_frame_ring(struct sk_buff *skb, struct fbnic_ring *ring)
404 {
405 	__le64 *meta = &ring->desc[ring->tail];
406 	u16 desc_needed;
407 
408 	if (skb_put_padto(skb, FBNIC_MIN_FRAME_LEN))
409 		goto err_count;
410 
411 	/* Need: 1 descriptor per page,
412 	 *       + 1 desc for skb_head,
413 	 *       + 2 desc for metadata and timestamp metadata
414 	 *       + 7 desc gap to keep tail from touching head
415 	 * otherwise try next time
416 	 */
417 	desc_needed = skb_shinfo(skb)->nr_frags + 10;
418 	if (fbnic_maybe_stop_tx(skb->dev, ring, desc_needed))
419 		return NETDEV_TX_BUSY;
420 
421 	*meta = cpu_to_le64(FBNIC_TWD_FLAG_DEST_MAC);
422 
423 	/* Write all members within DWORD to condense this into 2 4B writes */
424 	FBNIC_XMIT_CB(skb)->bytecount = skb->len;
425 	FBNIC_XMIT_CB(skb)->gso_segs = 1;
426 	FBNIC_XMIT_CB(skb)->desc_count = 0;
427 	FBNIC_XMIT_CB(skb)->flags = 0;
428 
429 	if (fbnic_tx_offloads(ring, skb, meta))
430 		goto err_free;
431 
432 	if (fbnic_tx_map(ring, skb, meta))
433 		goto err_free;
434 
435 	return NETDEV_TX_OK;
436 
437 err_free:
438 	dev_kfree_skb_any(skb);
439 err_count:
440 	u64_stats_update_begin(&ring->stats.syncp);
441 	ring->stats.dropped++;
442 	u64_stats_update_end(&ring->stats.syncp);
443 	return NETDEV_TX_OK;
444 }
445 
446 netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev)
447 {
448 	struct fbnic_net *fbn = netdev_priv(dev);
449 	unsigned int q_map = skb->queue_mapping;
450 
451 	return fbnic_xmit_frame_ring(skb, fbn->tx[q_map]);
452 }
453 
454 static netdev_features_t
455 fbnic_features_check_encap_gso(struct sk_buff *skb, struct net_device *dev,
456 			       netdev_features_t features, unsigned int l3len)
457 {
458 	netdev_features_t skb_gso_features;
459 	struct ipv6hdr *ip6_hdr;
460 	unsigned char l4_hdr;
461 	unsigned int start;
462 	__be16 frag_off;
463 
464 	/* Require MANGLEID for GSO_PARTIAL of IPv4.
465 	 * In theory we could support TSO with single, innermost v4 header
466 	 * by pretending everything before it is L2, but that needs to be
467 	 * parsed case by case.. so leaving it for when the need arises.
468 	 */
469 	if (!(features & NETIF_F_TSO_MANGLEID))
470 		features &= ~NETIF_F_TSO;
471 
472 	skb_gso_features = skb_shinfo(skb)->gso_type;
473 	skb_gso_features <<= NETIF_F_GSO_SHIFT;
474 
475 	/* We'd only clear the native GSO features, so don't bother validating
476 	 * if the match can only be on those supported thru GSO_PARTIAL.
477 	 */
478 	if (!(skb_gso_features & FBNIC_TUN_GSO_FEATURES))
479 		return features;
480 
481 	/* We can only do IPv6-in-IPv6, not v4-in-v6. It'd be nice
482 	 * to fall back to partial for this, or any failure below.
483 	 * This is just an optimization, UDPv4 will be caught later on.
484 	 */
485 	if (skb_gso_features & NETIF_F_TSO)
486 		return features & ~FBNIC_TUN_GSO_FEATURES;
487 
488 	/* Inner headers multiple of 2 */
489 	if ((skb_inner_network_header(skb) - skb_network_header(skb)) % 2)
490 		return features & ~FBNIC_TUN_GSO_FEATURES;
491 
492 	/* Encapsulated GSO packet, make 100% sure it's IPv6-in-IPv6. */
493 	ip6_hdr = ipv6_hdr(skb);
494 	if (ip6_hdr->version != 6)
495 		return features & ~FBNIC_TUN_GSO_FEATURES;
496 
497 	l4_hdr = ip6_hdr->nexthdr;
498 	start = (unsigned char *)ip6_hdr - skb->data + sizeof(struct ipv6hdr);
499 	start = ipv6_skip_exthdr(skb, start, &l4_hdr, &frag_off);
500 	if (frag_off || l4_hdr != IPPROTO_IPV6 ||
501 	    skb->data + start != skb_inner_network_header(skb))
502 		return features & ~FBNIC_TUN_GSO_FEATURES;
503 
504 	return features;
505 }
506 
507 netdev_features_t
508 fbnic_features_check(struct sk_buff *skb, struct net_device *dev,
509 		     netdev_features_t features)
510 {
511 	unsigned int l2len, l3len;
512 
513 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL))
514 		return features;
515 
516 	l2len = skb_mac_header_len(skb);
517 	l3len = skb_checksum_start(skb) - skb_network_header(skb);
518 
519 	/* Check header lengths are multiple of 2.
520 	 * In case of 6in6 we support longer headers (IHLEN + OHLEN)
521 	 * but keep things simple for now, 512B is plenty.
522 	 */
523 	if ((l2len | l3len | skb->csum_offset) % 2 ||
524 	    !FIELD_FIT(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) ||
525 	    !FIELD_FIT(FBNIC_TWD_L3_IHLEN_MASK, l3len / 2) ||
526 	    !FIELD_FIT(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2))
527 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
528 
529 	if (likely(!skb->encapsulation) || !skb_is_gso(skb))
530 		return features;
531 
532 	return fbnic_features_check_encap_gso(skb, dev, features, l3len);
533 }
534 
535 static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
536 			     struct fbnic_ring *ring, bool discard,
537 			     unsigned int hw_head)
538 {
539 	u64 total_bytes = 0, total_packets = 0, ts_lost = 0;
540 	unsigned int head = ring->head;
541 	struct netdev_queue *txq;
542 	unsigned int clean_desc;
543 
544 	clean_desc = (hw_head - head) & ring->size_mask;
545 
546 	while (clean_desc) {
547 		struct sk_buff *skb = ring->tx_buf[head];
548 		unsigned int desc_cnt;
549 
550 		desc_cnt = FBNIC_XMIT_CB(skb)->desc_count;
551 		if (desc_cnt > clean_desc)
552 			break;
553 
554 		if (unlikely(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS)) {
555 			FBNIC_XMIT_CB(skb)->hw_head = hw_head;
556 			if (likely(!discard))
557 				break;
558 			ts_lost++;
559 		}
560 
561 		ring->tx_buf[head] = NULL;
562 
563 		clean_desc -= desc_cnt;
564 
565 		while (!(ring->desc[head] & FBNIC_TWD_TYPE(AL))) {
566 			head++;
567 			head &= ring->size_mask;
568 			desc_cnt--;
569 		}
570 
571 		fbnic_unmap_single_twd(nv->dev, &ring->desc[head]);
572 		head++;
573 		head &= ring->size_mask;
574 		desc_cnt--;
575 
576 		while (desc_cnt--) {
577 			fbnic_unmap_page_twd(nv->dev, &ring->desc[head]);
578 			head++;
579 			head &= ring->size_mask;
580 		}
581 
582 		total_bytes += FBNIC_XMIT_CB(skb)->bytecount;
583 		total_packets += FBNIC_XMIT_CB(skb)->gso_segs;
584 
585 		napi_consume_skb(skb, napi_budget);
586 	}
587 
588 	if (!total_bytes)
589 		return;
590 
591 	ring->head = head;
592 
593 	txq = txring_txq(nv->napi.dev, ring);
594 
595 	if (unlikely(discard)) {
596 		u64_stats_update_begin(&ring->stats.syncp);
597 		ring->stats.dropped += total_packets;
598 		ring->stats.twq.ts_lost += ts_lost;
599 		u64_stats_update_end(&ring->stats.syncp);
600 
601 		netdev_tx_completed_queue(txq, total_packets, total_bytes);
602 		return;
603 	}
604 
605 	u64_stats_update_begin(&ring->stats.syncp);
606 	ring->stats.bytes += total_bytes;
607 	ring->stats.packets += total_packets;
608 	u64_stats_update_end(&ring->stats.syncp);
609 
610 	if (!netif_txq_completed_wake(txq, total_packets, total_bytes,
611 				      fbnic_desc_unused(ring),
612 				      FBNIC_TX_DESC_WAKEUP)) {
613 		u64_stats_update_begin(&ring->stats.syncp);
614 		ring->stats.twq.wake++;
615 		u64_stats_update_end(&ring->stats.syncp);
616 	}
617 }
618 
619 static void fbnic_clean_twq1(struct fbnic_napi_vector *nv, bool pp_allow_direct,
620 			     struct fbnic_ring *ring, bool discard,
621 			     unsigned int hw_head)
622 {
623 	u64 total_bytes = 0, total_packets = 0;
624 	unsigned int head = ring->head;
625 
626 	while (hw_head != head) {
627 		struct page *page;
628 		u64 twd;
629 
630 		if (unlikely(!(ring->desc[head] & FBNIC_TWD_TYPE(AL))))
631 			goto next_desc;
632 
633 		twd = le64_to_cpu(ring->desc[head]);
634 		page = ring->tx_buf[head];
635 
636 		/* TYPE_AL is 2, TYPE_LAST_AL is 3. So this trick gives
637 		 * us one increment per packet, with no branches.
638 		 */
639 		total_packets += FIELD_GET(FBNIC_TWD_TYPE_MASK, twd) -
640 				 FBNIC_TWD_TYPE_AL;
641 		total_bytes += FIELD_GET(FBNIC_TWD_LEN_MASK, twd);
642 
643 		page_pool_put_page(page->pp, page, -1, pp_allow_direct);
644 next_desc:
645 		head++;
646 		head &= ring->size_mask;
647 	}
648 
649 	if (!total_bytes)
650 		return;
651 
652 	ring->head = head;
653 
654 	if (discard) {
655 		u64_stats_update_begin(&ring->stats.syncp);
656 		ring->stats.dropped += total_packets;
657 		u64_stats_update_end(&ring->stats.syncp);
658 		return;
659 	}
660 
661 	u64_stats_update_begin(&ring->stats.syncp);
662 	ring->stats.bytes += total_bytes;
663 	ring->stats.packets += total_packets;
664 	u64_stats_update_end(&ring->stats.syncp);
665 }
666 
667 static void fbnic_clean_tsq(struct fbnic_napi_vector *nv,
668 			    struct fbnic_ring *ring,
669 			    u64 tcd, int *ts_head, int *head0)
670 {
671 	struct skb_shared_hwtstamps hwtstamp;
672 	struct fbnic_net *fbn;
673 	struct sk_buff *skb;
674 	int head;
675 	u64 ns;
676 
677 	head = (*ts_head < 0) ? ring->head : *ts_head;
678 
679 	do {
680 		unsigned int desc_cnt;
681 
682 		if (head == ring->tail) {
683 			if (unlikely(net_ratelimit()))
684 				netdev_err(nv->napi.dev,
685 					   "Tx timestamp without matching packet\n");
686 			return;
687 		}
688 
689 		skb = ring->tx_buf[head];
690 		desc_cnt = FBNIC_XMIT_CB(skb)->desc_count;
691 
692 		head += desc_cnt;
693 		head &= ring->size_mask;
694 	} while (!(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS));
695 
696 	fbn = netdev_priv(nv->napi.dev);
697 	ns = fbnic_ts40_to_ns(fbn, FIELD_GET(FBNIC_TCD_TYPE1_TS_MASK, tcd));
698 
699 	memset(&hwtstamp, 0, sizeof(hwtstamp));
700 	hwtstamp.hwtstamp = ns_to_ktime(ns);
701 
702 	*ts_head = head;
703 
704 	FBNIC_XMIT_CB(skb)->flags &= ~FBNIC_XMIT_CB_TS;
705 	if (*head0 < 0) {
706 		head = FBNIC_XMIT_CB(skb)->hw_head;
707 		if (head >= 0)
708 			*head0 = head;
709 	}
710 
711 	skb_tstamp_tx(skb, &hwtstamp);
712 	u64_stats_update_begin(&ring->stats.syncp);
713 	ring->stats.twq.ts_packets++;
714 	u64_stats_update_end(&ring->stats.syncp);
715 }
716 
717 static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx,
718 				 netmem_ref netmem)
719 {
720 	struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
721 
722 	page_pool_fragment_netmem(netmem, FBNIC_PAGECNT_BIAS_MAX);
723 	rx_buf->pagecnt_bias = FBNIC_PAGECNT_BIAS_MAX;
724 	rx_buf->netmem = netmem;
725 }
726 
727 static struct page *
728 fbnic_page_pool_get_head(struct fbnic_q_triad *qt, unsigned int idx)
729 {
730 	struct fbnic_rx_buf *rx_buf = &qt->sub0.rx_buf[idx];
731 
732 	rx_buf->pagecnt_bias--;
733 
734 	/* sub0 is always fed system pages, from the NAPI-level page_pool */
735 	return netmem_to_page(rx_buf->netmem);
736 }
737 
738 static netmem_ref
739 fbnic_page_pool_get_data(struct fbnic_q_triad *qt, unsigned int idx)
740 {
741 	struct fbnic_rx_buf *rx_buf = &qt->sub1.rx_buf[idx];
742 
743 	rx_buf->pagecnt_bias--;
744 
745 	return rx_buf->netmem;
746 }
747 
748 static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx,
749 				  int budget)
750 {
751 	struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
752 	netmem_ref netmem = rx_buf->netmem;
753 
754 	if (!page_pool_unref_netmem(netmem, rx_buf->pagecnt_bias))
755 		page_pool_put_unrefed_netmem(ring->page_pool, netmem, -1,
756 					     !!budget);
757 
758 	rx_buf->netmem = 0;
759 }
760 
761 static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget,
762 			    struct fbnic_q_triad *qt, s32 ts_head, s32 head0,
763 			    s32 head1)
764 {
765 	if (head0 >= 0)
766 		fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0);
767 	else if (ts_head >= 0)
768 		fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, ts_head);
769 
770 	if (head1 >= 0) {
771 		qt->cmpl.deferred_head = -1;
772 		if (napi_budget)
773 			fbnic_clean_twq1(nv, true, &qt->sub1, false, head1);
774 		else
775 			qt->cmpl.deferred_head = head1;
776 	}
777 }
778 
779 static void
780 fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt,
781 		int napi_budget)
782 {
783 	struct fbnic_ring *cmpl = &qt->cmpl;
784 	s32 head1 = cmpl->deferred_head;
785 	s32 head0 = -1, ts_head = -1;
786 	__le64 *raw_tcd, done;
787 	u32 head = cmpl->head;
788 
789 	done = (head & (cmpl->size_mask + 1)) ? 0 : cpu_to_le64(FBNIC_TCD_DONE);
790 	raw_tcd = &cmpl->desc[head & cmpl->size_mask];
791 
792 	/* Walk the completion queue collecting the heads reported by NIC */
793 	while ((*raw_tcd & cpu_to_le64(FBNIC_TCD_DONE)) == done) {
794 		u64 tcd;
795 
796 		dma_rmb();
797 
798 		tcd = le64_to_cpu(*raw_tcd);
799 
800 		switch (FIELD_GET(FBNIC_TCD_TYPE_MASK, tcd)) {
801 		case FBNIC_TCD_TYPE_0:
802 			if (tcd & FBNIC_TCD_TWQ1)
803 				head1 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD1_MASK,
804 						  tcd);
805 			else
806 				head0 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD0_MASK,
807 						  tcd);
808 			/* Currently all err status bits are related to
809 			 * timestamps and as those have yet to be added
810 			 * they are skipped for now.
811 			 */
812 			break;
813 		case FBNIC_TCD_TYPE_1:
814 			if (WARN_ON_ONCE(tcd & FBNIC_TCD_TWQ1))
815 				break;
816 
817 			fbnic_clean_tsq(nv, &qt->sub0, tcd, &ts_head, &head0);
818 			break;
819 		default:
820 			break;
821 		}
822 
823 		raw_tcd++;
824 		head++;
825 		if (!(head & cmpl->size_mask)) {
826 			done ^= cpu_to_le64(FBNIC_TCD_DONE);
827 			raw_tcd = &cmpl->desc[0];
828 		}
829 	}
830 
831 	/* Record the current head/tail of the queue */
832 	if (cmpl->head != head) {
833 		cmpl->head = head;
834 		writel(head & cmpl->size_mask, cmpl->doorbell);
835 	}
836 
837 	/* Unmap and free processed buffers */
838 	fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0, head1);
839 }
840 
841 static void fbnic_clean_bdq(struct fbnic_ring *ring, unsigned int hw_head,
842 			    int napi_budget)
843 {
844 	unsigned int head = ring->head;
845 
846 	if (head == hw_head)
847 		return;
848 
849 	do {
850 		fbnic_page_pool_drain(ring, head, napi_budget);
851 
852 		head++;
853 		head &= ring->size_mask;
854 	} while (head != hw_head);
855 
856 	ring->head = head;
857 }
858 
859 static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, netmem_ref netmem)
860 {
861 	__le64 *bdq_desc = &bdq->desc[id * FBNIC_BD_FRAG_COUNT];
862 	dma_addr_t dma = page_pool_get_dma_addr_netmem(netmem);
863 	u64 bd, i = FBNIC_BD_FRAG_COUNT;
864 
865 	bd = (FBNIC_BD_PAGE_ADDR_MASK & dma) |
866 	     FIELD_PREP(FBNIC_BD_PAGE_ID_MASK, id);
867 
868 	/* In the case that a page size is larger than 4K we will map a
869 	 * single page to multiple fragments. The fragments will be
870 	 * FBNIC_BD_FRAG_COUNT in size and the lower n bits will be use
871 	 * to indicate the individual fragment IDs.
872 	 */
873 	do {
874 		*bdq_desc = cpu_to_le64(bd);
875 		bd += FIELD_PREP(FBNIC_BD_DESC_ADDR_MASK, 1) |
876 		      FIELD_PREP(FBNIC_BD_DESC_ID_MASK, 1);
877 	} while (--i);
878 }
879 
880 static void fbnic_fill_bdq(struct fbnic_ring *bdq)
881 {
882 	unsigned int count = fbnic_desc_unused(bdq);
883 	unsigned int i = bdq->tail;
884 
885 	if (!count)
886 		return;
887 
888 	do {
889 		netmem_ref netmem;
890 
891 		netmem = page_pool_dev_alloc_netmems(bdq->page_pool);
892 		if (!netmem) {
893 			u64_stats_update_begin(&bdq->stats.syncp);
894 			bdq->stats.rx.alloc_failed++;
895 			u64_stats_update_end(&bdq->stats.syncp);
896 
897 			break;
898 		}
899 
900 		fbnic_page_pool_init(bdq, i, netmem);
901 		fbnic_bd_prep(bdq, i, netmem);
902 
903 		i++;
904 		i &= bdq->size_mask;
905 
906 		count--;
907 	} while (count);
908 
909 	if (bdq->tail != i) {
910 		bdq->tail = i;
911 
912 		/* Force DMA writes to flush before writing to tail */
913 		dma_wmb();
914 
915 		writel(i, bdq->doorbell);
916 	}
917 }
918 
919 static unsigned int fbnic_hdr_pg_start(unsigned int pg_off)
920 {
921 	/* The headroom of the first header may be larger than FBNIC_RX_HROOM
922 	 * due to alignment. So account for that by just making the page
923 	 * offset 0 if we are starting at the first header.
924 	 */
925 	if (ALIGN(FBNIC_RX_HROOM, 128) > FBNIC_RX_HROOM &&
926 	    pg_off == ALIGN(FBNIC_RX_HROOM, 128))
927 		return 0;
928 
929 	return pg_off - FBNIC_RX_HROOM;
930 }
931 
932 static unsigned int fbnic_hdr_pg_end(unsigned int pg_off, unsigned int len)
933 {
934 	/* Determine the end of the buffer by finding the start of the next
935 	 * and then subtracting the headroom from that frame.
936 	 */
937 	pg_off += len + FBNIC_RX_TROOM + FBNIC_RX_HROOM;
938 
939 	return ALIGN(pg_off, 128) - FBNIC_RX_HROOM;
940 }
941 
942 static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd,
943 			      struct fbnic_pkt_buff *pkt,
944 			      struct fbnic_q_triad *qt)
945 {
946 	unsigned int hdr_pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
947 	unsigned int hdr_pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd);
948 	struct page *page = fbnic_page_pool_get_head(qt, hdr_pg_idx);
949 	unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd);
950 	unsigned int frame_sz, hdr_pg_start, hdr_pg_end, headroom;
951 	unsigned char *hdr_start;
952 
953 	/* data_hard_start should always be NULL when this is called */
954 	WARN_ON_ONCE(pkt->buff.data_hard_start);
955 
956 	/* Short-cut the end calculation if we know page is fully consumed */
957 	hdr_pg_end = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ?
958 		     FBNIC_BD_FRAG_SIZE : fbnic_hdr_pg_end(hdr_pg_off, len);
959 	hdr_pg_start = fbnic_hdr_pg_start(hdr_pg_off);
960 
961 	headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD;
962 	frame_sz = hdr_pg_end - hdr_pg_start;
963 	xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq);
964 	hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
965 			FBNIC_BD_FRAG_SIZE;
966 
967 	/* Sync DMA buffer */
968 	dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page),
969 				      hdr_pg_start, frame_sz,
970 				      DMA_BIDIRECTIONAL);
971 
972 	/* Build frame around buffer */
973 	hdr_start = page_address(page) + hdr_pg_start;
974 	net_prefetch(pkt->buff.data);
975 	xdp_prepare_buff(&pkt->buff, hdr_start, headroom,
976 			 len - FBNIC_RX_PAD, true);
977 
978 	pkt->hwtstamp = 0;
979 	pkt->add_frag_failed = false;
980 }
981 
982 static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd,
983 			      struct fbnic_pkt_buff *pkt,
984 			      struct fbnic_q_triad *qt)
985 {
986 	unsigned int pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
987 	unsigned int pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd);
988 	unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd);
989 	netmem_ref netmem = fbnic_page_pool_get_data(qt, pg_idx);
990 	unsigned int truesize;
991 	bool added;
992 
993 	truesize = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ?
994 		   FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128);
995 
996 	pg_off += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
997 		  FBNIC_BD_FRAG_SIZE;
998 
999 	/* Sync DMA buffer */
1000 	page_pool_dma_sync_netmem_for_cpu(qt->sub1.page_pool, netmem,
1001 					  pg_off, truesize);
1002 
1003 	added = xdp_buff_add_frag(&pkt->buff, netmem, pg_off, len, truesize);
1004 	if (unlikely(!added)) {
1005 		pkt->add_frag_failed = true;
1006 		netdev_err_once(nv->napi.dev,
1007 				"Failed to add fragment to xdp_buff\n");
1008 	}
1009 }
1010 
1011 static void fbnic_put_pkt_buff(struct fbnic_q_triad *qt,
1012 			       struct fbnic_pkt_buff *pkt, int budget)
1013 {
1014 	struct page *page;
1015 
1016 	if (!pkt->buff.data_hard_start)
1017 		return;
1018 
1019 	if (xdp_buff_has_frags(&pkt->buff)) {
1020 		struct skb_shared_info *shinfo;
1021 		netmem_ref netmem;
1022 		int nr_frags;
1023 
1024 		shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
1025 		nr_frags = shinfo->nr_frags;
1026 
1027 		while (nr_frags--) {
1028 			netmem = skb_frag_netmem(&shinfo->frags[nr_frags]);
1029 			page_pool_put_full_netmem(qt->sub1.page_pool, netmem,
1030 						  !!budget);
1031 		}
1032 	}
1033 
1034 	page = virt_to_page(pkt->buff.data_hard_start);
1035 	page_pool_put_full_page(qt->sub0.page_pool, page, !!budget);
1036 }
1037 
1038 static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv,
1039 				       struct fbnic_pkt_buff *pkt)
1040 {
1041 	struct sk_buff *skb;
1042 
1043 	skb = xdp_build_skb_from_buff(&pkt->buff);
1044 	if (!skb)
1045 		return NULL;
1046 
1047 	/* Add timestamp if present */
1048 	if (pkt->hwtstamp)
1049 		skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp;
1050 
1051 	return skb;
1052 }
1053 
1054 static long fbnic_pkt_tx(struct fbnic_napi_vector *nv,
1055 			 struct fbnic_pkt_buff *pkt)
1056 {
1057 	struct fbnic_ring *ring = &nv->qt[0].sub1;
1058 	int size, offset, nsegs = 1, data_len = 0;
1059 	unsigned int tail = ring->tail;
1060 	struct skb_shared_info *shinfo;
1061 	skb_frag_t *frag = NULL;
1062 	struct page *page;
1063 	dma_addr_t dma;
1064 	__le64 *twd;
1065 
1066 	if (unlikely(xdp_buff_has_frags(&pkt->buff))) {
1067 		shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
1068 		nsegs += shinfo->nr_frags;
1069 		data_len = shinfo->xdp_frags_size;
1070 		frag = &shinfo->frags[0];
1071 	}
1072 
1073 	if (fbnic_desc_unused(ring) < nsegs) {
1074 		u64_stats_update_begin(&ring->stats.syncp);
1075 		ring->stats.dropped++;
1076 		u64_stats_update_end(&ring->stats.syncp);
1077 		return -FBNIC_XDP_CONSUME;
1078 	}
1079 
1080 	page = virt_to_page(pkt->buff.data_hard_start);
1081 	offset = offset_in_page(pkt->buff.data);
1082 	dma = page_pool_get_dma_addr(page);
1083 
1084 	size = pkt->buff.data_end - pkt->buff.data;
1085 
1086 	while (nsegs--) {
1087 		dma_sync_single_range_for_device(nv->dev, dma, offset, size,
1088 						 DMA_BIDIRECTIONAL);
1089 		dma += offset;
1090 
1091 		ring->tx_buf[tail] = page;
1092 
1093 		twd = &ring->desc[tail];
1094 		*twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) |
1095 				   FIELD_PREP(FBNIC_TWD_LEN_MASK, size) |
1096 				   FIELD_PREP(FBNIC_TWD_TYPE_MASK,
1097 					      FBNIC_TWD_TYPE_AL));
1098 
1099 		tail++;
1100 		tail &= ring->size_mask;
1101 
1102 		if (!data_len)
1103 			break;
1104 
1105 		offset = skb_frag_off(frag);
1106 		page = skb_frag_page(frag);
1107 		dma = page_pool_get_dma_addr(page);
1108 
1109 		size = skb_frag_size(frag);
1110 		data_len -= size;
1111 		frag++;
1112 	}
1113 
1114 	*twd |= FBNIC_TWD_TYPE(LAST_AL);
1115 
1116 	ring->tail = tail;
1117 
1118 	return -FBNIC_XDP_TX;
1119 }
1120 
1121 static void fbnic_pkt_commit_tail(struct fbnic_napi_vector *nv,
1122 				  unsigned int pkt_tail)
1123 {
1124 	struct fbnic_ring *ring = &nv->qt[0].sub1;
1125 
1126 	/* Force DMA writes to flush before writing to tail */
1127 	dma_wmb();
1128 
1129 	writel(pkt_tail, ring->doorbell);
1130 }
1131 
1132 static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv,
1133 				     struct fbnic_pkt_buff *pkt)
1134 {
1135 	struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
1136 	struct bpf_prog *xdp_prog;
1137 	int act;
1138 
1139 	xdp_prog = READ_ONCE(fbn->xdp_prog);
1140 	if (!xdp_prog)
1141 		goto xdp_pass;
1142 
1143 	/* Should never happen, config paths enforce HDS threshold > MTU */
1144 	if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags)
1145 		return ERR_PTR(-FBNIC_XDP_LEN_ERR);
1146 
1147 	act = bpf_prog_run_xdp(xdp_prog, &pkt->buff);
1148 	switch (act) {
1149 	case XDP_PASS:
1150 xdp_pass:
1151 		return fbnic_build_skb(nv, pkt);
1152 	case XDP_TX:
1153 		return ERR_PTR(fbnic_pkt_tx(nv, pkt));
1154 	default:
1155 		bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act);
1156 		fallthrough;
1157 	case XDP_ABORTED:
1158 		trace_xdp_exception(nv->napi.dev, xdp_prog, act);
1159 		fallthrough;
1160 	case XDP_DROP:
1161 		break;
1162 	}
1163 
1164 	return ERR_PTR(-FBNIC_XDP_CONSUME);
1165 }
1166 
1167 static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd)
1168 {
1169 	return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 :
1170 	       (FBNIC_RCD_META_L3_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L3 :
1171 						     PKT_HASH_TYPE_L2;
1172 }
1173 
1174 static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd,
1175 			    struct fbnic_pkt_buff *pkt)
1176 {
1177 	struct fbnic_net *fbn;
1178 	u64 ns, ts;
1179 
1180 	if (!FIELD_GET(FBNIC_RCD_OPT_META_TS, rcd))
1181 		return;
1182 
1183 	fbn = netdev_priv(nv->napi.dev);
1184 	ts = FIELD_GET(FBNIC_RCD_OPT_META_TS_MASK, rcd);
1185 	ns = fbnic_ts40_to_ns(fbn, ts);
1186 
1187 	/* Add timestamp to shared info */
1188 	pkt->hwtstamp = ns_to_ktime(ns);
1189 }
1190 
1191 static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv,
1192 				      u64 rcd, struct sk_buff *skb,
1193 				      struct fbnic_q_triad *qt,
1194 				      u64 *csum_cmpl, u64 *csum_none)
1195 {
1196 	struct net_device *netdev = nv->napi.dev;
1197 	struct fbnic_ring *rcq = &qt->cmpl;
1198 
1199 	fbnic_rx_csum(rcd, skb, rcq, csum_cmpl, csum_none);
1200 
1201 	if (netdev->features & NETIF_F_RXHASH)
1202 		skb_set_hash(skb,
1203 			     FIELD_GET(FBNIC_RCD_META_RSS_HASH_MASK, rcd),
1204 			     fbnic_skb_hash_type(rcd));
1205 
1206 	skb_record_rx_queue(skb, rcq->q_idx);
1207 }
1208 
1209 static bool fbnic_rcd_metadata_err(u64 rcd)
1210 {
1211 	return !!(FBNIC_RCD_META_UNCORRECTABLE_ERR_MASK & rcd);
1212 }
1213 
1214 static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
1215 			   struct fbnic_q_triad *qt, int budget)
1216 {
1217 	unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0;
1218 	u64 csum_complete = 0, csum_none = 0, length_errors = 0;
1219 	s32 head0 = -1, head1 = -1, pkt_tail = -1;
1220 	struct fbnic_ring *rcq = &qt->cmpl;
1221 	struct fbnic_pkt_buff *pkt;
1222 	__le64 *raw_rcd, done;
1223 	u32 head = rcq->head;
1224 
1225 	done = (head & (rcq->size_mask + 1)) ? cpu_to_le64(FBNIC_RCD_DONE) : 0;
1226 	raw_rcd = &rcq->desc[head & rcq->size_mask];
1227 	pkt = rcq->pkt;
1228 
1229 	/* Walk the completion queue collecting the heads reported by NIC */
1230 	while (likely(packets < budget)) {
1231 		struct sk_buff *skb = ERR_PTR(-EINVAL);
1232 		u64 rcd;
1233 
1234 		if ((*raw_rcd & cpu_to_le64(FBNIC_RCD_DONE)) == done)
1235 			break;
1236 
1237 		dma_rmb();
1238 
1239 		rcd = le64_to_cpu(*raw_rcd);
1240 
1241 		switch (FIELD_GET(FBNIC_RCD_TYPE_MASK, rcd)) {
1242 		case FBNIC_RCD_TYPE_HDR_AL:
1243 			head0 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
1244 			fbnic_pkt_prepare(nv, rcd, pkt, qt);
1245 
1246 			break;
1247 		case FBNIC_RCD_TYPE_PAY_AL:
1248 			head1 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
1249 			fbnic_add_rx_frag(nv, rcd, pkt, qt);
1250 
1251 			break;
1252 		case FBNIC_RCD_TYPE_OPT_META:
1253 			/* Only type 0 is currently supported */
1254 			if (FIELD_GET(FBNIC_RCD_OPT_META_TYPE_MASK, rcd))
1255 				break;
1256 
1257 			fbnic_rx_tstamp(nv, rcd, pkt);
1258 
1259 			/* We currently ignore the action table index */
1260 			break;
1261 		case FBNIC_RCD_TYPE_META:
1262 			if (unlikely(pkt->add_frag_failed))
1263 				skb = NULL;
1264 			else if (likely(!fbnic_rcd_metadata_err(rcd)))
1265 				skb = fbnic_run_xdp(nv, pkt);
1266 
1267 			/* Populate skb and invalidate XDP */
1268 			if (!IS_ERR_OR_NULL(skb)) {
1269 				fbnic_populate_skb_fields(nv, rcd, skb, qt,
1270 							  &csum_complete,
1271 							  &csum_none);
1272 
1273 				packets++;
1274 				bytes += skb->len;
1275 
1276 				napi_gro_receive(&nv->napi, skb);
1277 			} else if (skb == ERR_PTR(-FBNIC_XDP_TX)) {
1278 				pkt_tail = nv->qt[0].sub1.tail;
1279 				bytes += xdp_get_buff_len(&pkt->buff);
1280 			} else {
1281 				if (!skb) {
1282 					alloc_failed++;
1283 					dropped++;
1284 				} else if (skb == ERR_PTR(-FBNIC_XDP_LEN_ERR)) {
1285 					length_errors++;
1286 				} else {
1287 					dropped++;
1288 				}
1289 
1290 				fbnic_put_pkt_buff(qt, pkt, 1);
1291 			}
1292 
1293 			pkt->buff.data_hard_start = NULL;
1294 
1295 			break;
1296 		}
1297 
1298 		raw_rcd++;
1299 		head++;
1300 		if (!(head & rcq->size_mask)) {
1301 			done ^= cpu_to_le64(FBNIC_RCD_DONE);
1302 			raw_rcd = &rcq->desc[0];
1303 		}
1304 	}
1305 
1306 	u64_stats_update_begin(&rcq->stats.syncp);
1307 	rcq->stats.packets += packets;
1308 	rcq->stats.bytes += bytes;
1309 	/* Re-add ethernet header length (removed in fbnic_build_skb) */
1310 	rcq->stats.bytes += ETH_HLEN * packets;
1311 	rcq->stats.dropped += dropped;
1312 	rcq->stats.rx.alloc_failed += alloc_failed;
1313 	rcq->stats.rx.csum_complete += csum_complete;
1314 	rcq->stats.rx.csum_none += csum_none;
1315 	rcq->stats.rx.length_errors += length_errors;
1316 	u64_stats_update_end(&rcq->stats.syncp);
1317 
1318 	if (pkt_tail >= 0)
1319 		fbnic_pkt_commit_tail(nv, pkt_tail);
1320 
1321 	/* Unmap and free processed buffers */
1322 	if (head0 >= 0)
1323 		fbnic_clean_bdq(&qt->sub0, head0, budget);
1324 	fbnic_fill_bdq(&qt->sub0);
1325 
1326 	if (head1 >= 0)
1327 		fbnic_clean_bdq(&qt->sub1, head1, budget);
1328 	fbnic_fill_bdq(&qt->sub1);
1329 
1330 	/* Record the current head/tail of the queue */
1331 	if (rcq->head != head) {
1332 		rcq->head = head;
1333 		writel(head & rcq->size_mask, rcq->doorbell);
1334 	}
1335 
1336 	return packets;
1337 }
1338 
1339 static void fbnic_nv_irq_disable(struct fbnic_napi_vector *nv)
1340 {
1341 	struct fbnic_dev *fbd = nv->fbd;
1342 	u32 v_idx = nv->v_idx;
1343 
1344 	fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(v_idx / 32), 1 << (v_idx % 32));
1345 }
1346 
1347 static void fbnic_nv_irq_rearm(struct fbnic_napi_vector *nv)
1348 {
1349 	struct fbnic_dev *fbd = nv->fbd;
1350 	u32 v_idx = nv->v_idx;
1351 
1352 	fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(v_idx),
1353 		   FBNIC_INTR_CQ_REARM_INTR_UNMASK);
1354 }
1355 
1356 static int fbnic_poll(struct napi_struct *napi, int budget)
1357 {
1358 	struct fbnic_napi_vector *nv = container_of(napi,
1359 						    struct fbnic_napi_vector,
1360 						    napi);
1361 	int i, j, work_done = 0;
1362 
1363 	for (i = 0; i < nv->txt_count; i++)
1364 		fbnic_clean_tcq(nv, &nv->qt[i], budget);
1365 
1366 	for (j = 0; j < nv->rxt_count; j++, i++)
1367 		work_done += fbnic_clean_rcq(nv, &nv->qt[i], budget);
1368 
1369 	if (work_done >= budget)
1370 		return budget;
1371 
1372 	if (likely(napi_complete_done(napi, work_done)))
1373 		fbnic_nv_irq_rearm(nv);
1374 
1375 	return work_done;
1376 }
1377 
1378 irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data)
1379 {
1380 	struct fbnic_napi_vector *nv = *(void **)data;
1381 
1382 	napi_schedule_irqoff(&nv->napi);
1383 
1384 	return IRQ_HANDLED;
1385 }
1386 
1387 void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn,
1388 				      struct fbnic_ring *rxr)
1389 {
1390 	struct fbnic_queue_stats *stats = &rxr->stats;
1391 
1392 	/* Capture stats from queues before dissasociating them */
1393 	fbn->rx_stats.bytes += stats->bytes;
1394 	fbn->rx_stats.packets += stats->packets;
1395 	fbn->rx_stats.dropped += stats->dropped;
1396 	fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed;
1397 	fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete;
1398 	fbn->rx_stats.rx.csum_none += stats->rx.csum_none;
1399 	fbn->rx_stats.rx.length_errors += stats->rx.length_errors;
1400 	/* Remember to add new stats here */
1401 	BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 4);
1402 }
1403 
1404 void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn,
1405 				      struct fbnic_ring *txr)
1406 {
1407 	struct fbnic_queue_stats *stats = &txr->stats;
1408 
1409 	/* Capture stats from queues before dissasociating them */
1410 	fbn->tx_stats.bytes += stats->bytes;
1411 	fbn->tx_stats.packets += stats->packets;
1412 	fbn->tx_stats.dropped += stats->dropped;
1413 	fbn->tx_stats.twq.csum_partial += stats->twq.csum_partial;
1414 	fbn->tx_stats.twq.lso += stats->twq.lso;
1415 	fbn->tx_stats.twq.ts_lost += stats->twq.ts_lost;
1416 	fbn->tx_stats.twq.ts_packets += stats->twq.ts_packets;
1417 	fbn->tx_stats.twq.stop += stats->twq.stop;
1418 	fbn->tx_stats.twq.wake += stats->twq.wake;
1419 	/* Remember to add new stats here */
1420 	BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 6);
1421 }
1422 
1423 static void fbnic_aggregate_ring_xdp_counters(struct fbnic_net *fbn,
1424 					      struct fbnic_ring *xdpr)
1425 {
1426 	struct fbnic_queue_stats *stats = &xdpr->stats;
1427 
1428 	if (!(xdpr->flags & FBNIC_RING_F_STATS))
1429 		return;
1430 
1431 	/* Capture stats from queues before dissasociating them */
1432 	fbn->rx_stats.bytes += stats->bytes;
1433 	fbn->rx_stats.packets += stats->packets;
1434 	fbn->rx_stats.dropped += stats->dropped;
1435 	fbn->tx_stats.bytes += stats->bytes;
1436 	fbn->tx_stats.packets += stats->packets;
1437 }
1438 
1439 static void fbnic_remove_tx_ring(struct fbnic_net *fbn,
1440 				 struct fbnic_ring *txr)
1441 {
1442 	if (!(txr->flags & FBNIC_RING_F_STATS))
1443 		return;
1444 
1445 	fbnic_aggregate_ring_tx_counters(fbn, txr);
1446 
1447 	/* Remove pointer to the Tx ring */
1448 	WARN_ON(fbn->tx[txr->q_idx] && fbn->tx[txr->q_idx] != txr);
1449 	fbn->tx[txr->q_idx] = NULL;
1450 }
1451 
1452 static void fbnic_remove_xdp_ring(struct fbnic_net *fbn,
1453 				  struct fbnic_ring *xdpr)
1454 {
1455 	if (!(xdpr->flags & FBNIC_RING_F_STATS))
1456 		return;
1457 
1458 	fbnic_aggregate_ring_xdp_counters(fbn, xdpr);
1459 
1460 	/* Remove pointer to the Tx ring */
1461 	WARN_ON(fbn->tx[xdpr->q_idx] && fbn->tx[xdpr->q_idx] != xdpr);
1462 	fbn->tx[xdpr->q_idx] = NULL;
1463 }
1464 
1465 static void fbnic_remove_rx_ring(struct fbnic_net *fbn,
1466 				 struct fbnic_ring *rxr)
1467 {
1468 	if (!(rxr->flags & FBNIC_RING_F_STATS))
1469 		return;
1470 
1471 	fbnic_aggregate_ring_rx_counters(fbn, rxr);
1472 
1473 	/* Remove pointer to the Rx ring */
1474 	WARN_ON(fbn->rx[rxr->q_idx] && fbn->rx[rxr->q_idx] != rxr);
1475 	fbn->rx[rxr->q_idx] = NULL;
1476 }
1477 
1478 static void fbnic_free_qt_page_pools(struct fbnic_q_triad *qt)
1479 {
1480 	page_pool_destroy(qt->sub0.page_pool);
1481 	page_pool_destroy(qt->sub1.page_pool);
1482 }
1483 
1484 static void fbnic_free_napi_vector(struct fbnic_net *fbn,
1485 				   struct fbnic_napi_vector *nv)
1486 {
1487 	struct fbnic_dev *fbd = nv->fbd;
1488 	int i, j;
1489 
1490 	for (i = 0; i < nv->txt_count; i++) {
1491 		fbnic_remove_tx_ring(fbn, &nv->qt[i].sub0);
1492 		fbnic_remove_xdp_ring(fbn, &nv->qt[i].sub1);
1493 		fbnic_remove_tx_ring(fbn, &nv->qt[i].cmpl);
1494 	}
1495 
1496 	for (j = 0; j < nv->rxt_count; j++, i++) {
1497 		fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0);
1498 		fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1);
1499 		fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl);
1500 	}
1501 
1502 	fbnic_napi_free_irq(fbd, nv);
1503 	netif_napi_del_locked(&nv->napi);
1504 	fbn->napi[fbnic_napi_idx(nv)] = NULL;
1505 	kfree(nv);
1506 }
1507 
1508 void fbnic_free_napi_vectors(struct fbnic_net *fbn)
1509 {
1510 	int i;
1511 
1512 	for (i = 0; i < fbn->num_napi; i++)
1513 		if (fbn->napi[i])
1514 			fbnic_free_napi_vector(fbn, fbn->napi[i]);
1515 }
1516 
1517 static int
1518 fbnic_alloc_qt_page_pools(struct fbnic_net *fbn, struct fbnic_q_triad *qt,
1519 			  unsigned int rxq_idx)
1520 {
1521 	struct page_pool_params pp_params = {
1522 		.order = 0,
1523 		.flags = PP_FLAG_DMA_MAP |
1524 			 PP_FLAG_DMA_SYNC_DEV,
1525 		.pool_size = fbn->hpq_size + fbn->ppq_size,
1526 		.nid = NUMA_NO_NODE,
1527 		.dev = fbn->netdev->dev.parent,
1528 		.dma_dir = DMA_BIDIRECTIONAL,
1529 		.offset = 0,
1530 		.max_len = PAGE_SIZE,
1531 		.netdev	= fbn->netdev,
1532 		.queue_idx = rxq_idx,
1533 	};
1534 	struct page_pool *pp;
1535 
1536 	/* Page pool cannot exceed a size of 32768. This doesn't limit the
1537 	 * pages on the ring but the number we can have cached waiting on
1538 	 * the next use.
1539 	 *
1540 	 * TBD: Can this be reduced further? Would a multiple of
1541 	 * NAPI_POLL_WEIGHT possibly make more sense? The question is how
1542 	 * may pages do we need to hold in reserve to get the best return
1543 	 * without hogging too much system memory.
1544 	 */
1545 	if (pp_params.pool_size > 32768)
1546 		pp_params.pool_size = 32768;
1547 
1548 	pp = page_pool_create(&pp_params);
1549 	if (IS_ERR(pp))
1550 		return PTR_ERR(pp);
1551 
1552 	qt->sub0.page_pool = pp;
1553 	if (netif_rxq_has_unreadable_mp(fbn->netdev, rxq_idx)) {
1554 		pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM;
1555 		pp_params.dma_dir = DMA_FROM_DEVICE;
1556 
1557 		pp = page_pool_create(&pp_params);
1558 		if (IS_ERR(pp))
1559 			goto err_destroy_sub0;
1560 	} else {
1561 		page_pool_get(pp);
1562 	}
1563 	qt->sub1.page_pool = pp;
1564 
1565 	return 0;
1566 
1567 err_destroy_sub0:
1568 	page_pool_destroy(pp);
1569 	return PTR_ERR(pp);
1570 }
1571 
1572 static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell,
1573 			    int q_idx, u8 flags)
1574 {
1575 	u64_stats_init(&ring->stats.syncp);
1576 	ring->doorbell = doorbell;
1577 	ring->q_idx = q_idx;
1578 	ring->flags = flags;
1579 	ring->deferred_head = -1;
1580 }
1581 
1582 static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
1583 				   unsigned int v_count, unsigned int v_idx,
1584 				   unsigned int txq_count, unsigned int txq_idx,
1585 				   unsigned int rxq_count, unsigned int rxq_idx)
1586 {
1587 	int txt_count = txq_count, rxt_count = rxq_count;
1588 	u32 __iomem *uc_addr = fbd->uc_addr0;
1589 	int xdp_count = 0, qt_count, err;
1590 	struct fbnic_napi_vector *nv;
1591 	struct fbnic_q_triad *qt;
1592 	u32 __iomem *db;
1593 
1594 	/* We need to reserve at least one Tx Queue Triad for an XDP ring */
1595 	if (rxq_count) {
1596 		xdp_count = 1;
1597 		if (!txt_count)
1598 			txt_count = 1;
1599 	}
1600 
1601 	qt_count = txt_count + rxq_count;
1602 	if (!qt_count)
1603 		return -EINVAL;
1604 
1605 	/* If MMIO has already failed there are no rings to initialize */
1606 	if (!uc_addr)
1607 		return -EIO;
1608 
1609 	/* Allocate NAPI vector and queue triads */
1610 	nv = kzalloc(struct_size(nv, qt, qt_count), GFP_KERNEL);
1611 	if (!nv)
1612 		return -ENOMEM;
1613 
1614 	/* Record queue triad counts */
1615 	nv->txt_count = txt_count;
1616 	nv->rxt_count = rxt_count;
1617 
1618 	/* Provide pointer back to fbnic and MSI-X vectors */
1619 	nv->fbd = fbd;
1620 	nv->v_idx = v_idx;
1621 
1622 	/* Tie napi to netdev */
1623 	fbn->napi[fbnic_napi_idx(nv)] = nv;
1624 	netif_napi_add_config_locked(fbn->netdev, &nv->napi, fbnic_poll,
1625 				     fbnic_napi_idx(nv));
1626 
1627 	/* Record IRQ to NAPI struct */
1628 	netif_napi_set_irq_locked(&nv->napi,
1629 				  pci_irq_vector(to_pci_dev(fbd->dev),
1630 						 nv->v_idx));
1631 
1632 	/* Tie nv back to PCIe dev */
1633 	nv->dev = fbd->dev;
1634 
1635 	/* Request the IRQ for napi vector */
1636 	err = fbnic_napi_request_irq(fbd, nv);
1637 	if (err)
1638 		goto napi_del;
1639 
1640 	/* Initialize queue triads */
1641 	qt = nv->qt;
1642 
1643 	while (txt_count) {
1644 		u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS;
1645 
1646 		/* Configure Tx queue */
1647 		db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ0_TAIL];
1648 
1649 		/* Assign Tx queue to netdev if applicable */
1650 		if (txq_count > 0) {
1651 
1652 			fbnic_ring_init(&qt->sub0, db, txq_idx, flags);
1653 			fbn->tx[txq_idx] = &qt->sub0;
1654 			txq_count--;
1655 		} else {
1656 			fbnic_ring_init(&qt->sub0, db, 0,
1657 					FBNIC_RING_F_DISABLED);
1658 		}
1659 
1660 		/* Configure XDP queue */
1661 		db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ1_TAIL];
1662 
1663 		/* Assign XDP queue to netdev if applicable
1664 		 *
1665 		 * The setup for this is in itself a bit different.
1666 		 * 1. We only need one XDP Tx queue per NAPI vector.
1667 		 * 2. We associate it to the first Rx queue index.
1668 		 * 3. The hardware side is associated based on the Tx Queue.
1669 		 * 4. The netdev queue is offset by FBNIC_MAX_TXQs.
1670 		 */
1671 		if (xdp_count > 0) {
1672 			unsigned int xdp_idx = FBNIC_MAX_TXQS + rxq_idx;
1673 
1674 			fbnic_ring_init(&qt->sub1, db, xdp_idx, flags);
1675 			fbn->tx[xdp_idx] = &qt->sub1;
1676 			xdp_count--;
1677 		} else {
1678 			fbnic_ring_init(&qt->sub1, db, 0,
1679 					FBNIC_RING_F_DISABLED);
1680 		}
1681 
1682 		/* Configure Tx completion queue */
1683 		db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TCQ_HEAD];
1684 		fbnic_ring_init(&qt->cmpl, db, 0, 0);
1685 
1686 		/* Update Tx queue index */
1687 		txt_count--;
1688 		txq_idx += v_count;
1689 
1690 		/* Move to next queue triad */
1691 		qt++;
1692 	}
1693 
1694 	while (rxt_count) {
1695 		/* Configure header queue */
1696 		db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_HPQ_TAIL];
1697 		fbnic_ring_init(&qt->sub0, db, 0, FBNIC_RING_F_CTX);
1698 
1699 		/* Configure payload queue */
1700 		db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_PPQ_TAIL];
1701 		fbnic_ring_init(&qt->sub1, db, 0, FBNIC_RING_F_CTX);
1702 
1703 		/* Configure Rx completion queue */
1704 		db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_RCQ_HEAD];
1705 		fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS);
1706 		fbn->rx[rxq_idx] = &qt->cmpl;
1707 
1708 		/* Update Rx queue index */
1709 		rxt_count--;
1710 		rxq_idx += v_count;
1711 
1712 		/* Move to next queue triad */
1713 		qt++;
1714 	}
1715 
1716 	return 0;
1717 
1718 napi_del:
1719 	netif_napi_del_locked(&nv->napi);
1720 	fbn->napi[fbnic_napi_idx(nv)] = NULL;
1721 	kfree(nv);
1722 	return err;
1723 }
1724 
1725 int fbnic_alloc_napi_vectors(struct fbnic_net *fbn)
1726 {
1727 	unsigned int txq_idx = 0, rxq_idx = 0, v_idx = FBNIC_NON_NAPI_VECTORS;
1728 	unsigned int num_tx = fbn->num_tx_queues;
1729 	unsigned int num_rx = fbn->num_rx_queues;
1730 	unsigned int num_napi = fbn->num_napi;
1731 	struct fbnic_dev *fbd = fbn->fbd;
1732 	int err;
1733 
1734 	/* Allocate 1 Tx queue per napi vector */
1735 	if (num_napi < FBNIC_MAX_TXQS && num_napi == num_tx + num_rx) {
1736 		while (num_tx) {
1737 			err = fbnic_alloc_napi_vector(fbd, fbn,
1738 						      num_napi, v_idx,
1739 						      1, txq_idx, 0, 0);
1740 			if (err)
1741 				goto free_vectors;
1742 
1743 			/* Update counts and index */
1744 			num_tx--;
1745 			txq_idx++;
1746 
1747 			v_idx++;
1748 		}
1749 	}
1750 
1751 	/* Allocate Tx/Rx queue pairs per vector, or allocate remaining Rx */
1752 	while (num_rx | num_tx) {
1753 		int tqpv = DIV_ROUND_UP(num_tx, num_napi - txq_idx);
1754 		int rqpv = DIV_ROUND_UP(num_rx, num_napi - rxq_idx);
1755 
1756 		err = fbnic_alloc_napi_vector(fbd, fbn, num_napi, v_idx,
1757 					      tqpv, txq_idx, rqpv, rxq_idx);
1758 		if (err)
1759 			goto free_vectors;
1760 
1761 		/* Update counts and index */
1762 		num_tx -= tqpv;
1763 		txq_idx++;
1764 
1765 		num_rx -= rqpv;
1766 		rxq_idx++;
1767 
1768 		v_idx++;
1769 	}
1770 
1771 	return 0;
1772 
1773 free_vectors:
1774 	fbnic_free_napi_vectors(fbn);
1775 
1776 	return -ENOMEM;
1777 }
1778 
1779 static void fbnic_free_ring_resources(struct device *dev,
1780 				      struct fbnic_ring *ring)
1781 {
1782 	kvfree(ring->buffer);
1783 	ring->buffer = NULL;
1784 
1785 	/* If size is not set there are no descriptors present */
1786 	if (!ring->size)
1787 		return;
1788 
1789 	dma_free_coherent(dev, ring->size, ring->desc, ring->dma);
1790 	ring->size_mask = 0;
1791 	ring->size = 0;
1792 }
1793 
1794 static int fbnic_alloc_tx_ring_desc(struct fbnic_net *fbn,
1795 				    struct fbnic_ring *txr)
1796 {
1797 	struct device *dev = fbn->netdev->dev.parent;
1798 	size_t size;
1799 
1800 	/* Round size up to nearest 4K */
1801 	size = ALIGN(array_size(sizeof(*txr->desc), fbn->txq_size), 4096);
1802 
1803 	txr->desc = dma_alloc_coherent(dev, size, &txr->dma,
1804 				       GFP_KERNEL | __GFP_NOWARN);
1805 	if (!txr->desc)
1806 		return -ENOMEM;
1807 
1808 	/* txq_size should be a power of 2, so mask is just that -1 */
1809 	txr->size_mask = fbn->txq_size - 1;
1810 	txr->size = size;
1811 
1812 	return 0;
1813 }
1814 
1815 static int fbnic_alloc_tx_ring_buffer(struct fbnic_ring *txr)
1816 {
1817 	size_t size = array_size(sizeof(*txr->tx_buf), txr->size_mask + 1);
1818 
1819 	txr->tx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1820 
1821 	return txr->tx_buf ? 0 : -ENOMEM;
1822 }
1823 
1824 static int fbnic_alloc_tx_ring_resources(struct fbnic_net *fbn,
1825 					 struct fbnic_ring *txr)
1826 {
1827 	struct device *dev = fbn->netdev->dev.parent;
1828 	int err;
1829 
1830 	if (txr->flags & FBNIC_RING_F_DISABLED)
1831 		return 0;
1832 
1833 	err = fbnic_alloc_tx_ring_desc(fbn, txr);
1834 	if (err)
1835 		return err;
1836 
1837 	if (!(txr->flags & FBNIC_RING_F_CTX))
1838 		return 0;
1839 
1840 	err = fbnic_alloc_tx_ring_buffer(txr);
1841 	if (err)
1842 		goto free_desc;
1843 
1844 	return 0;
1845 
1846 free_desc:
1847 	fbnic_free_ring_resources(dev, txr);
1848 	return err;
1849 }
1850 
1851 static int fbnic_alloc_rx_ring_desc(struct fbnic_net *fbn,
1852 				    struct fbnic_ring *rxr)
1853 {
1854 	struct device *dev = fbn->netdev->dev.parent;
1855 	size_t desc_size = sizeof(*rxr->desc);
1856 	u32 rxq_size;
1857 	size_t size;
1858 
1859 	switch (rxr->doorbell - fbnic_ring_csr_base(rxr)) {
1860 	case FBNIC_QUEUE_BDQ_HPQ_TAIL:
1861 		rxq_size = fbn->hpq_size / FBNIC_BD_FRAG_COUNT;
1862 		desc_size *= FBNIC_BD_FRAG_COUNT;
1863 		break;
1864 	case FBNIC_QUEUE_BDQ_PPQ_TAIL:
1865 		rxq_size = fbn->ppq_size / FBNIC_BD_FRAG_COUNT;
1866 		desc_size *= FBNIC_BD_FRAG_COUNT;
1867 		break;
1868 	case FBNIC_QUEUE_RCQ_HEAD:
1869 		rxq_size = fbn->rcq_size;
1870 		break;
1871 	default:
1872 		return -EINVAL;
1873 	}
1874 
1875 	/* Round size up to nearest 4K */
1876 	size = ALIGN(array_size(desc_size, rxq_size), 4096);
1877 
1878 	rxr->desc = dma_alloc_coherent(dev, size, &rxr->dma,
1879 				       GFP_KERNEL | __GFP_NOWARN);
1880 	if (!rxr->desc)
1881 		return -ENOMEM;
1882 
1883 	/* rxq_size should be a power of 2, so mask is just that -1 */
1884 	rxr->size_mask = rxq_size - 1;
1885 	rxr->size = size;
1886 
1887 	return 0;
1888 }
1889 
1890 static int fbnic_alloc_rx_ring_buffer(struct fbnic_ring *rxr)
1891 {
1892 	size_t size = array_size(sizeof(*rxr->rx_buf), rxr->size_mask + 1);
1893 
1894 	if (rxr->flags & FBNIC_RING_F_CTX)
1895 		size = sizeof(*rxr->rx_buf) * (rxr->size_mask + 1);
1896 	else
1897 		size = sizeof(*rxr->pkt);
1898 
1899 	rxr->rx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1900 
1901 	return rxr->rx_buf ? 0 : -ENOMEM;
1902 }
1903 
1904 static int fbnic_alloc_rx_ring_resources(struct fbnic_net *fbn,
1905 					 struct fbnic_ring *rxr)
1906 {
1907 	struct device *dev = fbn->netdev->dev.parent;
1908 	int err;
1909 
1910 	err = fbnic_alloc_rx_ring_desc(fbn, rxr);
1911 	if (err)
1912 		return err;
1913 
1914 	err = fbnic_alloc_rx_ring_buffer(rxr);
1915 	if (err)
1916 		goto free_desc;
1917 
1918 	return 0;
1919 
1920 free_desc:
1921 	fbnic_free_ring_resources(dev, rxr);
1922 	return err;
1923 }
1924 
1925 static void fbnic_free_qt_resources(struct fbnic_net *fbn,
1926 				    struct fbnic_q_triad *qt)
1927 {
1928 	struct device *dev = fbn->netdev->dev.parent;
1929 
1930 	fbnic_free_ring_resources(dev, &qt->cmpl);
1931 	fbnic_free_ring_resources(dev, &qt->sub1);
1932 	fbnic_free_ring_resources(dev, &qt->sub0);
1933 
1934 	if (xdp_rxq_info_is_reg(&qt->xdp_rxq)) {
1935 		xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq);
1936 		xdp_rxq_info_unreg(&qt->xdp_rxq);
1937 		fbnic_free_qt_page_pools(qt);
1938 	}
1939 }
1940 
1941 static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn,
1942 				       struct fbnic_q_triad *qt)
1943 {
1944 	struct device *dev = fbn->netdev->dev.parent;
1945 	int err;
1946 
1947 	err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub0);
1948 	if (err)
1949 		return err;
1950 
1951 	err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub1);
1952 	if (err)
1953 		goto free_sub0;
1954 
1955 	err = fbnic_alloc_tx_ring_resources(fbn, &qt->cmpl);
1956 	if (err)
1957 		goto free_sub1;
1958 
1959 	return 0;
1960 
1961 free_sub1:
1962 	fbnic_free_ring_resources(dev, &qt->sub1);
1963 free_sub0:
1964 	fbnic_free_ring_resources(dev, &qt->sub0);
1965 	return err;
1966 }
1967 
1968 static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn,
1969 				       struct fbnic_napi_vector *nv,
1970 				       struct fbnic_q_triad *qt)
1971 {
1972 	struct device *dev = fbn->netdev->dev.parent;
1973 	int err;
1974 
1975 	err = fbnic_alloc_qt_page_pools(fbn, qt, qt->cmpl.q_idx);
1976 	if (err)
1977 		return err;
1978 
1979 	err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, qt->sub0.q_idx,
1980 			       nv->napi.napi_id);
1981 	if (err)
1982 		goto free_page_pools;
1983 
1984 	err = xdp_rxq_info_reg_mem_model(&qt->xdp_rxq, MEM_TYPE_PAGE_POOL,
1985 					 qt->sub0.page_pool);
1986 	if (err)
1987 		goto unreg_rxq;
1988 
1989 	err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0);
1990 	if (err)
1991 		goto unreg_mm;
1992 
1993 	err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub1);
1994 	if (err)
1995 		goto free_sub0;
1996 
1997 	err = fbnic_alloc_rx_ring_resources(fbn, &qt->cmpl);
1998 	if (err)
1999 		goto free_sub1;
2000 
2001 	return 0;
2002 
2003 free_sub1:
2004 	fbnic_free_ring_resources(dev, &qt->sub1);
2005 free_sub0:
2006 	fbnic_free_ring_resources(dev, &qt->sub0);
2007 unreg_mm:
2008 	xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq);
2009 unreg_rxq:
2010 	xdp_rxq_info_unreg(&qt->xdp_rxq);
2011 free_page_pools:
2012 	fbnic_free_qt_page_pools(qt);
2013 	return err;
2014 }
2015 
2016 static void fbnic_free_nv_resources(struct fbnic_net *fbn,
2017 				    struct fbnic_napi_vector *nv)
2018 {
2019 	int i;
2020 
2021 	for (i = 0; i < nv->txt_count + nv->rxt_count; i++)
2022 		fbnic_free_qt_resources(fbn, &nv->qt[i]);
2023 }
2024 
2025 static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
2026 				    struct fbnic_napi_vector *nv)
2027 {
2028 	int i, j, err;
2029 
2030 	/* Allocate Tx Resources */
2031 	for (i = 0; i < nv->txt_count; i++) {
2032 		err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]);
2033 		if (err)
2034 			goto free_qt_resources;
2035 	}
2036 
2037 	/* Allocate Rx Resources */
2038 	for (j = 0; j < nv->rxt_count; j++, i++) {
2039 		err = fbnic_alloc_rx_qt_resources(fbn, nv, &nv->qt[i]);
2040 		if (err)
2041 			goto free_qt_resources;
2042 	}
2043 
2044 	return 0;
2045 
2046 free_qt_resources:
2047 	while (i--)
2048 		fbnic_free_qt_resources(fbn, &nv->qt[i]);
2049 	return err;
2050 }
2051 
2052 void fbnic_free_resources(struct fbnic_net *fbn)
2053 {
2054 	int i;
2055 
2056 	for (i = 0; i < fbn->num_napi; i++)
2057 		fbnic_free_nv_resources(fbn, fbn->napi[i]);
2058 }
2059 
2060 int fbnic_alloc_resources(struct fbnic_net *fbn)
2061 {
2062 	int i, err = -ENODEV;
2063 
2064 	for (i = 0; i < fbn->num_napi; i++) {
2065 		err = fbnic_alloc_nv_resources(fbn, fbn->napi[i]);
2066 		if (err)
2067 			goto free_resources;
2068 	}
2069 
2070 	return 0;
2071 
2072 free_resources:
2073 	while (i--)
2074 		fbnic_free_nv_resources(fbn, fbn->napi[i]);
2075 
2076 	return err;
2077 }
2078 
2079 static void fbnic_set_netif_napi(struct fbnic_napi_vector *nv)
2080 {
2081 	int i, j;
2082 
2083 	/* Associate Tx queue with NAPI */
2084 	for (i = 0; i < nv->txt_count; i++) {
2085 		struct fbnic_q_triad *qt = &nv->qt[i];
2086 
2087 		netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx,
2088 				     NETDEV_QUEUE_TYPE_TX, &nv->napi);
2089 	}
2090 
2091 	/* Associate Rx queue with NAPI */
2092 	for (j = 0; j < nv->rxt_count; j++, i++) {
2093 		struct fbnic_q_triad *qt = &nv->qt[i];
2094 
2095 		netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx,
2096 				     NETDEV_QUEUE_TYPE_RX, &nv->napi);
2097 	}
2098 }
2099 
2100 static void fbnic_reset_netif_napi(struct fbnic_napi_vector *nv)
2101 {
2102 	int i, j;
2103 
2104 	/* Disassociate Tx queue from NAPI */
2105 	for (i = 0; i < nv->txt_count; i++) {
2106 		struct fbnic_q_triad *qt = &nv->qt[i];
2107 
2108 		netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx,
2109 				     NETDEV_QUEUE_TYPE_TX, NULL);
2110 	}
2111 
2112 	/* Disassociate Rx queue from NAPI */
2113 	for (j = 0; j < nv->rxt_count; j++, i++) {
2114 		struct fbnic_q_triad *qt = &nv->qt[i];
2115 
2116 		netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx,
2117 				     NETDEV_QUEUE_TYPE_RX, NULL);
2118 	}
2119 }
2120 
2121 int fbnic_set_netif_queues(struct fbnic_net *fbn)
2122 {
2123 	int i, err;
2124 
2125 	err = netif_set_real_num_queues(fbn->netdev, fbn->num_tx_queues,
2126 					fbn->num_rx_queues);
2127 	if (err)
2128 		return err;
2129 
2130 	for (i = 0; i < fbn->num_napi; i++)
2131 		fbnic_set_netif_napi(fbn->napi[i]);
2132 
2133 	return 0;
2134 }
2135 
2136 void fbnic_reset_netif_queues(struct fbnic_net *fbn)
2137 {
2138 	int i;
2139 
2140 	for (i = 0; i < fbn->num_napi; i++)
2141 		fbnic_reset_netif_napi(fbn->napi[i]);
2142 }
2143 
2144 static void fbnic_disable_twq0(struct fbnic_ring *txr)
2145 {
2146 	u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ0_CTL);
2147 
2148 	twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE;
2149 
2150 	fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ0_CTL, twq_ctl);
2151 }
2152 
2153 static void fbnic_disable_twq1(struct fbnic_ring *txr)
2154 {
2155 	u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ1_CTL);
2156 
2157 	twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE;
2158 
2159 	fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ1_CTL, twq_ctl);
2160 }
2161 
2162 static void fbnic_disable_tcq(struct fbnic_ring *txr)
2163 {
2164 	fbnic_ring_wr32(txr, FBNIC_QUEUE_TCQ_CTL, 0);
2165 	fbnic_ring_wr32(txr, FBNIC_QUEUE_TIM_MASK, FBNIC_QUEUE_TIM_MASK_MASK);
2166 }
2167 
2168 static void fbnic_disable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq)
2169 {
2170 	u32 bdq_ctl = fbnic_ring_rd32(hpq, FBNIC_QUEUE_BDQ_CTL);
2171 
2172 	bdq_ctl &= ~FBNIC_QUEUE_BDQ_CTL_ENABLE;
2173 
2174 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl);
2175 }
2176 
2177 static void fbnic_disable_rcq(struct fbnic_ring *rxr)
2178 {
2179 	fbnic_ring_wr32(rxr, FBNIC_QUEUE_RCQ_CTL, 0);
2180 	fbnic_ring_wr32(rxr, FBNIC_QUEUE_RIM_MASK, FBNIC_QUEUE_RIM_MASK_MASK);
2181 }
2182 
2183 void fbnic_napi_disable(struct fbnic_net *fbn)
2184 {
2185 	int i;
2186 
2187 	for (i = 0; i < fbn->num_napi; i++) {
2188 		napi_disable_locked(&fbn->napi[i]->napi);
2189 
2190 		fbnic_nv_irq_disable(fbn->napi[i]);
2191 	}
2192 }
2193 
2194 static void __fbnic_nv_disable(struct fbnic_napi_vector *nv)
2195 {
2196 	int i, t;
2197 
2198 	/* Disable Tx queue triads */
2199 	for (t = 0; t < nv->txt_count; t++) {
2200 		struct fbnic_q_triad *qt = &nv->qt[t];
2201 
2202 		fbnic_disable_twq0(&qt->sub0);
2203 		fbnic_disable_twq1(&qt->sub1);
2204 		fbnic_disable_tcq(&qt->cmpl);
2205 	}
2206 
2207 	/* Disable Rx queue triads */
2208 	for (i = 0; i < nv->rxt_count; i++, t++) {
2209 		struct fbnic_q_triad *qt = &nv->qt[t];
2210 
2211 		fbnic_disable_bdq(&qt->sub0, &qt->sub1);
2212 		fbnic_disable_rcq(&qt->cmpl);
2213 	}
2214 }
2215 
2216 static void
2217 fbnic_nv_disable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv)
2218 {
2219 	__fbnic_nv_disable(nv);
2220 	fbnic_wrfl(fbn->fbd);
2221 }
2222 
2223 void fbnic_disable(struct fbnic_net *fbn)
2224 {
2225 	struct fbnic_dev *fbd = fbn->fbd;
2226 	int i;
2227 
2228 	for (i = 0; i < fbn->num_napi; i++)
2229 		__fbnic_nv_disable(fbn->napi[i]);
2230 
2231 	fbnic_wrfl(fbd);
2232 }
2233 
2234 static void fbnic_tx_flush(struct fbnic_dev *fbd)
2235 {
2236 	netdev_warn(fbd->netdev, "triggering Tx flush\n");
2237 
2238 	fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN,
2239 		    FBNIC_TMI_DROP_CTRL_EN);
2240 }
2241 
2242 static void fbnic_tx_flush_off(struct fbnic_dev *fbd)
2243 {
2244 	fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN, 0);
2245 }
2246 
2247 struct fbnic_idle_regs {
2248 	u32 reg_base;
2249 	u8 reg_cnt;
2250 };
2251 
2252 static bool fbnic_all_idle(struct fbnic_dev *fbd,
2253 			   const struct fbnic_idle_regs *regs,
2254 			   unsigned int nregs)
2255 {
2256 	unsigned int i, j;
2257 
2258 	for (i = 0; i < nregs; i++) {
2259 		for (j = 0; j < regs[i].reg_cnt; j++) {
2260 			if (fbnic_rd32(fbd, regs[i].reg_base + j) != ~0U)
2261 				return false;
2262 		}
2263 	}
2264 	return true;
2265 }
2266 
2267 static void fbnic_idle_dump(struct fbnic_dev *fbd,
2268 			    const struct fbnic_idle_regs *regs,
2269 			    unsigned int nregs, const char *dir, int err)
2270 {
2271 	unsigned int i, j;
2272 
2273 	netdev_err(fbd->netdev, "error waiting for %s idle %d\n", dir, err);
2274 	for (i = 0; i < nregs; i++)
2275 		for (j = 0; j < regs[i].reg_cnt; j++)
2276 			netdev_err(fbd->netdev, "0x%04x: %08x\n",
2277 				   regs[i].reg_base + j,
2278 				   fbnic_rd32(fbd, regs[i].reg_base + j));
2279 }
2280 
2281 int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail)
2282 {
2283 	static const struct fbnic_idle_regs tx[] = {
2284 		{ FBNIC_QM_TWQ_IDLE(0),	FBNIC_QM_TWQ_IDLE_CNT, },
2285 		{ FBNIC_QM_TQS_IDLE(0),	FBNIC_QM_TQS_IDLE_CNT, },
2286 		{ FBNIC_QM_TDE_IDLE(0),	FBNIC_QM_TDE_IDLE_CNT, },
2287 		{ FBNIC_QM_TCQ_IDLE(0),	FBNIC_QM_TCQ_IDLE_CNT, },
2288 	}, rx[] = {
2289 		{ FBNIC_QM_HPQ_IDLE(0),	FBNIC_QM_HPQ_IDLE_CNT, },
2290 		{ FBNIC_QM_PPQ_IDLE(0),	FBNIC_QM_PPQ_IDLE_CNT, },
2291 		{ FBNIC_QM_RCQ_IDLE(0),	FBNIC_QM_RCQ_IDLE_CNT, },
2292 	};
2293 	bool idle;
2294 	int err;
2295 
2296 	err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000,
2297 				       false, fbd, tx, ARRAY_SIZE(tx));
2298 	if (err == -ETIMEDOUT) {
2299 		fbnic_tx_flush(fbd);
2300 		err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle,
2301 					       2, 500000, false,
2302 					       fbd, tx, ARRAY_SIZE(tx));
2303 		fbnic_tx_flush_off(fbd);
2304 	}
2305 	if (err) {
2306 		fbnic_idle_dump(fbd, tx, ARRAY_SIZE(tx), "Tx", err);
2307 		if (may_fail)
2308 			return err;
2309 	}
2310 
2311 	err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000,
2312 				       false, fbd, rx, ARRAY_SIZE(rx));
2313 	if (err)
2314 		fbnic_idle_dump(fbd, rx, ARRAY_SIZE(rx), "Rx", err);
2315 	return err;
2316 }
2317 
2318 static int
2319 fbnic_wait_queue_idle(struct fbnic_net *fbn, bool rx, unsigned int idx)
2320 {
2321 	static const unsigned int tx_regs[] = {
2322 		FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TQS_IDLE(0),
2323 		FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TCQ_IDLE(0),
2324 	}, rx_regs[] = {
2325 		FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_PPQ_IDLE(0),
2326 		FBNIC_QM_RCQ_IDLE(0),
2327 	};
2328 	struct fbnic_dev *fbd = fbn->fbd;
2329 	unsigned int val, mask, off;
2330 	const unsigned int *regs;
2331 	unsigned int reg_cnt;
2332 	int i, err;
2333 
2334 	regs = rx ? rx_regs : tx_regs;
2335 	reg_cnt = rx ? ARRAY_SIZE(rx_regs) : ARRAY_SIZE(tx_regs);
2336 
2337 	off = idx / 32;
2338 	mask = BIT(idx % 32);
2339 
2340 	for (i = 0; i < reg_cnt; i++) {
2341 		err = read_poll_timeout_atomic(fbnic_rd32, val, val & mask,
2342 					       2, 500000, false,
2343 					       fbd, regs[i] + off);
2344 		if (err) {
2345 			netdev_err(fbd->netdev,
2346 				   "wait for queue %s%d idle failed 0x%04x(%d): %08x (mask: %08x)\n",
2347 				   rx ? "Rx" : "Tx", idx, regs[i] + off, i,
2348 				   val, mask);
2349 			return err;
2350 		}
2351 	}
2352 
2353 	return 0;
2354 }
2355 
2356 static void fbnic_nv_flush(struct fbnic_napi_vector *nv)
2357 {
2358 	int j, t;
2359 
2360 	/* Flush any processed Tx Queue Triads and drop the rest */
2361 	for (t = 0; t < nv->txt_count; t++) {
2362 		struct fbnic_q_triad *qt = &nv->qt[t];
2363 		struct netdev_queue *tx_queue;
2364 
2365 		/* Clean the work queues of unprocessed work */
2366 		fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail);
2367 		fbnic_clean_twq1(nv, false, &qt->sub1, true,
2368 				 qt->sub1.tail);
2369 
2370 		/* Reset completion queue descriptor ring */
2371 		memset(qt->cmpl.desc, 0, qt->cmpl.size);
2372 
2373 		/* Nothing else to do if Tx queue is disabled */
2374 		if (qt->sub0.flags & FBNIC_RING_F_DISABLED)
2375 			continue;
2376 
2377 		/* Reset BQL associated with Tx queue */
2378 		tx_queue = netdev_get_tx_queue(nv->napi.dev,
2379 					       qt->sub0.q_idx);
2380 		netdev_tx_reset_queue(tx_queue);
2381 	}
2382 
2383 	/* Flush any processed Rx Queue Triads and drop the rest */
2384 	for (j = 0; j < nv->rxt_count; j++, t++) {
2385 		struct fbnic_q_triad *qt = &nv->qt[t];
2386 
2387 		/* Clean the work queues of unprocessed work */
2388 		fbnic_clean_bdq(&qt->sub0, qt->sub0.tail, 0);
2389 		fbnic_clean_bdq(&qt->sub1, qt->sub1.tail, 0);
2390 
2391 		/* Reset completion queue descriptor ring */
2392 		memset(qt->cmpl.desc, 0, qt->cmpl.size);
2393 
2394 		fbnic_put_pkt_buff(qt, qt->cmpl.pkt, 0);
2395 		memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff));
2396 	}
2397 }
2398 
2399 void fbnic_flush(struct fbnic_net *fbn)
2400 {
2401 	int i;
2402 
2403 	for (i = 0; i < fbn->num_napi; i++)
2404 		fbnic_nv_flush(fbn->napi[i]);
2405 }
2406 
2407 static void fbnic_nv_fill(struct fbnic_napi_vector *nv)
2408 {
2409 	int j, t;
2410 
2411 	/* Configure NAPI mapping and populate pages
2412 	 * in the BDQ rings to use for Rx
2413 	 */
2414 	for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) {
2415 		struct fbnic_q_triad *qt = &nv->qt[t];
2416 
2417 		/* Populate the header and payload BDQs */
2418 		fbnic_fill_bdq(&qt->sub0);
2419 		fbnic_fill_bdq(&qt->sub1);
2420 	}
2421 }
2422 
2423 void fbnic_fill(struct fbnic_net *fbn)
2424 {
2425 	int i;
2426 
2427 	for (i = 0; i < fbn->num_napi; i++)
2428 		fbnic_nv_fill(fbn->napi[i]);
2429 }
2430 
2431 static void fbnic_enable_twq0(struct fbnic_ring *twq)
2432 {
2433 	u32 log_size = fls(twq->size_mask);
2434 
2435 	if (!twq->size_mask)
2436 		return;
2437 
2438 	/* Reset head/tail */
2439 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_RESET);
2440 	twq->tail = 0;
2441 	twq->head = 0;
2442 
2443 	/* Store descriptor ring address and size */
2444 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAL, lower_32_bits(twq->dma));
2445 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAH, upper_32_bits(twq->dma));
2446 
2447 	/* Write lower 4 bits of log size as 64K ring size is 0 */
2448 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_SIZE, log_size & 0xf);
2449 
2450 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE);
2451 }
2452 
2453 static void fbnic_enable_twq1(struct fbnic_ring *twq)
2454 {
2455 	u32 log_size = fls(twq->size_mask);
2456 
2457 	if (!twq->size_mask)
2458 		return;
2459 
2460 	/* Reset head/tail */
2461 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_RESET);
2462 	twq->tail = 0;
2463 	twq->head = 0;
2464 
2465 	/* Store descriptor ring address and size */
2466 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAL, lower_32_bits(twq->dma));
2467 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAH, upper_32_bits(twq->dma));
2468 
2469 	/* Write lower 4 bits of log size as 64K ring size is 0 */
2470 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_SIZE, log_size & 0xf);
2471 
2472 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE);
2473 }
2474 
2475 static void fbnic_enable_tcq(struct fbnic_napi_vector *nv,
2476 			     struct fbnic_ring *tcq)
2477 {
2478 	u32 log_size = fls(tcq->size_mask);
2479 
2480 	if (!tcq->size_mask)
2481 		return;
2482 
2483 	/* Reset head/tail */
2484 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_RESET);
2485 	tcq->tail = 0;
2486 	tcq->head = 0;
2487 
2488 	/* Store descriptor ring address and size */
2489 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAL, lower_32_bits(tcq->dma));
2490 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAH, upper_32_bits(tcq->dma));
2491 
2492 	/* Write lower 4 bits of log size as 64K ring size is 0 */
2493 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_SIZE, log_size & 0xf);
2494 
2495 	/* Store interrupt information for the completion queue */
2496 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_CTL, nv->v_idx);
2497 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_THRESHOLD, tcq->size_mask / 2);
2498 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_MASK, 0);
2499 
2500 	/* Enable queue */
2501 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_ENABLE);
2502 }
2503 
2504 static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq)
2505 {
2506 	u32 bdq_ctl = FBNIC_QUEUE_BDQ_CTL_ENABLE;
2507 	u32 log_size;
2508 
2509 	/* Reset head/tail */
2510 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, FBNIC_QUEUE_BDQ_CTL_RESET);
2511 	ppq->tail = 0;
2512 	ppq->head = 0;
2513 	hpq->tail = 0;
2514 	hpq->head = 0;
2515 
2516 	log_size = fls(hpq->size_mask);
2517 
2518 	/* Store descriptor ring address and size */
2519 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma));
2520 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAH, upper_32_bits(hpq->dma));
2521 
2522 	/* Write lower 4 bits of log size as 64K ring size is 0 */
2523 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_SIZE, log_size & 0xf);
2524 
2525 	if (!ppq->size_mask)
2526 		goto write_ctl;
2527 
2528 	log_size = fls(ppq->size_mask);
2529 
2530 	/* Add enabling of PPQ to BDQ control */
2531 	bdq_ctl |= FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE;
2532 
2533 	/* Store descriptor ring address and size */
2534 	fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAL, lower_32_bits(ppq->dma));
2535 	fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAH, upper_32_bits(ppq->dma));
2536 	fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_SIZE, log_size & 0xf);
2537 
2538 write_ctl:
2539 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl);
2540 }
2541 
2542 static void fbnic_config_drop_mode_rcq(struct fbnic_napi_vector *nv,
2543 				       struct fbnic_ring *rcq)
2544 {
2545 	u32 drop_mode, rcq_ctl;
2546 
2547 	drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_IMMEDIATE;
2548 
2549 	/* Specify packet layout */
2550 	rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_DROP_MODE_MASK, drop_mode) |
2551 	    FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_HROOM_MASK, FBNIC_RX_HROOM) |
2552 	    FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_TROOM_MASK, FBNIC_RX_TROOM);
2553 
2554 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL0, rcq_ctl);
2555 }
2556 
2557 static void fbnic_config_rim_threshold(struct fbnic_ring *rcq, u16 nv_idx, u32 rx_desc)
2558 {
2559 	u32 threshold;
2560 
2561 	/* Set the threhsold to half the ring size if rx_frames
2562 	 * is not configured
2563 	 */
2564 	threshold = rx_desc ? : rcq->size_mask / 2;
2565 
2566 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_CTL, nv_idx);
2567 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_THRESHOLD, threshold);
2568 }
2569 
2570 void fbnic_config_txrx_usecs(struct fbnic_napi_vector *nv, u32 arm)
2571 {
2572 	struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
2573 	struct fbnic_dev *fbd = nv->fbd;
2574 	u32 val = arm;
2575 
2576 	val |= FIELD_PREP(FBNIC_INTR_CQ_REARM_RCQ_TIMEOUT, fbn->rx_usecs) |
2577 	       FBNIC_INTR_CQ_REARM_RCQ_TIMEOUT_UPD_EN;
2578 	val |= FIELD_PREP(FBNIC_INTR_CQ_REARM_TCQ_TIMEOUT, fbn->tx_usecs) |
2579 	       FBNIC_INTR_CQ_REARM_TCQ_TIMEOUT_UPD_EN;
2580 
2581 	fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(nv->v_idx), val);
2582 }
2583 
2584 void fbnic_config_rx_frames(struct fbnic_napi_vector *nv)
2585 {
2586 	struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
2587 	int i;
2588 
2589 	for (i = nv->txt_count; i < nv->rxt_count + nv->txt_count; i++) {
2590 		struct fbnic_q_triad *qt = &nv->qt[i];
2591 
2592 		fbnic_config_rim_threshold(&qt->cmpl, nv->v_idx,
2593 					   fbn->rx_max_frames *
2594 					   FBNIC_MIN_RXD_PER_FRAME);
2595 	}
2596 }
2597 
2598 static void fbnic_enable_rcq(struct fbnic_napi_vector *nv,
2599 			     struct fbnic_ring *rcq)
2600 {
2601 	struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
2602 	u32 log_size = fls(rcq->size_mask);
2603 	u32 hds_thresh = fbn->hds_thresh;
2604 	u32 rcq_ctl = 0;
2605 
2606 	fbnic_config_drop_mode_rcq(nv, rcq);
2607 
2608 	/* Force lower bound on MAX_HEADER_BYTES. Below this, all frames should
2609 	 * be split at L4. It would also result in the frames being split at
2610 	 * L2/L3 depending on the frame size.
2611 	 */
2612 	if (fbn->hds_thresh < FBNIC_HDR_BYTES_MIN) {
2613 		rcq_ctl = FBNIC_QUEUE_RDE_CTL0_EN_HDR_SPLIT;
2614 		hds_thresh = FBNIC_HDR_BYTES_MIN;
2615 	}
2616 
2617 	rcq_ctl |= FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) |
2618 		   FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, hds_thresh) |
2619 		   FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK,
2620 			      FBNIC_RX_PAYLD_OFFSET) |
2621 		   FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_PG_CL_MASK,
2622 			      FBNIC_RX_PAYLD_PG_CL);
2623 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL1, rcq_ctl);
2624 
2625 	/* Reset head/tail */
2626 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_RESET);
2627 	rcq->head = 0;
2628 	rcq->tail = 0;
2629 
2630 	/* Store descriptor ring address and size */
2631 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAL, lower_32_bits(rcq->dma));
2632 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAH, upper_32_bits(rcq->dma));
2633 
2634 	/* Write lower 4 bits of log size as 64K ring size is 0 */
2635 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_SIZE, log_size & 0xf);
2636 
2637 	/* Store interrupt information for the completion queue */
2638 	fbnic_config_rim_threshold(rcq, nv->v_idx, fbn->rx_max_frames *
2639 						   FBNIC_MIN_RXD_PER_FRAME);
2640 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_MASK, 0);
2641 
2642 	/* Enable queue */
2643 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_ENABLE);
2644 }
2645 
2646 static void __fbnic_nv_enable(struct fbnic_napi_vector *nv)
2647 {
2648 	int j, t;
2649 
2650 	/* Setup Tx Queue Triads */
2651 	for (t = 0; t < nv->txt_count; t++) {
2652 		struct fbnic_q_triad *qt = &nv->qt[t];
2653 
2654 		fbnic_enable_twq0(&qt->sub0);
2655 		fbnic_enable_twq1(&qt->sub1);
2656 		fbnic_enable_tcq(nv, &qt->cmpl);
2657 	}
2658 
2659 	/* Setup Rx Queue Triads */
2660 	for (j = 0; j < nv->rxt_count; j++, t++) {
2661 		struct fbnic_q_triad *qt = &nv->qt[t];
2662 
2663 		page_pool_enable_direct_recycling(qt->sub0.page_pool,
2664 						  &nv->napi);
2665 		page_pool_enable_direct_recycling(qt->sub1.page_pool,
2666 						  &nv->napi);
2667 
2668 		fbnic_enable_bdq(&qt->sub0, &qt->sub1);
2669 		fbnic_config_drop_mode_rcq(nv, &qt->cmpl);
2670 		fbnic_enable_rcq(nv, &qt->cmpl);
2671 	}
2672 }
2673 
2674 static void fbnic_nv_enable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv)
2675 {
2676 	__fbnic_nv_enable(nv);
2677 	fbnic_wrfl(fbn->fbd);
2678 }
2679 
2680 void fbnic_enable(struct fbnic_net *fbn)
2681 {
2682 	struct fbnic_dev *fbd = fbn->fbd;
2683 	int i;
2684 
2685 	for (i = 0; i < fbn->num_napi; i++)
2686 		__fbnic_nv_enable(fbn->napi[i]);
2687 
2688 	fbnic_wrfl(fbd);
2689 }
2690 
2691 static void fbnic_nv_irq_enable(struct fbnic_napi_vector *nv)
2692 {
2693 	fbnic_config_txrx_usecs(nv, FBNIC_INTR_CQ_REARM_INTR_UNMASK);
2694 }
2695 
2696 void fbnic_napi_enable(struct fbnic_net *fbn)
2697 {
2698 	u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {};
2699 	struct fbnic_dev *fbd = fbn->fbd;
2700 	int i;
2701 
2702 	for (i = 0; i < fbn->num_napi; i++) {
2703 		struct fbnic_napi_vector *nv = fbn->napi[i];
2704 
2705 		napi_enable_locked(&nv->napi);
2706 
2707 		fbnic_nv_irq_enable(nv);
2708 
2709 		/* Record bit used for NAPI IRQs so we can
2710 		 * set the mask appropriately
2711 		 */
2712 		irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32);
2713 	}
2714 
2715 	/* Force the first interrupt on the device to guarantee
2716 	 * that any packets that may have been enqueued during the
2717 	 * bringup are processed.
2718 	 */
2719 	for (i = 0; i < ARRAY_SIZE(irqs); i++) {
2720 		if (!irqs[i])
2721 			continue;
2722 		fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]);
2723 	}
2724 
2725 	fbnic_wrfl(fbd);
2726 }
2727 
2728 void fbnic_napi_depletion_check(struct net_device *netdev)
2729 {
2730 	struct fbnic_net *fbn = netdev_priv(netdev);
2731 	u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {};
2732 	struct fbnic_dev *fbd = fbn->fbd;
2733 	int i, j, t;
2734 
2735 	for (i = 0; i < fbn->num_napi; i++) {
2736 		struct fbnic_napi_vector *nv = fbn->napi[i];
2737 
2738 		/* Find RQs which are completely out of pages */
2739 		for (t = nv->txt_count, j = 0; j < nv->rxt_count; j++, t++) {
2740 			/* Assume 4 pages is always enough to fit a packet
2741 			 * and therefore generate a completion and an IRQ.
2742 			 */
2743 			if (fbnic_desc_used(&nv->qt[t].sub0) < 4 ||
2744 			    fbnic_desc_used(&nv->qt[t].sub1) < 4)
2745 				irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32);
2746 		}
2747 	}
2748 
2749 	for (i = 0; i < ARRAY_SIZE(irqs); i++) {
2750 		if (!irqs[i])
2751 			continue;
2752 		fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(i), irqs[i]);
2753 		fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]);
2754 	}
2755 
2756 	fbnic_wrfl(fbd);
2757 }
2758 
2759 static int fbnic_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
2760 {
2761 	struct fbnic_net *fbn = netdev_priv(dev);
2762 	const struct fbnic_q_triad *real;
2763 	struct fbnic_q_triad *qt = qmem;
2764 	struct fbnic_napi_vector *nv;
2765 
2766 	if (!netif_running(dev))
2767 		return fbnic_alloc_qt_page_pools(fbn, qt, idx);
2768 
2769 	real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl);
2770 	nv = fbn->napi[idx % fbn->num_napi];
2771 
2772 	fbnic_ring_init(&qt->sub0, real->sub0.doorbell, real->sub0.q_idx,
2773 			real->sub0.flags);
2774 	fbnic_ring_init(&qt->sub1, real->sub1.doorbell, real->sub1.q_idx,
2775 			real->sub1.flags);
2776 	fbnic_ring_init(&qt->cmpl, real->cmpl.doorbell, real->cmpl.q_idx,
2777 			real->cmpl.flags);
2778 
2779 	return fbnic_alloc_rx_qt_resources(fbn, nv, qt);
2780 }
2781 
2782 static void fbnic_queue_mem_free(struct net_device *dev, void *qmem)
2783 {
2784 	struct fbnic_net *fbn = netdev_priv(dev);
2785 	struct fbnic_q_triad *qt = qmem;
2786 
2787 	if (!netif_running(dev))
2788 		fbnic_free_qt_page_pools(qt);
2789 	else
2790 		fbnic_free_qt_resources(fbn, qt);
2791 }
2792 
2793 static void __fbnic_nv_restart(struct fbnic_net *fbn,
2794 			       struct fbnic_napi_vector *nv)
2795 {
2796 	struct fbnic_dev *fbd = fbn->fbd;
2797 	int i;
2798 
2799 	fbnic_nv_enable(fbn, nv);
2800 	fbnic_nv_fill(nv);
2801 
2802 	napi_enable_locked(&nv->napi);
2803 	fbnic_nv_irq_enable(nv);
2804 	fbnic_wr32(fbd, FBNIC_INTR_SET(nv->v_idx / 32), BIT(nv->v_idx % 32));
2805 	fbnic_wrfl(fbd);
2806 
2807 	for (i = 0; i < nv->txt_count; i++)
2808 		netif_wake_subqueue(fbn->netdev, nv->qt[i].sub0.q_idx);
2809 }
2810 
2811 static int fbnic_queue_start(struct net_device *dev, void *qmem, int idx)
2812 {
2813 	struct fbnic_net *fbn = netdev_priv(dev);
2814 	struct fbnic_napi_vector *nv;
2815 	struct fbnic_q_triad *real;
2816 
2817 	real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl);
2818 	nv = fbn->napi[idx % fbn->num_napi];
2819 
2820 	fbnic_aggregate_ring_rx_counters(fbn, &real->sub0);
2821 	fbnic_aggregate_ring_rx_counters(fbn, &real->sub1);
2822 	fbnic_aggregate_ring_rx_counters(fbn, &real->cmpl);
2823 
2824 	memcpy(real, qmem, sizeof(*real));
2825 
2826 	__fbnic_nv_restart(fbn, nv);
2827 
2828 	return 0;
2829 }
2830 
2831 static int fbnic_queue_stop(struct net_device *dev, void *qmem, int idx)
2832 {
2833 	struct fbnic_net *fbn = netdev_priv(dev);
2834 	const struct fbnic_q_triad *real;
2835 	struct fbnic_napi_vector *nv;
2836 	int i, t;
2837 	int err;
2838 
2839 	real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl);
2840 	nv = fbn->napi[idx % fbn->num_napi];
2841 
2842 	napi_disable_locked(&nv->napi);
2843 	fbnic_nv_irq_disable(nv);
2844 
2845 	for (i = 0; i < nv->txt_count; i++)
2846 		netif_stop_subqueue(dev, nv->qt[i].sub0.q_idx);
2847 	fbnic_nv_disable(fbn, nv);
2848 
2849 	for (t = 0; t < nv->txt_count + nv->rxt_count; t++) {
2850 		err = fbnic_wait_queue_idle(fbn, t >= nv->txt_count,
2851 					    nv->qt[t].sub0.q_idx);
2852 		if (err)
2853 			goto err_restart;
2854 	}
2855 
2856 	fbnic_synchronize_irq(fbn->fbd, nv->v_idx);
2857 	fbnic_nv_flush(nv);
2858 
2859 	page_pool_disable_direct_recycling(real->sub0.page_pool);
2860 	page_pool_disable_direct_recycling(real->sub1.page_pool);
2861 
2862 	memcpy(qmem, real, sizeof(*real));
2863 
2864 	return 0;
2865 
2866 err_restart:
2867 	__fbnic_nv_restart(fbn, nv);
2868 	return err;
2869 }
2870 
2871 const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_ops = {
2872 	.ndo_queue_mem_size	= sizeof(struct fbnic_q_triad),
2873 	.ndo_queue_mem_alloc	= fbnic_queue_mem_alloc,
2874 	.ndo_queue_mem_free	= fbnic_queue_mem_free,
2875 	.ndo_queue_start	= fbnic_queue_start,
2876 	.ndo_queue_stop		= fbnic_queue_stop,
2877 };
2878