xref: /linux/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c (revision 1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */
3 
4 #include <linux/bitfield.h>
5 #include <linux/iopoll.h>
6 #include <linux/pci.h>
7 #include <net/netdev_queues.h>
8 #include <net/page_pool/helpers.h>
9 #include <net/tcp.h>
10 
11 #include "fbnic.h"
12 #include "fbnic_csr.h"
13 #include "fbnic_netdev.h"
14 #include "fbnic_txrx.h"
15 
16 enum {
17 	FBNIC_XMIT_CB_TS	= 0x01,
18 };
19 
20 struct fbnic_xmit_cb {
21 	u32 bytecount;
22 	u16 gso_segs;
23 	u8 desc_count;
24 	u8 flags;
25 	int hw_head;
26 };
27 
28 #define FBNIC_XMIT_CB(__skb) ((struct fbnic_xmit_cb *)((__skb)->cb))
29 
fbnic_ring_csr_base(const struct fbnic_ring * ring)30 static u32 __iomem *fbnic_ring_csr_base(const struct fbnic_ring *ring)
31 {
32 	unsigned long csr_base = (unsigned long)ring->doorbell;
33 
34 	csr_base &= ~(FBNIC_QUEUE_STRIDE * sizeof(u32) - 1);
35 
36 	return (u32 __iomem *)csr_base;
37 }
38 
fbnic_ring_rd32(struct fbnic_ring * ring,unsigned int csr)39 static u32 fbnic_ring_rd32(struct fbnic_ring *ring, unsigned int csr)
40 {
41 	u32 __iomem *csr_base = fbnic_ring_csr_base(ring);
42 
43 	return readl(csr_base + csr);
44 }
45 
fbnic_ring_wr32(struct fbnic_ring * ring,unsigned int csr,u32 val)46 static void fbnic_ring_wr32(struct fbnic_ring *ring, unsigned int csr, u32 val)
47 {
48 	u32 __iomem *csr_base = fbnic_ring_csr_base(ring);
49 
50 	writel(val, csr_base + csr);
51 }
52 
53 /**
54  * fbnic_ts40_to_ns() - convert descriptor timestamp to PHC time
55  * @fbn: netdev priv of the FB NIC
56  * @ts40: timestamp read from a descriptor
57  *
58  * Return: u64 value of PHC time in nanoseconds
59  *
60  * Convert truncated 40 bit device timestamp as read from a descriptor
61  * to the full PHC time in nanoseconds.
62  */
fbnic_ts40_to_ns(struct fbnic_net * fbn,u64 ts40)63 static __maybe_unused u64 fbnic_ts40_to_ns(struct fbnic_net *fbn, u64 ts40)
64 {
65 	unsigned int s;
66 	u64 time_ns;
67 	s64 offset;
68 	u8 ts_top;
69 	u32 high;
70 
71 	do {
72 		s = u64_stats_fetch_begin(&fbn->time_seq);
73 		offset = READ_ONCE(fbn->time_offset);
74 	} while (u64_stats_fetch_retry(&fbn->time_seq, s));
75 
76 	high = READ_ONCE(fbn->time_high);
77 
78 	/* Bits 63..40 from periodic clock reads, 39..0 from ts40 */
79 	time_ns = (u64)(high >> 8) << 40 | ts40;
80 
81 	/* Compare bits 32-39 between periodic reads and ts40,
82 	 * see if HW clock may have wrapped since last read. We are sure
83 	 * that periodic reads are always at least ~1 minute behind, so
84 	 * this logic works perfectly fine.
85 	 */
86 	ts_top = ts40 >> 32;
87 	if (ts_top < (u8)high && (u8)high - ts_top > U8_MAX / 2)
88 		time_ns += 1ULL << 40;
89 
90 	return time_ns + offset;
91 }
92 
fbnic_desc_unused(struct fbnic_ring * ring)93 static unsigned int fbnic_desc_unused(struct fbnic_ring *ring)
94 {
95 	return (ring->head - ring->tail - 1) & ring->size_mask;
96 }
97 
fbnic_desc_used(struct fbnic_ring * ring)98 static unsigned int fbnic_desc_used(struct fbnic_ring *ring)
99 {
100 	return (ring->tail - ring->head) & ring->size_mask;
101 }
102 
txring_txq(const struct net_device * dev,const struct fbnic_ring * ring)103 static struct netdev_queue *txring_txq(const struct net_device *dev,
104 				       const struct fbnic_ring *ring)
105 {
106 	return netdev_get_tx_queue(dev, ring->q_idx);
107 }
108 
fbnic_maybe_stop_tx(const struct net_device * dev,struct fbnic_ring * ring,const unsigned int size)109 static int fbnic_maybe_stop_tx(const struct net_device *dev,
110 			       struct fbnic_ring *ring,
111 			       const unsigned int size)
112 {
113 	struct netdev_queue *txq = txring_txq(dev, ring);
114 	int res;
115 
116 	res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size,
117 				   FBNIC_TX_DESC_WAKEUP);
118 	if (!res) {
119 		u64_stats_update_begin(&ring->stats.syncp);
120 		ring->stats.twq.stop++;
121 		u64_stats_update_end(&ring->stats.syncp);
122 	}
123 
124 	return !res;
125 }
126 
fbnic_tx_sent_queue(struct sk_buff * skb,struct fbnic_ring * ring)127 static bool fbnic_tx_sent_queue(struct sk_buff *skb, struct fbnic_ring *ring)
128 {
129 	struct netdev_queue *dev_queue = txring_txq(skb->dev, ring);
130 	unsigned int bytecount = FBNIC_XMIT_CB(skb)->bytecount;
131 	bool xmit_more = netdev_xmit_more();
132 
133 	/* TBD: Request completion more often if xmit_more becomes large */
134 
135 	return __netdev_tx_sent_queue(dev_queue, bytecount, xmit_more);
136 }
137 
fbnic_unmap_single_twd(struct device * dev,__le64 * twd)138 static void fbnic_unmap_single_twd(struct device *dev, __le64 *twd)
139 {
140 	u64 raw_twd = le64_to_cpu(*twd);
141 	unsigned int len;
142 	dma_addr_t dma;
143 
144 	dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd);
145 	len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd);
146 
147 	dma_unmap_single(dev, dma, len, DMA_TO_DEVICE);
148 }
149 
fbnic_unmap_page_twd(struct device * dev,__le64 * twd)150 static void fbnic_unmap_page_twd(struct device *dev, __le64 *twd)
151 {
152 	u64 raw_twd = le64_to_cpu(*twd);
153 	unsigned int len;
154 	dma_addr_t dma;
155 
156 	dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd);
157 	len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd);
158 
159 	dma_unmap_page(dev, dma, len, DMA_TO_DEVICE);
160 }
161 
162 #define FBNIC_TWD_TYPE(_type) \
163 	cpu_to_le64(FIELD_PREP(FBNIC_TWD_TYPE_MASK, FBNIC_TWD_TYPE_##_type))
164 
fbnic_tx_tstamp(struct sk_buff * skb)165 static bool fbnic_tx_tstamp(struct sk_buff *skb)
166 {
167 	struct fbnic_net *fbn;
168 
169 	if (!unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
170 		return false;
171 
172 	fbn = netdev_priv(skb->dev);
173 	if (fbn->hwtstamp_config.tx_type == HWTSTAMP_TX_OFF)
174 		return false;
175 
176 	skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
177 	FBNIC_XMIT_CB(skb)->flags |= FBNIC_XMIT_CB_TS;
178 	FBNIC_XMIT_CB(skb)->hw_head = -1;
179 
180 	return true;
181 }
182 
183 static bool
fbnic_tx_lso(struct fbnic_ring * ring,struct sk_buff * skb,struct skb_shared_info * shinfo,__le64 * meta,unsigned int * l2len,unsigned int * i3len)184 fbnic_tx_lso(struct fbnic_ring *ring, struct sk_buff *skb,
185 	     struct skb_shared_info *shinfo, __le64 *meta,
186 	     unsigned int *l2len, unsigned int *i3len)
187 {
188 	unsigned int l3_type, l4_type, l4len, hdrlen;
189 	unsigned char *l4hdr;
190 	__be16 payload_len;
191 
192 	if (unlikely(skb_cow_head(skb, 0)))
193 		return true;
194 
195 	if (shinfo->gso_type & SKB_GSO_PARTIAL) {
196 		l3_type = FBNIC_TWD_L3_TYPE_OTHER;
197 	} else if (!skb->encapsulation) {
198 		if (ip_hdr(skb)->version == 4)
199 			l3_type = FBNIC_TWD_L3_TYPE_IPV4;
200 		else
201 			l3_type = FBNIC_TWD_L3_TYPE_IPV6;
202 	} else {
203 		unsigned int o3len;
204 
205 		o3len = skb_inner_network_header(skb) - skb_network_header(skb);
206 		*i3len -= o3len;
207 		*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_OHLEN_MASK,
208 						o3len / 2));
209 		l3_type = FBNIC_TWD_L3_TYPE_V6V6;
210 	}
211 
212 	l4hdr = skb_checksum_start(skb);
213 	payload_len = cpu_to_be16(skb->len - (l4hdr - skb->data));
214 
215 	if (shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) {
216 		struct tcphdr *tcph = (struct tcphdr *)l4hdr;
217 
218 		l4_type = FBNIC_TWD_L4_TYPE_TCP;
219 		l4len = __tcp_hdrlen((struct tcphdr *)l4hdr);
220 		csum_replace_by_diff(&tcph->check, (__force __wsum)payload_len);
221 	} else {
222 		struct udphdr *udph = (struct udphdr *)l4hdr;
223 
224 		l4_type = FBNIC_TWD_L4_TYPE_UDP;
225 		l4len = sizeof(struct udphdr);
226 		csum_replace_by_diff(&udph->check, (__force __wsum)payload_len);
227 	}
228 
229 	hdrlen = (l4hdr - skb->data) + l4len;
230 	*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_TYPE_MASK, l3_type) |
231 			     FIELD_PREP(FBNIC_TWD_L4_TYPE_MASK, l4_type) |
232 			     FIELD_PREP(FBNIC_TWD_L4_HLEN_MASK, l4len / 4) |
233 			     FIELD_PREP(FBNIC_TWD_MSS_MASK, shinfo->gso_size) |
234 			     FBNIC_TWD_FLAG_REQ_LSO);
235 
236 	FBNIC_XMIT_CB(skb)->bytecount += (shinfo->gso_segs - 1) * hdrlen;
237 	FBNIC_XMIT_CB(skb)->gso_segs = shinfo->gso_segs;
238 
239 	u64_stats_update_begin(&ring->stats.syncp);
240 	ring->stats.twq.lso += shinfo->gso_segs;
241 	u64_stats_update_end(&ring->stats.syncp);
242 
243 	return false;
244 }
245 
246 static bool
fbnic_tx_offloads(struct fbnic_ring * ring,struct sk_buff * skb,__le64 * meta)247 fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
248 {
249 	struct skb_shared_info *shinfo = skb_shinfo(skb);
250 	unsigned int l2len, i3len;
251 
252 	if (fbnic_tx_tstamp(skb))
253 		*meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_TS);
254 
255 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL))
256 		return false;
257 
258 	l2len = skb_mac_header_len(skb);
259 	i3len = skb_checksum_start(skb) - skb_network_header(skb);
260 
261 	*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_CSUM_OFFSET_MASK,
262 					skb->csum_offset / 2));
263 
264 	if (shinfo->gso_size) {
265 		if (fbnic_tx_lso(ring, skb, shinfo, meta, &l2len, &i3len))
266 			return true;
267 	} else {
268 		*meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO);
269 		u64_stats_update_begin(&ring->stats.syncp);
270 		ring->stats.twq.csum_partial++;
271 		u64_stats_update_end(&ring->stats.syncp);
272 	}
273 
274 	*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) |
275 			     FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2));
276 	return false;
277 }
278 
279 static void
fbnic_rx_csum(u64 rcd,struct sk_buff * skb,struct fbnic_ring * rcq,u64 * csum_cmpl,u64 * csum_none)280 fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq,
281 	      u64 *csum_cmpl, u64 *csum_none)
282 {
283 	skb_checksum_none_assert(skb);
284 
285 	if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) {
286 		(*csum_none)++;
287 		return;
288 	}
289 
290 	if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) {
291 		skb->ip_summed = CHECKSUM_UNNECESSARY;
292 	} else {
293 		u16 csum = FIELD_GET(FBNIC_RCD_META_L2_CSUM_MASK, rcd);
294 
295 		skb->ip_summed = CHECKSUM_COMPLETE;
296 		skb->csum = (__force __wsum)csum;
297 		(*csum_cmpl)++;
298 	}
299 }
300 
301 static bool
fbnic_tx_map(struct fbnic_ring * ring,struct sk_buff * skb,__le64 * meta)302 fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
303 {
304 	struct device *dev = skb->dev->dev.parent;
305 	unsigned int tail = ring->tail, first;
306 	unsigned int size, data_len;
307 	skb_frag_t *frag;
308 	dma_addr_t dma;
309 	__le64 *twd;
310 
311 	ring->tx_buf[tail] = skb;
312 
313 	tail++;
314 	tail &= ring->size_mask;
315 	first = tail;
316 
317 	size = skb_headlen(skb);
318 	data_len = skb->data_len;
319 
320 	if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK))
321 		goto dma_error;
322 
323 	dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
324 
325 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
326 		twd = &ring->desc[tail];
327 
328 		if (dma_mapping_error(dev, dma))
329 			goto dma_error;
330 
331 		*twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) |
332 				   FIELD_PREP(FBNIC_TWD_LEN_MASK, size) |
333 				   FIELD_PREP(FBNIC_TWD_TYPE_MASK,
334 					      FBNIC_TWD_TYPE_AL));
335 
336 		tail++;
337 		tail &= ring->size_mask;
338 
339 		if (!data_len)
340 			break;
341 
342 		size = skb_frag_size(frag);
343 		data_len -= size;
344 
345 		if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK))
346 			goto dma_error;
347 
348 		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
349 	}
350 
351 	*twd |= FBNIC_TWD_TYPE(LAST_AL);
352 
353 	FBNIC_XMIT_CB(skb)->desc_count = ((twd - meta) + 1) & ring->size_mask;
354 
355 	ring->tail = tail;
356 
357 	/* Record SW timestamp */
358 	skb_tx_timestamp(skb);
359 
360 	/* Verify there is room for another packet */
361 	fbnic_maybe_stop_tx(skb->dev, ring, FBNIC_MAX_SKB_DESC);
362 
363 	if (fbnic_tx_sent_queue(skb, ring)) {
364 		*meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_COMPLETION);
365 
366 		/* Force DMA writes to flush before writing to tail */
367 		dma_wmb();
368 
369 		writel(tail, ring->doorbell);
370 	}
371 
372 	return false;
373 dma_error:
374 	if (net_ratelimit())
375 		netdev_err(skb->dev, "TX DMA map failed\n");
376 
377 	while (tail != first) {
378 		tail--;
379 		tail &= ring->size_mask;
380 		twd = &ring->desc[tail];
381 		if (tail == first)
382 			fbnic_unmap_single_twd(dev, twd);
383 		else
384 			fbnic_unmap_page_twd(dev, twd);
385 	}
386 
387 	return true;
388 }
389 
390 #define FBNIC_MIN_FRAME_LEN	60
391 
392 static netdev_tx_t
fbnic_xmit_frame_ring(struct sk_buff * skb,struct fbnic_ring * ring)393 fbnic_xmit_frame_ring(struct sk_buff *skb, struct fbnic_ring *ring)
394 {
395 	__le64 *meta = &ring->desc[ring->tail];
396 	u16 desc_needed;
397 
398 	if (skb_put_padto(skb, FBNIC_MIN_FRAME_LEN))
399 		goto err_count;
400 
401 	/* Need: 1 descriptor per page,
402 	 *       + 1 desc for skb_head,
403 	 *       + 2 desc for metadata and timestamp metadata
404 	 *       + 7 desc gap to keep tail from touching head
405 	 * otherwise try next time
406 	 */
407 	desc_needed = skb_shinfo(skb)->nr_frags + 10;
408 	if (fbnic_maybe_stop_tx(skb->dev, ring, desc_needed))
409 		return NETDEV_TX_BUSY;
410 
411 	*meta = cpu_to_le64(FBNIC_TWD_FLAG_DEST_MAC);
412 
413 	/* Write all members within DWORD to condense this into 2 4B writes */
414 	FBNIC_XMIT_CB(skb)->bytecount = skb->len;
415 	FBNIC_XMIT_CB(skb)->gso_segs = 1;
416 	FBNIC_XMIT_CB(skb)->desc_count = 0;
417 	FBNIC_XMIT_CB(skb)->flags = 0;
418 
419 	if (fbnic_tx_offloads(ring, skb, meta))
420 		goto err_free;
421 
422 	if (fbnic_tx_map(ring, skb, meta))
423 		goto err_free;
424 
425 	return NETDEV_TX_OK;
426 
427 err_free:
428 	dev_kfree_skb_any(skb);
429 err_count:
430 	u64_stats_update_begin(&ring->stats.syncp);
431 	ring->stats.dropped++;
432 	u64_stats_update_end(&ring->stats.syncp);
433 	return NETDEV_TX_OK;
434 }
435 
fbnic_xmit_frame(struct sk_buff * skb,struct net_device * dev)436 netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev)
437 {
438 	struct fbnic_net *fbn = netdev_priv(dev);
439 	unsigned int q_map = skb->queue_mapping;
440 
441 	return fbnic_xmit_frame_ring(skb, fbn->tx[q_map]);
442 }
443 
444 static netdev_features_t
fbnic_features_check_encap_gso(struct sk_buff * skb,struct net_device * dev,netdev_features_t features,unsigned int l3len)445 fbnic_features_check_encap_gso(struct sk_buff *skb, struct net_device *dev,
446 			       netdev_features_t features, unsigned int l3len)
447 {
448 	netdev_features_t skb_gso_features;
449 	struct ipv6hdr *ip6_hdr;
450 	unsigned char l4_hdr;
451 	unsigned int start;
452 	__be16 frag_off;
453 
454 	/* Require MANGLEID for GSO_PARTIAL of IPv4.
455 	 * In theory we could support TSO with single, innermost v4 header
456 	 * by pretending everything before it is L2, but that needs to be
457 	 * parsed case by case.. so leaving it for when the need arises.
458 	 */
459 	if (!(features & NETIF_F_TSO_MANGLEID))
460 		features &= ~NETIF_F_TSO;
461 
462 	skb_gso_features = skb_shinfo(skb)->gso_type;
463 	skb_gso_features <<= NETIF_F_GSO_SHIFT;
464 
465 	/* We'd only clear the native GSO features, so don't bother validating
466 	 * if the match can only be on those supported thru GSO_PARTIAL.
467 	 */
468 	if (!(skb_gso_features & FBNIC_TUN_GSO_FEATURES))
469 		return features;
470 
471 	/* We can only do IPv6-in-IPv6, not v4-in-v6. It'd be nice
472 	 * to fall back to partial for this, or any failure below.
473 	 * This is just an optimization, UDPv4 will be caught later on.
474 	 */
475 	if (skb_gso_features & NETIF_F_TSO)
476 		return features & ~FBNIC_TUN_GSO_FEATURES;
477 
478 	/* Inner headers multiple of 2 */
479 	if ((skb_inner_network_header(skb) - skb_network_header(skb)) % 2)
480 		return features & ~FBNIC_TUN_GSO_FEATURES;
481 
482 	/* Encapsulated GSO packet, make 100% sure it's IPv6-in-IPv6. */
483 	ip6_hdr = ipv6_hdr(skb);
484 	if (ip6_hdr->version != 6)
485 		return features & ~FBNIC_TUN_GSO_FEATURES;
486 
487 	l4_hdr = ip6_hdr->nexthdr;
488 	start = (unsigned char *)ip6_hdr - skb->data + sizeof(struct ipv6hdr);
489 	start = ipv6_skip_exthdr(skb, start, &l4_hdr, &frag_off);
490 	if (frag_off || l4_hdr != IPPROTO_IPV6 ||
491 	    skb->data + start != skb_inner_network_header(skb))
492 		return features & ~FBNIC_TUN_GSO_FEATURES;
493 
494 	return features;
495 }
496 
497 netdev_features_t
fbnic_features_check(struct sk_buff * skb,struct net_device * dev,netdev_features_t features)498 fbnic_features_check(struct sk_buff *skb, struct net_device *dev,
499 		     netdev_features_t features)
500 {
501 	unsigned int l2len, l3len;
502 
503 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL))
504 		return features;
505 
506 	l2len = skb_mac_header_len(skb);
507 	l3len = skb_checksum_start(skb) - skb_network_header(skb);
508 
509 	/* Check header lengths are multiple of 2.
510 	 * In case of 6in6 we support longer headers (IHLEN + OHLEN)
511 	 * but keep things simple for now, 512B is plenty.
512 	 */
513 	if ((l2len | l3len | skb->csum_offset) % 2 ||
514 	    !FIELD_FIT(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) ||
515 	    !FIELD_FIT(FBNIC_TWD_L3_IHLEN_MASK, l3len / 2) ||
516 	    !FIELD_FIT(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2))
517 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
518 
519 	if (likely(!skb->encapsulation) || !skb_is_gso(skb))
520 		return features;
521 
522 	return fbnic_features_check_encap_gso(skb, dev, features, l3len);
523 }
524 
fbnic_clean_twq0(struct fbnic_napi_vector * nv,int napi_budget,struct fbnic_ring * ring,bool discard,unsigned int hw_head)525 static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
526 			     struct fbnic_ring *ring, bool discard,
527 			     unsigned int hw_head)
528 {
529 	u64 total_bytes = 0, total_packets = 0, ts_lost = 0;
530 	unsigned int head = ring->head;
531 	struct netdev_queue *txq;
532 	unsigned int clean_desc;
533 
534 	clean_desc = (hw_head - head) & ring->size_mask;
535 
536 	while (clean_desc) {
537 		struct sk_buff *skb = ring->tx_buf[head];
538 		unsigned int desc_cnt;
539 
540 		desc_cnt = FBNIC_XMIT_CB(skb)->desc_count;
541 		if (desc_cnt > clean_desc)
542 			break;
543 
544 		if (unlikely(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS)) {
545 			FBNIC_XMIT_CB(skb)->hw_head = hw_head;
546 			if (likely(!discard))
547 				break;
548 			ts_lost++;
549 		}
550 
551 		ring->tx_buf[head] = NULL;
552 
553 		clean_desc -= desc_cnt;
554 
555 		while (!(ring->desc[head] & FBNIC_TWD_TYPE(AL))) {
556 			head++;
557 			head &= ring->size_mask;
558 			desc_cnt--;
559 		}
560 
561 		fbnic_unmap_single_twd(nv->dev, &ring->desc[head]);
562 		head++;
563 		head &= ring->size_mask;
564 		desc_cnt--;
565 
566 		while (desc_cnt--) {
567 			fbnic_unmap_page_twd(nv->dev, &ring->desc[head]);
568 			head++;
569 			head &= ring->size_mask;
570 		}
571 
572 		total_bytes += FBNIC_XMIT_CB(skb)->bytecount;
573 		total_packets += FBNIC_XMIT_CB(skb)->gso_segs;
574 
575 		napi_consume_skb(skb, napi_budget);
576 	}
577 
578 	if (!total_bytes)
579 		return;
580 
581 	ring->head = head;
582 
583 	txq = txring_txq(nv->napi.dev, ring);
584 
585 	if (unlikely(discard)) {
586 		u64_stats_update_begin(&ring->stats.syncp);
587 		ring->stats.dropped += total_packets;
588 		ring->stats.twq.ts_lost += ts_lost;
589 		u64_stats_update_end(&ring->stats.syncp);
590 
591 		netdev_tx_completed_queue(txq, total_packets, total_bytes);
592 		return;
593 	}
594 
595 	u64_stats_update_begin(&ring->stats.syncp);
596 	ring->stats.bytes += total_bytes;
597 	ring->stats.packets += total_packets;
598 	u64_stats_update_end(&ring->stats.syncp);
599 
600 	if (!netif_txq_completed_wake(txq, total_packets, total_bytes,
601 				      fbnic_desc_unused(ring),
602 				      FBNIC_TX_DESC_WAKEUP)) {
603 		u64_stats_update_begin(&ring->stats.syncp);
604 		ring->stats.twq.wake++;
605 		u64_stats_update_end(&ring->stats.syncp);
606 	}
607 }
608 
fbnic_clean_tsq(struct fbnic_napi_vector * nv,struct fbnic_ring * ring,u64 tcd,int * ts_head,int * head0)609 static void fbnic_clean_tsq(struct fbnic_napi_vector *nv,
610 			    struct fbnic_ring *ring,
611 			    u64 tcd, int *ts_head, int *head0)
612 {
613 	struct skb_shared_hwtstamps hwtstamp;
614 	struct fbnic_net *fbn;
615 	struct sk_buff *skb;
616 	int head;
617 	u64 ns;
618 
619 	head = (*ts_head < 0) ? ring->head : *ts_head;
620 
621 	do {
622 		unsigned int desc_cnt;
623 
624 		if (head == ring->tail) {
625 			if (unlikely(net_ratelimit()))
626 				netdev_err(nv->napi.dev,
627 					   "Tx timestamp without matching packet\n");
628 			return;
629 		}
630 
631 		skb = ring->tx_buf[head];
632 		desc_cnt = FBNIC_XMIT_CB(skb)->desc_count;
633 
634 		head += desc_cnt;
635 		head &= ring->size_mask;
636 	} while (!(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS));
637 
638 	fbn = netdev_priv(nv->napi.dev);
639 	ns = fbnic_ts40_to_ns(fbn, FIELD_GET(FBNIC_TCD_TYPE1_TS_MASK, tcd));
640 
641 	memset(&hwtstamp, 0, sizeof(hwtstamp));
642 	hwtstamp.hwtstamp = ns_to_ktime(ns);
643 
644 	*ts_head = head;
645 
646 	FBNIC_XMIT_CB(skb)->flags &= ~FBNIC_XMIT_CB_TS;
647 	if (*head0 < 0) {
648 		head = FBNIC_XMIT_CB(skb)->hw_head;
649 		if (head >= 0)
650 			*head0 = head;
651 	}
652 
653 	skb_tstamp_tx(skb, &hwtstamp);
654 	u64_stats_update_begin(&ring->stats.syncp);
655 	ring->stats.twq.ts_packets++;
656 	u64_stats_update_end(&ring->stats.syncp);
657 }
658 
fbnic_page_pool_init(struct fbnic_ring * ring,unsigned int idx,struct page * page)659 static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx,
660 				 struct page *page)
661 {
662 	struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
663 
664 	page_pool_fragment_page(page, PAGECNT_BIAS_MAX);
665 	rx_buf->pagecnt_bias = PAGECNT_BIAS_MAX;
666 	rx_buf->page = page;
667 }
668 
fbnic_page_pool_get(struct fbnic_ring * ring,unsigned int idx)669 static struct page *fbnic_page_pool_get(struct fbnic_ring *ring,
670 					unsigned int idx)
671 {
672 	struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
673 
674 	rx_buf->pagecnt_bias--;
675 
676 	return rx_buf->page;
677 }
678 
fbnic_page_pool_drain(struct fbnic_ring * ring,unsigned int idx,struct fbnic_napi_vector * nv,int budget)679 static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx,
680 				  struct fbnic_napi_vector *nv, int budget)
681 {
682 	struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
683 	struct page *page = rx_buf->page;
684 
685 	if (!page_pool_unref_page(page, rx_buf->pagecnt_bias))
686 		page_pool_put_unrefed_page(nv->page_pool, page, -1, !!budget);
687 
688 	rx_buf->page = NULL;
689 }
690 
fbnic_clean_twq(struct fbnic_napi_vector * nv,int napi_budget,struct fbnic_q_triad * qt,s32 ts_head,s32 head0)691 static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget,
692 			    struct fbnic_q_triad *qt, s32 ts_head, s32 head0)
693 {
694 	if (head0 >= 0)
695 		fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0);
696 	else if (ts_head >= 0)
697 		fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, ts_head);
698 }
699 
700 static void
fbnic_clean_tcq(struct fbnic_napi_vector * nv,struct fbnic_q_triad * qt,int napi_budget)701 fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt,
702 		int napi_budget)
703 {
704 	struct fbnic_ring *cmpl = &qt->cmpl;
705 	s32 head0 = -1, ts_head = -1;
706 	__le64 *raw_tcd, done;
707 	u32 head = cmpl->head;
708 
709 	done = (head & (cmpl->size_mask + 1)) ? 0 : cpu_to_le64(FBNIC_TCD_DONE);
710 	raw_tcd = &cmpl->desc[head & cmpl->size_mask];
711 
712 	/* Walk the completion queue collecting the heads reported by NIC */
713 	while ((*raw_tcd & cpu_to_le64(FBNIC_TCD_DONE)) == done) {
714 		u64 tcd;
715 
716 		dma_rmb();
717 
718 		tcd = le64_to_cpu(*raw_tcd);
719 
720 		switch (FIELD_GET(FBNIC_TCD_TYPE_MASK, tcd)) {
721 		case FBNIC_TCD_TYPE_0:
722 			if (!(tcd & FBNIC_TCD_TWQ1))
723 				head0 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD0_MASK,
724 						  tcd);
725 			/* Currently all err status bits are related to
726 			 * timestamps and as those have yet to be added
727 			 * they are skipped for now.
728 			 */
729 			break;
730 		case FBNIC_TCD_TYPE_1:
731 			if (WARN_ON_ONCE(tcd & FBNIC_TCD_TWQ1))
732 				break;
733 
734 			fbnic_clean_tsq(nv, &qt->sub0, tcd, &ts_head, &head0);
735 			break;
736 		default:
737 			break;
738 		}
739 
740 		raw_tcd++;
741 		head++;
742 		if (!(head & cmpl->size_mask)) {
743 			done ^= cpu_to_le64(FBNIC_TCD_DONE);
744 			raw_tcd = &cmpl->desc[0];
745 		}
746 	}
747 
748 	/* Record the current head/tail of the queue */
749 	if (cmpl->head != head) {
750 		cmpl->head = head;
751 		writel(head & cmpl->size_mask, cmpl->doorbell);
752 	}
753 
754 	/* Unmap and free processed buffers */
755 	fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0);
756 }
757 
fbnic_clean_bdq(struct fbnic_napi_vector * nv,int napi_budget,struct fbnic_ring * ring,unsigned int hw_head)758 static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget,
759 			    struct fbnic_ring *ring, unsigned int hw_head)
760 {
761 	unsigned int head = ring->head;
762 
763 	if (head == hw_head)
764 		return;
765 
766 	do {
767 		fbnic_page_pool_drain(ring, head, nv, napi_budget);
768 
769 		head++;
770 		head &= ring->size_mask;
771 	} while (head != hw_head);
772 
773 	ring->head = head;
774 }
775 
fbnic_bd_prep(struct fbnic_ring * bdq,u16 id,struct page * page)776 static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, struct page *page)
777 {
778 	__le64 *bdq_desc = &bdq->desc[id * FBNIC_BD_FRAG_COUNT];
779 	dma_addr_t dma = page_pool_get_dma_addr(page);
780 	u64 bd, i = FBNIC_BD_FRAG_COUNT;
781 
782 	bd = (FBNIC_BD_PAGE_ADDR_MASK & dma) |
783 	     FIELD_PREP(FBNIC_BD_PAGE_ID_MASK, id);
784 
785 	/* In the case that a page size is larger than 4K we will map a
786 	 * single page to multiple fragments. The fragments will be
787 	 * FBNIC_BD_FRAG_COUNT in size and the lower n bits will be use
788 	 * to indicate the individual fragment IDs.
789 	 */
790 	do {
791 		*bdq_desc = cpu_to_le64(bd);
792 		bd += FIELD_PREP(FBNIC_BD_DESC_ADDR_MASK, 1) |
793 		      FIELD_PREP(FBNIC_BD_DESC_ID_MASK, 1);
794 	} while (--i);
795 }
796 
fbnic_fill_bdq(struct fbnic_napi_vector * nv,struct fbnic_ring * bdq)797 static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq)
798 {
799 	unsigned int count = fbnic_desc_unused(bdq);
800 	unsigned int i = bdq->tail;
801 
802 	if (!count)
803 		return;
804 
805 	do {
806 		struct page *page;
807 
808 		page = page_pool_dev_alloc_pages(nv->page_pool);
809 		if (!page) {
810 			u64_stats_update_begin(&bdq->stats.syncp);
811 			bdq->stats.rx.alloc_failed++;
812 			u64_stats_update_end(&bdq->stats.syncp);
813 
814 			break;
815 		}
816 
817 		fbnic_page_pool_init(bdq, i, page);
818 		fbnic_bd_prep(bdq, i, page);
819 
820 		i++;
821 		i &= bdq->size_mask;
822 
823 		count--;
824 	} while (count);
825 
826 	if (bdq->tail != i) {
827 		bdq->tail = i;
828 
829 		/* Force DMA writes to flush before writing to tail */
830 		dma_wmb();
831 
832 		writel(i, bdq->doorbell);
833 	}
834 }
835 
fbnic_hdr_pg_start(unsigned int pg_off)836 static unsigned int fbnic_hdr_pg_start(unsigned int pg_off)
837 {
838 	/* The headroom of the first header may be larger than FBNIC_RX_HROOM
839 	 * due to alignment. So account for that by just making the page
840 	 * offset 0 if we are starting at the first header.
841 	 */
842 	if (ALIGN(FBNIC_RX_HROOM, 128) > FBNIC_RX_HROOM &&
843 	    pg_off == ALIGN(FBNIC_RX_HROOM, 128))
844 		return 0;
845 
846 	return pg_off - FBNIC_RX_HROOM;
847 }
848 
fbnic_hdr_pg_end(unsigned int pg_off,unsigned int len)849 static unsigned int fbnic_hdr_pg_end(unsigned int pg_off, unsigned int len)
850 {
851 	/* Determine the end of the buffer by finding the start of the next
852 	 * and then subtracting the headroom from that frame.
853 	 */
854 	pg_off += len + FBNIC_RX_TROOM + FBNIC_RX_HROOM;
855 
856 	return ALIGN(pg_off, 128) - FBNIC_RX_HROOM;
857 }
858 
fbnic_pkt_prepare(struct fbnic_napi_vector * nv,u64 rcd,struct fbnic_pkt_buff * pkt,struct fbnic_q_triad * qt)859 static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd,
860 			      struct fbnic_pkt_buff *pkt,
861 			      struct fbnic_q_triad *qt)
862 {
863 	unsigned int hdr_pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
864 	unsigned int hdr_pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd);
865 	struct page *page = fbnic_page_pool_get(&qt->sub0, hdr_pg_idx);
866 	unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd);
867 	unsigned int frame_sz, hdr_pg_start, hdr_pg_end, headroom;
868 	unsigned char *hdr_start;
869 
870 	/* data_hard_start should always be NULL when this is called */
871 	WARN_ON_ONCE(pkt->buff.data_hard_start);
872 
873 	/* Short-cut the end calculation if we know page is fully consumed */
874 	hdr_pg_end = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ?
875 		     FBNIC_BD_FRAG_SIZE : fbnic_hdr_pg_end(hdr_pg_off, len);
876 	hdr_pg_start = fbnic_hdr_pg_start(hdr_pg_off);
877 
878 	headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD;
879 	frame_sz = hdr_pg_end - hdr_pg_start;
880 	xdp_init_buff(&pkt->buff, frame_sz, NULL);
881 	hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
882 			FBNIC_BD_FRAG_SIZE;
883 
884 	/* Sync DMA buffer */
885 	dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page),
886 				      hdr_pg_start, frame_sz,
887 				      DMA_BIDIRECTIONAL);
888 
889 	/* Build frame around buffer */
890 	hdr_start = page_address(page) + hdr_pg_start;
891 
892 	xdp_prepare_buff(&pkt->buff, hdr_start, headroom,
893 			 len - FBNIC_RX_PAD, true);
894 
895 	pkt->data_truesize = 0;
896 	pkt->data_len = 0;
897 	pkt->nr_frags = 0;
898 }
899 
fbnic_add_rx_frag(struct fbnic_napi_vector * nv,u64 rcd,struct fbnic_pkt_buff * pkt,struct fbnic_q_triad * qt)900 static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd,
901 			      struct fbnic_pkt_buff *pkt,
902 			      struct fbnic_q_triad *qt)
903 {
904 	unsigned int pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
905 	unsigned int pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd);
906 	unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd);
907 	struct page *page = fbnic_page_pool_get(&qt->sub1, pg_idx);
908 	struct skb_shared_info *shinfo;
909 	unsigned int truesize;
910 
911 	truesize = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ?
912 		   FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128);
913 
914 	pg_off += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
915 		  FBNIC_BD_FRAG_SIZE;
916 
917 	/* Sync DMA buffer */
918 	dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page),
919 				      pg_off, truesize, DMA_BIDIRECTIONAL);
920 
921 	/* Add page to xdp shared info */
922 	shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
923 
924 	/* We use gso_segs to store truesize */
925 	pkt->data_truesize += truesize;
926 
927 	__skb_fill_page_desc_noacc(shinfo, pkt->nr_frags++, page, pg_off, len);
928 
929 	/* Store data_len in gso_size */
930 	pkt->data_len += len;
931 }
932 
fbnic_put_pkt_buff(struct fbnic_napi_vector * nv,struct fbnic_pkt_buff * pkt,int budget)933 static void fbnic_put_pkt_buff(struct fbnic_napi_vector *nv,
934 			       struct fbnic_pkt_buff *pkt, int budget)
935 {
936 	struct skb_shared_info *shinfo;
937 	struct page *page;
938 	int nr_frags;
939 
940 	if (!pkt->buff.data_hard_start)
941 		return;
942 
943 	shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
944 	nr_frags = pkt->nr_frags;
945 
946 	while (nr_frags--) {
947 		page = skb_frag_page(&shinfo->frags[nr_frags]);
948 		page_pool_put_full_page(nv->page_pool, page, !!budget);
949 	}
950 
951 	page = virt_to_page(pkt->buff.data_hard_start);
952 	page_pool_put_full_page(nv->page_pool, page, !!budget);
953 }
954 
fbnic_build_skb(struct fbnic_napi_vector * nv,struct fbnic_pkt_buff * pkt)955 static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv,
956 				       struct fbnic_pkt_buff *pkt)
957 {
958 	unsigned int nr_frags = pkt->nr_frags;
959 	struct skb_shared_info *shinfo;
960 	unsigned int truesize;
961 	struct sk_buff *skb;
962 
963 	truesize = xdp_data_hard_end(&pkt->buff) + FBNIC_RX_TROOM -
964 		   pkt->buff.data_hard_start;
965 
966 	/* Build frame around buffer */
967 	skb = napi_build_skb(pkt->buff.data_hard_start, truesize);
968 	if (unlikely(!skb))
969 		return NULL;
970 
971 	/* Push data pointer to start of data, put tail to end of data */
972 	skb_reserve(skb, pkt->buff.data - pkt->buff.data_hard_start);
973 	__skb_put(skb, pkt->buff.data_end - pkt->buff.data);
974 
975 	/* Add tracking for metadata at the start of the frame */
976 	skb_metadata_set(skb, pkt->buff.data - pkt->buff.data_meta);
977 
978 	/* Add Rx frags */
979 	if (nr_frags) {
980 		/* Verify that shared info didn't move */
981 		shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
982 		WARN_ON(skb_shinfo(skb) != shinfo);
983 
984 		skb->truesize += pkt->data_truesize;
985 		skb->data_len += pkt->data_len;
986 		shinfo->nr_frags = nr_frags;
987 		skb->len += pkt->data_len;
988 	}
989 
990 	skb_mark_for_recycle(skb);
991 
992 	/* Set MAC header specific fields */
993 	skb->protocol = eth_type_trans(skb, nv->napi.dev);
994 
995 	/* Add timestamp if present */
996 	if (pkt->hwtstamp)
997 		skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp;
998 
999 	return skb;
1000 }
1001 
fbnic_skb_hash_type(u64 rcd)1002 static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd)
1003 {
1004 	return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 :
1005 	       (FBNIC_RCD_META_L3_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L3 :
1006 						     PKT_HASH_TYPE_L2;
1007 }
1008 
fbnic_rx_tstamp(struct fbnic_napi_vector * nv,u64 rcd,struct fbnic_pkt_buff * pkt)1009 static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd,
1010 			    struct fbnic_pkt_buff *pkt)
1011 {
1012 	struct fbnic_net *fbn;
1013 	u64 ns, ts;
1014 
1015 	if (!FIELD_GET(FBNIC_RCD_OPT_META_TS, rcd))
1016 		return;
1017 
1018 	fbn = netdev_priv(nv->napi.dev);
1019 	ts = FIELD_GET(FBNIC_RCD_OPT_META_TS_MASK, rcd);
1020 	ns = fbnic_ts40_to_ns(fbn, ts);
1021 
1022 	/* Add timestamp to shared info */
1023 	pkt->hwtstamp = ns_to_ktime(ns);
1024 }
1025 
fbnic_populate_skb_fields(struct fbnic_napi_vector * nv,u64 rcd,struct sk_buff * skb,struct fbnic_q_triad * qt,u64 * csum_cmpl,u64 * csum_none)1026 static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv,
1027 				      u64 rcd, struct sk_buff *skb,
1028 				      struct fbnic_q_triad *qt,
1029 				      u64 *csum_cmpl, u64 *csum_none)
1030 {
1031 	struct net_device *netdev = nv->napi.dev;
1032 	struct fbnic_ring *rcq = &qt->cmpl;
1033 
1034 	fbnic_rx_csum(rcd, skb, rcq, csum_cmpl, csum_none);
1035 
1036 	if (netdev->features & NETIF_F_RXHASH)
1037 		skb_set_hash(skb,
1038 			     FIELD_GET(FBNIC_RCD_META_RSS_HASH_MASK, rcd),
1039 			     fbnic_skb_hash_type(rcd));
1040 
1041 	skb_record_rx_queue(skb, rcq->q_idx);
1042 }
1043 
fbnic_rcd_metadata_err(u64 rcd)1044 static bool fbnic_rcd_metadata_err(u64 rcd)
1045 {
1046 	return !!(FBNIC_RCD_META_UNCORRECTABLE_ERR_MASK & rcd);
1047 }
1048 
fbnic_clean_rcq(struct fbnic_napi_vector * nv,struct fbnic_q_triad * qt,int budget)1049 static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
1050 			   struct fbnic_q_triad *qt, int budget)
1051 {
1052 	unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0;
1053 	u64 csum_complete = 0, csum_none = 0;
1054 	struct fbnic_ring *rcq = &qt->cmpl;
1055 	struct fbnic_pkt_buff *pkt;
1056 	s32 head0 = -1, head1 = -1;
1057 	__le64 *raw_rcd, done;
1058 	u32 head = rcq->head;
1059 
1060 	done = (head & (rcq->size_mask + 1)) ? cpu_to_le64(FBNIC_RCD_DONE) : 0;
1061 	raw_rcd = &rcq->desc[head & rcq->size_mask];
1062 	pkt = rcq->pkt;
1063 
1064 	/* Walk the completion queue collecting the heads reported by NIC */
1065 	while (likely(packets < budget)) {
1066 		struct sk_buff *skb = ERR_PTR(-EINVAL);
1067 		u64 rcd;
1068 
1069 		if ((*raw_rcd & cpu_to_le64(FBNIC_RCD_DONE)) == done)
1070 			break;
1071 
1072 		dma_rmb();
1073 
1074 		rcd = le64_to_cpu(*raw_rcd);
1075 
1076 		switch (FIELD_GET(FBNIC_RCD_TYPE_MASK, rcd)) {
1077 		case FBNIC_RCD_TYPE_HDR_AL:
1078 			head0 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
1079 			fbnic_pkt_prepare(nv, rcd, pkt, qt);
1080 
1081 			break;
1082 		case FBNIC_RCD_TYPE_PAY_AL:
1083 			head1 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
1084 			fbnic_add_rx_frag(nv, rcd, pkt, qt);
1085 
1086 			break;
1087 		case FBNIC_RCD_TYPE_OPT_META:
1088 			/* Only type 0 is currently supported */
1089 			if (FIELD_GET(FBNIC_RCD_OPT_META_TYPE_MASK, rcd))
1090 				break;
1091 
1092 			fbnic_rx_tstamp(nv, rcd, pkt);
1093 
1094 			/* We currently ignore the action table index */
1095 			break;
1096 		case FBNIC_RCD_TYPE_META:
1097 			if (likely(!fbnic_rcd_metadata_err(rcd)))
1098 				skb = fbnic_build_skb(nv, pkt);
1099 
1100 			/* Populate skb and invalidate XDP */
1101 			if (!IS_ERR_OR_NULL(skb)) {
1102 				fbnic_populate_skb_fields(nv, rcd, skb, qt,
1103 							  &csum_complete,
1104 							  &csum_none);
1105 
1106 				packets++;
1107 				bytes += skb->len;
1108 
1109 				napi_gro_receive(&nv->napi, skb);
1110 			} else {
1111 				if (!skb) {
1112 					alloc_failed++;
1113 					dropped++;
1114 				} else {
1115 					dropped++;
1116 				}
1117 
1118 				fbnic_put_pkt_buff(nv, pkt, 1);
1119 			}
1120 
1121 			pkt->buff.data_hard_start = NULL;
1122 
1123 			break;
1124 		}
1125 
1126 		raw_rcd++;
1127 		head++;
1128 		if (!(head & rcq->size_mask)) {
1129 			done ^= cpu_to_le64(FBNIC_RCD_DONE);
1130 			raw_rcd = &rcq->desc[0];
1131 		}
1132 	}
1133 
1134 	u64_stats_update_begin(&rcq->stats.syncp);
1135 	rcq->stats.packets += packets;
1136 	rcq->stats.bytes += bytes;
1137 	/* Re-add ethernet header length (removed in fbnic_build_skb) */
1138 	rcq->stats.bytes += ETH_HLEN * packets;
1139 	rcq->stats.dropped += dropped;
1140 	rcq->stats.rx.alloc_failed += alloc_failed;
1141 	rcq->stats.rx.csum_complete += csum_complete;
1142 	rcq->stats.rx.csum_none += csum_none;
1143 	u64_stats_update_end(&rcq->stats.syncp);
1144 
1145 	/* Unmap and free processed buffers */
1146 	if (head0 >= 0)
1147 		fbnic_clean_bdq(nv, budget, &qt->sub0, head0);
1148 	fbnic_fill_bdq(nv, &qt->sub0);
1149 
1150 	if (head1 >= 0)
1151 		fbnic_clean_bdq(nv, budget, &qt->sub1, head1);
1152 	fbnic_fill_bdq(nv, &qt->sub1);
1153 
1154 	/* Record the current head/tail of the queue */
1155 	if (rcq->head != head) {
1156 		rcq->head = head;
1157 		writel(head & rcq->size_mask, rcq->doorbell);
1158 	}
1159 
1160 	return packets;
1161 }
1162 
fbnic_nv_irq_disable(struct fbnic_napi_vector * nv)1163 static void fbnic_nv_irq_disable(struct fbnic_napi_vector *nv)
1164 {
1165 	struct fbnic_dev *fbd = nv->fbd;
1166 	u32 v_idx = nv->v_idx;
1167 
1168 	fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(v_idx / 32), 1 << (v_idx % 32));
1169 }
1170 
fbnic_nv_irq_rearm(struct fbnic_napi_vector * nv)1171 static void fbnic_nv_irq_rearm(struct fbnic_napi_vector *nv)
1172 {
1173 	struct fbnic_dev *fbd = nv->fbd;
1174 	u32 v_idx = nv->v_idx;
1175 
1176 	fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(v_idx),
1177 		   FBNIC_INTR_CQ_REARM_INTR_UNMASK);
1178 }
1179 
fbnic_poll(struct napi_struct * napi,int budget)1180 static int fbnic_poll(struct napi_struct *napi, int budget)
1181 {
1182 	struct fbnic_napi_vector *nv = container_of(napi,
1183 						    struct fbnic_napi_vector,
1184 						    napi);
1185 	int i, j, work_done = 0;
1186 
1187 	for (i = 0; i < nv->txt_count; i++)
1188 		fbnic_clean_tcq(nv, &nv->qt[i], budget);
1189 
1190 	for (j = 0; j < nv->rxt_count; j++, i++)
1191 		work_done += fbnic_clean_rcq(nv, &nv->qt[i], budget);
1192 
1193 	if (work_done >= budget)
1194 		return budget;
1195 
1196 	if (likely(napi_complete_done(napi, work_done)))
1197 		fbnic_nv_irq_rearm(nv);
1198 
1199 	return work_done;
1200 }
1201 
fbnic_msix_clean_rings(int __always_unused irq,void * data)1202 irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data)
1203 {
1204 	struct fbnic_napi_vector *nv = *(void **)data;
1205 
1206 	napi_schedule_irqoff(&nv->napi);
1207 
1208 	return IRQ_HANDLED;
1209 }
1210 
fbnic_aggregate_ring_rx_counters(struct fbnic_net * fbn,struct fbnic_ring * rxr)1211 void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn,
1212 				      struct fbnic_ring *rxr)
1213 {
1214 	struct fbnic_queue_stats *stats = &rxr->stats;
1215 
1216 	/* Capture stats from queues before dissasociating them */
1217 	fbn->rx_stats.bytes += stats->bytes;
1218 	fbn->rx_stats.packets += stats->packets;
1219 	fbn->rx_stats.dropped += stats->dropped;
1220 	fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed;
1221 	fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete;
1222 	fbn->rx_stats.rx.csum_none += stats->rx.csum_none;
1223 	/* Remember to add new stats here */
1224 	BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 3);
1225 }
1226 
fbnic_aggregate_ring_tx_counters(struct fbnic_net * fbn,struct fbnic_ring * txr)1227 void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn,
1228 				      struct fbnic_ring *txr)
1229 {
1230 	struct fbnic_queue_stats *stats = &txr->stats;
1231 
1232 	/* Capture stats from queues before dissasociating them */
1233 	fbn->tx_stats.bytes += stats->bytes;
1234 	fbn->tx_stats.packets += stats->packets;
1235 	fbn->tx_stats.dropped += stats->dropped;
1236 	fbn->tx_stats.twq.csum_partial += stats->twq.csum_partial;
1237 	fbn->tx_stats.twq.lso += stats->twq.lso;
1238 	fbn->tx_stats.twq.ts_lost += stats->twq.ts_lost;
1239 	fbn->tx_stats.twq.ts_packets += stats->twq.ts_packets;
1240 	fbn->tx_stats.twq.stop += stats->twq.stop;
1241 	fbn->tx_stats.twq.wake += stats->twq.wake;
1242 	/* Remember to add new stats here */
1243 	BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 6);
1244 }
1245 
fbnic_remove_tx_ring(struct fbnic_net * fbn,struct fbnic_ring * txr)1246 static void fbnic_remove_tx_ring(struct fbnic_net *fbn,
1247 				 struct fbnic_ring *txr)
1248 {
1249 	if (!(txr->flags & FBNIC_RING_F_STATS))
1250 		return;
1251 
1252 	fbnic_aggregate_ring_tx_counters(fbn, txr);
1253 
1254 	/* Remove pointer to the Tx ring */
1255 	WARN_ON(fbn->tx[txr->q_idx] && fbn->tx[txr->q_idx] != txr);
1256 	fbn->tx[txr->q_idx] = NULL;
1257 }
1258 
fbnic_remove_rx_ring(struct fbnic_net * fbn,struct fbnic_ring * rxr)1259 static void fbnic_remove_rx_ring(struct fbnic_net *fbn,
1260 				 struct fbnic_ring *rxr)
1261 {
1262 	if (!(rxr->flags & FBNIC_RING_F_STATS))
1263 		return;
1264 
1265 	fbnic_aggregate_ring_rx_counters(fbn, rxr);
1266 
1267 	/* Remove pointer to the Rx ring */
1268 	WARN_ON(fbn->rx[rxr->q_idx] && fbn->rx[rxr->q_idx] != rxr);
1269 	fbn->rx[rxr->q_idx] = NULL;
1270 }
1271 
fbnic_free_napi_vector(struct fbnic_net * fbn,struct fbnic_napi_vector * nv)1272 static void fbnic_free_napi_vector(struct fbnic_net *fbn,
1273 				   struct fbnic_napi_vector *nv)
1274 {
1275 	struct fbnic_dev *fbd = nv->fbd;
1276 	int i, j;
1277 
1278 	for (i = 0; i < nv->txt_count; i++) {
1279 		fbnic_remove_tx_ring(fbn, &nv->qt[i].sub0);
1280 		fbnic_remove_tx_ring(fbn, &nv->qt[i].cmpl);
1281 	}
1282 
1283 	for (j = 0; j < nv->rxt_count; j++, i++) {
1284 		fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0);
1285 		fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1);
1286 		fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl);
1287 	}
1288 
1289 	fbnic_napi_free_irq(fbd, nv);
1290 	page_pool_destroy(nv->page_pool);
1291 	netif_napi_del(&nv->napi);
1292 	fbn->napi[fbnic_napi_idx(nv)] = NULL;
1293 	kfree(nv);
1294 }
1295 
fbnic_free_napi_vectors(struct fbnic_net * fbn)1296 void fbnic_free_napi_vectors(struct fbnic_net *fbn)
1297 {
1298 	int i;
1299 
1300 	for (i = 0; i < fbn->num_napi; i++)
1301 		if (fbn->napi[i])
1302 			fbnic_free_napi_vector(fbn, fbn->napi[i]);
1303 }
1304 
1305 #define FBNIC_PAGE_POOL_FLAGS \
1306 	(PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
1307 
fbnic_alloc_nv_page_pool(struct fbnic_net * fbn,struct fbnic_napi_vector * nv)1308 static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn,
1309 				    struct fbnic_napi_vector *nv)
1310 {
1311 	struct page_pool_params pp_params = {
1312 		.order = 0,
1313 		.flags = FBNIC_PAGE_POOL_FLAGS,
1314 		.pool_size = (fbn->hpq_size + fbn->ppq_size) * nv->rxt_count,
1315 		.nid = NUMA_NO_NODE,
1316 		.dev = nv->dev,
1317 		.dma_dir = DMA_BIDIRECTIONAL,
1318 		.offset = 0,
1319 		.max_len = PAGE_SIZE,
1320 		.napi	= &nv->napi,
1321 		.netdev	= fbn->netdev,
1322 	};
1323 	struct page_pool *pp;
1324 
1325 	/* Page pool cannot exceed a size of 32768. This doesn't limit the
1326 	 * pages on the ring but the number we can have cached waiting on
1327 	 * the next use.
1328 	 *
1329 	 * TBD: Can this be reduced further? Would a multiple of
1330 	 * NAPI_POLL_WEIGHT possibly make more sense? The question is how
1331 	 * may pages do we need to hold in reserve to get the best return
1332 	 * without hogging too much system memory.
1333 	 */
1334 	if (pp_params.pool_size > 32768)
1335 		pp_params.pool_size = 32768;
1336 
1337 	pp = page_pool_create(&pp_params);
1338 	if (IS_ERR(pp))
1339 		return PTR_ERR(pp);
1340 
1341 	nv->page_pool = pp;
1342 
1343 	return 0;
1344 }
1345 
fbnic_ring_init(struct fbnic_ring * ring,u32 __iomem * doorbell,int q_idx,u8 flags)1346 static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell,
1347 			    int q_idx, u8 flags)
1348 {
1349 	u64_stats_init(&ring->stats.syncp);
1350 	ring->doorbell = doorbell;
1351 	ring->q_idx = q_idx;
1352 	ring->flags = flags;
1353 }
1354 
fbnic_alloc_napi_vector(struct fbnic_dev * fbd,struct fbnic_net * fbn,unsigned int v_count,unsigned int v_idx,unsigned int txq_count,unsigned int txq_idx,unsigned int rxq_count,unsigned int rxq_idx)1355 static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
1356 				   unsigned int v_count, unsigned int v_idx,
1357 				   unsigned int txq_count, unsigned int txq_idx,
1358 				   unsigned int rxq_count, unsigned int rxq_idx)
1359 {
1360 	int txt_count = txq_count, rxt_count = rxq_count;
1361 	u32 __iomem *uc_addr = fbd->uc_addr0;
1362 	struct fbnic_napi_vector *nv;
1363 	struct fbnic_q_triad *qt;
1364 	int qt_count, err;
1365 	u32 __iomem *db;
1366 
1367 	qt_count = txt_count + rxq_count;
1368 	if (!qt_count)
1369 		return -EINVAL;
1370 
1371 	/* If MMIO has already failed there are no rings to initialize */
1372 	if (!uc_addr)
1373 		return -EIO;
1374 
1375 	/* Allocate NAPI vector and queue triads */
1376 	nv = kzalloc(struct_size(nv, qt, qt_count), GFP_KERNEL);
1377 	if (!nv)
1378 		return -ENOMEM;
1379 
1380 	/* Record queue triad counts */
1381 	nv->txt_count = txt_count;
1382 	nv->rxt_count = rxt_count;
1383 
1384 	/* Provide pointer back to fbnic and MSI-X vectors */
1385 	nv->fbd = fbd;
1386 	nv->v_idx = v_idx;
1387 
1388 	/* Tie napi to netdev */
1389 	fbn->napi[fbnic_napi_idx(nv)] = nv;
1390 	netif_napi_add(fbn->netdev, &nv->napi, fbnic_poll);
1391 
1392 	/* Record IRQ to NAPI struct */
1393 	netif_napi_set_irq(&nv->napi,
1394 			   pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx));
1395 
1396 	/* Tie nv back to PCIe dev */
1397 	nv->dev = fbd->dev;
1398 
1399 	/* Allocate page pool */
1400 	if (rxq_count) {
1401 		err = fbnic_alloc_nv_page_pool(fbn, nv);
1402 		if (err)
1403 			goto napi_del;
1404 	}
1405 
1406 	/* Request the IRQ for napi vector */
1407 	err = fbnic_napi_request_irq(fbd, nv);
1408 	if (err)
1409 		goto pp_destroy;
1410 
1411 	/* Initialize queue triads */
1412 	qt = nv->qt;
1413 
1414 	while (txt_count) {
1415 		/* Configure Tx queue */
1416 		db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ0_TAIL];
1417 
1418 		/* Assign Tx queue to netdev if applicable */
1419 		if (txq_count > 0) {
1420 			u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS;
1421 
1422 			fbnic_ring_init(&qt->sub0, db, txq_idx, flags);
1423 			fbn->tx[txq_idx] = &qt->sub0;
1424 			txq_count--;
1425 		} else {
1426 			fbnic_ring_init(&qt->sub0, db, 0,
1427 					FBNIC_RING_F_DISABLED);
1428 		}
1429 
1430 		/* Configure Tx completion queue */
1431 		db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TCQ_HEAD];
1432 		fbnic_ring_init(&qt->cmpl, db, 0, 0);
1433 
1434 		/* Update Tx queue index */
1435 		txt_count--;
1436 		txq_idx += v_count;
1437 
1438 		/* Move to next queue triad */
1439 		qt++;
1440 	}
1441 
1442 	while (rxt_count) {
1443 		/* Configure header queue */
1444 		db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_HPQ_TAIL];
1445 		fbnic_ring_init(&qt->sub0, db, 0, FBNIC_RING_F_CTX);
1446 
1447 		/* Configure payload queue */
1448 		db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_PPQ_TAIL];
1449 		fbnic_ring_init(&qt->sub1, db, 0, FBNIC_RING_F_CTX);
1450 
1451 		/* Configure Rx completion queue */
1452 		db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_RCQ_HEAD];
1453 		fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS);
1454 		fbn->rx[rxq_idx] = &qt->cmpl;
1455 
1456 		/* Update Rx queue index */
1457 		rxt_count--;
1458 		rxq_idx += v_count;
1459 
1460 		/* Move to next queue triad */
1461 		qt++;
1462 	}
1463 
1464 	return 0;
1465 
1466 pp_destroy:
1467 	page_pool_destroy(nv->page_pool);
1468 napi_del:
1469 	netif_napi_del(&nv->napi);
1470 	fbn->napi[fbnic_napi_idx(nv)] = NULL;
1471 	kfree(nv);
1472 	return err;
1473 }
1474 
fbnic_alloc_napi_vectors(struct fbnic_net * fbn)1475 int fbnic_alloc_napi_vectors(struct fbnic_net *fbn)
1476 {
1477 	unsigned int txq_idx = 0, rxq_idx = 0, v_idx = FBNIC_NON_NAPI_VECTORS;
1478 	unsigned int num_tx = fbn->num_tx_queues;
1479 	unsigned int num_rx = fbn->num_rx_queues;
1480 	unsigned int num_napi = fbn->num_napi;
1481 	struct fbnic_dev *fbd = fbn->fbd;
1482 	int err;
1483 
1484 	/* Allocate 1 Tx queue per napi vector */
1485 	if (num_napi < FBNIC_MAX_TXQS && num_napi == num_tx + num_rx) {
1486 		while (num_tx) {
1487 			err = fbnic_alloc_napi_vector(fbd, fbn,
1488 						      num_napi, v_idx,
1489 						      1, txq_idx, 0, 0);
1490 			if (err)
1491 				goto free_vectors;
1492 
1493 			/* Update counts and index */
1494 			num_tx--;
1495 			txq_idx++;
1496 
1497 			v_idx++;
1498 		}
1499 	}
1500 
1501 	/* Allocate Tx/Rx queue pairs per vector, or allocate remaining Rx */
1502 	while (num_rx | num_tx) {
1503 		int tqpv = DIV_ROUND_UP(num_tx, num_napi - txq_idx);
1504 		int rqpv = DIV_ROUND_UP(num_rx, num_napi - rxq_idx);
1505 
1506 		err = fbnic_alloc_napi_vector(fbd, fbn, num_napi, v_idx,
1507 					      tqpv, txq_idx, rqpv, rxq_idx);
1508 		if (err)
1509 			goto free_vectors;
1510 
1511 		/* Update counts and index */
1512 		num_tx -= tqpv;
1513 		txq_idx++;
1514 
1515 		num_rx -= rqpv;
1516 		rxq_idx++;
1517 
1518 		v_idx++;
1519 	}
1520 
1521 	return 0;
1522 
1523 free_vectors:
1524 	fbnic_free_napi_vectors(fbn);
1525 
1526 	return -ENOMEM;
1527 }
1528 
fbnic_free_ring_resources(struct device * dev,struct fbnic_ring * ring)1529 static void fbnic_free_ring_resources(struct device *dev,
1530 				      struct fbnic_ring *ring)
1531 {
1532 	kvfree(ring->buffer);
1533 	ring->buffer = NULL;
1534 
1535 	/* If size is not set there are no descriptors present */
1536 	if (!ring->size)
1537 		return;
1538 
1539 	dma_free_coherent(dev, ring->size, ring->desc, ring->dma);
1540 	ring->size_mask = 0;
1541 	ring->size = 0;
1542 }
1543 
fbnic_alloc_tx_ring_desc(struct fbnic_net * fbn,struct fbnic_ring * txr)1544 static int fbnic_alloc_tx_ring_desc(struct fbnic_net *fbn,
1545 				    struct fbnic_ring *txr)
1546 {
1547 	struct device *dev = fbn->netdev->dev.parent;
1548 	size_t size;
1549 
1550 	/* Round size up to nearest 4K */
1551 	size = ALIGN(array_size(sizeof(*txr->desc), fbn->txq_size), 4096);
1552 
1553 	txr->desc = dma_alloc_coherent(dev, size, &txr->dma,
1554 				       GFP_KERNEL | __GFP_NOWARN);
1555 	if (!txr->desc)
1556 		return -ENOMEM;
1557 
1558 	/* txq_size should be a power of 2, so mask is just that -1 */
1559 	txr->size_mask = fbn->txq_size - 1;
1560 	txr->size = size;
1561 
1562 	return 0;
1563 }
1564 
fbnic_alloc_tx_ring_buffer(struct fbnic_ring * txr)1565 static int fbnic_alloc_tx_ring_buffer(struct fbnic_ring *txr)
1566 {
1567 	size_t size = array_size(sizeof(*txr->tx_buf), txr->size_mask + 1);
1568 
1569 	txr->tx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1570 
1571 	return txr->tx_buf ? 0 : -ENOMEM;
1572 }
1573 
fbnic_alloc_tx_ring_resources(struct fbnic_net * fbn,struct fbnic_ring * txr)1574 static int fbnic_alloc_tx_ring_resources(struct fbnic_net *fbn,
1575 					 struct fbnic_ring *txr)
1576 {
1577 	struct device *dev = fbn->netdev->dev.parent;
1578 	int err;
1579 
1580 	if (txr->flags & FBNIC_RING_F_DISABLED)
1581 		return 0;
1582 
1583 	err = fbnic_alloc_tx_ring_desc(fbn, txr);
1584 	if (err)
1585 		return err;
1586 
1587 	if (!(txr->flags & FBNIC_RING_F_CTX))
1588 		return 0;
1589 
1590 	err = fbnic_alloc_tx_ring_buffer(txr);
1591 	if (err)
1592 		goto free_desc;
1593 
1594 	return 0;
1595 
1596 free_desc:
1597 	fbnic_free_ring_resources(dev, txr);
1598 	return err;
1599 }
1600 
fbnic_alloc_rx_ring_desc(struct fbnic_net * fbn,struct fbnic_ring * rxr)1601 static int fbnic_alloc_rx_ring_desc(struct fbnic_net *fbn,
1602 				    struct fbnic_ring *rxr)
1603 {
1604 	struct device *dev = fbn->netdev->dev.parent;
1605 	size_t desc_size = sizeof(*rxr->desc);
1606 	u32 rxq_size;
1607 	size_t size;
1608 
1609 	switch (rxr->doorbell - fbnic_ring_csr_base(rxr)) {
1610 	case FBNIC_QUEUE_BDQ_HPQ_TAIL:
1611 		rxq_size = fbn->hpq_size / FBNIC_BD_FRAG_COUNT;
1612 		desc_size *= FBNIC_BD_FRAG_COUNT;
1613 		break;
1614 	case FBNIC_QUEUE_BDQ_PPQ_TAIL:
1615 		rxq_size = fbn->ppq_size / FBNIC_BD_FRAG_COUNT;
1616 		desc_size *= FBNIC_BD_FRAG_COUNT;
1617 		break;
1618 	case FBNIC_QUEUE_RCQ_HEAD:
1619 		rxq_size = fbn->rcq_size;
1620 		break;
1621 	default:
1622 		return -EINVAL;
1623 	}
1624 
1625 	/* Round size up to nearest 4K */
1626 	size = ALIGN(array_size(desc_size, rxq_size), 4096);
1627 
1628 	rxr->desc = dma_alloc_coherent(dev, size, &rxr->dma,
1629 				       GFP_KERNEL | __GFP_NOWARN);
1630 	if (!rxr->desc)
1631 		return -ENOMEM;
1632 
1633 	/* rxq_size should be a power of 2, so mask is just that -1 */
1634 	rxr->size_mask = rxq_size - 1;
1635 	rxr->size = size;
1636 
1637 	return 0;
1638 }
1639 
fbnic_alloc_rx_ring_buffer(struct fbnic_ring * rxr)1640 static int fbnic_alloc_rx_ring_buffer(struct fbnic_ring *rxr)
1641 {
1642 	size_t size = array_size(sizeof(*rxr->rx_buf), rxr->size_mask + 1);
1643 
1644 	if (rxr->flags & FBNIC_RING_F_CTX)
1645 		size = sizeof(*rxr->rx_buf) * (rxr->size_mask + 1);
1646 	else
1647 		size = sizeof(*rxr->pkt);
1648 
1649 	rxr->rx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1650 
1651 	return rxr->rx_buf ? 0 : -ENOMEM;
1652 }
1653 
fbnic_alloc_rx_ring_resources(struct fbnic_net * fbn,struct fbnic_ring * rxr)1654 static int fbnic_alloc_rx_ring_resources(struct fbnic_net *fbn,
1655 					 struct fbnic_ring *rxr)
1656 {
1657 	struct device *dev = fbn->netdev->dev.parent;
1658 	int err;
1659 
1660 	err = fbnic_alloc_rx_ring_desc(fbn, rxr);
1661 	if (err)
1662 		return err;
1663 
1664 	err = fbnic_alloc_rx_ring_buffer(rxr);
1665 	if (err)
1666 		goto free_desc;
1667 
1668 	return 0;
1669 
1670 free_desc:
1671 	fbnic_free_ring_resources(dev, rxr);
1672 	return err;
1673 }
1674 
fbnic_free_qt_resources(struct fbnic_net * fbn,struct fbnic_q_triad * qt)1675 static void fbnic_free_qt_resources(struct fbnic_net *fbn,
1676 				    struct fbnic_q_triad *qt)
1677 {
1678 	struct device *dev = fbn->netdev->dev.parent;
1679 
1680 	fbnic_free_ring_resources(dev, &qt->cmpl);
1681 	fbnic_free_ring_resources(dev, &qt->sub1);
1682 	fbnic_free_ring_resources(dev, &qt->sub0);
1683 }
1684 
fbnic_alloc_tx_qt_resources(struct fbnic_net * fbn,struct fbnic_q_triad * qt)1685 static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn,
1686 				       struct fbnic_q_triad *qt)
1687 {
1688 	struct device *dev = fbn->netdev->dev.parent;
1689 	int err;
1690 
1691 	err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub0);
1692 	if (err)
1693 		return err;
1694 
1695 	err = fbnic_alloc_tx_ring_resources(fbn, &qt->cmpl);
1696 	if (err)
1697 		goto free_sub1;
1698 
1699 	return 0;
1700 
1701 free_sub1:
1702 	fbnic_free_ring_resources(dev, &qt->sub0);
1703 	return err;
1704 }
1705 
fbnic_alloc_rx_qt_resources(struct fbnic_net * fbn,struct fbnic_q_triad * qt)1706 static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn,
1707 				       struct fbnic_q_triad *qt)
1708 {
1709 	struct device *dev = fbn->netdev->dev.parent;
1710 	int err;
1711 
1712 	err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0);
1713 	if (err)
1714 		return err;
1715 
1716 	err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub1);
1717 	if (err)
1718 		goto free_sub0;
1719 
1720 	err = fbnic_alloc_rx_ring_resources(fbn, &qt->cmpl);
1721 	if (err)
1722 		goto free_sub1;
1723 
1724 	return 0;
1725 
1726 free_sub1:
1727 	fbnic_free_ring_resources(dev, &qt->sub1);
1728 free_sub0:
1729 	fbnic_free_ring_resources(dev, &qt->sub0);
1730 	return err;
1731 }
1732 
fbnic_free_nv_resources(struct fbnic_net * fbn,struct fbnic_napi_vector * nv)1733 static void fbnic_free_nv_resources(struct fbnic_net *fbn,
1734 				    struct fbnic_napi_vector *nv)
1735 {
1736 	int i, j;
1737 
1738 	/* Free Tx Resources  */
1739 	for (i = 0; i < nv->txt_count; i++)
1740 		fbnic_free_qt_resources(fbn, &nv->qt[i]);
1741 
1742 	for (j = 0; j < nv->rxt_count; j++, i++)
1743 		fbnic_free_qt_resources(fbn, &nv->qt[i]);
1744 }
1745 
fbnic_alloc_nv_resources(struct fbnic_net * fbn,struct fbnic_napi_vector * nv)1746 static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
1747 				    struct fbnic_napi_vector *nv)
1748 {
1749 	int i, j, err;
1750 
1751 	/* Allocate Tx Resources */
1752 	for (i = 0; i < nv->txt_count; i++) {
1753 		err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]);
1754 		if (err)
1755 			goto free_resources;
1756 	}
1757 
1758 	/* Allocate Rx Resources */
1759 	for (j = 0; j < nv->rxt_count; j++, i++) {
1760 		err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]);
1761 		if (err)
1762 			goto free_resources;
1763 	}
1764 
1765 	return 0;
1766 
1767 free_resources:
1768 	while (i--)
1769 		fbnic_free_qt_resources(fbn, &nv->qt[i]);
1770 	return err;
1771 }
1772 
fbnic_free_resources(struct fbnic_net * fbn)1773 void fbnic_free_resources(struct fbnic_net *fbn)
1774 {
1775 	int i;
1776 
1777 	for (i = 0; i < fbn->num_napi; i++)
1778 		fbnic_free_nv_resources(fbn, fbn->napi[i]);
1779 }
1780 
fbnic_alloc_resources(struct fbnic_net * fbn)1781 int fbnic_alloc_resources(struct fbnic_net *fbn)
1782 {
1783 	int i, err = -ENODEV;
1784 
1785 	for (i = 0; i < fbn->num_napi; i++) {
1786 		err = fbnic_alloc_nv_resources(fbn, fbn->napi[i]);
1787 		if (err)
1788 			goto free_resources;
1789 	}
1790 
1791 	return 0;
1792 
1793 free_resources:
1794 	while (i--)
1795 		fbnic_free_nv_resources(fbn, fbn->napi[i]);
1796 
1797 	return err;
1798 }
1799 
fbnic_set_netif_napi(struct fbnic_napi_vector * nv)1800 static void fbnic_set_netif_napi(struct fbnic_napi_vector *nv)
1801 {
1802 	int i, j;
1803 
1804 	/* Associate Tx queue with NAPI */
1805 	for (i = 0; i < nv->txt_count; i++) {
1806 		struct fbnic_q_triad *qt = &nv->qt[i];
1807 
1808 		netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx,
1809 				     NETDEV_QUEUE_TYPE_TX, &nv->napi);
1810 	}
1811 
1812 	/* Associate Rx queue with NAPI */
1813 	for (j = 0; j < nv->rxt_count; j++, i++) {
1814 		struct fbnic_q_triad *qt = &nv->qt[i];
1815 
1816 		netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx,
1817 				     NETDEV_QUEUE_TYPE_RX, &nv->napi);
1818 	}
1819 }
1820 
fbnic_reset_netif_napi(struct fbnic_napi_vector * nv)1821 static void fbnic_reset_netif_napi(struct fbnic_napi_vector *nv)
1822 {
1823 	int i, j;
1824 
1825 	/* Disassociate Tx queue from NAPI */
1826 	for (i = 0; i < nv->txt_count; i++) {
1827 		struct fbnic_q_triad *qt = &nv->qt[i];
1828 
1829 		netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx,
1830 				     NETDEV_QUEUE_TYPE_TX, NULL);
1831 	}
1832 
1833 	/* Disassociate Rx queue from NAPI */
1834 	for (j = 0; j < nv->rxt_count; j++, i++) {
1835 		struct fbnic_q_triad *qt = &nv->qt[i];
1836 
1837 		netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx,
1838 				     NETDEV_QUEUE_TYPE_RX, NULL);
1839 	}
1840 }
1841 
fbnic_set_netif_queues(struct fbnic_net * fbn)1842 int fbnic_set_netif_queues(struct fbnic_net *fbn)
1843 {
1844 	int i, err;
1845 
1846 	err = netif_set_real_num_queues(fbn->netdev, fbn->num_tx_queues,
1847 					fbn->num_rx_queues);
1848 	if (err)
1849 		return err;
1850 
1851 	for (i = 0; i < fbn->num_napi; i++)
1852 		fbnic_set_netif_napi(fbn->napi[i]);
1853 
1854 	return 0;
1855 }
1856 
fbnic_reset_netif_queues(struct fbnic_net * fbn)1857 void fbnic_reset_netif_queues(struct fbnic_net *fbn)
1858 {
1859 	int i;
1860 
1861 	for (i = 0; i < fbn->num_napi; i++)
1862 		fbnic_reset_netif_napi(fbn->napi[i]);
1863 }
1864 
fbnic_disable_twq0(struct fbnic_ring * txr)1865 static void fbnic_disable_twq0(struct fbnic_ring *txr)
1866 {
1867 	u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ0_CTL);
1868 
1869 	twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE;
1870 
1871 	fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ0_CTL, twq_ctl);
1872 }
1873 
fbnic_disable_tcq(struct fbnic_ring * txr)1874 static void fbnic_disable_tcq(struct fbnic_ring *txr)
1875 {
1876 	fbnic_ring_wr32(txr, FBNIC_QUEUE_TCQ_CTL, 0);
1877 	fbnic_ring_wr32(txr, FBNIC_QUEUE_TIM_MASK, FBNIC_QUEUE_TIM_MASK_MASK);
1878 }
1879 
fbnic_disable_bdq(struct fbnic_ring * hpq,struct fbnic_ring * ppq)1880 static void fbnic_disable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq)
1881 {
1882 	u32 bdq_ctl = fbnic_ring_rd32(hpq, FBNIC_QUEUE_BDQ_CTL);
1883 
1884 	bdq_ctl &= ~FBNIC_QUEUE_BDQ_CTL_ENABLE;
1885 
1886 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl);
1887 }
1888 
fbnic_disable_rcq(struct fbnic_ring * rxr)1889 static void fbnic_disable_rcq(struct fbnic_ring *rxr)
1890 {
1891 	fbnic_ring_wr32(rxr, FBNIC_QUEUE_RCQ_CTL, 0);
1892 	fbnic_ring_wr32(rxr, FBNIC_QUEUE_RIM_MASK, FBNIC_QUEUE_RIM_MASK_MASK);
1893 }
1894 
fbnic_napi_disable(struct fbnic_net * fbn)1895 void fbnic_napi_disable(struct fbnic_net *fbn)
1896 {
1897 	int i;
1898 
1899 	for (i = 0; i < fbn->num_napi; i++) {
1900 		napi_disable(&fbn->napi[i]->napi);
1901 
1902 		fbnic_nv_irq_disable(fbn->napi[i]);
1903 	}
1904 }
1905 
fbnic_disable(struct fbnic_net * fbn)1906 void fbnic_disable(struct fbnic_net *fbn)
1907 {
1908 	struct fbnic_dev *fbd = fbn->fbd;
1909 	int i, j, t;
1910 
1911 	for (i = 0; i < fbn->num_napi; i++) {
1912 		struct fbnic_napi_vector *nv = fbn->napi[i];
1913 
1914 		/* Disable Tx queue triads */
1915 		for (t = 0; t < nv->txt_count; t++) {
1916 			struct fbnic_q_triad *qt = &nv->qt[t];
1917 
1918 			fbnic_disable_twq0(&qt->sub0);
1919 			fbnic_disable_tcq(&qt->cmpl);
1920 		}
1921 
1922 		/* Disable Rx queue triads */
1923 		for (j = 0; j < nv->rxt_count; j++, t++) {
1924 			struct fbnic_q_triad *qt = &nv->qt[t];
1925 
1926 			fbnic_disable_bdq(&qt->sub0, &qt->sub1);
1927 			fbnic_disable_rcq(&qt->cmpl);
1928 		}
1929 	}
1930 
1931 	fbnic_wrfl(fbd);
1932 }
1933 
fbnic_tx_flush(struct fbnic_dev * fbd)1934 static void fbnic_tx_flush(struct fbnic_dev *fbd)
1935 {
1936 	netdev_warn(fbd->netdev, "triggering Tx flush\n");
1937 
1938 	fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN,
1939 		    FBNIC_TMI_DROP_CTRL_EN);
1940 }
1941 
fbnic_tx_flush_off(struct fbnic_dev * fbd)1942 static void fbnic_tx_flush_off(struct fbnic_dev *fbd)
1943 {
1944 	fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN, 0);
1945 }
1946 
1947 struct fbnic_idle_regs {
1948 	u32 reg_base;
1949 	u8 reg_cnt;
1950 };
1951 
fbnic_all_idle(struct fbnic_dev * fbd,const struct fbnic_idle_regs * regs,unsigned int nregs)1952 static bool fbnic_all_idle(struct fbnic_dev *fbd,
1953 			   const struct fbnic_idle_regs *regs,
1954 			   unsigned int nregs)
1955 {
1956 	unsigned int i, j;
1957 
1958 	for (i = 0; i < nregs; i++) {
1959 		for (j = 0; j < regs[i].reg_cnt; j++) {
1960 			if (fbnic_rd32(fbd, regs[i].reg_base + j) != ~0U)
1961 				return false;
1962 		}
1963 	}
1964 	return true;
1965 }
1966 
fbnic_idle_dump(struct fbnic_dev * fbd,const struct fbnic_idle_regs * regs,unsigned int nregs,const char * dir,int err)1967 static void fbnic_idle_dump(struct fbnic_dev *fbd,
1968 			    const struct fbnic_idle_regs *regs,
1969 			    unsigned int nregs, const char *dir, int err)
1970 {
1971 	unsigned int i, j;
1972 
1973 	netdev_err(fbd->netdev, "error waiting for %s idle %d\n", dir, err);
1974 	for (i = 0; i < nregs; i++)
1975 		for (j = 0; j < regs[i].reg_cnt; j++)
1976 			netdev_err(fbd->netdev, "0x%04x: %08x\n",
1977 				   regs[i].reg_base + j,
1978 				   fbnic_rd32(fbd, regs[i].reg_base + j));
1979 }
1980 
fbnic_wait_all_queues_idle(struct fbnic_dev * fbd,bool may_fail)1981 int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail)
1982 {
1983 	static const struct fbnic_idle_regs tx[] = {
1984 		{ FBNIC_QM_TWQ_IDLE(0),	FBNIC_QM_TWQ_IDLE_CNT, },
1985 		{ FBNIC_QM_TQS_IDLE(0),	FBNIC_QM_TQS_IDLE_CNT, },
1986 		{ FBNIC_QM_TDE_IDLE(0),	FBNIC_QM_TDE_IDLE_CNT, },
1987 		{ FBNIC_QM_TCQ_IDLE(0),	FBNIC_QM_TCQ_IDLE_CNT, },
1988 	}, rx[] = {
1989 		{ FBNIC_QM_HPQ_IDLE(0),	FBNIC_QM_HPQ_IDLE_CNT, },
1990 		{ FBNIC_QM_PPQ_IDLE(0),	FBNIC_QM_PPQ_IDLE_CNT, },
1991 		{ FBNIC_QM_RCQ_IDLE(0),	FBNIC_QM_RCQ_IDLE_CNT, },
1992 	};
1993 	bool idle;
1994 	int err;
1995 
1996 	err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000,
1997 				       false, fbd, tx, ARRAY_SIZE(tx));
1998 	if (err == -ETIMEDOUT) {
1999 		fbnic_tx_flush(fbd);
2000 		err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle,
2001 					       2, 500000, false,
2002 					       fbd, tx, ARRAY_SIZE(tx));
2003 		fbnic_tx_flush_off(fbd);
2004 	}
2005 	if (err) {
2006 		fbnic_idle_dump(fbd, tx, ARRAY_SIZE(tx), "Tx", err);
2007 		if (may_fail)
2008 			return err;
2009 	}
2010 
2011 	err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000,
2012 				       false, fbd, rx, ARRAY_SIZE(rx));
2013 	if (err)
2014 		fbnic_idle_dump(fbd, rx, ARRAY_SIZE(rx), "Rx", err);
2015 	return err;
2016 }
2017 
fbnic_flush(struct fbnic_net * fbn)2018 void fbnic_flush(struct fbnic_net *fbn)
2019 {
2020 	int i;
2021 
2022 	for (i = 0; i < fbn->num_napi; i++) {
2023 		struct fbnic_napi_vector *nv = fbn->napi[i];
2024 		int j, t;
2025 
2026 		/* Flush any processed Tx Queue Triads and drop the rest */
2027 		for (t = 0; t < nv->txt_count; t++) {
2028 			struct fbnic_q_triad *qt = &nv->qt[t];
2029 			struct netdev_queue *tx_queue;
2030 
2031 			/* Clean the work queues of unprocessed work */
2032 			fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail);
2033 
2034 			/* Reset completion queue descriptor ring */
2035 			memset(qt->cmpl.desc, 0, qt->cmpl.size);
2036 
2037 			/* Nothing else to do if Tx queue is disabled */
2038 			if (qt->sub0.flags & FBNIC_RING_F_DISABLED)
2039 				continue;
2040 
2041 			/* Reset BQL associated with Tx queue */
2042 			tx_queue = netdev_get_tx_queue(nv->napi.dev,
2043 						       qt->sub0.q_idx);
2044 			netdev_tx_reset_queue(tx_queue);
2045 		}
2046 
2047 		/* Flush any processed Rx Queue Triads and drop the rest */
2048 		for (j = 0; j < nv->rxt_count; j++, t++) {
2049 			struct fbnic_q_triad *qt = &nv->qt[t];
2050 
2051 			/* Clean the work queues of unprocessed work */
2052 			fbnic_clean_bdq(nv, 0, &qt->sub0, qt->sub0.tail);
2053 			fbnic_clean_bdq(nv, 0, &qt->sub1, qt->sub1.tail);
2054 
2055 			/* Reset completion queue descriptor ring */
2056 			memset(qt->cmpl.desc, 0, qt->cmpl.size);
2057 
2058 			fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0);
2059 			qt->cmpl.pkt->buff.data_hard_start = NULL;
2060 		}
2061 	}
2062 }
2063 
fbnic_fill(struct fbnic_net * fbn)2064 void fbnic_fill(struct fbnic_net *fbn)
2065 {
2066 	int i;
2067 
2068 	for (i = 0; i < fbn->num_napi; i++) {
2069 		struct fbnic_napi_vector *nv = fbn->napi[i];
2070 		int j, t;
2071 
2072 		/* Configure NAPI mapping and populate pages
2073 		 * in the BDQ rings to use for Rx
2074 		 */
2075 		for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) {
2076 			struct fbnic_q_triad *qt = &nv->qt[t];
2077 
2078 			/* Populate the header and payload BDQs */
2079 			fbnic_fill_bdq(nv, &qt->sub0);
2080 			fbnic_fill_bdq(nv, &qt->sub1);
2081 		}
2082 	}
2083 }
2084 
fbnic_enable_twq0(struct fbnic_ring * twq)2085 static void fbnic_enable_twq0(struct fbnic_ring *twq)
2086 {
2087 	u32 log_size = fls(twq->size_mask);
2088 
2089 	if (!twq->size_mask)
2090 		return;
2091 
2092 	/* Reset head/tail */
2093 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_RESET);
2094 	twq->tail = 0;
2095 	twq->head = 0;
2096 
2097 	/* Store descriptor ring address and size */
2098 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAL, lower_32_bits(twq->dma));
2099 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAH, upper_32_bits(twq->dma));
2100 
2101 	/* Write lower 4 bits of log size as 64K ring size is 0 */
2102 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_SIZE, log_size & 0xf);
2103 
2104 	fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE);
2105 }
2106 
fbnic_enable_tcq(struct fbnic_napi_vector * nv,struct fbnic_ring * tcq)2107 static void fbnic_enable_tcq(struct fbnic_napi_vector *nv,
2108 			     struct fbnic_ring *tcq)
2109 {
2110 	u32 log_size = fls(tcq->size_mask);
2111 
2112 	if (!tcq->size_mask)
2113 		return;
2114 
2115 	/* Reset head/tail */
2116 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_RESET);
2117 	tcq->tail = 0;
2118 	tcq->head = 0;
2119 
2120 	/* Store descriptor ring address and size */
2121 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAL, lower_32_bits(tcq->dma));
2122 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAH, upper_32_bits(tcq->dma));
2123 
2124 	/* Write lower 4 bits of log size as 64K ring size is 0 */
2125 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_SIZE, log_size & 0xf);
2126 
2127 	/* Store interrupt information for the completion queue */
2128 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_CTL, nv->v_idx);
2129 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_THRESHOLD, tcq->size_mask / 2);
2130 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_MASK, 0);
2131 
2132 	/* Enable queue */
2133 	fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_ENABLE);
2134 }
2135 
fbnic_enable_bdq(struct fbnic_ring * hpq,struct fbnic_ring * ppq)2136 static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq)
2137 {
2138 	u32 bdq_ctl = FBNIC_QUEUE_BDQ_CTL_ENABLE;
2139 	u32 log_size;
2140 
2141 	/* Reset head/tail */
2142 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, FBNIC_QUEUE_BDQ_CTL_RESET);
2143 	ppq->tail = 0;
2144 	ppq->head = 0;
2145 	hpq->tail = 0;
2146 	hpq->head = 0;
2147 
2148 	log_size = fls(hpq->size_mask);
2149 
2150 	/* Store descriptor ring address and size */
2151 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma));
2152 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAH, upper_32_bits(hpq->dma));
2153 
2154 	/* Write lower 4 bits of log size as 64K ring size is 0 */
2155 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_SIZE, log_size & 0xf);
2156 
2157 	if (!ppq->size_mask)
2158 		goto write_ctl;
2159 
2160 	log_size = fls(ppq->size_mask);
2161 
2162 	/* Add enabling of PPQ to BDQ control */
2163 	bdq_ctl |= FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE;
2164 
2165 	/* Store descriptor ring address and size */
2166 	fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAL, lower_32_bits(ppq->dma));
2167 	fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAH, upper_32_bits(ppq->dma));
2168 	fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_SIZE, log_size & 0xf);
2169 
2170 write_ctl:
2171 	fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl);
2172 }
2173 
fbnic_config_drop_mode_rcq(struct fbnic_napi_vector * nv,struct fbnic_ring * rcq)2174 static void fbnic_config_drop_mode_rcq(struct fbnic_napi_vector *nv,
2175 				       struct fbnic_ring *rcq)
2176 {
2177 	u32 drop_mode, rcq_ctl;
2178 
2179 	drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_IMMEDIATE;
2180 
2181 	/* Specify packet layout */
2182 	rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_DROP_MODE_MASK, drop_mode) |
2183 	    FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_HROOM_MASK, FBNIC_RX_HROOM) |
2184 	    FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_TROOM_MASK, FBNIC_RX_TROOM);
2185 
2186 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL0, rcq_ctl);
2187 }
2188 
fbnic_config_rim_threshold(struct fbnic_ring * rcq,u16 nv_idx,u32 rx_desc)2189 static void fbnic_config_rim_threshold(struct fbnic_ring *rcq, u16 nv_idx, u32 rx_desc)
2190 {
2191 	u32 threshold;
2192 
2193 	/* Set the threhsold to half the ring size if rx_frames
2194 	 * is not configured
2195 	 */
2196 	threshold = rx_desc ? : rcq->size_mask / 2;
2197 
2198 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_CTL, nv_idx);
2199 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_THRESHOLD, threshold);
2200 }
2201 
fbnic_config_txrx_usecs(struct fbnic_napi_vector * nv,u32 arm)2202 void fbnic_config_txrx_usecs(struct fbnic_napi_vector *nv, u32 arm)
2203 {
2204 	struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
2205 	struct fbnic_dev *fbd = nv->fbd;
2206 	u32 val = arm;
2207 
2208 	val |= FIELD_PREP(FBNIC_INTR_CQ_REARM_RCQ_TIMEOUT, fbn->rx_usecs) |
2209 	       FBNIC_INTR_CQ_REARM_RCQ_TIMEOUT_UPD_EN;
2210 	val |= FIELD_PREP(FBNIC_INTR_CQ_REARM_TCQ_TIMEOUT, fbn->tx_usecs) |
2211 	       FBNIC_INTR_CQ_REARM_TCQ_TIMEOUT_UPD_EN;
2212 
2213 	fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(nv->v_idx), val);
2214 }
2215 
fbnic_config_rx_frames(struct fbnic_napi_vector * nv)2216 void fbnic_config_rx_frames(struct fbnic_napi_vector *nv)
2217 {
2218 	struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
2219 	int i;
2220 
2221 	for (i = nv->txt_count; i < nv->rxt_count + nv->txt_count; i++) {
2222 		struct fbnic_q_triad *qt = &nv->qt[i];
2223 
2224 		fbnic_config_rim_threshold(&qt->cmpl, nv->v_idx,
2225 					   fbn->rx_max_frames *
2226 					   FBNIC_MIN_RXD_PER_FRAME);
2227 	}
2228 }
2229 
fbnic_enable_rcq(struct fbnic_napi_vector * nv,struct fbnic_ring * rcq)2230 static void fbnic_enable_rcq(struct fbnic_napi_vector *nv,
2231 			     struct fbnic_ring *rcq)
2232 {
2233 	struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
2234 	u32 log_size = fls(rcq->size_mask);
2235 	u32 rcq_ctl;
2236 
2237 	fbnic_config_drop_mode_rcq(nv, rcq);
2238 
2239 	rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) |
2240 		   FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK,
2241 			      FBNIC_RX_MAX_HDR) |
2242 		   FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK,
2243 			      FBNIC_RX_PAYLD_OFFSET) |
2244 		   FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_PG_CL_MASK,
2245 			      FBNIC_RX_PAYLD_PG_CL);
2246 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL1, rcq_ctl);
2247 
2248 	/* Reset head/tail */
2249 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_RESET);
2250 	rcq->head = 0;
2251 	rcq->tail = 0;
2252 
2253 	/* Store descriptor ring address and size */
2254 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAL, lower_32_bits(rcq->dma));
2255 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAH, upper_32_bits(rcq->dma));
2256 
2257 	/* Write lower 4 bits of log size as 64K ring size is 0 */
2258 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_SIZE, log_size & 0xf);
2259 
2260 	/* Store interrupt information for the completion queue */
2261 	fbnic_config_rim_threshold(rcq, nv->v_idx, fbn->rx_max_frames *
2262 						   FBNIC_MIN_RXD_PER_FRAME);
2263 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_MASK, 0);
2264 
2265 	/* Enable queue */
2266 	fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_ENABLE);
2267 }
2268 
fbnic_enable(struct fbnic_net * fbn)2269 void fbnic_enable(struct fbnic_net *fbn)
2270 {
2271 	struct fbnic_dev *fbd = fbn->fbd;
2272 	int i;
2273 
2274 	for (i = 0; i < fbn->num_napi; i++) {
2275 		struct fbnic_napi_vector *nv = fbn->napi[i];
2276 		int j, t;
2277 
2278 		/* Setup Tx Queue Triads */
2279 		for (t = 0; t < nv->txt_count; t++) {
2280 			struct fbnic_q_triad *qt = &nv->qt[t];
2281 
2282 			fbnic_enable_twq0(&qt->sub0);
2283 			fbnic_enable_tcq(nv, &qt->cmpl);
2284 		}
2285 
2286 		/* Setup Rx Queue Triads */
2287 		for (j = 0; j < nv->rxt_count; j++, t++) {
2288 			struct fbnic_q_triad *qt = &nv->qt[t];
2289 
2290 			fbnic_enable_bdq(&qt->sub0, &qt->sub1);
2291 			fbnic_config_drop_mode_rcq(nv, &qt->cmpl);
2292 			fbnic_enable_rcq(nv, &qt->cmpl);
2293 		}
2294 	}
2295 
2296 	fbnic_wrfl(fbd);
2297 }
2298 
fbnic_nv_irq_enable(struct fbnic_napi_vector * nv)2299 static void fbnic_nv_irq_enable(struct fbnic_napi_vector *nv)
2300 {
2301 	fbnic_config_txrx_usecs(nv, FBNIC_INTR_CQ_REARM_INTR_UNMASK);
2302 }
2303 
fbnic_napi_enable(struct fbnic_net * fbn)2304 void fbnic_napi_enable(struct fbnic_net *fbn)
2305 {
2306 	u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {};
2307 	struct fbnic_dev *fbd = fbn->fbd;
2308 	int i;
2309 
2310 	for (i = 0; i < fbn->num_napi; i++) {
2311 		struct fbnic_napi_vector *nv = fbn->napi[i];
2312 
2313 		napi_enable(&nv->napi);
2314 
2315 		fbnic_nv_irq_enable(nv);
2316 
2317 		/* Record bit used for NAPI IRQs so we can
2318 		 * set the mask appropriately
2319 		 */
2320 		irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32);
2321 	}
2322 
2323 	/* Force the first interrupt on the device to guarantee
2324 	 * that any packets that may have been enqueued during the
2325 	 * bringup are processed.
2326 	 */
2327 	for (i = 0; i < ARRAY_SIZE(irqs); i++) {
2328 		if (!irqs[i])
2329 			continue;
2330 		fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]);
2331 	}
2332 
2333 	fbnic_wrfl(fbd);
2334 }
2335 
fbnic_napi_depletion_check(struct net_device * netdev)2336 void fbnic_napi_depletion_check(struct net_device *netdev)
2337 {
2338 	struct fbnic_net *fbn = netdev_priv(netdev);
2339 	u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {};
2340 	struct fbnic_dev *fbd = fbn->fbd;
2341 	int i, j, t;
2342 
2343 	for (i = 0; i < fbn->num_napi; i++) {
2344 		struct fbnic_napi_vector *nv = fbn->napi[i];
2345 
2346 		/* Find RQs which are completely out of pages */
2347 		for (t = nv->txt_count, j = 0; j < nv->rxt_count; j++, t++) {
2348 			/* Assume 4 pages is always enough to fit a packet
2349 			 * and therefore generate a completion and an IRQ.
2350 			 */
2351 			if (fbnic_desc_used(&nv->qt[t].sub0) < 4 ||
2352 			    fbnic_desc_used(&nv->qt[t].sub1) < 4)
2353 				irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32);
2354 		}
2355 	}
2356 
2357 	for (i = 0; i < ARRAY_SIZE(irqs); i++) {
2358 		if (!irqs[i])
2359 			continue;
2360 		fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(i), irqs[i]);
2361 		fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]);
2362 	}
2363 
2364 	fbnic_wrfl(fbd);
2365 }
2366