1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */
3
4 #include <linux/bitfield.h>
5 #include <linux/iopoll.h>
6 #include <linux/pci.h>
7 #include <net/netdev_queues.h>
8 #include <net/page_pool/helpers.h>
9
10 #include "fbnic.h"
11 #include "fbnic_csr.h"
12 #include "fbnic_netdev.h"
13 #include "fbnic_txrx.h"
14
15 struct fbnic_xmit_cb {
16 u32 bytecount;
17 u8 desc_count;
18 int hw_head;
19 };
20
21 #define FBNIC_XMIT_CB(__skb) ((struct fbnic_xmit_cb *)((__skb)->cb))
22
fbnic_ring_csr_base(const struct fbnic_ring * ring)23 static u32 __iomem *fbnic_ring_csr_base(const struct fbnic_ring *ring)
24 {
25 unsigned long csr_base = (unsigned long)ring->doorbell;
26
27 csr_base &= ~(FBNIC_QUEUE_STRIDE * sizeof(u32) - 1);
28
29 return (u32 __iomem *)csr_base;
30 }
31
fbnic_ring_rd32(struct fbnic_ring * ring,unsigned int csr)32 static u32 fbnic_ring_rd32(struct fbnic_ring *ring, unsigned int csr)
33 {
34 u32 __iomem *csr_base = fbnic_ring_csr_base(ring);
35
36 return readl(csr_base + csr);
37 }
38
fbnic_ring_wr32(struct fbnic_ring * ring,unsigned int csr,u32 val)39 static void fbnic_ring_wr32(struct fbnic_ring *ring, unsigned int csr, u32 val)
40 {
41 u32 __iomem *csr_base = fbnic_ring_csr_base(ring);
42
43 writel(val, csr_base + csr);
44 }
45
fbnic_desc_unused(struct fbnic_ring * ring)46 static unsigned int fbnic_desc_unused(struct fbnic_ring *ring)
47 {
48 return (ring->head - ring->tail - 1) & ring->size_mask;
49 }
50
fbnic_desc_used(struct fbnic_ring * ring)51 static unsigned int fbnic_desc_used(struct fbnic_ring *ring)
52 {
53 return (ring->tail - ring->head) & ring->size_mask;
54 }
55
txring_txq(const struct net_device * dev,const struct fbnic_ring * ring)56 static struct netdev_queue *txring_txq(const struct net_device *dev,
57 const struct fbnic_ring *ring)
58 {
59 return netdev_get_tx_queue(dev, ring->q_idx);
60 }
61
fbnic_maybe_stop_tx(const struct net_device * dev,struct fbnic_ring * ring,const unsigned int size)62 static int fbnic_maybe_stop_tx(const struct net_device *dev,
63 struct fbnic_ring *ring,
64 const unsigned int size)
65 {
66 struct netdev_queue *txq = txring_txq(dev, ring);
67 int res;
68
69 res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size,
70 FBNIC_TX_DESC_WAKEUP);
71
72 return !res;
73 }
74
fbnic_tx_sent_queue(struct sk_buff * skb,struct fbnic_ring * ring)75 static bool fbnic_tx_sent_queue(struct sk_buff *skb, struct fbnic_ring *ring)
76 {
77 struct netdev_queue *dev_queue = txring_txq(skb->dev, ring);
78 unsigned int bytecount = FBNIC_XMIT_CB(skb)->bytecount;
79 bool xmit_more = netdev_xmit_more();
80
81 /* TBD: Request completion more often if xmit_more becomes large */
82
83 return __netdev_tx_sent_queue(dev_queue, bytecount, xmit_more);
84 }
85
fbnic_unmap_single_twd(struct device * dev,__le64 * twd)86 static void fbnic_unmap_single_twd(struct device *dev, __le64 *twd)
87 {
88 u64 raw_twd = le64_to_cpu(*twd);
89 unsigned int len;
90 dma_addr_t dma;
91
92 dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd);
93 len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd);
94
95 dma_unmap_single(dev, dma, len, DMA_TO_DEVICE);
96 }
97
fbnic_unmap_page_twd(struct device * dev,__le64 * twd)98 static void fbnic_unmap_page_twd(struct device *dev, __le64 *twd)
99 {
100 u64 raw_twd = le64_to_cpu(*twd);
101 unsigned int len;
102 dma_addr_t dma;
103
104 dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd);
105 len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd);
106
107 dma_unmap_page(dev, dma, len, DMA_TO_DEVICE);
108 }
109
110 #define FBNIC_TWD_TYPE(_type) \
111 cpu_to_le64(FIELD_PREP(FBNIC_TWD_TYPE_MASK, FBNIC_TWD_TYPE_##_type))
112
113 static bool
fbnic_tx_offloads(struct fbnic_ring * ring,struct sk_buff * skb,__le64 * meta)114 fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
115 {
116 unsigned int l2len, i3len;
117
118 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL))
119 return false;
120
121 l2len = skb_mac_header_len(skb);
122 i3len = skb_checksum_start(skb) - skb_network_header(skb);
123
124 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_CSUM_OFFSET_MASK,
125 skb->csum_offset / 2));
126
127 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO);
128
129 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) |
130 FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2));
131 return false;
132 }
133
134 static void
fbnic_rx_csum(u64 rcd,struct sk_buff * skb,struct fbnic_ring * rcq)135 fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq)
136 {
137 skb_checksum_none_assert(skb);
138
139 if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM)))
140 return;
141
142 if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) {
143 skb->ip_summed = CHECKSUM_UNNECESSARY;
144 } else {
145 u16 csum = FIELD_GET(FBNIC_RCD_META_L2_CSUM_MASK, rcd);
146
147 skb->ip_summed = CHECKSUM_COMPLETE;
148 skb->csum = (__force __wsum)csum;
149 }
150 }
151
152 static bool
fbnic_tx_map(struct fbnic_ring * ring,struct sk_buff * skb,__le64 * meta)153 fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
154 {
155 struct device *dev = skb->dev->dev.parent;
156 unsigned int tail = ring->tail, first;
157 unsigned int size, data_len;
158 skb_frag_t *frag;
159 dma_addr_t dma;
160 __le64 *twd;
161
162 ring->tx_buf[tail] = skb;
163
164 tail++;
165 tail &= ring->size_mask;
166 first = tail;
167
168 size = skb_headlen(skb);
169 data_len = skb->data_len;
170
171 if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK))
172 goto dma_error;
173
174 dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
175
176 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
177 twd = &ring->desc[tail];
178
179 if (dma_mapping_error(dev, dma))
180 goto dma_error;
181
182 *twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) |
183 FIELD_PREP(FBNIC_TWD_LEN_MASK, size) |
184 FIELD_PREP(FBNIC_TWD_TYPE_MASK,
185 FBNIC_TWD_TYPE_AL));
186
187 tail++;
188 tail &= ring->size_mask;
189
190 if (!data_len)
191 break;
192
193 size = skb_frag_size(frag);
194 data_len -= size;
195
196 if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK))
197 goto dma_error;
198
199 dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
200 }
201
202 *twd |= FBNIC_TWD_TYPE(LAST_AL);
203
204 FBNIC_XMIT_CB(skb)->desc_count = ((twd - meta) + 1) & ring->size_mask;
205
206 ring->tail = tail;
207
208 /* Verify there is room for another packet */
209 fbnic_maybe_stop_tx(skb->dev, ring, FBNIC_MAX_SKB_DESC);
210
211 if (fbnic_tx_sent_queue(skb, ring)) {
212 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_COMPLETION);
213
214 /* Force DMA writes to flush before writing to tail */
215 dma_wmb();
216
217 writel(tail, ring->doorbell);
218 }
219
220 return false;
221 dma_error:
222 if (net_ratelimit())
223 netdev_err(skb->dev, "TX DMA map failed\n");
224
225 while (tail != first) {
226 tail--;
227 tail &= ring->size_mask;
228 twd = &ring->desc[tail];
229 if (tail == first)
230 fbnic_unmap_single_twd(dev, twd);
231 else
232 fbnic_unmap_page_twd(dev, twd);
233 }
234
235 return true;
236 }
237
238 #define FBNIC_MIN_FRAME_LEN 60
239
240 static netdev_tx_t
fbnic_xmit_frame_ring(struct sk_buff * skb,struct fbnic_ring * ring)241 fbnic_xmit_frame_ring(struct sk_buff *skb, struct fbnic_ring *ring)
242 {
243 __le64 *meta = &ring->desc[ring->tail];
244 u16 desc_needed;
245
246 if (skb_put_padto(skb, FBNIC_MIN_FRAME_LEN))
247 goto err_count;
248
249 /* Need: 1 descriptor per page,
250 * + 1 desc for skb_head,
251 * + 2 desc for metadata and timestamp metadata
252 * + 7 desc gap to keep tail from touching head
253 * otherwise try next time
254 */
255 desc_needed = skb_shinfo(skb)->nr_frags + 10;
256 if (fbnic_maybe_stop_tx(skb->dev, ring, desc_needed))
257 return NETDEV_TX_BUSY;
258
259 *meta = cpu_to_le64(FBNIC_TWD_FLAG_DEST_MAC);
260
261 /* Write all members within DWORD to condense this into 2 4B writes */
262 FBNIC_XMIT_CB(skb)->bytecount = skb->len;
263 FBNIC_XMIT_CB(skb)->desc_count = 0;
264
265 if (fbnic_tx_offloads(ring, skb, meta))
266 goto err_free;
267
268 if (fbnic_tx_map(ring, skb, meta))
269 goto err_free;
270
271 return NETDEV_TX_OK;
272
273 err_free:
274 dev_kfree_skb_any(skb);
275 err_count:
276 u64_stats_update_begin(&ring->stats.syncp);
277 ring->stats.dropped++;
278 u64_stats_update_end(&ring->stats.syncp);
279 return NETDEV_TX_OK;
280 }
281
fbnic_xmit_frame(struct sk_buff * skb,struct net_device * dev)282 netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev)
283 {
284 struct fbnic_net *fbn = netdev_priv(dev);
285 unsigned int q_map = skb->queue_mapping;
286
287 return fbnic_xmit_frame_ring(skb, fbn->tx[q_map]);
288 }
289
290 netdev_features_t
fbnic_features_check(struct sk_buff * skb,struct net_device * dev,netdev_features_t features)291 fbnic_features_check(struct sk_buff *skb, struct net_device *dev,
292 netdev_features_t features)
293 {
294 unsigned int l2len, l3len;
295
296 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL))
297 return features;
298
299 l2len = skb_mac_header_len(skb);
300 l3len = skb_checksum_start(skb) - skb_network_header(skb);
301
302 /* Check header lengths are multiple of 2.
303 * In case of 6in6 we support longer headers (IHLEN + OHLEN)
304 * but keep things simple for now, 512B is plenty.
305 */
306 if ((l2len | l3len | skb->csum_offset) % 2 ||
307 !FIELD_FIT(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) ||
308 !FIELD_FIT(FBNIC_TWD_L3_IHLEN_MASK, l3len / 2) ||
309 !FIELD_FIT(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2))
310 return features & ~NETIF_F_CSUM_MASK;
311
312 return features;
313 }
314
fbnic_clean_twq0(struct fbnic_napi_vector * nv,int napi_budget,struct fbnic_ring * ring,bool discard,unsigned int hw_head)315 static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
316 struct fbnic_ring *ring, bool discard,
317 unsigned int hw_head)
318 {
319 u64 total_bytes = 0, total_packets = 0;
320 unsigned int head = ring->head;
321 struct netdev_queue *txq;
322 unsigned int clean_desc;
323
324 clean_desc = (hw_head - head) & ring->size_mask;
325
326 while (clean_desc) {
327 struct sk_buff *skb = ring->tx_buf[head];
328 unsigned int desc_cnt;
329
330 desc_cnt = FBNIC_XMIT_CB(skb)->desc_count;
331 if (desc_cnt > clean_desc)
332 break;
333
334 ring->tx_buf[head] = NULL;
335
336 clean_desc -= desc_cnt;
337
338 while (!(ring->desc[head] & FBNIC_TWD_TYPE(AL))) {
339 head++;
340 head &= ring->size_mask;
341 desc_cnt--;
342 }
343
344 fbnic_unmap_single_twd(nv->dev, &ring->desc[head]);
345 head++;
346 head &= ring->size_mask;
347 desc_cnt--;
348
349 while (desc_cnt--) {
350 fbnic_unmap_page_twd(nv->dev, &ring->desc[head]);
351 head++;
352 head &= ring->size_mask;
353 }
354
355 total_bytes += FBNIC_XMIT_CB(skb)->bytecount;
356 total_packets += 1;
357
358 napi_consume_skb(skb, napi_budget);
359 }
360
361 if (!total_bytes)
362 return;
363
364 ring->head = head;
365
366 txq = txring_txq(nv->napi.dev, ring);
367
368 if (unlikely(discard)) {
369 u64_stats_update_begin(&ring->stats.syncp);
370 ring->stats.dropped += total_packets;
371 u64_stats_update_end(&ring->stats.syncp);
372
373 netdev_tx_completed_queue(txq, total_packets, total_bytes);
374 return;
375 }
376
377 u64_stats_update_begin(&ring->stats.syncp);
378 ring->stats.bytes += total_bytes;
379 ring->stats.packets += total_packets;
380 u64_stats_update_end(&ring->stats.syncp);
381
382 netif_txq_completed_wake(txq, total_packets, total_bytes,
383 fbnic_desc_unused(ring),
384 FBNIC_TX_DESC_WAKEUP);
385 }
386
fbnic_page_pool_init(struct fbnic_ring * ring,unsigned int idx,struct page * page)387 static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx,
388 struct page *page)
389 {
390 struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
391
392 page_pool_fragment_page(page, PAGECNT_BIAS_MAX);
393 rx_buf->pagecnt_bias = PAGECNT_BIAS_MAX;
394 rx_buf->page = page;
395 }
396
fbnic_page_pool_get(struct fbnic_ring * ring,unsigned int idx)397 static struct page *fbnic_page_pool_get(struct fbnic_ring *ring,
398 unsigned int idx)
399 {
400 struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
401
402 rx_buf->pagecnt_bias--;
403
404 return rx_buf->page;
405 }
406
fbnic_page_pool_drain(struct fbnic_ring * ring,unsigned int idx,struct fbnic_napi_vector * nv,int budget)407 static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx,
408 struct fbnic_napi_vector *nv, int budget)
409 {
410 struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx];
411 struct page *page = rx_buf->page;
412
413 if (!page_pool_unref_page(page, rx_buf->pagecnt_bias))
414 page_pool_put_unrefed_page(nv->page_pool, page, -1, !!budget);
415
416 rx_buf->page = NULL;
417 }
418
fbnic_clean_twq(struct fbnic_napi_vector * nv,int napi_budget,struct fbnic_q_triad * qt,s32 head0)419 static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget,
420 struct fbnic_q_triad *qt, s32 head0)
421 {
422 if (head0 >= 0)
423 fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0);
424 }
425
426 static void
fbnic_clean_tcq(struct fbnic_napi_vector * nv,struct fbnic_q_triad * qt,int napi_budget)427 fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt,
428 int napi_budget)
429 {
430 struct fbnic_ring *cmpl = &qt->cmpl;
431 __le64 *raw_tcd, done;
432 u32 head = cmpl->head;
433 s32 head0 = -1;
434
435 done = (head & (cmpl->size_mask + 1)) ? 0 : cpu_to_le64(FBNIC_TCD_DONE);
436 raw_tcd = &cmpl->desc[head & cmpl->size_mask];
437
438 /* Walk the completion queue collecting the heads reported by NIC */
439 while ((*raw_tcd & cpu_to_le64(FBNIC_TCD_DONE)) == done) {
440 u64 tcd;
441
442 dma_rmb();
443
444 tcd = le64_to_cpu(*raw_tcd);
445
446 switch (FIELD_GET(FBNIC_TCD_TYPE_MASK, tcd)) {
447 case FBNIC_TCD_TYPE_0:
448 if (!(tcd & FBNIC_TCD_TWQ1))
449 head0 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD0_MASK,
450 tcd);
451 /* Currently all err status bits are related to
452 * timestamps and as those have yet to be added
453 * they are skipped for now.
454 */
455 break;
456 default:
457 break;
458 }
459
460 raw_tcd++;
461 head++;
462 if (!(head & cmpl->size_mask)) {
463 done ^= cpu_to_le64(FBNIC_TCD_DONE);
464 raw_tcd = &cmpl->desc[0];
465 }
466 }
467
468 /* Record the current head/tail of the queue */
469 if (cmpl->head != head) {
470 cmpl->head = head;
471 writel(head & cmpl->size_mask, cmpl->doorbell);
472 }
473
474 /* Unmap and free processed buffers */
475 fbnic_clean_twq(nv, napi_budget, qt, head0);
476 }
477
fbnic_clean_bdq(struct fbnic_napi_vector * nv,int napi_budget,struct fbnic_ring * ring,unsigned int hw_head)478 static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget,
479 struct fbnic_ring *ring, unsigned int hw_head)
480 {
481 unsigned int head = ring->head;
482
483 if (head == hw_head)
484 return;
485
486 do {
487 fbnic_page_pool_drain(ring, head, nv, napi_budget);
488
489 head++;
490 head &= ring->size_mask;
491 } while (head != hw_head);
492
493 ring->head = head;
494 }
495
fbnic_bd_prep(struct fbnic_ring * bdq,u16 id,struct page * page)496 static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, struct page *page)
497 {
498 __le64 *bdq_desc = &bdq->desc[id * FBNIC_BD_FRAG_COUNT];
499 dma_addr_t dma = page_pool_get_dma_addr(page);
500 u64 bd, i = FBNIC_BD_FRAG_COUNT;
501
502 bd = (FBNIC_BD_PAGE_ADDR_MASK & dma) |
503 FIELD_PREP(FBNIC_BD_PAGE_ID_MASK, id);
504
505 /* In the case that a page size is larger than 4K we will map a
506 * single page to multiple fragments. The fragments will be
507 * FBNIC_BD_FRAG_COUNT in size and the lower n bits will be use
508 * to indicate the individual fragment IDs.
509 */
510 do {
511 *bdq_desc = cpu_to_le64(bd);
512 bd += FIELD_PREP(FBNIC_BD_DESC_ADDR_MASK, 1) |
513 FIELD_PREP(FBNIC_BD_DESC_ID_MASK, 1);
514 } while (--i);
515 }
516
fbnic_fill_bdq(struct fbnic_napi_vector * nv,struct fbnic_ring * bdq)517 static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq)
518 {
519 unsigned int count = fbnic_desc_unused(bdq);
520 unsigned int i = bdq->tail;
521
522 if (!count)
523 return;
524
525 do {
526 struct page *page;
527
528 page = page_pool_dev_alloc_pages(nv->page_pool);
529 if (!page)
530 break;
531
532 fbnic_page_pool_init(bdq, i, page);
533 fbnic_bd_prep(bdq, i, page);
534
535 i++;
536 i &= bdq->size_mask;
537
538 count--;
539 } while (count);
540
541 if (bdq->tail != i) {
542 bdq->tail = i;
543
544 /* Force DMA writes to flush before writing to tail */
545 dma_wmb();
546
547 writel(i, bdq->doorbell);
548 }
549 }
550
fbnic_hdr_pg_start(unsigned int pg_off)551 static unsigned int fbnic_hdr_pg_start(unsigned int pg_off)
552 {
553 /* The headroom of the first header may be larger than FBNIC_RX_HROOM
554 * due to alignment. So account for that by just making the page
555 * offset 0 if we are starting at the first header.
556 */
557 if (ALIGN(FBNIC_RX_HROOM, 128) > FBNIC_RX_HROOM &&
558 pg_off == ALIGN(FBNIC_RX_HROOM, 128))
559 return 0;
560
561 return pg_off - FBNIC_RX_HROOM;
562 }
563
fbnic_hdr_pg_end(unsigned int pg_off,unsigned int len)564 static unsigned int fbnic_hdr_pg_end(unsigned int pg_off, unsigned int len)
565 {
566 /* Determine the end of the buffer by finding the start of the next
567 * and then subtracting the headroom from that frame.
568 */
569 pg_off += len + FBNIC_RX_TROOM + FBNIC_RX_HROOM;
570
571 return ALIGN(pg_off, 128) - FBNIC_RX_HROOM;
572 }
573
fbnic_pkt_prepare(struct fbnic_napi_vector * nv,u64 rcd,struct fbnic_pkt_buff * pkt,struct fbnic_q_triad * qt)574 static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd,
575 struct fbnic_pkt_buff *pkt,
576 struct fbnic_q_triad *qt)
577 {
578 unsigned int hdr_pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
579 unsigned int hdr_pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd);
580 struct page *page = fbnic_page_pool_get(&qt->sub0, hdr_pg_idx);
581 unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd);
582 unsigned int frame_sz, hdr_pg_start, hdr_pg_end, headroom;
583 unsigned char *hdr_start;
584
585 /* data_hard_start should always be NULL when this is called */
586 WARN_ON_ONCE(pkt->buff.data_hard_start);
587
588 /* Short-cut the end calculation if we know page is fully consumed */
589 hdr_pg_end = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ?
590 FBNIC_BD_FRAG_SIZE : fbnic_hdr_pg_end(hdr_pg_off, len);
591 hdr_pg_start = fbnic_hdr_pg_start(hdr_pg_off);
592
593 headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD;
594 frame_sz = hdr_pg_end - hdr_pg_start;
595 xdp_init_buff(&pkt->buff, frame_sz, NULL);
596 hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
597 FBNIC_BD_FRAG_SIZE;
598
599 /* Sync DMA buffer */
600 dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page),
601 hdr_pg_start, frame_sz,
602 DMA_BIDIRECTIONAL);
603
604 /* Build frame around buffer */
605 hdr_start = page_address(page) + hdr_pg_start;
606
607 xdp_prepare_buff(&pkt->buff, hdr_start, headroom,
608 len - FBNIC_RX_PAD, true);
609
610 pkt->data_truesize = 0;
611 pkt->data_len = 0;
612 pkt->nr_frags = 0;
613 }
614
fbnic_add_rx_frag(struct fbnic_napi_vector * nv,u64 rcd,struct fbnic_pkt_buff * pkt,struct fbnic_q_triad * qt)615 static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd,
616 struct fbnic_pkt_buff *pkt,
617 struct fbnic_q_triad *qt)
618 {
619 unsigned int pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
620 unsigned int pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd);
621 unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd);
622 struct page *page = fbnic_page_pool_get(&qt->sub1, pg_idx);
623 struct skb_shared_info *shinfo;
624 unsigned int truesize;
625
626 truesize = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ?
627 FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128);
628
629 pg_off += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
630 FBNIC_BD_FRAG_SIZE;
631
632 /* Sync DMA buffer */
633 dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page),
634 pg_off, truesize, DMA_BIDIRECTIONAL);
635
636 /* Add page to xdp shared info */
637 shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
638
639 /* We use gso_segs to store truesize */
640 pkt->data_truesize += truesize;
641
642 __skb_fill_page_desc_noacc(shinfo, pkt->nr_frags++, page, pg_off, len);
643
644 /* Store data_len in gso_size */
645 pkt->data_len += len;
646 }
647
fbnic_put_pkt_buff(struct fbnic_napi_vector * nv,struct fbnic_pkt_buff * pkt,int budget)648 static void fbnic_put_pkt_buff(struct fbnic_napi_vector *nv,
649 struct fbnic_pkt_buff *pkt, int budget)
650 {
651 struct skb_shared_info *shinfo;
652 struct page *page;
653 int nr_frags;
654
655 if (!pkt->buff.data_hard_start)
656 return;
657
658 shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
659 nr_frags = pkt->nr_frags;
660
661 while (nr_frags--) {
662 page = skb_frag_page(&shinfo->frags[nr_frags]);
663 page_pool_put_full_page(nv->page_pool, page, !!budget);
664 }
665
666 page = virt_to_page(pkt->buff.data_hard_start);
667 page_pool_put_full_page(nv->page_pool, page, !!budget);
668 }
669
fbnic_build_skb(struct fbnic_napi_vector * nv,struct fbnic_pkt_buff * pkt)670 static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv,
671 struct fbnic_pkt_buff *pkt)
672 {
673 unsigned int nr_frags = pkt->nr_frags;
674 struct skb_shared_info *shinfo;
675 unsigned int truesize;
676 struct sk_buff *skb;
677
678 truesize = xdp_data_hard_end(&pkt->buff) + FBNIC_RX_TROOM -
679 pkt->buff.data_hard_start;
680
681 /* Build frame around buffer */
682 skb = napi_build_skb(pkt->buff.data_hard_start, truesize);
683 if (unlikely(!skb))
684 return NULL;
685
686 /* Push data pointer to start of data, put tail to end of data */
687 skb_reserve(skb, pkt->buff.data - pkt->buff.data_hard_start);
688 __skb_put(skb, pkt->buff.data_end - pkt->buff.data);
689
690 /* Add tracking for metadata at the start of the frame */
691 skb_metadata_set(skb, pkt->buff.data - pkt->buff.data_meta);
692
693 /* Add Rx frags */
694 if (nr_frags) {
695 /* Verify that shared info didn't move */
696 shinfo = xdp_get_shared_info_from_buff(&pkt->buff);
697 WARN_ON(skb_shinfo(skb) != shinfo);
698
699 skb->truesize += pkt->data_truesize;
700 skb->data_len += pkt->data_len;
701 shinfo->nr_frags = nr_frags;
702 skb->len += pkt->data_len;
703 }
704
705 skb_mark_for_recycle(skb);
706
707 /* Set MAC header specific fields */
708 skb->protocol = eth_type_trans(skb, nv->napi.dev);
709
710 return skb;
711 }
712
fbnic_skb_hash_type(u64 rcd)713 static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd)
714 {
715 return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 :
716 (FBNIC_RCD_META_L3_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L3 :
717 PKT_HASH_TYPE_L2;
718 }
719
fbnic_populate_skb_fields(struct fbnic_napi_vector * nv,u64 rcd,struct sk_buff * skb,struct fbnic_q_triad * qt)720 static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv,
721 u64 rcd, struct sk_buff *skb,
722 struct fbnic_q_triad *qt)
723 {
724 struct net_device *netdev = nv->napi.dev;
725 struct fbnic_ring *rcq = &qt->cmpl;
726
727 fbnic_rx_csum(rcd, skb, rcq);
728
729 if (netdev->features & NETIF_F_RXHASH)
730 skb_set_hash(skb,
731 FIELD_GET(FBNIC_RCD_META_RSS_HASH_MASK, rcd),
732 fbnic_skb_hash_type(rcd));
733
734 skb_record_rx_queue(skb, rcq->q_idx);
735 }
736
fbnic_rcd_metadata_err(u64 rcd)737 static bool fbnic_rcd_metadata_err(u64 rcd)
738 {
739 return !!(FBNIC_RCD_META_UNCORRECTABLE_ERR_MASK & rcd);
740 }
741
fbnic_clean_rcq(struct fbnic_napi_vector * nv,struct fbnic_q_triad * qt,int budget)742 static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
743 struct fbnic_q_triad *qt, int budget)
744 {
745 unsigned int packets = 0, bytes = 0, dropped = 0;
746 struct fbnic_ring *rcq = &qt->cmpl;
747 struct fbnic_pkt_buff *pkt;
748 s32 head0 = -1, head1 = -1;
749 __le64 *raw_rcd, done;
750 u32 head = rcq->head;
751
752 done = (head & (rcq->size_mask + 1)) ? cpu_to_le64(FBNIC_RCD_DONE) : 0;
753 raw_rcd = &rcq->desc[head & rcq->size_mask];
754 pkt = rcq->pkt;
755
756 /* Walk the completion queue collecting the heads reported by NIC */
757 while (likely(packets < budget)) {
758 struct sk_buff *skb = ERR_PTR(-EINVAL);
759 u64 rcd;
760
761 if ((*raw_rcd & cpu_to_le64(FBNIC_RCD_DONE)) == done)
762 break;
763
764 dma_rmb();
765
766 rcd = le64_to_cpu(*raw_rcd);
767
768 switch (FIELD_GET(FBNIC_RCD_TYPE_MASK, rcd)) {
769 case FBNIC_RCD_TYPE_HDR_AL:
770 head0 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
771 fbnic_pkt_prepare(nv, rcd, pkt, qt);
772
773 break;
774 case FBNIC_RCD_TYPE_PAY_AL:
775 head1 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd);
776 fbnic_add_rx_frag(nv, rcd, pkt, qt);
777
778 break;
779 case FBNIC_RCD_TYPE_OPT_META:
780 /* Only type 0 is currently supported */
781 if (FIELD_GET(FBNIC_RCD_OPT_META_TYPE_MASK, rcd))
782 break;
783
784 /* We currently ignore the action table index */
785 break;
786 case FBNIC_RCD_TYPE_META:
787 if (likely(!fbnic_rcd_metadata_err(rcd)))
788 skb = fbnic_build_skb(nv, pkt);
789
790 /* Populate skb and invalidate XDP */
791 if (!IS_ERR_OR_NULL(skb)) {
792 fbnic_populate_skb_fields(nv, rcd, skb, qt);
793
794 packets++;
795 bytes += skb->len;
796
797 napi_gro_receive(&nv->napi, skb);
798 } else {
799 dropped++;
800 fbnic_put_pkt_buff(nv, pkt, 1);
801 }
802
803 pkt->buff.data_hard_start = NULL;
804
805 break;
806 }
807
808 raw_rcd++;
809 head++;
810 if (!(head & rcq->size_mask)) {
811 done ^= cpu_to_le64(FBNIC_RCD_DONE);
812 raw_rcd = &rcq->desc[0];
813 }
814 }
815
816 u64_stats_update_begin(&rcq->stats.syncp);
817 rcq->stats.packets += packets;
818 rcq->stats.bytes += bytes;
819 /* Re-add ethernet header length (removed in fbnic_build_skb) */
820 rcq->stats.bytes += ETH_HLEN * packets;
821 rcq->stats.dropped += dropped;
822 u64_stats_update_end(&rcq->stats.syncp);
823
824 /* Unmap and free processed buffers */
825 if (head0 >= 0)
826 fbnic_clean_bdq(nv, budget, &qt->sub0, head0);
827 fbnic_fill_bdq(nv, &qt->sub0);
828
829 if (head1 >= 0)
830 fbnic_clean_bdq(nv, budget, &qt->sub1, head1);
831 fbnic_fill_bdq(nv, &qt->sub1);
832
833 /* Record the current head/tail of the queue */
834 if (rcq->head != head) {
835 rcq->head = head;
836 writel(head & rcq->size_mask, rcq->doorbell);
837 }
838
839 return packets;
840 }
841
fbnic_nv_irq_disable(struct fbnic_napi_vector * nv)842 static void fbnic_nv_irq_disable(struct fbnic_napi_vector *nv)
843 {
844 struct fbnic_dev *fbd = nv->fbd;
845 u32 v_idx = nv->v_idx;
846
847 fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(v_idx / 32), 1 << (v_idx % 32));
848 }
849
fbnic_nv_irq_rearm(struct fbnic_napi_vector * nv)850 static void fbnic_nv_irq_rearm(struct fbnic_napi_vector *nv)
851 {
852 struct fbnic_dev *fbd = nv->fbd;
853 u32 v_idx = nv->v_idx;
854
855 fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(v_idx),
856 FBNIC_INTR_CQ_REARM_INTR_UNMASK);
857 }
858
fbnic_poll(struct napi_struct * napi,int budget)859 static int fbnic_poll(struct napi_struct *napi, int budget)
860 {
861 struct fbnic_napi_vector *nv = container_of(napi,
862 struct fbnic_napi_vector,
863 napi);
864 int i, j, work_done = 0;
865
866 for (i = 0; i < nv->txt_count; i++)
867 fbnic_clean_tcq(nv, &nv->qt[i], budget);
868
869 for (j = 0; j < nv->rxt_count; j++, i++)
870 work_done += fbnic_clean_rcq(nv, &nv->qt[i], budget);
871
872 if (work_done >= budget)
873 return budget;
874
875 if (likely(napi_complete_done(napi, work_done)))
876 fbnic_nv_irq_rearm(nv);
877
878 return 0;
879 }
880
fbnic_msix_clean_rings(int __always_unused irq,void * data)881 static irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data)
882 {
883 struct fbnic_napi_vector *nv = data;
884
885 napi_schedule_irqoff(&nv->napi);
886
887 return IRQ_HANDLED;
888 }
889
fbnic_aggregate_ring_rx_counters(struct fbnic_net * fbn,struct fbnic_ring * rxr)890 static void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn,
891 struct fbnic_ring *rxr)
892 {
893 struct fbnic_queue_stats *stats = &rxr->stats;
894
895 /* Capture stats from queues before dissasociating them */
896 fbn->rx_stats.bytes += stats->bytes;
897 fbn->rx_stats.packets += stats->packets;
898 fbn->rx_stats.dropped += stats->dropped;
899 }
900
fbnic_aggregate_ring_tx_counters(struct fbnic_net * fbn,struct fbnic_ring * txr)901 static void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn,
902 struct fbnic_ring *txr)
903 {
904 struct fbnic_queue_stats *stats = &txr->stats;
905
906 /* Capture stats from queues before dissasociating them */
907 fbn->tx_stats.bytes += stats->bytes;
908 fbn->tx_stats.packets += stats->packets;
909 fbn->tx_stats.dropped += stats->dropped;
910 }
911
fbnic_remove_tx_ring(struct fbnic_net * fbn,struct fbnic_ring * txr)912 static void fbnic_remove_tx_ring(struct fbnic_net *fbn,
913 struct fbnic_ring *txr)
914 {
915 if (!(txr->flags & FBNIC_RING_F_STATS))
916 return;
917
918 fbnic_aggregate_ring_tx_counters(fbn, txr);
919
920 /* Remove pointer to the Tx ring */
921 WARN_ON(fbn->tx[txr->q_idx] && fbn->tx[txr->q_idx] != txr);
922 fbn->tx[txr->q_idx] = NULL;
923 }
924
fbnic_remove_rx_ring(struct fbnic_net * fbn,struct fbnic_ring * rxr)925 static void fbnic_remove_rx_ring(struct fbnic_net *fbn,
926 struct fbnic_ring *rxr)
927 {
928 if (!(rxr->flags & FBNIC_RING_F_STATS))
929 return;
930
931 fbnic_aggregate_ring_rx_counters(fbn, rxr);
932
933 /* Remove pointer to the Rx ring */
934 WARN_ON(fbn->rx[rxr->q_idx] && fbn->rx[rxr->q_idx] != rxr);
935 fbn->rx[rxr->q_idx] = NULL;
936 }
937
fbnic_free_napi_vector(struct fbnic_net * fbn,struct fbnic_napi_vector * nv)938 static void fbnic_free_napi_vector(struct fbnic_net *fbn,
939 struct fbnic_napi_vector *nv)
940 {
941 struct fbnic_dev *fbd = nv->fbd;
942 u32 v_idx = nv->v_idx;
943 int i, j;
944
945 for (i = 0; i < nv->txt_count; i++) {
946 fbnic_remove_tx_ring(fbn, &nv->qt[i].sub0);
947 fbnic_remove_tx_ring(fbn, &nv->qt[i].cmpl);
948 }
949
950 for (j = 0; j < nv->rxt_count; j++, i++) {
951 fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0);
952 fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1);
953 fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl);
954 }
955
956 fbnic_free_irq(fbd, v_idx, nv);
957 page_pool_destroy(nv->page_pool);
958 netif_napi_del(&nv->napi);
959 list_del(&nv->napis);
960 kfree(nv);
961 }
962
fbnic_free_napi_vectors(struct fbnic_net * fbn)963 void fbnic_free_napi_vectors(struct fbnic_net *fbn)
964 {
965 struct fbnic_napi_vector *nv, *temp;
966
967 list_for_each_entry_safe(nv, temp, &fbn->napis, napis)
968 fbnic_free_napi_vector(fbn, nv);
969 }
970
fbnic_name_napi_vector(struct fbnic_napi_vector * nv)971 static void fbnic_name_napi_vector(struct fbnic_napi_vector *nv)
972 {
973 unsigned char *dev_name = nv->napi.dev->name;
974
975 if (!nv->rxt_count)
976 snprintf(nv->name, sizeof(nv->name), "%s-Tx-%u", dev_name,
977 nv->v_idx - FBNIC_NON_NAPI_VECTORS);
978 else
979 snprintf(nv->name, sizeof(nv->name), "%s-TxRx-%u", dev_name,
980 nv->v_idx - FBNIC_NON_NAPI_VECTORS);
981 }
982
983 #define FBNIC_PAGE_POOL_FLAGS \
984 (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
985
fbnic_alloc_nv_page_pool(struct fbnic_net * fbn,struct fbnic_napi_vector * nv)986 static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn,
987 struct fbnic_napi_vector *nv)
988 {
989 struct page_pool_params pp_params = {
990 .order = 0,
991 .flags = FBNIC_PAGE_POOL_FLAGS,
992 .pool_size = (fbn->hpq_size + fbn->ppq_size) * nv->rxt_count,
993 .nid = NUMA_NO_NODE,
994 .dev = nv->dev,
995 .dma_dir = DMA_BIDIRECTIONAL,
996 .offset = 0,
997 .max_len = PAGE_SIZE
998 };
999 struct page_pool *pp;
1000
1001 /* Page pool cannot exceed a size of 32768. This doesn't limit the
1002 * pages on the ring but the number we can have cached waiting on
1003 * the next use.
1004 *
1005 * TBD: Can this be reduced further? Would a multiple of
1006 * NAPI_POLL_WEIGHT possibly make more sense? The question is how
1007 * may pages do we need to hold in reserve to get the best return
1008 * without hogging too much system memory.
1009 */
1010 if (pp_params.pool_size > 32768)
1011 pp_params.pool_size = 32768;
1012
1013 pp = page_pool_create(&pp_params);
1014 if (IS_ERR(pp))
1015 return PTR_ERR(pp);
1016
1017 nv->page_pool = pp;
1018
1019 return 0;
1020 }
1021
fbnic_ring_init(struct fbnic_ring * ring,u32 __iomem * doorbell,int q_idx,u8 flags)1022 static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell,
1023 int q_idx, u8 flags)
1024 {
1025 u64_stats_init(&ring->stats.syncp);
1026 ring->doorbell = doorbell;
1027 ring->q_idx = q_idx;
1028 ring->flags = flags;
1029 }
1030
fbnic_alloc_napi_vector(struct fbnic_dev * fbd,struct fbnic_net * fbn,unsigned int v_count,unsigned int v_idx,unsigned int txq_count,unsigned int txq_idx,unsigned int rxq_count,unsigned int rxq_idx)1031 static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
1032 unsigned int v_count, unsigned int v_idx,
1033 unsigned int txq_count, unsigned int txq_idx,
1034 unsigned int rxq_count, unsigned int rxq_idx)
1035 {
1036 int txt_count = txq_count, rxt_count = rxq_count;
1037 u32 __iomem *uc_addr = fbd->uc_addr0;
1038 struct fbnic_napi_vector *nv;
1039 struct fbnic_q_triad *qt;
1040 int qt_count, err;
1041 u32 __iomem *db;
1042
1043 qt_count = txt_count + rxq_count;
1044 if (!qt_count)
1045 return -EINVAL;
1046
1047 /* If MMIO has already failed there are no rings to initialize */
1048 if (!uc_addr)
1049 return -EIO;
1050
1051 /* Allocate NAPI vector and queue triads */
1052 nv = kzalloc(struct_size(nv, qt, qt_count), GFP_KERNEL);
1053 if (!nv)
1054 return -ENOMEM;
1055
1056 /* Record queue triad counts */
1057 nv->txt_count = txt_count;
1058 nv->rxt_count = rxt_count;
1059
1060 /* Provide pointer back to fbnic and MSI-X vectors */
1061 nv->fbd = fbd;
1062 nv->v_idx = v_idx;
1063
1064 /* Tie napi to netdev */
1065 list_add(&nv->napis, &fbn->napis);
1066 netif_napi_add(fbn->netdev, &nv->napi, fbnic_poll);
1067
1068 /* Record IRQ to NAPI struct */
1069 netif_napi_set_irq(&nv->napi,
1070 pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx));
1071
1072 /* Tie nv back to PCIe dev */
1073 nv->dev = fbd->dev;
1074
1075 /* Allocate page pool */
1076 if (rxq_count) {
1077 err = fbnic_alloc_nv_page_pool(fbn, nv);
1078 if (err)
1079 goto napi_del;
1080 }
1081
1082 /* Initialize vector name */
1083 fbnic_name_napi_vector(nv);
1084
1085 /* Request the IRQ for napi vector */
1086 err = fbnic_request_irq(fbd, v_idx, &fbnic_msix_clean_rings,
1087 IRQF_SHARED, nv->name, nv);
1088 if (err)
1089 goto pp_destroy;
1090
1091 /* Initialize queue triads */
1092 qt = nv->qt;
1093
1094 while (txt_count) {
1095 /* Configure Tx queue */
1096 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ0_TAIL];
1097
1098 /* Assign Tx queue to netdev if applicable */
1099 if (txq_count > 0) {
1100 u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS;
1101
1102 fbnic_ring_init(&qt->sub0, db, txq_idx, flags);
1103 fbn->tx[txq_idx] = &qt->sub0;
1104 txq_count--;
1105 } else {
1106 fbnic_ring_init(&qt->sub0, db, 0,
1107 FBNIC_RING_F_DISABLED);
1108 }
1109
1110 /* Configure Tx completion queue */
1111 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TCQ_HEAD];
1112 fbnic_ring_init(&qt->cmpl, db, 0, 0);
1113
1114 /* Update Tx queue index */
1115 txt_count--;
1116 txq_idx += v_count;
1117
1118 /* Move to next queue triad */
1119 qt++;
1120 }
1121
1122 while (rxt_count) {
1123 /* Configure header queue */
1124 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_HPQ_TAIL];
1125 fbnic_ring_init(&qt->sub0, db, 0, FBNIC_RING_F_CTX);
1126
1127 /* Configure payload queue */
1128 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_PPQ_TAIL];
1129 fbnic_ring_init(&qt->sub1, db, 0, FBNIC_RING_F_CTX);
1130
1131 /* Configure Rx completion queue */
1132 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_RCQ_HEAD];
1133 fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS);
1134 fbn->rx[rxq_idx] = &qt->cmpl;
1135
1136 /* Update Rx queue index */
1137 rxt_count--;
1138 rxq_idx += v_count;
1139
1140 /* Move to next queue triad */
1141 qt++;
1142 }
1143
1144 return 0;
1145
1146 pp_destroy:
1147 page_pool_destroy(nv->page_pool);
1148 napi_del:
1149 netif_napi_del(&nv->napi);
1150 list_del(&nv->napis);
1151 kfree(nv);
1152 return err;
1153 }
1154
fbnic_alloc_napi_vectors(struct fbnic_net * fbn)1155 int fbnic_alloc_napi_vectors(struct fbnic_net *fbn)
1156 {
1157 unsigned int txq_idx = 0, rxq_idx = 0, v_idx = FBNIC_NON_NAPI_VECTORS;
1158 unsigned int num_tx = fbn->num_tx_queues;
1159 unsigned int num_rx = fbn->num_rx_queues;
1160 unsigned int num_napi = fbn->num_napi;
1161 struct fbnic_dev *fbd = fbn->fbd;
1162 int err;
1163
1164 /* Allocate 1 Tx queue per napi vector */
1165 if (num_napi < FBNIC_MAX_TXQS && num_napi == num_tx + num_rx) {
1166 while (num_tx) {
1167 err = fbnic_alloc_napi_vector(fbd, fbn,
1168 num_napi, v_idx,
1169 1, txq_idx, 0, 0);
1170 if (err)
1171 goto free_vectors;
1172
1173 /* Update counts and index */
1174 num_tx--;
1175 txq_idx++;
1176
1177 v_idx++;
1178 }
1179 }
1180
1181 /* Allocate Tx/Rx queue pairs per vector, or allocate remaining Rx */
1182 while (num_rx | num_tx) {
1183 int tqpv = DIV_ROUND_UP(num_tx, num_napi - txq_idx);
1184 int rqpv = DIV_ROUND_UP(num_rx, num_napi - rxq_idx);
1185
1186 err = fbnic_alloc_napi_vector(fbd, fbn, num_napi, v_idx,
1187 tqpv, txq_idx, rqpv, rxq_idx);
1188 if (err)
1189 goto free_vectors;
1190
1191 /* Update counts and index */
1192 num_tx -= tqpv;
1193 txq_idx++;
1194
1195 num_rx -= rqpv;
1196 rxq_idx++;
1197
1198 v_idx++;
1199 }
1200
1201 return 0;
1202
1203 free_vectors:
1204 fbnic_free_napi_vectors(fbn);
1205
1206 return -ENOMEM;
1207 }
1208
fbnic_free_ring_resources(struct device * dev,struct fbnic_ring * ring)1209 static void fbnic_free_ring_resources(struct device *dev,
1210 struct fbnic_ring *ring)
1211 {
1212 kvfree(ring->buffer);
1213 ring->buffer = NULL;
1214
1215 /* If size is not set there are no descriptors present */
1216 if (!ring->size)
1217 return;
1218
1219 dma_free_coherent(dev, ring->size, ring->desc, ring->dma);
1220 ring->size_mask = 0;
1221 ring->size = 0;
1222 }
1223
fbnic_alloc_tx_ring_desc(struct fbnic_net * fbn,struct fbnic_ring * txr)1224 static int fbnic_alloc_tx_ring_desc(struct fbnic_net *fbn,
1225 struct fbnic_ring *txr)
1226 {
1227 struct device *dev = fbn->netdev->dev.parent;
1228 size_t size;
1229
1230 /* Round size up to nearest 4K */
1231 size = ALIGN(array_size(sizeof(*txr->desc), fbn->txq_size), 4096);
1232
1233 txr->desc = dma_alloc_coherent(dev, size, &txr->dma,
1234 GFP_KERNEL | __GFP_NOWARN);
1235 if (!txr->desc)
1236 return -ENOMEM;
1237
1238 /* txq_size should be a power of 2, so mask is just that -1 */
1239 txr->size_mask = fbn->txq_size - 1;
1240 txr->size = size;
1241
1242 return 0;
1243 }
1244
fbnic_alloc_tx_ring_buffer(struct fbnic_ring * txr)1245 static int fbnic_alloc_tx_ring_buffer(struct fbnic_ring *txr)
1246 {
1247 size_t size = array_size(sizeof(*txr->tx_buf), txr->size_mask + 1);
1248
1249 txr->tx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1250
1251 return txr->tx_buf ? 0 : -ENOMEM;
1252 }
1253
fbnic_alloc_tx_ring_resources(struct fbnic_net * fbn,struct fbnic_ring * txr)1254 static int fbnic_alloc_tx_ring_resources(struct fbnic_net *fbn,
1255 struct fbnic_ring *txr)
1256 {
1257 struct device *dev = fbn->netdev->dev.parent;
1258 int err;
1259
1260 if (txr->flags & FBNIC_RING_F_DISABLED)
1261 return 0;
1262
1263 err = fbnic_alloc_tx_ring_desc(fbn, txr);
1264 if (err)
1265 return err;
1266
1267 if (!(txr->flags & FBNIC_RING_F_CTX))
1268 return 0;
1269
1270 err = fbnic_alloc_tx_ring_buffer(txr);
1271 if (err)
1272 goto free_desc;
1273
1274 return 0;
1275
1276 free_desc:
1277 fbnic_free_ring_resources(dev, txr);
1278 return err;
1279 }
1280
fbnic_alloc_rx_ring_desc(struct fbnic_net * fbn,struct fbnic_ring * rxr)1281 static int fbnic_alloc_rx_ring_desc(struct fbnic_net *fbn,
1282 struct fbnic_ring *rxr)
1283 {
1284 struct device *dev = fbn->netdev->dev.parent;
1285 size_t desc_size = sizeof(*rxr->desc);
1286 u32 rxq_size;
1287 size_t size;
1288
1289 switch (rxr->doorbell - fbnic_ring_csr_base(rxr)) {
1290 case FBNIC_QUEUE_BDQ_HPQ_TAIL:
1291 rxq_size = fbn->hpq_size / FBNIC_BD_FRAG_COUNT;
1292 desc_size *= FBNIC_BD_FRAG_COUNT;
1293 break;
1294 case FBNIC_QUEUE_BDQ_PPQ_TAIL:
1295 rxq_size = fbn->ppq_size / FBNIC_BD_FRAG_COUNT;
1296 desc_size *= FBNIC_BD_FRAG_COUNT;
1297 break;
1298 case FBNIC_QUEUE_RCQ_HEAD:
1299 rxq_size = fbn->rcq_size;
1300 break;
1301 default:
1302 return -EINVAL;
1303 }
1304
1305 /* Round size up to nearest 4K */
1306 size = ALIGN(array_size(desc_size, rxq_size), 4096);
1307
1308 rxr->desc = dma_alloc_coherent(dev, size, &rxr->dma,
1309 GFP_KERNEL | __GFP_NOWARN);
1310 if (!rxr->desc)
1311 return -ENOMEM;
1312
1313 /* rxq_size should be a power of 2, so mask is just that -1 */
1314 rxr->size_mask = rxq_size - 1;
1315 rxr->size = size;
1316
1317 return 0;
1318 }
1319
fbnic_alloc_rx_ring_buffer(struct fbnic_ring * rxr)1320 static int fbnic_alloc_rx_ring_buffer(struct fbnic_ring *rxr)
1321 {
1322 size_t size = array_size(sizeof(*rxr->rx_buf), rxr->size_mask + 1);
1323
1324 if (rxr->flags & FBNIC_RING_F_CTX)
1325 size = sizeof(*rxr->rx_buf) * (rxr->size_mask + 1);
1326 else
1327 size = sizeof(*rxr->pkt);
1328
1329 rxr->rx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1330
1331 return rxr->rx_buf ? 0 : -ENOMEM;
1332 }
1333
fbnic_alloc_rx_ring_resources(struct fbnic_net * fbn,struct fbnic_ring * rxr)1334 static int fbnic_alloc_rx_ring_resources(struct fbnic_net *fbn,
1335 struct fbnic_ring *rxr)
1336 {
1337 struct device *dev = fbn->netdev->dev.parent;
1338 int err;
1339
1340 err = fbnic_alloc_rx_ring_desc(fbn, rxr);
1341 if (err)
1342 return err;
1343
1344 err = fbnic_alloc_rx_ring_buffer(rxr);
1345 if (err)
1346 goto free_desc;
1347
1348 return 0;
1349
1350 free_desc:
1351 fbnic_free_ring_resources(dev, rxr);
1352 return err;
1353 }
1354
fbnic_free_qt_resources(struct fbnic_net * fbn,struct fbnic_q_triad * qt)1355 static void fbnic_free_qt_resources(struct fbnic_net *fbn,
1356 struct fbnic_q_triad *qt)
1357 {
1358 struct device *dev = fbn->netdev->dev.parent;
1359
1360 fbnic_free_ring_resources(dev, &qt->cmpl);
1361 fbnic_free_ring_resources(dev, &qt->sub1);
1362 fbnic_free_ring_resources(dev, &qt->sub0);
1363 }
1364
fbnic_alloc_tx_qt_resources(struct fbnic_net * fbn,struct fbnic_q_triad * qt)1365 static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn,
1366 struct fbnic_q_triad *qt)
1367 {
1368 struct device *dev = fbn->netdev->dev.parent;
1369 int err;
1370
1371 err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub0);
1372 if (err)
1373 return err;
1374
1375 err = fbnic_alloc_tx_ring_resources(fbn, &qt->cmpl);
1376 if (err)
1377 goto free_sub1;
1378
1379 return 0;
1380
1381 free_sub1:
1382 fbnic_free_ring_resources(dev, &qt->sub0);
1383 return err;
1384 }
1385
fbnic_alloc_rx_qt_resources(struct fbnic_net * fbn,struct fbnic_q_triad * qt)1386 static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn,
1387 struct fbnic_q_triad *qt)
1388 {
1389 struct device *dev = fbn->netdev->dev.parent;
1390 int err;
1391
1392 err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0);
1393 if (err)
1394 return err;
1395
1396 err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub1);
1397 if (err)
1398 goto free_sub0;
1399
1400 err = fbnic_alloc_rx_ring_resources(fbn, &qt->cmpl);
1401 if (err)
1402 goto free_sub1;
1403
1404 return 0;
1405
1406 free_sub1:
1407 fbnic_free_ring_resources(dev, &qt->sub1);
1408 free_sub0:
1409 fbnic_free_ring_resources(dev, &qt->sub0);
1410 return err;
1411 }
1412
fbnic_free_nv_resources(struct fbnic_net * fbn,struct fbnic_napi_vector * nv)1413 static void fbnic_free_nv_resources(struct fbnic_net *fbn,
1414 struct fbnic_napi_vector *nv)
1415 {
1416 int i, j;
1417
1418 /* Free Tx Resources */
1419 for (i = 0; i < nv->txt_count; i++)
1420 fbnic_free_qt_resources(fbn, &nv->qt[i]);
1421
1422 for (j = 0; j < nv->rxt_count; j++, i++)
1423 fbnic_free_qt_resources(fbn, &nv->qt[i]);
1424 }
1425
fbnic_alloc_nv_resources(struct fbnic_net * fbn,struct fbnic_napi_vector * nv)1426 static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
1427 struct fbnic_napi_vector *nv)
1428 {
1429 int i, j, err;
1430
1431 /* Allocate Tx Resources */
1432 for (i = 0; i < nv->txt_count; i++) {
1433 err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]);
1434 if (err)
1435 goto free_resources;
1436 }
1437
1438 /* Allocate Rx Resources */
1439 for (j = 0; j < nv->rxt_count; j++, i++) {
1440 err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]);
1441 if (err)
1442 goto free_resources;
1443 }
1444
1445 return 0;
1446
1447 free_resources:
1448 while (i--)
1449 fbnic_free_qt_resources(fbn, &nv->qt[i]);
1450 return err;
1451 }
1452
fbnic_free_resources(struct fbnic_net * fbn)1453 void fbnic_free_resources(struct fbnic_net *fbn)
1454 {
1455 struct fbnic_napi_vector *nv;
1456
1457 list_for_each_entry(nv, &fbn->napis, napis)
1458 fbnic_free_nv_resources(fbn, nv);
1459 }
1460
fbnic_alloc_resources(struct fbnic_net * fbn)1461 int fbnic_alloc_resources(struct fbnic_net *fbn)
1462 {
1463 struct fbnic_napi_vector *nv;
1464 int err = -ENODEV;
1465
1466 list_for_each_entry(nv, &fbn->napis, napis) {
1467 err = fbnic_alloc_nv_resources(fbn, nv);
1468 if (err)
1469 goto free_resources;
1470 }
1471
1472 return 0;
1473
1474 free_resources:
1475 list_for_each_entry_continue_reverse(nv, &fbn->napis, napis)
1476 fbnic_free_nv_resources(fbn, nv);
1477
1478 return err;
1479 }
1480
fbnic_disable_twq0(struct fbnic_ring * txr)1481 static void fbnic_disable_twq0(struct fbnic_ring *txr)
1482 {
1483 u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ0_CTL);
1484
1485 twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE;
1486
1487 fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ0_CTL, twq_ctl);
1488 }
1489
fbnic_disable_tcq(struct fbnic_ring * txr)1490 static void fbnic_disable_tcq(struct fbnic_ring *txr)
1491 {
1492 fbnic_ring_wr32(txr, FBNIC_QUEUE_TCQ_CTL, 0);
1493 fbnic_ring_wr32(txr, FBNIC_QUEUE_TIM_MASK, FBNIC_QUEUE_TIM_MASK_MASK);
1494 }
1495
fbnic_disable_bdq(struct fbnic_ring * hpq,struct fbnic_ring * ppq)1496 static void fbnic_disable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq)
1497 {
1498 u32 bdq_ctl = fbnic_ring_rd32(hpq, FBNIC_QUEUE_BDQ_CTL);
1499
1500 bdq_ctl &= ~FBNIC_QUEUE_BDQ_CTL_ENABLE;
1501
1502 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl);
1503 }
1504
fbnic_disable_rcq(struct fbnic_ring * rxr)1505 static void fbnic_disable_rcq(struct fbnic_ring *rxr)
1506 {
1507 fbnic_ring_wr32(rxr, FBNIC_QUEUE_RCQ_CTL, 0);
1508 fbnic_ring_wr32(rxr, FBNIC_QUEUE_RIM_MASK, FBNIC_QUEUE_RIM_MASK_MASK);
1509 }
1510
fbnic_napi_disable(struct fbnic_net * fbn)1511 void fbnic_napi_disable(struct fbnic_net *fbn)
1512 {
1513 struct fbnic_napi_vector *nv;
1514
1515 list_for_each_entry(nv, &fbn->napis, napis) {
1516 napi_disable(&nv->napi);
1517
1518 fbnic_nv_irq_disable(nv);
1519 }
1520 }
1521
fbnic_disable(struct fbnic_net * fbn)1522 void fbnic_disable(struct fbnic_net *fbn)
1523 {
1524 struct fbnic_dev *fbd = fbn->fbd;
1525 struct fbnic_napi_vector *nv;
1526 int i, j;
1527
1528 list_for_each_entry(nv, &fbn->napis, napis) {
1529 /* Disable Tx queue triads */
1530 for (i = 0; i < nv->txt_count; i++) {
1531 struct fbnic_q_triad *qt = &nv->qt[i];
1532
1533 fbnic_disable_twq0(&qt->sub0);
1534 fbnic_disable_tcq(&qt->cmpl);
1535 }
1536
1537 /* Disable Rx queue triads */
1538 for (j = 0; j < nv->rxt_count; j++, i++) {
1539 struct fbnic_q_triad *qt = &nv->qt[i];
1540
1541 fbnic_disable_bdq(&qt->sub0, &qt->sub1);
1542 fbnic_disable_rcq(&qt->cmpl);
1543 }
1544 }
1545
1546 fbnic_wrfl(fbd);
1547 }
1548
fbnic_tx_flush(struct fbnic_dev * fbd)1549 static void fbnic_tx_flush(struct fbnic_dev *fbd)
1550 {
1551 netdev_warn(fbd->netdev, "triggering Tx flush\n");
1552
1553 fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN,
1554 FBNIC_TMI_DROP_CTRL_EN);
1555 }
1556
fbnic_tx_flush_off(struct fbnic_dev * fbd)1557 static void fbnic_tx_flush_off(struct fbnic_dev *fbd)
1558 {
1559 fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN, 0);
1560 }
1561
1562 struct fbnic_idle_regs {
1563 u32 reg_base;
1564 u8 reg_cnt;
1565 };
1566
fbnic_all_idle(struct fbnic_dev * fbd,const struct fbnic_idle_regs * regs,unsigned int nregs)1567 static bool fbnic_all_idle(struct fbnic_dev *fbd,
1568 const struct fbnic_idle_regs *regs,
1569 unsigned int nregs)
1570 {
1571 unsigned int i, j;
1572
1573 for (i = 0; i < nregs; i++) {
1574 for (j = 0; j < regs[i].reg_cnt; j++) {
1575 if (fbnic_rd32(fbd, regs[i].reg_base + j) != ~0U)
1576 return false;
1577 }
1578 }
1579 return true;
1580 }
1581
fbnic_idle_dump(struct fbnic_dev * fbd,const struct fbnic_idle_regs * regs,unsigned int nregs,const char * dir,int err)1582 static void fbnic_idle_dump(struct fbnic_dev *fbd,
1583 const struct fbnic_idle_regs *regs,
1584 unsigned int nregs, const char *dir, int err)
1585 {
1586 unsigned int i, j;
1587
1588 netdev_err(fbd->netdev, "error waiting for %s idle %d\n", dir, err);
1589 for (i = 0; i < nregs; i++)
1590 for (j = 0; j < regs[i].reg_cnt; j++)
1591 netdev_err(fbd->netdev, "0x%04x: %08x\n",
1592 regs[i].reg_base + j,
1593 fbnic_rd32(fbd, regs[i].reg_base + j));
1594 }
1595
fbnic_wait_all_queues_idle(struct fbnic_dev * fbd,bool may_fail)1596 int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail)
1597 {
1598 static const struct fbnic_idle_regs tx[] = {
1599 { FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TWQ_IDLE_CNT, },
1600 { FBNIC_QM_TQS_IDLE(0), FBNIC_QM_TQS_IDLE_CNT, },
1601 { FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TDE_IDLE_CNT, },
1602 { FBNIC_QM_TCQ_IDLE(0), FBNIC_QM_TCQ_IDLE_CNT, },
1603 }, rx[] = {
1604 { FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_HPQ_IDLE_CNT, },
1605 { FBNIC_QM_PPQ_IDLE(0), FBNIC_QM_PPQ_IDLE_CNT, },
1606 { FBNIC_QM_RCQ_IDLE(0), FBNIC_QM_RCQ_IDLE_CNT, },
1607 };
1608 bool idle;
1609 int err;
1610
1611 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000,
1612 false, fbd, tx, ARRAY_SIZE(tx));
1613 if (err == -ETIMEDOUT) {
1614 fbnic_tx_flush(fbd);
1615 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle,
1616 2, 500000, false,
1617 fbd, tx, ARRAY_SIZE(tx));
1618 fbnic_tx_flush_off(fbd);
1619 }
1620 if (err) {
1621 fbnic_idle_dump(fbd, tx, ARRAY_SIZE(tx), "Tx", err);
1622 if (may_fail)
1623 return err;
1624 }
1625
1626 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000,
1627 false, fbd, rx, ARRAY_SIZE(rx));
1628 if (err)
1629 fbnic_idle_dump(fbd, rx, ARRAY_SIZE(rx), "Rx", err);
1630 return err;
1631 }
1632
fbnic_flush(struct fbnic_net * fbn)1633 void fbnic_flush(struct fbnic_net *fbn)
1634 {
1635 struct fbnic_napi_vector *nv;
1636
1637 list_for_each_entry(nv, &fbn->napis, napis) {
1638 int i, j;
1639
1640 /* Flush any processed Tx Queue Triads and drop the rest */
1641 for (i = 0; i < nv->txt_count; i++) {
1642 struct fbnic_q_triad *qt = &nv->qt[i];
1643 struct netdev_queue *tx_queue;
1644
1645 /* Clean the work queues of unprocessed work */
1646 fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail);
1647
1648 /* Reset completion queue descriptor ring */
1649 memset(qt->cmpl.desc, 0, qt->cmpl.size);
1650
1651 /* Nothing else to do if Tx queue is disabled */
1652 if (qt->sub0.flags & FBNIC_RING_F_DISABLED)
1653 continue;
1654
1655 /* Reset BQL associated with Tx queue */
1656 tx_queue = netdev_get_tx_queue(nv->napi.dev,
1657 qt->sub0.q_idx);
1658 netdev_tx_reset_queue(tx_queue);
1659
1660 /* Disassociate Tx queue from NAPI */
1661 netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx,
1662 NETDEV_QUEUE_TYPE_TX, NULL);
1663 }
1664
1665 /* Flush any processed Rx Queue Triads and drop the rest */
1666 for (j = 0; j < nv->rxt_count; j++, i++) {
1667 struct fbnic_q_triad *qt = &nv->qt[i];
1668
1669 /* Clean the work queues of unprocessed work */
1670 fbnic_clean_bdq(nv, 0, &qt->sub0, qt->sub0.tail);
1671 fbnic_clean_bdq(nv, 0, &qt->sub1, qt->sub1.tail);
1672
1673 /* Reset completion queue descriptor ring */
1674 memset(qt->cmpl.desc, 0, qt->cmpl.size);
1675
1676 fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0);
1677 qt->cmpl.pkt->buff.data_hard_start = NULL;
1678
1679 /* Disassociate Rx queue from NAPI */
1680 netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx,
1681 NETDEV_QUEUE_TYPE_RX, NULL);
1682 }
1683 }
1684 }
1685
fbnic_fill(struct fbnic_net * fbn)1686 void fbnic_fill(struct fbnic_net *fbn)
1687 {
1688 struct fbnic_napi_vector *nv;
1689
1690 list_for_each_entry(nv, &fbn->napis, napis) {
1691 int i, j;
1692
1693 /* Configure NAPI mapping for Tx */
1694 for (i = 0; i < nv->txt_count; i++) {
1695 struct fbnic_q_triad *qt = &nv->qt[i];
1696
1697 /* Nothing to do if Tx queue is disabled */
1698 if (qt->sub0.flags & FBNIC_RING_F_DISABLED)
1699 continue;
1700
1701 /* Associate Tx queue with NAPI */
1702 netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx,
1703 NETDEV_QUEUE_TYPE_TX, &nv->napi);
1704 }
1705
1706 /* Configure NAPI mapping and populate pages
1707 * in the BDQ rings to use for Rx
1708 */
1709 for (j = 0; j < nv->rxt_count; j++, i++) {
1710 struct fbnic_q_triad *qt = &nv->qt[i];
1711
1712 /* Associate Rx queue with NAPI */
1713 netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx,
1714 NETDEV_QUEUE_TYPE_RX, &nv->napi);
1715
1716 /* Populate the header and payload BDQs */
1717 fbnic_fill_bdq(nv, &qt->sub0);
1718 fbnic_fill_bdq(nv, &qt->sub1);
1719 }
1720 }
1721 }
1722
fbnic_enable_twq0(struct fbnic_ring * twq)1723 static void fbnic_enable_twq0(struct fbnic_ring *twq)
1724 {
1725 u32 log_size = fls(twq->size_mask);
1726
1727 if (!twq->size_mask)
1728 return;
1729
1730 /* Reset head/tail */
1731 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_RESET);
1732 twq->tail = 0;
1733 twq->head = 0;
1734
1735 /* Store descriptor ring address and size */
1736 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAL, lower_32_bits(twq->dma));
1737 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAH, upper_32_bits(twq->dma));
1738
1739 /* Write lower 4 bits of log size as 64K ring size is 0 */
1740 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_SIZE, log_size & 0xf);
1741
1742 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE);
1743 }
1744
fbnic_enable_tcq(struct fbnic_napi_vector * nv,struct fbnic_ring * tcq)1745 static void fbnic_enable_tcq(struct fbnic_napi_vector *nv,
1746 struct fbnic_ring *tcq)
1747 {
1748 u32 log_size = fls(tcq->size_mask);
1749
1750 if (!tcq->size_mask)
1751 return;
1752
1753 /* Reset head/tail */
1754 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_RESET);
1755 tcq->tail = 0;
1756 tcq->head = 0;
1757
1758 /* Store descriptor ring address and size */
1759 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAL, lower_32_bits(tcq->dma));
1760 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAH, upper_32_bits(tcq->dma));
1761
1762 /* Write lower 4 bits of log size as 64K ring size is 0 */
1763 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_SIZE, log_size & 0xf);
1764
1765 /* Store interrupt information for the completion queue */
1766 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_CTL, nv->v_idx);
1767 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_THRESHOLD, tcq->size_mask / 2);
1768 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_MASK, 0);
1769
1770 /* Enable queue */
1771 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_ENABLE);
1772 }
1773
fbnic_enable_bdq(struct fbnic_ring * hpq,struct fbnic_ring * ppq)1774 static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq)
1775 {
1776 u32 bdq_ctl = FBNIC_QUEUE_BDQ_CTL_ENABLE;
1777 u32 log_size;
1778
1779 /* Reset head/tail */
1780 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, FBNIC_QUEUE_BDQ_CTL_RESET);
1781 ppq->tail = 0;
1782 ppq->head = 0;
1783 hpq->tail = 0;
1784 hpq->head = 0;
1785
1786 log_size = fls(hpq->size_mask);
1787
1788 /* Store descriptor ring address and size */
1789 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma));
1790 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAH, upper_32_bits(hpq->dma));
1791
1792 /* Write lower 4 bits of log size as 64K ring size is 0 */
1793 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_SIZE, log_size & 0xf);
1794
1795 if (!ppq->size_mask)
1796 goto write_ctl;
1797
1798 log_size = fls(ppq->size_mask);
1799
1800 /* Add enabling of PPQ to BDQ control */
1801 bdq_ctl |= FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE;
1802
1803 /* Store descriptor ring address and size */
1804 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAL, lower_32_bits(ppq->dma));
1805 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAH, upper_32_bits(ppq->dma));
1806 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_SIZE, log_size & 0xf);
1807
1808 write_ctl:
1809 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl);
1810 }
1811
fbnic_config_drop_mode_rcq(struct fbnic_napi_vector * nv,struct fbnic_ring * rcq)1812 static void fbnic_config_drop_mode_rcq(struct fbnic_napi_vector *nv,
1813 struct fbnic_ring *rcq)
1814 {
1815 u32 drop_mode, rcq_ctl;
1816
1817 drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_IMMEDIATE;
1818
1819 /* Specify packet layout */
1820 rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_DROP_MODE_MASK, drop_mode) |
1821 FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_HROOM_MASK, FBNIC_RX_HROOM) |
1822 FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_TROOM_MASK, FBNIC_RX_TROOM);
1823
1824 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL0, rcq_ctl);
1825 }
1826
fbnic_enable_rcq(struct fbnic_napi_vector * nv,struct fbnic_ring * rcq)1827 static void fbnic_enable_rcq(struct fbnic_napi_vector *nv,
1828 struct fbnic_ring *rcq)
1829 {
1830 u32 log_size = fls(rcq->size_mask);
1831 u32 rcq_ctl;
1832
1833 fbnic_config_drop_mode_rcq(nv, rcq);
1834
1835 rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) |
1836 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK,
1837 FBNIC_RX_MAX_HDR) |
1838 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK,
1839 FBNIC_RX_PAYLD_OFFSET) |
1840 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_PG_CL_MASK,
1841 FBNIC_RX_PAYLD_PG_CL);
1842 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL1, rcq_ctl);
1843
1844 /* Reset head/tail */
1845 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_RESET);
1846 rcq->head = 0;
1847 rcq->tail = 0;
1848
1849 /* Store descriptor ring address and size */
1850 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAL, lower_32_bits(rcq->dma));
1851 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAH, upper_32_bits(rcq->dma));
1852
1853 /* Write lower 4 bits of log size as 64K ring size is 0 */
1854 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_SIZE, log_size & 0xf);
1855
1856 /* Store interrupt information for the completion queue */
1857 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_CTL, nv->v_idx);
1858 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_THRESHOLD, rcq->size_mask / 2);
1859 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_MASK, 0);
1860
1861 /* Enable queue */
1862 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_ENABLE);
1863 }
1864
fbnic_enable(struct fbnic_net * fbn)1865 void fbnic_enable(struct fbnic_net *fbn)
1866 {
1867 struct fbnic_dev *fbd = fbn->fbd;
1868 struct fbnic_napi_vector *nv;
1869 int i, j;
1870
1871 list_for_each_entry(nv, &fbn->napis, napis) {
1872 /* Setup Tx Queue Triads */
1873 for (i = 0; i < nv->txt_count; i++) {
1874 struct fbnic_q_triad *qt = &nv->qt[i];
1875
1876 fbnic_enable_twq0(&qt->sub0);
1877 fbnic_enable_tcq(nv, &qt->cmpl);
1878 }
1879
1880 /* Setup Rx Queue Triads */
1881 for (j = 0; j < nv->rxt_count; j++, i++) {
1882 struct fbnic_q_triad *qt = &nv->qt[i];
1883
1884 fbnic_enable_bdq(&qt->sub0, &qt->sub1);
1885 fbnic_config_drop_mode_rcq(nv, &qt->cmpl);
1886 fbnic_enable_rcq(nv, &qt->cmpl);
1887 }
1888 }
1889
1890 fbnic_wrfl(fbd);
1891 }
1892
fbnic_nv_irq_enable(struct fbnic_napi_vector * nv)1893 static void fbnic_nv_irq_enable(struct fbnic_napi_vector *nv)
1894 {
1895 struct fbnic_dev *fbd = nv->fbd;
1896 u32 val;
1897
1898 val = FBNIC_INTR_CQ_REARM_INTR_UNMASK;
1899
1900 fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(nv->v_idx), val);
1901 }
1902
fbnic_napi_enable(struct fbnic_net * fbn)1903 void fbnic_napi_enable(struct fbnic_net *fbn)
1904 {
1905 u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {};
1906 struct fbnic_dev *fbd = fbn->fbd;
1907 struct fbnic_napi_vector *nv;
1908 int i;
1909
1910 list_for_each_entry(nv, &fbn->napis, napis) {
1911 napi_enable(&nv->napi);
1912
1913 fbnic_nv_irq_enable(nv);
1914
1915 /* Record bit used for NAPI IRQs so we can
1916 * set the mask appropriately
1917 */
1918 irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32);
1919 }
1920
1921 /* Force the first interrupt on the device to guarantee
1922 * that any packets that may have been enqueued during the
1923 * bringup are processed.
1924 */
1925 for (i = 0; i < ARRAY_SIZE(irqs); i++) {
1926 if (!irqs[i])
1927 continue;
1928 fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]);
1929 }
1930
1931 fbnic_wrfl(fbd);
1932 }
1933
fbnic_napi_depletion_check(struct net_device * netdev)1934 void fbnic_napi_depletion_check(struct net_device *netdev)
1935 {
1936 struct fbnic_net *fbn = netdev_priv(netdev);
1937 u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {};
1938 struct fbnic_dev *fbd = fbn->fbd;
1939 struct fbnic_napi_vector *nv;
1940 int i, j;
1941
1942 list_for_each_entry(nv, &fbn->napis, napis) {
1943 /* Find RQs which are completely out of pages */
1944 for (i = nv->txt_count, j = 0; j < nv->rxt_count; j++, i++) {
1945 /* Assume 4 pages is always enough to fit a packet
1946 * and therefore generate a completion and an IRQ.
1947 */
1948 if (fbnic_desc_used(&nv->qt[i].sub0) < 4 ||
1949 fbnic_desc_used(&nv->qt[i].sub1) < 4)
1950 irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32);
1951 }
1952 }
1953
1954 for (i = 0; i < ARRAY_SIZE(irqs); i++) {
1955 if (!irqs[i])
1956 continue;
1957 fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(i), irqs[i]);
1958 fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]);
1959 }
1960
1961 fbnic_wrfl(fbd);
1962 }
1963