1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */ 3 4 #include <linux/bitfield.h> 5 #include <linux/iopoll.h> 6 #include <linux/pci.h> 7 #include <net/netdev_queues.h> 8 #include <net/page_pool/helpers.h> 9 10 #include "fbnic.h" 11 #include "fbnic_csr.h" 12 #include "fbnic_netdev.h" 13 #include "fbnic_txrx.h" 14 15 enum { 16 FBNIC_XMIT_CB_TS = 0x01, 17 }; 18 19 struct fbnic_xmit_cb { 20 u32 bytecount; 21 u8 desc_count; 22 u8 flags; 23 int hw_head; 24 }; 25 26 #define FBNIC_XMIT_CB(__skb) ((struct fbnic_xmit_cb *)((__skb)->cb)) 27 28 static u32 __iomem *fbnic_ring_csr_base(const struct fbnic_ring *ring) 29 { 30 unsigned long csr_base = (unsigned long)ring->doorbell; 31 32 csr_base &= ~(FBNIC_QUEUE_STRIDE * sizeof(u32) - 1); 33 34 return (u32 __iomem *)csr_base; 35 } 36 37 static u32 fbnic_ring_rd32(struct fbnic_ring *ring, unsigned int csr) 38 { 39 u32 __iomem *csr_base = fbnic_ring_csr_base(ring); 40 41 return readl(csr_base + csr); 42 } 43 44 static void fbnic_ring_wr32(struct fbnic_ring *ring, unsigned int csr, u32 val) 45 { 46 u32 __iomem *csr_base = fbnic_ring_csr_base(ring); 47 48 writel(val, csr_base + csr); 49 } 50 51 /** 52 * fbnic_ts40_to_ns() - convert descriptor timestamp to PHC time 53 * @fbn: netdev priv of the FB NIC 54 * @ts40: timestamp read from a descriptor 55 * 56 * Return: u64 value of PHC time in nanoseconds 57 * 58 * Convert truncated 40 bit device timestamp as read from a descriptor 59 * to the full PHC time in nanoseconds. 60 */ 61 static __maybe_unused u64 fbnic_ts40_to_ns(struct fbnic_net *fbn, u64 ts40) 62 { 63 unsigned int s; 64 u64 time_ns; 65 s64 offset; 66 u8 ts_top; 67 u32 high; 68 69 do { 70 s = u64_stats_fetch_begin(&fbn->time_seq); 71 offset = READ_ONCE(fbn->time_offset); 72 } while (u64_stats_fetch_retry(&fbn->time_seq, s)); 73 74 high = READ_ONCE(fbn->time_high); 75 76 /* Bits 63..40 from periodic clock reads, 39..0 from ts40 */ 77 time_ns = (u64)(high >> 8) << 40 | ts40; 78 79 /* Compare bits 32-39 between periodic reads and ts40, 80 * see if HW clock may have wrapped since last read. We are sure 81 * that periodic reads are always at least ~1 minute behind, so 82 * this logic works perfectly fine. 83 */ 84 ts_top = ts40 >> 32; 85 if (ts_top < (u8)high && (u8)high - ts_top > U8_MAX / 2) 86 time_ns += 1ULL << 40; 87 88 return time_ns + offset; 89 } 90 91 static unsigned int fbnic_desc_unused(struct fbnic_ring *ring) 92 { 93 return (ring->head - ring->tail - 1) & ring->size_mask; 94 } 95 96 static unsigned int fbnic_desc_used(struct fbnic_ring *ring) 97 { 98 return (ring->tail - ring->head) & ring->size_mask; 99 } 100 101 static struct netdev_queue *txring_txq(const struct net_device *dev, 102 const struct fbnic_ring *ring) 103 { 104 return netdev_get_tx_queue(dev, ring->q_idx); 105 } 106 107 static int fbnic_maybe_stop_tx(const struct net_device *dev, 108 struct fbnic_ring *ring, 109 const unsigned int size) 110 { 111 struct netdev_queue *txq = txring_txq(dev, ring); 112 int res; 113 114 res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size, 115 FBNIC_TX_DESC_WAKEUP); 116 117 return !res; 118 } 119 120 static bool fbnic_tx_sent_queue(struct sk_buff *skb, struct fbnic_ring *ring) 121 { 122 struct netdev_queue *dev_queue = txring_txq(skb->dev, ring); 123 unsigned int bytecount = FBNIC_XMIT_CB(skb)->bytecount; 124 bool xmit_more = netdev_xmit_more(); 125 126 /* TBD: Request completion more often if xmit_more becomes large */ 127 128 return __netdev_tx_sent_queue(dev_queue, bytecount, xmit_more); 129 } 130 131 static void fbnic_unmap_single_twd(struct device *dev, __le64 *twd) 132 { 133 u64 raw_twd = le64_to_cpu(*twd); 134 unsigned int len; 135 dma_addr_t dma; 136 137 dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd); 138 len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd); 139 140 dma_unmap_single(dev, dma, len, DMA_TO_DEVICE); 141 } 142 143 static void fbnic_unmap_page_twd(struct device *dev, __le64 *twd) 144 { 145 u64 raw_twd = le64_to_cpu(*twd); 146 unsigned int len; 147 dma_addr_t dma; 148 149 dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd); 150 len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd); 151 152 dma_unmap_page(dev, dma, len, DMA_TO_DEVICE); 153 } 154 155 #define FBNIC_TWD_TYPE(_type) \ 156 cpu_to_le64(FIELD_PREP(FBNIC_TWD_TYPE_MASK, FBNIC_TWD_TYPE_##_type)) 157 158 static bool fbnic_tx_tstamp(struct sk_buff *skb) 159 { 160 struct fbnic_net *fbn; 161 162 if (!unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) 163 return false; 164 165 fbn = netdev_priv(skb->dev); 166 if (fbn->hwtstamp_config.tx_type == HWTSTAMP_TX_OFF) 167 return false; 168 169 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 170 FBNIC_XMIT_CB(skb)->flags |= FBNIC_XMIT_CB_TS; 171 FBNIC_XMIT_CB(skb)->hw_head = -1; 172 173 return true; 174 } 175 176 static bool 177 fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) 178 { 179 unsigned int l2len, i3len; 180 181 if (fbnic_tx_tstamp(skb)) 182 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_TS); 183 184 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) 185 return false; 186 187 l2len = skb_mac_header_len(skb); 188 i3len = skb_checksum_start(skb) - skb_network_header(skb); 189 190 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_CSUM_OFFSET_MASK, 191 skb->csum_offset / 2)); 192 193 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO); 194 195 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) | 196 FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2)); 197 return false; 198 } 199 200 static void 201 fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq) 202 { 203 skb_checksum_none_assert(skb); 204 205 if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) 206 return; 207 208 if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) { 209 skb->ip_summed = CHECKSUM_UNNECESSARY; 210 } else { 211 u16 csum = FIELD_GET(FBNIC_RCD_META_L2_CSUM_MASK, rcd); 212 213 skb->ip_summed = CHECKSUM_COMPLETE; 214 skb->csum = (__force __wsum)csum; 215 } 216 } 217 218 static bool 219 fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) 220 { 221 struct device *dev = skb->dev->dev.parent; 222 unsigned int tail = ring->tail, first; 223 unsigned int size, data_len; 224 skb_frag_t *frag; 225 dma_addr_t dma; 226 __le64 *twd; 227 228 ring->tx_buf[tail] = skb; 229 230 tail++; 231 tail &= ring->size_mask; 232 first = tail; 233 234 size = skb_headlen(skb); 235 data_len = skb->data_len; 236 237 if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK)) 238 goto dma_error; 239 240 dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); 241 242 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 243 twd = &ring->desc[tail]; 244 245 if (dma_mapping_error(dev, dma)) 246 goto dma_error; 247 248 *twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) | 249 FIELD_PREP(FBNIC_TWD_LEN_MASK, size) | 250 FIELD_PREP(FBNIC_TWD_TYPE_MASK, 251 FBNIC_TWD_TYPE_AL)); 252 253 tail++; 254 tail &= ring->size_mask; 255 256 if (!data_len) 257 break; 258 259 size = skb_frag_size(frag); 260 data_len -= size; 261 262 if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK)) 263 goto dma_error; 264 265 dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); 266 } 267 268 *twd |= FBNIC_TWD_TYPE(LAST_AL); 269 270 FBNIC_XMIT_CB(skb)->desc_count = ((twd - meta) + 1) & ring->size_mask; 271 272 ring->tail = tail; 273 274 /* Record SW timestamp */ 275 skb_tx_timestamp(skb); 276 277 /* Verify there is room for another packet */ 278 fbnic_maybe_stop_tx(skb->dev, ring, FBNIC_MAX_SKB_DESC); 279 280 if (fbnic_tx_sent_queue(skb, ring)) { 281 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_COMPLETION); 282 283 /* Force DMA writes to flush before writing to tail */ 284 dma_wmb(); 285 286 writel(tail, ring->doorbell); 287 } 288 289 return false; 290 dma_error: 291 if (net_ratelimit()) 292 netdev_err(skb->dev, "TX DMA map failed\n"); 293 294 while (tail != first) { 295 tail--; 296 tail &= ring->size_mask; 297 twd = &ring->desc[tail]; 298 if (tail == first) 299 fbnic_unmap_single_twd(dev, twd); 300 else 301 fbnic_unmap_page_twd(dev, twd); 302 } 303 304 return true; 305 } 306 307 #define FBNIC_MIN_FRAME_LEN 60 308 309 static netdev_tx_t 310 fbnic_xmit_frame_ring(struct sk_buff *skb, struct fbnic_ring *ring) 311 { 312 __le64 *meta = &ring->desc[ring->tail]; 313 u16 desc_needed; 314 315 if (skb_put_padto(skb, FBNIC_MIN_FRAME_LEN)) 316 goto err_count; 317 318 /* Need: 1 descriptor per page, 319 * + 1 desc for skb_head, 320 * + 2 desc for metadata and timestamp metadata 321 * + 7 desc gap to keep tail from touching head 322 * otherwise try next time 323 */ 324 desc_needed = skb_shinfo(skb)->nr_frags + 10; 325 if (fbnic_maybe_stop_tx(skb->dev, ring, desc_needed)) 326 return NETDEV_TX_BUSY; 327 328 *meta = cpu_to_le64(FBNIC_TWD_FLAG_DEST_MAC); 329 330 /* Write all members within DWORD to condense this into 2 4B writes */ 331 FBNIC_XMIT_CB(skb)->bytecount = skb->len; 332 FBNIC_XMIT_CB(skb)->desc_count = 0; 333 334 if (fbnic_tx_offloads(ring, skb, meta)) 335 goto err_free; 336 337 if (fbnic_tx_map(ring, skb, meta)) 338 goto err_free; 339 340 return NETDEV_TX_OK; 341 342 err_free: 343 dev_kfree_skb_any(skb); 344 err_count: 345 u64_stats_update_begin(&ring->stats.syncp); 346 ring->stats.dropped++; 347 u64_stats_update_end(&ring->stats.syncp); 348 return NETDEV_TX_OK; 349 } 350 351 netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev) 352 { 353 struct fbnic_net *fbn = netdev_priv(dev); 354 unsigned int q_map = skb->queue_mapping; 355 356 return fbnic_xmit_frame_ring(skb, fbn->tx[q_map]); 357 } 358 359 netdev_features_t 360 fbnic_features_check(struct sk_buff *skb, struct net_device *dev, 361 netdev_features_t features) 362 { 363 unsigned int l2len, l3len; 364 365 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) 366 return features; 367 368 l2len = skb_mac_header_len(skb); 369 l3len = skb_checksum_start(skb) - skb_network_header(skb); 370 371 /* Check header lengths are multiple of 2. 372 * In case of 6in6 we support longer headers (IHLEN + OHLEN) 373 * but keep things simple for now, 512B is plenty. 374 */ 375 if ((l2len | l3len | skb->csum_offset) % 2 || 376 !FIELD_FIT(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) || 377 !FIELD_FIT(FBNIC_TWD_L3_IHLEN_MASK, l3len / 2) || 378 !FIELD_FIT(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2)) 379 return features & ~NETIF_F_CSUM_MASK; 380 381 return features; 382 } 383 384 static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, 385 struct fbnic_ring *ring, bool discard, 386 unsigned int hw_head) 387 { 388 u64 total_bytes = 0, total_packets = 0, ts_lost = 0; 389 unsigned int head = ring->head; 390 struct netdev_queue *txq; 391 unsigned int clean_desc; 392 393 clean_desc = (hw_head - head) & ring->size_mask; 394 395 while (clean_desc) { 396 struct sk_buff *skb = ring->tx_buf[head]; 397 unsigned int desc_cnt; 398 399 desc_cnt = FBNIC_XMIT_CB(skb)->desc_count; 400 if (desc_cnt > clean_desc) 401 break; 402 403 if (unlikely(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS)) { 404 FBNIC_XMIT_CB(skb)->hw_head = hw_head; 405 if (likely(!discard)) 406 break; 407 ts_lost++; 408 } 409 410 ring->tx_buf[head] = NULL; 411 412 clean_desc -= desc_cnt; 413 414 while (!(ring->desc[head] & FBNIC_TWD_TYPE(AL))) { 415 head++; 416 head &= ring->size_mask; 417 desc_cnt--; 418 } 419 420 fbnic_unmap_single_twd(nv->dev, &ring->desc[head]); 421 head++; 422 head &= ring->size_mask; 423 desc_cnt--; 424 425 while (desc_cnt--) { 426 fbnic_unmap_page_twd(nv->dev, &ring->desc[head]); 427 head++; 428 head &= ring->size_mask; 429 } 430 431 total_bytes += FBNIC_XMIT_CB(skb)->bytecount; 432 total_packets += 1; 433 434 napi_consume_skb(skb, napi_budget); 435 } 436 437 if (!total_bytes) 438 return; 439 440 ring->head = head; 441 442 txq = txring_txq(nv->napi.dev, ring); 443 444 if (unlikely(discard)) { 445 u64_stats_update_begin(&ring->stats.syncp); 446 ring->stats.dropped += total_packets; 447 ring->stats.ts_lost += ts_lost; 448 u64_stats_update_end(&ring->stats.syncp); 449 450 netdev_tx_completed_queue(txq, total_packets, total_bytes); 451 return; 452 } 453 454 u64_stats_update_begin(&ring->stats.syncp); 455 ring->stats.bytes += total_bytes; 456 ring->stats.packets += total_packets; 457 u64_stats_update_end(&ring->stats.syncp); 458 459 netif_txq_completed_wake(txq, total_packets, total_bytes, 460 fbnic_desc_unused(ring), 461 FBNIC_TX_DESC_WAKEUP); 462 } 463 464 static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, 465 struct fbnic_ring *ring, 466 u64 tcd, int *ts_head, int *head0) 467 { 468 struct skb_shared_hwtstamps hwtstamp; 469 struct fbnic_net *fbn; 470 struct sk_buff *skb; 471 int head; 472 u64 ns; 473 474 head = (*ts_head < 0) ? ring->head : *ts_head; 475 476 do { 477 unsigned int desc_cnt; 478 479 if (head == ring->tail) { 480 if (unlikely(net_ratelimit())) 481 netdev_err(nv->napi.dev, 482 "Tx timestamp without matching packet\n"); 483 return; 484 } 485 486 skb = ring->tx_buf[head]; 487 desc_cnt = FBNIC_XMIT_CB(skb)->desc_count; 488 489 head += desc_cnt; 490 head &= ring->size_mask; 491 } while (!(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS)); 492 493 fbn = netdev_priv(nv->napi.dev); 494 ns = fbnic_ts40_to_ns(fbn, FIELD_GET(FBNIC_TCD_TYPE1_TS_MASK, tcd)); 495 496 memset(&hwtstamp, 0, sizeof(hwtstamp)); 497 hwtstamp.hwtstamp = ns_to_ktime(ns); 498 499 *ts_head = head; 500 501 FBNIC_XMIT_CB(skb)->flags &= ~FBNIC_XMIT_CB_TS; 502 if (*head0 < 0) { 503 head = FBNIC_XMIT_CB(skb)->hw_head; 504 if (head >= 0) 505 *head0 = head; 506 } 507 508 skb_tstamp_tx(skb, &hwtstamp); 509 u64_stats_update_begin(&ring->stats.syncp); 510 ring->stats.ts_packets++; 511 u64_stats_update_end(&ring->stats.syncp); 512 } 513 514 static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx, 515 struct page *page) 516 { 517 struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; 518 519 page_pool_fragment_page(page, PAGECNT_BIAS_MAX); 520 rx_buf->pagecnt_bias = PAGECNT_BIAS_MAX; 521 rx_buf->page = page; 522 } 523 524 static struct page *fbnic_page_pool_get(struct fbnic_ring *ring, 525 unsigned int idx) 526 { 527 struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; 528 529 rx_buf->pagecnt_bias--; 530 531 return rx_buf->page; 532 } 533 534 static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx, 535 struct fbnic_napi_vector *nv, int budget) 536 { 537 struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; 538 struct page *page = rx_buf->page; 539 540 if (!page_pool_unref_page(page, rx_buf->pagecnt_bias)) 541 page_pool_put_unrefed_page(nv->page_pool, page, -1, !!budget); 542 543 rx_buf->page = NULL; 544 } 545 546 static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget, 547 struct fbnic_q_triad *qt, s32 ts_head, s32 head0) 548 { 549 if (head0 >= 0) 550 fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0); 551 else if (ts_head >= 0) 552 fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, ts_head); 553 } 554 555 static void 556 fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, 557 int napi_budget) 558 { 559 struct fbnic_ring *cmpl = &qt->cmpl; 560 s32 head0 = -1, ts_head = -1; 561 __le64 *raw_tcd, done; 562 u32 head = cmpl->head; 563 564 done = (head & (cmpl->size_mask + 1)) ? 0 : cpu_to_le64(FBNIC_TCD_DONE); 565 raw_tcd = &cmpl->desc[head & cmpl->size_mask]; 566 567 /* Walk the completion queue collecting the heads reported by NIC */ 568 while ((*raw_tcd & cpu_to_le64(FBNIC_TCD_DONE)) == done) { 569 u64 tcd; 570 571 dma_rmb(); 572 573 tcd = le64_to_cpu(*raw_tcd); 574 575 switch (FIELD_GET(FBNIC_TCD_TYPE_MASK, tcd)) { 576 case FBNIC_TCD_TYPE_0: 577 if (!(tcd & FBNIC_TCD_TWQ1)) 578 head0 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD0_MASK, 579 tcd); 580 /* Currently all err status bits are related to 581 * timestamps and as those have yet to be added 582 * they are skipped for now. 583 */ 584 break; 585 case FBNIC_TCD_TYPE_1: 586 if (WARN_ON_ONCE(tcd & FBNIC_TCD_TWQ1)) 587 break; 588 589 fbnic_clean_tsq(nv, &qt->sub0, tcd, &ts_head, &head0); 590 break; 591 default: 592 break; 593 } 594 595 raw_tcd++; 596 head++; 597 if (!(head & cmpl->size_mask)) { 598 done ^= cpu_to_le64(FBNIC_TCD_DONE); 599 raw_tcd = &cmpl->desc[0]; 600 } 601 } 602 603 /* Record the current head/tail of the queue */ 604 if (cmpl->head != head) { 605 cmpl->head = head; 606 writel(head & cmpl->size_mask, cmpl->doorbell); 607 } 608 609 /* Unmap and free processed buffers */ 610 fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0); 611 } 612 613 static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget, 614 struct fbnic_ring *ring, unsigned int hw_head) 615 { 616 unsigned int head = ring->head; 617 618 if (head == hw_head) 619 return; 620 621 do { 622 fbnic_page_pool_drain(ring, head, nv, napi_budget); 623 624 head++; 625 head &= ring->size_mask; 626 } while (head != hw_head); 627 628 ring->head = head; 629 } 630 631 static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, struct page *page) 632 { 633 __le64 *bdq_desc = &bdq->desc[id * FBNIC_BD_FRAG_COUNT]; 634 dma_addr_t dma = page_pool_get_dma_addr(page); 635 u64 bd, i = FBNIC_BD_FRAG_COUNT; 636 637 bd = (FBNIC_BD_PAGE_ADDR_MASK & dma) | 638 FIELD_PREP(FBNIC_BD_PAGE_ID_MASK, id); 639 640 /* In the case that a page size is larger than 4K we will map a 641 * single page to multiple fragments. The fragments will be 642 * FBNIC_BD_FRAG_COUNT in size and the lower n bits will be use 643 * to indicate the individual fragment IDs. 644 */ 645 do { 646 *bdq_desc = cpu_to_le64(bd); 647 bd += FIELD_PREP(FBNIC_BD_DESC_ADDR_MASK, 1) | 648 FIELD_PREP(FBNIC_BD_DESC_ID_MASK, 1); 649 } while (--i); 650 } 651 652 static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq) 653 { 654 unsigned int count = fbnic_desc_unused(bdq); 655 unsigned int i = bdq->tail; 656 657 if (!count) 658 return; 659 660 do { 661 struct page *page; 662 663 page = page_pool_dev_alloc_pages(nv->page_pool); 664 if (!page) 665 break; 666 667 fbnic_page_pool_init(bdq, i, page); 668 fbnic_bd_prep(bdq, i, page); 669 670 i++; 671 i &= bdq->size_mask; 672 673 count--; 674 } while (count); 675 676 if (bdq->tail != i) { 677 bdq->tail = i; 678 679 /* Force DMA writes to flush before writing to tail */ 680 dma_wmb(); 681 682 writel(i, bdq->doorbell); 683 } 684 } 685 686 static unsigned int fbnic_hdr_pg_start(unsigned int pg_off) 687 { 688 /* The headroom of the first header may be larger than FBNIC_RX_HROOM 689 * due to alignment. So account for that by just making the page 690 * offset 0 if we are starting at the first header. 691 */ 692 if (ALIGN(FBNIC_RX_HROOM, 128) > FBNIC_RX_HROOM && 693 pg_off == ALIGN(FBNIC_RX_HROOM, 128)) 694 return 0; 695 696 return pg_off - FBNIC_RX_HROOM; 697 } 698 699 static unsigned int fbnic_hdr_pg_end(unsigned int pg_off, unsigned int len) 700 { 701 /* Determine the end of the buffer by finding the start of the next 702 * and then subtracting the headroom from that frame. 703 */ 704 pg_off += len + FBNIC_RX_TROOM + FBNIC_RX_HROOM; 705 706 return ALIGN(pg_off, 128) - FBNIC_RX_HROOM; 707 } 708 709 static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, 710 struct fbnic_pkt_buff *pkt, 711 struct fbnic_q_triad *qt) 712 { 713 unsigned int hdr_pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 714 unsigned int hdr_pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); 715 struct page *page = fbnic_page_pool_get(&qt->sub0, hdr_pg_idx); 716 unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); 717 unsigned int frame_sz, hdr_pg_start, hdr_pg_end, headroom; 718 unsigned char *hdr_start; 719 720 /* data_hard_start should always be NULL when this is called */ 721 WARN_ON_ONCE(pkt->buff.data_hard_start); 722 723 /* Short-cut the end calculation if we know page is fully consumed */ 724 hdr_pg_end = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ? 725 FBNIC_BD_FRAG_SIZE : fbnic_hdr_pg_end(hdr_pg_off, len); 726 hdr_pg_start = fbnic_hdr_pg_start(hdr_pg_off); 727 728 headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD; 729 frame_sz = hdr_pg_end - hdr_pg_start; 730 xdp_init_buff(&pkt->buff, frame_sz, NULL); 731 hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * 732 FBNIC_BD_FRAG_SIZE; 733 734 /* Sync DMA buffer */ 735 dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page), 736 hdr_pg_start, frame_sz, 737 DMA_BIDIRECTIONAL); 738 739 /* Build frame around buffer */ 740 hdr_start = page_address(page) + hdr_pg_start; 741 742 xdp_prepare_buff(&pkt->buff, hdr_start, headroom, 743 len - FBNIC_RX_PAD, true); 744 745 pkt->data_truesize = 0; 746 pkt->data_len = 0; 747 pkt->nr_frags = 0; 748 } 749 750 static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, 751 struct fbnic_pkt_buff *pkt, 752 struct fbnic_q_triad *qt) 753 { 754 unsigned int pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 755 unsigned int pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); 756 unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); 757 struct page *page = fbnic_page_pool_get(&qt->sub1, pg_idx); 758 struct skb_shared_info *shinfo; 759 unsigned int truesize; 760 761 truesize = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ? 762 FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128); 763 764 pg_off += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * 765 FBNIC_BD_FRAG_SIZE; 766 767 /* Sync DMA buffer */ 768 dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page), 769 pg_off, truesize, DMA_BIDIRECTIONAL); 770 771 /* Add page to xdp shared info */ 772 shinfo = xdp_get_shared_info_from_buff(&pkt->buff); 773 774 /* We use gso_segs to store truesize */ 775 pkt->data_truesize += truesize; 776 777 __skb_fill_page_desc_noacc(shinfo, pkt->nr_frags++, page, pg_off, len); 778 779 /* Store data_len in gso_size */ 780 pkt->data_len += len; 781 } 782 783 static void fbnic_put_pkt_buff(struct fbnic_napi_vector *nv, 784 struct fbnic_pkt_buff *pkt, int budget) 785 { 786 struct skb_shared_info *shinfo; 787 struct page *page; 788 int nr_frags; 789 790 if (!pkt->buff.data_hard_start) 791 return; 792 793 shinfo = xdp_get_shared_info_from_buff(&pkt->buff); 794 nr_frags = pkt->nr_frags; 795 796 while (nr_frags--) { 797 page = skb_frag_page(&shinfo->frags[nr_frags]); 798 page_pool_put_full_page(nv->page_pool, page, !!budget); 799 } 800 801 page = virt_to_page(pkt->buff.data_hard_start); 802 page_pool_put_full_page(nv->page_pool, page, !!budget); 803 } 804 805 static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv, 806 struct fbnic_pkt_buff *pkt) 807 { 808 unsigned int nr_frags = pkt->nr_frags; 809 struct skb_shared_info *shinfo; 810 unsigned int truesize; 811 struct sk_buff *skb; 812 813 truesize = xdp_data_hard_end(&pkt->buff) + FBNIC_RX_TROOM - 814 pkt->buff.data_hard_start; 815 816 /* Build frame around buffer */ 817 skb = napi_build_skb(pkt->buff.data_hard_start, truesize); 818 if (unlikely(!skb)) 819 return NULL; 820 821 /* Push data pointer to start of data, put tail to end of data */ 822 skb_reserve(skb, pkt->buff.data - pkt->buff.data_hard_start); 823 __skb_put(skb, pkt->buff.data_end - pkt->buff.data); 824 825 /* Add tracking for metadata at the start of the frame */ 826 skb_metadata_set(skb, pkt->buff.data - pkt->buff.data_meta); 827 828 /* Add Rx frags */ 829 if (nr_frags) { 830 /* Verify that shared info didn't move */ 831 shinfo = xdp_get_shared_info_from_buff(&pkt->buff); 832 WARN_ON(skb_shinfo(skb) != shinfo); 833 834 skb->truesize += pkt->data_truesize; 835 skb->data_len += pkt->data_len; 836 shinfo->nr_frags = nr_frags; 837 skb->len += pkt->data_len; 838 } 839 840 skb_mark_for_recycle(skb); 841 842 /* Set MAC header specific fields */ 843 skb->protocol = eth_type_trans(skb, nv->napi.dev); 844 845 /* Add timestamp if present */ 846 if (pkt->hwtstamp) 847 skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp; 848 849 return skb; 850 } 851 852 static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd) 853 { 854 return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 : 855 (FBNIC_RCD_META_L3_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L3 : 856 PKT_HASH_TYPE_L2; 857 } 858 859 static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd, 860 struct fbnic_pkt_buff *pkt) 861 { 862 struct fbnic_net *fbn; 863 u64 ns, ts; 864 865 if (!FIELD_GET(FBNIC_RCD_OPT_META_TS, rcd)) 866 return; 867 868 fbn = netdev_priv(nv->napi.dev); 869 ts = FIELD_GET(FBNIC_RCD_OPT_META_TS_MASK, rcd); 870 ns = fbnic_ts40_to_ns(fbn, ts); 871 872 /* Add timestamp to shared info */ 873 pkt->hwtstamp = ns_to_ktime(ns); 874 } 875 876 static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv, 877 u64 rcd, struct sk_buff *skb, 878 struct fbnic_q_triad *qt) 879 { 880 struct net_device *netdev = nv->napi.dev; 881 struct fbnic_ring *rcq = &qt->cmpl; 882 883 fbnic_rx_csum(rcd, skb, rcq); 884 885 if (netdev->features & NETIF_F_RXHASH) 886 skb_set_hash(skb, 887 FIELD_GET(FBNIC_RCD_META_RSS_HASH_MASK, rcd), 888 fbnic_skb_hash_type(rcd)); 889 890 skb_record_rx_queue(skb, rcq->q_idx); 891 } 892 893 static bool fbnic_rcd_metadata_err(u64 rcd) 894 { 895 return !!(FBNIC_RCD_META_UNCORRECTABLE_ERR_MASK & rcd); 896 } 897 898 static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, 899 struct fbnic_q_triad *qt, int budget) 900 { 901 unsigned int packets = 0, bytes = 0, dropped = 0; 902 struct fbnic_ring *rcq = &qt->cmpl; 903 struct fbnic_pkt_buff *pkt; 904 s32 head0 = -1, head1 = -1; 905 __le64 *raw_rcd, done; 906 u32 head = rcq->head; 907 908 done = (head & (rcq->size_mask + 1)) ? cpu_to_le64(FBNIC_RCD_DONE) : 0; 909 raw_rcd = &rcq->desc[head & rcq->size_mask]; 910 pkt = rcq->pkt; 911 912 /* Walk the completion queue collecting the heads reported by NIC */ 913 while (likely(packets < budget)) { 914 struct sk_buff *skb = ERR_PTR(-EINVAL); 915 u64 rcd; 916 917 if ((*raw_rcd & cpu_to_le64(FBNIC_RCD_DONE)) == done) 918 break; 919 920 dma_rmb(); 921 922 rcd = le64_to_cpu(*raw_rcd); 923 924 switch (FIELD_GET(FBNIC_RCD_TYPE_MASK, rcd)) { 925 case FBNIC_RCD_TYPE_HDR_AL: 926 head0 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 927 fbnic_pkt_prepare(nv, rcd, pkt, qt); 928 929 break; 930 case FBNIC_RCD_TYPE_PAY_AL: 931 head1 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 932 fbnic_add_rx_frag(nv, rcd, pkt, qt); 933 934 break; 935 case FBNIC_RCD_TYPE_OPT_META: 936 /* Only type 0 is currently supported */ 937 if (FIELD_GET(FBNIC_RCD_OPT_META_TYPE_MASK, rcd)) 938 break; 939 940 fbnic_rx_tstamp(nv, rcd, pkt); 941 942 /* We currently ignore the action table index */ 943 break; 944 case FBNIC_RCD_TYPE_META: 945 if (likely(!fbnic_rcd_metadata_err(rcd))) 946 skb = fbnic_build_skb(nv, pkt); 947 948 /* Populate skb and invalidate XDP */ 949 if (!IS_ERR_OR_NULL(skb)) { 950 fbnic_populate_skb_fields(nv, rcd, skb, qt); 951 952 packets++; 953 bytes += skb->len; 954 955 napi_gro_receive(&nv->napi, skb); 956 } else { 957 dropped++; 958 fbnic_put_pkt_buff(nv, pkt, 1); 959 } 960 961 pkt->buff.data_hard_start = NULL; 962 963 break; 964 } 965 966 raw_rcd++; 967 head++; 968 if (!(head & rcq->size_mask)) { 969 done ^= cpu_to_le64(FBNIC_RCD_DONE); 970 raw_rcd = &rcq->desc[0]; 971 } 972 } 973 974 u64_stats_update_begin(&rcq->stats.syncp); 975 rcq->stats.packets += packets; 976 rcq->stats.bytes += bytes; 977 /* Re-add ethernet header length (removed in fbnic_build_skb) */ 978 rcq->stats.bytes += ETH_HLEN * packets; 979 rcq->stats.dropped += dropped; 980 u64_stats_update_end(&rcq->stats.syncp); 981 982 /* Unmap and free processed buffers */ 983 if (head0 >= 0) 984 fbnic_clean_bdq(nv, budget, &qt->sub0, head0); 985 fbnic_fill_bdq(nv, &qt->sub0); 986 987 if (head1 >= 0) 988 fbnic_clean_bdq(nv, budget, &qt->sub1, head1); 989 fbnic_fill_bdq(nv, &qt->sub1); 990 991 /* Record the current head/tail of the queue */ 992 if (rcq->head != head) { 993 rcq->head = head; 994 writel(head & rcq->size_mask, rcq->doorbell); 995 } 996 997 return packets; 998 } 999 1000 static void fbnic_nv_irq_disable(struct fbnic_napi_vector *nv) 1001 { 1002 struct fbnic_dev *fbd = nv->fbd; 1003 u32 v_idx = nv->v_idx; 1004 1005 fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(v_idx / 32), 1 << (v_idx % 32)); 1006 } 1007 1008 static void fbnic_nv_irq_rearm(struct fbnic_napi_vector *nv) 1009 { 1010 struct fbnic_dev *fbd = nv->fbd; 1011 u32 v_idx = nv->v_idx; 1012 1013 fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(v_idx), 1014 FBNIC_INTR_CQ_REARM_INTR_UNMASK); 1015 } 1016 1017 static int fbnic_poll(struct napi_struct *napi, int budget) 1018 { 1019 struct fbnic_napi_vector *nv = container_of(napi, 1020 struct fbnic_napi_vector, 1021 napi); 1022 int i, j, work_done = 0; 1023 1024 for (i = 0; i < nv->txt_count; i++) 1025 fbnic_clean_tcq(nv, &nv->qt[i], budget); 1026 1027 for (j = 0; j < nv->rxt_count; j++, i++) 1028 work_done += fbnic_clean_rcq(nv, &nv->qt[i], budget); 1029 1030 if (work_done >= budget) 1031 return budget; 1032 1033 if (likely(napi_complete_done(napi, work_done))) 1034 fbnic_nv_irq_rearm(nv); 1035 1036 return 0; 1037 } 1038 1039 irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data) 1040 { 1041 struct fbnic_napi_vector *nv = *(void **)data; 1042 1043 napi_schedule_irqoff(&nv->napi); 1044 1045 return IRQ_HANDLED; 1046 } 1047 1048 void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, 1049 struct fbnic_ring *rxr) 1050 { 1051 struct fbnic_queue_stats *stats = &rxr->stats; 1052 1053 /* Capture stats from queues before dissasociating them */ 1054 fbn->rx_stats.bytes += stats->bytes; 1055 fbn->rx_stats.packets += stats->packets; 1056 fbn->rx_stats.dropped += stats->dropped; 1057 } 1058 1059 void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, 1060 struct fbnic_ring *txr) 1061 { 1062 struct fbnic_queue_stats *stats = &txr->stats; 1063 1064 /* Capture stats from queues before dissasociating them */ 1065 fbn->tx_stats.bytes += stats->bytes; 1066 fbn->tx_stats.packets += stats->packets; 1067 fbn->tx_stats.dropped += stats->dropped; 1068 fbn->tx_stats.ts_lost += stats->ts_lost; 1069 fbn->tx_stats.ts_packets += stats->ts_packets; 1070 } 1071 1072 static void fbnic_remove_tx_ring(struct fbnic_net *fbn, 1073 struct fbnic_ring *txr) 1074 { 1075 if (!(txr->flags & FBNIC_RING_F_STATS)) 1076 return; 1077 1078 fbnic_aggregate_ring_tx_counters(fbn, txr); 1079 1080 /* Remove pointer to the Tx ring */ 1081 WARN_ON(fbn->tx[txr->q_idx] && fbn->tx[txr->q_idx] != txr); 1082 fbn->tx[txr->q_idx] = NULL; 1083 } 1084 1085 static void fbnic_remove_rx_ring(struct fbnic_net *fbn, 1086 struct fbnic_ring *rxr) 1087 { 1088 if (!(rxr->flags & FBNIC_RING_F_STATS)) 1089 return; 1090 1091 fbnic_aggregate_ring_rx_counters(fbn, rxr); 1092 1093 /* Remove pointer to the Rx ring */ 1094 WARN_ON(fbn->rx[rxr->q_idx] && fbn->rx[rxr->q_idx] != rxr); 1095 fbn->rx[rxr->q_idx] = NULL; 1096 } 1097 1098 static void fbnic_free_napi_vector(struct fbnic_net *fbn, 1099 struct fbnic_napi_vector *nv) 1100 { 1101 struct fbnic_dev *fbd = nv->fbd; 1102 int i, j; 1103 1104 for (i = 0; i < nv->txt_count; i++) { 1105 fbnic_remove_tx_ring(fbn, &nv->qt[i].sub0); 1106 fbnic_remove_tx_ring(fbn, &nv->qt[i].cmpl); 1107 } 1108 1109 for (j = 0; j < nv->rxt_count; j++, i++) { 1110 fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0); 1111 fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1); 1112 fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl); 1113 } 1114 1115 fbnic_napi_free_irq(fbd, nv); 1116 page_pool_destroy(nv->page_pool); 1117 netif_napi_del(&nv->napi); 1118 fbn->napi[fbnic_napi_idx(nv)] = NULL; 1119 kfree(nv); 1120 } 1121 1122 void fbnic_free_napi_vectors(struct fbnic_net *fbn) 1123 { 1124 int i; 1125 1126 for (i = 0; i < fbn->num_napi; i++) 1127 if (fbn->napi[i]) 1128 fbnic_free_napi_vector(fbn, fbn->napi[i]); 1129 } 1130 1131 #define FBNIC_PAGE_POOL_FLAGS \ 1132 (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) 1133 1134 static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn, 1135 struct fbnic_napi_vector *nv) 1136 { 1137 struct page_pool_params pp_params = { 1138 .order = 0, 1139 .flags = FBNIC_PAGE_POOL_FLAGS, 1140 .pool_size = (fbn->hpq_size + fbn->ppq_size) * nv->rxt_count, 1141 .nid = NUMA_NO_NODE, 1142 .dev = nv->dev, 1143 .dma_dir = DMA_BIDIRECTIONAL, 1144 .offset = 0, 1145 .max_len = PAGE_SIZE 1146 }; 1147 struct page_pool *pp; 1148 1149 /* Page pool cannot exceed a size of 32768. This doesn't limit the 1150 * pages on the ring but the number we can have cached waiting on 1151 * the next use. 1152 * 1153 * TBD: Can this be reduced further? Would a multiple of 1154 * NAPI_POLL_WEIGHT possibly make more sense? The question is how 1155 * may pages do we need to hold in reserve to get the best return 1156 * without hogging too much system memory. 1157 */ 1158 if (pp_params.pool_size > 32768) 1159 pp_params.pool_size = 32768; 1160 1161 pp = page_pool_create(&pp_params); 1162 if (IS_ERR(pp)) 1163 return PTR_ERR(pp); 1164 1165 nv->page_pool = pp; 1166 1167 return 0; 1168 } 1169 1170 static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell, 1171 int q_idx, u8 flags) 1172 { 1173 u64_stats_init(&ring->stats.syncp); 1174 ring->doorbell = doorbell; 1175 ring->q_idx = q_idx; 1176 ring->flags = flags; 1177 } 1178 1179 static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, 1180 unsigned int v_count, unsigned int v_idx, 1181 unsigned int txq_count, unsigned int txq_idx, 1182 unsigned int rxq_count, unsigned int rxq_idx) 1183 { 1184 int txt_count = txq_count, rxt_count = rxq_count; 1185 u32 __iomem *uc_addr = fbd->uc_addr0; 1186 struct fbnic_napi_vector *nv; 1187 struct fbnic_q_triad *qt; 1188 int qt_count, err; 1189 u32 __iomem *db; 1190 1191 qt_count = txt_count + rxq_count; 1192 if (!qt_count) 1193 return -EINVAL; 1194 1195 /* If MMIO has already failed there are no rings to initialize */ 1196 if (!uc_addr) 1197 return -EIO; 1198 1199 /* Allocate NAPI vector and queue triads */ 1200 nv = kzalloc(struct_size(nv, qt, qt_count), GFP_KERNEL); 1201 if (!nv) 1202 return -ENOMEM; 1203 1204 /* Record queue triad counts */ 1205 nv->txt_count = txt_count; 1206 nv->rxt_count = rxt_count; 1207 1208 /* Provide pointer back to fbnic and MSI-X vectors */ 1209 nv->fbd = fbd; 1210 nv->v_idx = v_idx; 1211 1212 /* Tie napi to netdev */ 1213 fbn->napi[fbnic_napi_idx(nv)] = nv; 1214 netif_napi_add(fbn->netdev, &nv->napi, fbnic_poll); 1215 1216 /* Record IRQ to NAPI struct */ 1217 netif_napi_set_irq(&nv->napi, 1218 pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx)); 1219 1220 /* Tie nv back to PCIe dev */ 1221 nv->dev = fbd->dev; 1222 1223 /* Allocate page pool */ 1224 if (rxq_count) { 1225 err = fbnic_alloc_nv_page_pool(fbn, nv); 1226 if (err) 1227 goto napi_del; 1228 } 1229 1230 /* Request the IRQ for napi vector */ 1231 err = fbnic_napi_request_irq(fbd, nv); 1232 if (err) 1233 goto pp_destroy; 1234 1235 /* Initialize queue triads */ 1236 qt = nv->qt; 1237 1238 while (txt_count) { 1239 /* Configure Tx queue */ 1240 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ0_TAIL]; 1241 1242 /* Assign Tx queue to netdev if applicable */ 1243 if (txq_count > 0) { 1244 u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS; 1245 1246 fbnic_ring_init(&qt->sub0, db, txq_idx, flags); 1247 fbn->tx[txq_idx] = &qt->sub0; 1248 txq_count--; 1249 } else { 1250 fbnic_ring_init(&qt->sub0, db, 0, 1251 FBNIC_RING_F_DISABLED); 1252 } 1253 1254 /* Configure Tx completion queue */ 1255 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TCQ_HEAD]; 1256 fbnic_ring_init(&qt->cmpl, db, 0, 0); 1257 1258 /* Update Tx queue index */ 1259 txt_count--; 1260 txq_idx += v_count; 1261 1262 /* Move to next queue triad */ 1263 qt++; 1264 } 1265 1266 while (rxt_count) { 1267 /* Configure header queue */ 1268 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_HPQ_TAIL]; 1269 fbnic_ring_init(&qt->sub0, db, 0, FBNIC_RING_F_CTX); 1270 1271 /* Configure payload queue */ 1272 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_PPQ_TAIL]; 1273 fbnic_ring_init(&qt->sub1, db, 0, FBNIC_RING_F_CTX); 1274 1275 /* Configure Rx completion queue */ 1276 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_RCQ_HEAD]; 1277 fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS); 1278 fbn->rx[rxq_idx] = &qt->cmpl; 1279 1280 /* Update Rx queue index */ 1281 rxt_count--; 1282 rxq_idx += v_count; 1283 1284 /* Move to next queue triad */ 1285 qt++; 1286 } 1287 1288 return 0; 1289 1290 pp_destroy: 1291 page_pool_destroy(nv->page_pool); 1292 napi_del: 1293 netif_napi_del(&nv->napi); 1294 fbn->napi[fbnic_napi_idx(nv)] = NULL; 1295 kfree(nv); 1296 return err; 1297 } 1298 1299 int fbnic_alloc_napi_vectors(struct fbnic_net *fbn) 1300 { 1301 unsigned int txq_idx = 0, rxq_idx = 0, v_idx = FBNIC_NON_NAPI_VECTORS; 1302 unsigned int num_tx = fbn->num_tx_queues; 1303 unsigned int num_rx = fbn->num_rx_queues; 1304 unsigned int num_napi = fbn->num_napi; 1305 struct fbnic_dev *fbd = fbn->fbd; 1306 int err; 1307 1308 /* Allocate 1 Tx queue per napi vector */ 1309 if (num_napi < FBNIC_MAX_TXQS && num_napi == num_tx + num_rx) { 1310 while (num_tx) { 1311 err = fbnic_alloc_napi_vector(fbd, fbn, 1312 num_napi, v_idx, 1313 1, txq_idx, 0, 0); 1314 if (err) 1315 goto free_vectors; 1316 1317 /* Update counts and index */ 1318 num_tx--; 1319 txq_idx++; 1320 1321 v_idx++; 1322 } 1323 } 1324 1325 /* Allocate Tx/Rx queue pairs per vector, or allocate remaining Rx */ 1326 while (num_rx | num_tx) { 1327 int tqpv = DIV_ROUND_UP(num_tx, num_napi - txq_idx); 1328 int rqpv = DIV_ROUND_UP(num_rx, num_napi - rxq_idx); 1329 1330 err = fbnic_alloc_napi_vector(fbd, fbn, num_napi, v_idx, 1331 tqpv, txq_idx, rqpv, rxq_idx); 1332 if (err) 1333 goto free_vectors; 1334 1335 /* Update counts and index */ 1336 num_tx -= tqpv; 1337 txq_idx++; 1338 1339 num_rx -= rqpv; 1340 rxq_idx++; 1341 1342 v_idx++; 1343 } 1344 1345 return 0; 1346 1347 free_vectors: 1348 fbnic_free_napi_vectors(fbn); 1349 1350 return -ENOMEM; 1351 } 1352 1353 static void fbnic_free_ring_resources(struct device *dev, 1354 struct fbnic_ring *ring) 1355 { 1356 kvfree(ring->buffer); 1357 ring->buffer = NULL; 1358 1359 /* If size is not set there are no descriptors present */ 1360 if (!ring->size) 1361 return; 1362 1363 dma_free_coherent(dev, ring->size, ring->desc, ring->dma); 1364 ring->size_mask = 0; 1365 ring->size = 0; 1366 } 1367 1368 static int fbnic_alloc_tx_ring_desc(struct fbnic_net *fbn, 1369 struct fbnic_ring *txr) 1370 { 1371 struct device *dev = fbn->netdev->dev.parent; 1372 size_t size; 1373 1374 /* Round size up to nearest 4K */ 1375 size = ALIGN(array_size(sizeof(*txr->desc), fbn->txq_size), 4096); 1376 1377 txr->desc = dma_alloc_coherent(dev, size, &txr->dma, 1378 GFP_KERNEL | __GFP_NOWARN); 1379 if (!txr->desc) 1380 return -ENOMEM; 1381 1382 /* txq_size should be a power of 2, so mask is just that -1 */ 1383 txr->size_mask = fbn->txq_size - 1; 1384 txr->size = size; 1385 1386 return 0; 1387 } 1388 1389 static int fbnic_alloc_tx_ring_buffer(struct fbnic_ring *txr) 1390 { 1391 size_t size = array_size(sizeof(*txr->tx_buf), txr->size_mask + 1); 1392 1393 txr->tx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN); 1394 1395 return txr->tx_buf ? 0 : -ENOMEM; 1396 } 1397 1398 static int fbnic_alloc_tx_ring_resources(struct fbnic_net *fbn, 1399 struct fbnic_ring *txr) 1400 { 1401 struct device *dev = fbn->netdev->dev.parent; 1402 int err; 1403 1404 if (txr->flags & FBNIC_RING_F_DISABLED) 1405 return 0; 1406 1407 err = fbnic_alloc_tx_ring_desc(fbn, txr); 1408 if (err) 1409 return err; 1410 1411 if (!(txr->flags & FBNIC_RING_F_CTX)) 1412 return 0; 1413 1414 err = fbnic_alloc_tx_ring_buffer(txr); 1415 if (err) 1416 goto free_desc; 1417 1418 return 0; 1419 1420 free_desc: 1421 fbnic_free_ring_resources(dev, txr); 1422 return err; 1423 } 1424 1425 static int fbnic_alloc_rx_ring_desc(struct fbnic_net *fbn, 1426 struct fbnic_ring *rxr) 1427 { 1428 struct device *dev = fbn->netdev->dev.parent; 1429 size_t desc_size = sizeof(*rxr->desc); 1430 u32 rxq_size; 1431 size_t size; 1432 1433 switch (rxr->doorbell - fbnic_ring_csr_base(rxr)) { 1434 case FBNIC_QUEUE_BDQ_HPQ_TAIL: 1435 rxq_size = fbn->hpq_size / FBNIC_BD_FRAG_COUNT; 1436 desc_size *= FBNIC_BD_FRAG_COUNT; 1437 break; 1438 case FBNIC_QUEUE_BDQ_PPQ_TAIL: 1439 rxq_size = fbn->ppq_size / FBNIC_BD_FRAG_COUNT; 1440 desc_size *= FBNIC_BD_FRAG_COUNT; 1441 break; 1442 case FBNIC_QUEUE_RCQ_HEAD: 1443 rxq_size = fbn->rcq_size; 1444 break; 1445 default: 1446 return -EINVAL; 1447 } 1448 1449 /* Round size up to nearest 4K */ 1450 size = ALIGN(array_size(desc_size, rxq_size), 4096); 1451 1452 rxr->desc = dma_alloc_coherent(dev, size, &rxr->dma, 1453 GFP_KERNEL | __GFP_NOWARN); 1454 if (!rxr->desc) 1455 return -ENOMEM; 1456 1457 /* rxq_size should be a power of 2, so mask is just that -1 */ 1458 rxr->size_mask = rxq_size - 1; 1459 rxr->size = size; 1460 1461 return 0; 1462 } 1463 1464 static int fbnic_alloc_rx_ring_buffer(struct fbnic_ring *rxr) 1465 { 1466 size_t size = array_size(sizeof(*rxr->rx_buf), rxr->size_mask + 1); 1467 1468 if (rxr->flags & FBNIC_RING_F_CTX) 1469 size = sizeof(*rxr->rx_buf) * (rxr->size_mask + 1); 1470 else 1471 size = sizeof(*rxr->pkt); 1472 1473 rxr->rx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN); 1474 1475 return rxr->rx_buf ? 0 : -ENOMEM; 1476 } 1477 1478 static int fbnic_alloc_rx_ring_resources(struct fbnic_net *fbn, 1479 struct fbnic_ring *rxr) 1480 { 1481 struct device *dev = fbn->netdev->dev.parent; 1482 int err; 1483 1484 err = fbnic_alloc_rx_ring_desc(fbn, rxr); 1485 if (err) 1486 return err; 1487 1488 err = fbnic_alloc_rx_ring_buffer(rxr); 1489 if (err) 1490 goto free_desc; 1491 1492 return 0; 1493 1494 free_desc: 1495 fbnic_free_ring_resources(dev, rxr); 1496 return err; 1497 } 1498 1499 static void fbnic_free_qt_resources(struct fbnic_net *fbn, 1500 struct fbnic_q_triad *qt) 1501 { 1502 struct device *dev = fbn->netdev->dev.parent; 1503 1504 fbnic_free_ring_resources(dev, &qt->cmpl); 1505 fbnic_free_ring_resources(dev, &qt->sub1); 1506 fbnic_free_ring_resources(dev, &qt->sub0); 1507 } 1508 1509 static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, 1510 struct fbnic_q_triad *qt) 1511 { 1512 struct device *dev = fbn->netdev->dev.parent; 1513 int err; 1514 1515 err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub0); 1516 if (err) 1517 return err; 1518 1519 err = fbnic_alloc_tx_ring_resources(fbn, &qt->cmpl); 1520 if (err) 1521 goto free_sub1; 1522 1523 return 0; 1524 1525 free_sub1: 1526 fbnic_free_ring_resources(dev, &qt->sub0); 1527 return err; 1528 } 1529 1530 static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn, 1531 struct fbnic_q_triad *qt) 1532 { 1533 struct device *dev = fbn->netdev->dev.parent; 1534 int err; 1535 1536 err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0); 1537 if (err) 1538 return err; 1539 1540 err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub1); 1541 if (err) 1542 goto free_sub0; 1543 1544 err = fbnic_alloc_rx_ring_resources(fbn, &qt->cmpl); 1545 if (err) 1546 goto free_sub1; 1547 1548 return 0; 1549 1550 free_sub1: 1551 fbnic_free_ring_resources(dev, &qt->sub1); 1552 free_sub0: 1553 fbnic_free_ring_resources(dev, &qt->sub0); 1554 return err; 1555 } 1556 1557 static void fbnic_free_nv_resources(struct fbnic_net *fbn, 1558 struct fbnic_napi_vector *nv) 1559 { 1560 int i, j; 1561 1562 /* Free Tx Resources */ 1563 for (i = 0; i < nv->txt_count; i++) 1564 fbnic_free_qt_resources(fbn, &nv->qt[i]); 1565 1566 for (j = 0; j < nv->rxt_count; j++, i++) 1567 fbnic_free_qt_resources(fbn, &nv->qt[i]); 1568 } 1569 1570 static int fbnic_alloc_nv_resources(struct fbnic_net *fbn, 1571 struct fbnic_napi_vector *nv) 1572 { 1573 int i, j, err; 1574 1575 /* Allocate Tx Resources */ 1576 for (i = 0; i < nv->txt_count; i++) { 1577 err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]); 1578 if (err) 1579 goto free_resources; 1580 } 1581 1582 /* Allocate Rx Resources */ 1583 for (j = 0; j < nv->rxt_count; j++, i++) { 1584 err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]); 1585 if (err) 1586 goto free_resources; 1587 } 1588 1589 return 0; 1590 1591 free_resources: 1592 while (i--) 1593 fbnic_free_qt_resources(fbn, &nv->qt[i]); 1594 return err; 1595 } 1596 1597 void fbnic_free_resources(struct fbnic_net *fbn) 1598 { 1599 int i; 1600 1601 for (i = 0; i < fbn->num_napi; i++) 1602 fbnic_free_nv_resources(fbn, fbn->napi[i]); 1603 } 1604 1605 int fbnic_alloc_resources(struct fbnic_net *fbn) 1606 { 1607 int i, err = -ENODEV; 1608 1609 for (i = 0; i < fbn->num_napi; i++) { 1610 err = fbnic_alloc_nv_resources(fbn, fbn->napi[i]); 1611 if (err) 1612 goto free_resources; 1613 } 1614 1615 return 0; 1616 1617 free_resources: 1618 while (i--) 1619 fbnic_free_nv_resources(fbn, fbn->napi[i]); 1620 1621 return err; 1622 } 1623 1624 static void fbnic_set_netif_napi(struct fbnic_napi_vector *nv) 1625 { 1626 int i, j; 1627 1628 /* Associate Tx queue with NAPI */ 1629 for (i = 0; i < nv->txt_count; i++) { 1630 struct fbnic_q_triad *qt = &nv->qt[i]; 1631 1632 netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, 1633 NETDEV_QUEUE_TYPE_TX, &nv->napi); 1634 } 1635 1636 /* Associate Rx queue with NAPI */ 1637 for (j = 0; j < nv->rxt_count; j++, i++) { 1638 struct fbnic_q_triad *qt = &nv->qt[i]; 1639 1640 netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, 1641 NETDEV_QUEUE_TYPE_RX, &nv->napi); 1642 } 1643 } 1644 1645 static void fbnic_reset_netif_napi(struct fbnic_napi_vector *nv) 1646 { 1647 int i, j; 1648 1649 /* Disassociate Tx queue from NAPI */ 1650 for (i = 0; i < nv->txt_count; i++) { 1651 struct fbnic_q_triad *qt = &nv->qt[i]; 1652 1653 netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, 1654 NETDEV_QUEUE_TYPE_TX, NULL); 1655 } 1656 1657 /* Disassociate Rx queue from NAPI */ 1658 for (j = 0; j < nv->rxt_count; j++, i++) { 1659 struct fbnic_q_triad *qt = &nv->qt[i]; 1660 1661 netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, 1662 NETDEV_QUEUE_TYPE_RX, NULL); 1663 } 1664 } 1665 1666 int fbnic_set_netif_queues(struct fbnic_net *fbn) 1667 { 1668 int i, err; 1669 1670 err = netif_set_real_num_queues(fbn->netdev, fbn->num_tx_queues, 1671 fbn->num_rx_queues); 1672 if (err) 1673 return err; 1674 1675 for (i = 0; i < fbn->num_napi; i++) 1676 fbnic_set_netif_napi(fbn->napi[i]); 1677 1678 return 0; 1679 } 1680 1681 void fbnic_reset_netif_queues(struct fbnic_net *fbn) 1682 { 1683 int i; 1684 1685 for (i = 0; i < fbn->num_napi; i++) 1686 fbnic_reset_netif_napi(fbn->napi[i]); 1687 } 1688 1689 static void fbnic_disable_twq0(struct fbnic_ring *txr) 1690 { 1691 u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ0_CTL); 1692 1693 twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE; 1694 1695 fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ0_CTL, twq_ctl); 1696 } 1697 1698 static void fbnic_disable_tcq(struct fbnic_ring *txr) 1699 { 1700 fbnic_ring_wr32(txr, FBNIC_QUEUE_TCQ_CTL, 0); 1701 fbnic_ring_wr32(txr, FBNIC_QUEUE_TIM_MASK, FBNIC_QUEUE_TIM_MASK_MASK); 1702 } 1703 1704 static void fbnic_disable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) 1705 { 1706 u32 bdq_ctl = fbnic_ring_rd32(hpq, FBNIC_QUEUE_BDQ_CTL); 1707 1708 bdq_ctl &= ~FBNIC_QUEUE_BDQ_CTL_ENABLE; 1709 1710 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl); 1711 } 1712 1713 static void fbnic_disable_rcq(struct fbnic_ring *rxr) 1714 { 1715 fbnic_ring_wr32(rxr, FBNIC_QUEUE_RCQ_CTL, 0); 1716 fbnic_ring_wr32(rxr, FBNIC_QUEUE_RIM_MASK, FBNIC_QUEUE_RIM_MASK_MASK); 1717 } 1718 1719 void fbnic_napi_disable(struct fbnic_net *fbn) 1720 { 1721 int i; 1722 1723 for (i = 0; i < fbn->num_napi; i++) { 1724 napi_disable(&fbn->napi[i]->napi); 1725 1726 fbnic_nv_irq_disable(fbn->napi[i]); 1727 } 1728 } 1729 1730 void fbnic_disable(struct fbnic_net *fbn) 1731 { 1732 struct fbnic_dev *fbd = fbn->fbd; 1733 int i, j, t; 1734 1735 for (i = 0; i < fbn->num_napi; i++) { 1736 struct fbnic_napi_vector *nv = fbn->napi[i]; 1737 1738 /* Disable Tx queue triads */ 1739 for (t = 0; t < nv->txt_count; t++) { 1740 struct fbnic_q_triad *qt = &nv->qt[t]; 1741 1742 fbnic_disable_twq0(&qt->sub0); 1743 fbnic_disable_tcq(&qt->cmpl); 1744 } 1745 1746 /* Disable Rx queue triads */ 1747 for (j = 0; j < nv->rxt_count; j++, t++) { 1748 struct fbnic_q_triad *qt = &nv->qt[t]; 1749 1750 fbnic_disable_bdq(&qt->sub0, &qt->sub1); 1751 fbnic_disable_rcq(&qt->cmpl); 1752 } 1753 } 1754 1755 fbnic_wrfl(fbd); 1756 } 1757 1758 static void fbnic_tx_flush(struct fbnic_dev *fbd) 1759 { 1760 netdev_warn(fbd->netdev, "triggering Tx flush\n"); 1761 1762 fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN, 1763 FBNIC_TMI_DROP_CTRL_EN); 1764 } 1765 1766 static void fbnic_tx_flush_off(struct fbnic_dev *fbd) 1767 { 1768 fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN, 0); 1769 } 1770 1771 struct fbnic_idle_regs { 1772 u32 reg_base; 1773 u8 reg_cnt; 1774 }; 1775 1776 static bool fbnic_all_idle(struct fbnic_dev *fbd, 1777 const struct fbnic_idle_regs *regs, 1778 unsigned int nregs) 1779 { 1780 unsigned int i, j; 1781 1782 for (i = 0; i < nregs; i++) { 1783 for (j = 0; j < regs[i].reg_cnt; j++) { 1784 if (fbnic_rd32(fbd, regs[i].reg_base + j) != ~0U) 1785 return false; 1786 } 1787 } 1788 return true; 1789 } 1790 1791 static void fbnic_idle_dump(struct fbnic_dev *fbd, 1792 const struct fbnic_idle_regs *regs, 1793 unsigned int nregs, const char *dir, int err) 1794 { 1795 unsigned int i, j; 1796 1797 netdev_err(fbd->netdev, "error waiting for %s idle %d\n", dir, err); 1798 for (i = 0; i < nregs; i++) 1799 for (j = 0; j < regs[i].reg_cnt; j++) 1800 netdev_err(fbd->netdev, "0x%04x: %08x\n", 1801 regs[i].reg_base + j, 1802 fbnic_rd32(fbd, regs[i].reg_base + j)); 1803 } 1804 1805 int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail) 1806 { 1807 static const struct fbnic_idle_regs tx[] = { 1808 { FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TWQ_IDLE_CNT, }, 1809 { FBNIC_QM_TQS_IDLE(0), FBNIC_QM_TQS_IDLE_CNT, }, 1810 { FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TDE_IDLE_CNT, }, 1811 { FBNIC_QM_TCQ_IDLE(0), FBNIC_QM_TCQ_IDLE_CNT, }, 1812 }, rx[] = { 1813 { FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_HPQ_IDLE_CNT, }, 1814 { FBNIC_QM_PPQ_IDLE(0), FBNIC_QM_PPQ_IDLE_CNT, }, 1815 { FBNIC_QM_RCQ_IDLE(0), FBNIC_QM_RCQ_IDLE_CNT, }, 1816 }; 1817 bool idle; 1818 int err; 1819 1820 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000, 1821 false, fbd, tx, ARRAY_SIZE(tx)); 1822 if (err == -ETIMEDOUT) { 1823 fbnic_tx_flush(fbd); 1824 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 1825 2, 500000, false, 1826 fbd, tx, ARRAY_SIZE(tx)); 1827 fbnic_tx_flush_off(fbd); 1828 } 1829 if (err) { 1830 fbnic_idle_dump(fbd, tx, ARRAY_SIZE(tx), "Tx", err); 1831 if (may_fail) 1832 return err; 1833 } 1834 1835 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000, 1836 false, fbd, rx, ARRAY_SIZE(rx)); 1837 if (err) 1838 fbnic_idle_dump(fbd, rx, ARRAY_SIZE(rx), "Rx", err); 1839 return err; 1840 } 1841 1842 void fbnic_flush(struct fbnic_net *fbn) 1843 { 1844 int i; 1845 1846 for (i = 0; i < fbn->num_napi; i++) { 1847 struct fbnic_napi_vector *nv = fbn->napi[i]; 1848 int j, t; 1849 1850 /* Flush any processed Tx Queue Triads and drop the rest */ 1851 for (t = 0; t < nv->txt_count; t++) { 1852 struct fbnic_q_triad *qt = &nv->qt[t]; 1853 struct netdev_queue *tx_queue; 1854 1855 /* Clean the work queues of unprocessed work */ 1856 fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail); 1857 1858 /* Reset completion queue descriptor ring */ 1859 memset(qt->cmpl.desc, 0, qt->cmpl.size); 1860 1861 /* Nothing else to do if Tx queue is disabled */ 1862 if (qt->sub0.flags & FBNIC_RING_F_DISABLED) 1863 continue; 1864 1865 /* Reset BQL associated with Tx queue */ 1866 tx_queue = netdev_get_tx_queue(nv->napi.dev, 1867 qt->sub0.q_idx); 1868 netdev_tx_reset_queue(tx_queue); 1869 } 1870 1871 /* Flush any processed Rx Queue Triads and drop the rest */ 1872 for (j = 0; j < nv->rxt_count; j++, t++) { 1873 struct fbnic_q_triad *qt = &nv->qt[t]; 1874 1875 /* Clean the work queues of unprocessed work */ 1876 fbnic_clean_bdq(nv, 0, &qt->sub0, qt->sub0.tail); 1877 fbnic_clean_bdq(nv, 0, &qt->sub1, qt->sub1.tail); 1878 1879 /* Reset completion queue descriptor ring */ 1880 memset(qt->cmpl.desc, 0, qt->cmpl.size); 1881 1882 fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0); 1883 qt->cmpl.pkt->buff.data_hard_start = NULL; 1884 } 1885 } 1886 } 1887 1888 void fbnic_fill(struct fbnic_net *fbn) 1889 { 1890 int i; 1891 1892 for (i = 0; i < fbn->num_napi; i++) { 1893 struct fbnic_napi_vector *nv = fbn->napi[i]; 1894 int j, t; 1895 1896 /* Configure NAPI mapping and populate pages 1897 * in the BDQ rings to use for Rx 1898 */ 1899 for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) { 1900 struct fbnic_q_triad *qt = &nv->qt[t]; 1901 1902 /* Populate the header and payload BDQs */ 1903 fbnic_fill_bdq(nv, &qt->sub0); 1904 fbnic_fill_bdq(nv, &qt->sub1); 1905 } 1906 } 1907 } 1908 1909 static void fbnic_enable_twq0(struct fbnic_ring *twq) 1910 { 1911 u32 log_size = fls(twq->size_mask); 1912 1913 if (!twq->size_mask) 1914 return; 1915 1916 /* Reset head/tail */ 1917 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_RESET); 1918 twq->tail = 0; 1919 twq->head = 0; 1920 1921 /* Store descriptor ring address and size */ 1922 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAL, lower_32_bits(twq->dma)); 1923 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAH, upper_32_bits(twq->dma)); 1924 1925 /* Write lower 4 bits of log size as 64K ring size is 0 */ 1926 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_SIZE, log_size & 0xf); 1927 1928 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); 1929 } 1930 1931 static void fbnic_enable_tcq(struct fbnic_napi_vector *nv, 1932 struct fbnic_ring *tcq) 1933 { 1934 u32 log_size = fls(tcq->size_mask); 1935 1936 if (!tcq->size_mask) 1937 return; 1938 1939 /* Reset head/tail */ 1940 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_RESET); 1941 tcq->tail = 0; 1942 tcq->head = 0; 1943 1944 /* Store descriptor ring address and size */ 1945 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAL, lower_32_bits(tcq->dma)); 1946 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAH, upper_32_bits(tcq->dma)); 1947 1948 /* Write lower 4 bits of log size as 64K ring size is 0 */ 1949 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_SIZE, log_size & 0xf); 1950 1951 /* Store interrupt information for the completion queue */ 1952 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_CTL, nv->v_idx); 1953 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_THRESHOLD, tcq->size_mask / 2); 1954 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_MASK, 0); 1955 1956 /* Enable queue */ 1957 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_ENABLE); 1958 } 1959 1960 static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) 1961 { 1962 u32 bdq_ctl = FBNIC_QUEUE_BDQ_CTL_ENABLE; 1963 u32 log_size; 1964 1965 /* Reset head/tail */ 1966 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, FBNIC_QUEUE_BDQ_CTL_RESET); 1967 ppq->tail = 0; 1968 ppq->head = 0; 1969 hpq->tail = 0; 1970 hpq->head = 0; 1971 1972 log_size = fls(hpq->size_mask); 1973 1974 /* Store descriptor ring address and size */ 1975 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma)); 1976 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAH, upper_32_bits(hpq->dma)); 1977 1978 /* Write lower 4 bits of log size as 64K ring size is 0 */ 1979 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_SIZE, log_size & 0xf); 1980 1981 if (!ppq->size_mask) 1982 goto write_ctl; 1983 1984 log_size = fls(ppq->size_mask); 1985 1986 /* Add enabling of PPQ to BDQ control */ 1987 bdq_ctl |= FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE; 1988 1989 /* Store descriptor ring address and size */ 1990 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAL, lower_32_bits(ppq->dma)); 1991 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAH, upper_32_bits(ppq->dma)); 1992 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_SIZE, log_size & 0xf); 1993 1994 write_ctl: 1995 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl); 1996 } 1997 1998 static void fbnic_config_drop_mode_rcq(struct fbnic_napi_vector *nv, 1999 struct fbnic_ring *rcq) 2000 { 2001 u32 drop_mode, rcq_ctl; 2002 2003 drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_IMMEDIATE; 2004 2005 /* Specify packet layout */ 2006 rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_DROP_MODE_MASK, drop_mode) | 2007 FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_HROOM_MASK, FBNIC_RX_HROOM) | 2008 FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_TROOM_MASK, FBNIC_RX_TROOM); 2009 2010 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL0, rcq_ctl); 2011 } 2012 2013 static void fbnic_enable_rcq(struct fbnic_napi_vector *nv, 2014 struct fbnic_ring *rcq) 2015 { 2016 u32 log_size = fls(rcq->size_mask); 2017 u32 rcq_ctl; 2018 2019 fbnic_config_drop_mode_rcq(nv, rcq); 2020 2021 rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) | 2022 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, 2023 FBNIC_RX_MAX_HDR) | 2024 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK, 2025 FBNIC_RX_PAYLD_OFFSET) | 2026 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_PG_CL_MASK, 2027 FBNIC_RX_PAYLD_PG_CL); 2028 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL1, rcq_ctl); 2029 2030 /* Reset head/tail */ 2031 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_RESET); 2032 rcq->head = 0; 2033 rcq->tail = 0; 2034 2035 /* Store descriptor ring address and size */ 2036 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAL, lower_32_bits(rcq->dma)); 2037 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAH, upper_32_bits(rcq->dma)); 2038 2039 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2040 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_SIZE, log_size & 0xf); 2041 2042 /* Store interrupt information for the completion queue */ 2043 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_CTL, nv->v_idx); 2044 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_THRESHOLD, rcq->size_mask / 2); 2045 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_MASK, 0); 2046 2047 /* Enable queue */ 2048 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_ENABLE); 2049 } 2050 2051 void fbnic_enable(struct fbnic_net *fbn) 2052 { 2053 struct fbnic_dev *fbd = fbn->fbd; 2054 int i; 2055 2056 for (i = 0; i < fbn->num_napi; i++) { 2057 struct fbnic_napi_vector *nv = fbn->napi[i]; 2058 int j, t; 2059 2060 /* Setup Tx Queue Triads */ 2061 for (t = 0; t < nv->txt_count; t++) { 2062 struct fbnic_q_triad *qt = &nv->qt[t]; 2063 2064 fbnic_enable_twq0(&qt->sub0); 2065 fbnic_enable_tcq(nv, &qt->cmpl); 2066 } 2067 2068 /* Setup Rx Queue Triads */ 2069 for (j = 0; j < nv->rxt_count; j++, t++) { 2070 struct fbnic_q_triad *qt = &nv->qt[t]; 2071 2072 fbnic_enable_bdq(&qt->sub0, &qt->sub1); 2073 fbnic_config_drop_mode_rcq(nv, &qt->cmpl); 2074 fbnic_enable_rcq(nv, &qt->cmpl); 2075 } 2076 } 2077 2078 fbnic_wrfl(fbd); 2079 } 2080 2081 static void fbnic_nv_irq_enable(struct fbnic_napi_vector *nv) 2082 { 2083 struct fbnic_dev *fbd = nv->fbd; 2084 u32 val; 2085 2086 val = FBNIC_INTR_CQ_REARM_INTR_UNMASK; 2087 2088 fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(nv->v_idx), val); 2089 } 2090 2091 void fbnic_napi_enable(struct fbnic_net *fbn) 2092 { 2093 u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {}; 2094 struct fbnic_dev *fbd = fbn->fbd; 2095 int i; 2096 2097 for (i = 0; i < fbn->num_napi; i++) { 2098 struct fbnic_napi_vector *nv = fbn->napi[i]; 2099 2100 napi_enable(&nv->napi); 2101 2102 fbnic_nv_irq_enable(nv); 2103 2104 /* Record bit used for NAPI IRQs so we can 2105 * set the mask appropriately 2106 */ 2107 irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32); 2108 } 2109 2110 /* Force the first interrupt on the device to guarantee 2111 * that any packets that may have been enqueued during the 2112 * bringup are processed. 2113 */ 2114 for (i = 0; i < ARRAY_SIZE(irqs); i++) { 2115 if (!irqs[i]) 2116 continue; 2117 fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]); 2118 } 2119 2120 fbnic_wrfl(fbd); 2121 } 2122 2123 void fbnic_napi_depletion_check(struct net_device *netdev) 2124 { 2125 struct fbnic_net *fbn = netdev_priv(netdev); 2126 u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {}; 2127 struct fbnic_dev *fbd = fbn->fbd; 2128 int i, j, t; 2129 2130 for (i = 0; i < fbn->num_napi; i++) { 2131 struct fbnic_napi_vector *nv = fbn->napi[i]; 2132 2133 /* Find RQs which are completely out of pages */ 2134 for (t = nv->txt_count, j = 0; j < nv->rxt_count; j++, t++) { 2135 /* Assume 4 pages is always enough to fit a packet 2136 * and therefore generate a completion and an IRQ. 2137 */ 2138 if (fbnic_desc_used(&nv->qt[t].sub0) < 4 || 2139 fbnic_desc_used(&nv->qt[t].sub1) < 4) 2140 irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32); 2141 } 2142 } 2143 2144 for (i = 0; i < ARRAY_SIZE(irqs); i++) { 2145 if (!irqs[i]) 2146 continue; 2147 fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(i), irqs[i]); 2148 fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]); 2149 } 2150 2151 fbnic_wrfl(fbd); 2152 } 2153