1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */ 3 4 #include <linux/bitfield.h> 5 #include <linux/bpf.h> 6 #include <linux/bpf_trace.h> 7 #include <linux/iopoll.h> 8 #include <linux/pci.h> 9 #include <net/netdev_queues.h> 10 #include <net/page_pool/helpers.h> 11 #include <net/tcp.h> 12 #include <net/xdp.h> 13 14 #include "fbnic.h" 15 #include "fbnic_csr.h" 16 #include "fbnic_netdev.h" 17 #include "fbnic_txrx.h" 18 19 enum { 20 FBNIC_XDP_PASS = 0, 21 FBNIC_XDP_CONSUME, 22 FBNIC_XDP_TX, 23 FBNIC_XDP_LEN_ERR, 24 }; 25 26 enum { 27 FBNIC_XMIT_CB_TS = 0x01, 28 }; 29 30 struct fbnic_xmit_cb { 31 u32 bytecount; 32 u16 gso_segs; 33 u8 desc_count; 34 u8 flags; 35 int hw_head; 36 }; 37 38 #define FBNIC_XMIT_CB(__skb) ((struct fbnic_xmit_cb *)((__skb)->cb)) 39 40 #define FBNIC_XMIT_NOUNMAP ((void *)1) 41 42 static u32 __iomem *fbnic_ring_csr_base(const struct fbnic_ring *ring) 43 { 44 unsigned long csr_base = (unsigned long)ring->doorbell; 45 46 csr_base &= ~(FBNIC_QUEUE_STRIDE * sizeof(u32) - 1); 47 48 return (u32 __iomem *)csr_base; 49 } 50 51 static u32 fbnic_ring_rd32(struct fbnic_ring *ring, unsigned int csr) 52 { 53 u32 __iomem *csr_base = fbnic_ring_csr_base(ring); 54 55 return readl(csr_base + csr); 56 } 57 58 static void fbnic_ring_wr32(struct fbnic_ring *ring, unsigned int csr, u32 val) 59 { 60 u32 __iomem *csr_base = fbnic_ring_csr_base(ring); 61 62 writel(val, csr_base + csr); 63 } 64 65 /** 66 * fbnic_ts40_to_ns() - convert descriptor timestamp to PHC time 67 * @fbn: netdev priv of the FB NIC 68 * @ts40: timestamp read from a descriptor 69 * 70 * Return: u64 value of PHC time in nanoseconds 71 * 72 * Convert truncated 40 bit device timestamp as read from a descriptor 73 * to the full PHC time in nanoseconds. 74 */ 75 static __maybe_unused u64 fbnic_ts40_to_ns(struct fbnic_net *fbn, u64 ts40) 76 { 77 unsigned int s; 78 u64 time_ns; 79 s64 offset; 80 u8 ts_top; 81 u32 high; 82 83 do { 84 s = u64_stats_fetch_begin(&fbn->time_seq); 85 offset = READ_ONCE(fbn->time_offset); 86 } while (u64_stats_fetch_retry(&fbn->time_seq, s)); 87 88 high = READ_ONCE(fbn->time_high); 89 90 /* Bits 63..40 from periodic clock reads, 39..0 from ts40 */ 91 time_ns = (u64)(high >> 8) << 40 | ts40; 92 93 /* Compare bits 32-39 between periodic reads and ts40, 94 * see if HW clock may have wrapped since last read. We are sure 95 * that periodic reads are always at least ~1 minute behind, so 96 * this logic works perfectly fine. 97 */ 98 ts_top = ts40 >> 32; 99 if (ts_top < (u8)high && (u8)high - ts_top > U8_MAX / 2) 100 time_ns += 1ULL << 40; 101 102 return time_ns + offset; 103 } 104 105 static unsigned int fbnic_desc_unused(struct fbnic_ring *ring) 106 { 107 return (ring->head - ring->tail - 1) & ring->size_mask; 108 } 109 110 static unsigned int fbnic_desc_used(struct fbnic_ring *ring) 111 { 112 return (ring->tail - ring->head) & ring->size_mask; 113 } 114 115 static struct netdev_queue *txring_txq(const struct net_device *dev, 116 const struct fbnic_ring *ring) 117 { 118 return netdev_get_tx_queue(dev, ring->q_idx); 119 } 120 121 static int fbnic_maybe_stop_tx(const struct net_device *dev, 122 struct fbnic_ring *ring, 123 const unsigned int size) 124 { 125 struct netdev_queue *txq = txring_txq(dev, ring); 126 int res; 127 128 res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size, 129 FBNIC_TX_DESC_WAKEUP); 130 if (!res) { 131 u64_stats_update_begin(&ring->stats.syncp); 132 ring->stats.twq.stop++; 133 u64_stats_update_end(&ring->stats.syncp); 134 } 135 136 return !res; 137 } 138 139 static bool fbnic_tx_sent_queue(struct sk_buff *skb, struct fbnic_ring *ring) 140 { 141 struct netdev_queue *dev_queue = txring_txq(skb->dev, ring); 142 unsigned int bytecount = FBNIC_XMIT_CB(skb)->bytecount; 143 bool xmit_more = netdev_xmit_more(); 144 145 /* TBD: Request completion more often if xmit_more becomes large */ 146 147 return __netdev_tx_sent_queue(dev_queue, bytecount, xmit_more); 148 } 149 150 static void fbnic_unmap_single_twd(struct device *dev, __le64 *twd) 151 { 152 u64 raw_twd = le64_to_cpu(*twd); 153 unsigned int len; 154 dma_addr_t dma; 155 156 dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd); 157 len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd); 158 159 dma_unmap_single(dev, dma, len, DMA_TO_DEVICE); 160 } 161 162 static void fbnic_unmap_page_twd(struct device *dev, __le64 *twd) 163 { 164 u64 raw_twd = le64_to_cpu(*twd); 165 unsigned int len; 166 dma_addr_t dma; 167 168 dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd); 169 len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd); 170 171 dma_unmap_page(dev, dma, len, DMA_TO_DEVICE); 172 } 173 174 #define FBNIC_TWD_TYPE(_type) \ 175 cpu_to_le64(FIELD_PREP(FBNIC_TWD_TYPE_MASK, FBNIC_TWD_TYPE_##_type)) 176 177 static bool fbnic_tx_tstamp(struct sk_buff *skb) 178 { 179 struct fbnic_net *fbn; 180 181 if (!unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) 182 return false; 183 184 fbn = netdev_priv(skb->dev); 185 if (fbn->hwtstamp_config.tx_type == HWTSTAMP_TX_OFF) 186 return false; 187 188 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 189 FBNIC_XMIT_CB(skb)->flags |= FBNIC_XMIT_CB_TS; 190 FBNIC_XMIT_CB(skb)->hw_head = -1; 191 192 return true; 193 } 194 195 static bool 196 fbnic_tx_lso(struct fbnic_ring *ring, struct sk_buff *skb, 197 struct skb_shared_info *shinfo, __le64 *meta, 198 unsigned int *l2len, unsigned int *i3len) 199 { 200 unsigned int l3_type, l4_type, l4len, hdrlen; 201 unsigned char *l4hdr; 202 __be16 payload_len; 203 204 if (unlikely(skb_cow_head(skb, 0))) 205 return true; 206 207 if (shinfo->gso_type & SKB_GSO_PARTIAL) { 208 l3_type = FBNIC_TWD_L3_TYPE_OTHER; 209 } else if (!skb->encapsulation) { 210 if (ip_hdr(skb)->version == 4) 211 l3_type = FBNIC_TWD_L3_TYPE_IPV4; 212 else 213 l3_type = FBNIC_TWD_L3_TYPE_IPV6; 214 } else { 215 unsigned int o3len; 216 217 o3len = skb_inner_network_header(skb) - skb_network_header(skb); 218 *i3len -= o3len; 219 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_OHLEN_MASK, 220 o3len / 2)); 221 l3_type = FBNIC_TWD_L3_TYPE_V6V6; 222 } 223 224 l4hdr = skb_checksum_start(skb); 225 payload_len = cpu_to_be16(skb->len - (l4hdr - skb->data)); 226 227 if (shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { 228 struct tcphdr *tcph = (struct tcphdr *)l4hdr; 229 230 l4_type = FBNIC_TWD_L4_TYPE_TCP; 231 l4len = __tcp_hdrlen((struct tcphdr *)l4hdr); 232 csum_replace_by_diff(&tcph->check, (__force __wsum)payload_len); 233 } else { 234 struct udphdr *udph = (struct udphdr *)l4hdr; 235 236 l4_type = FBNIC_TWD_L4_TYPE_UDP; 237 l4len = sizeof(struct udphdr); 238 csum_replace_by_diff(&udph->check, (__force __wsum)payload_len); 239 } 240 241 hdrlen = (l4hdr - skb->data) + l4len; 242 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_TYPE_MASK, l3_type) | 243 FIELD_PREP(FBNIC_TWD_L4_TYPE_MASK, l4_type) | 244 FIELD_PREP(FBNIC_TWD_L4_HLEN_MASK, l4len / 4) | 245 FIELD_PREP(FBNIC_TWD_MSS_MASK, shinfo->gso_size) | 246 FBNIC_TWD_FLAG_REQ_LSO); 247 248 FBNIC_XMIT_CB(skb)->bytecount += (shinfo->gso_segs - 1) * hdrlen; 249 FBNIC_XMIT_CB(skb)->gso_segs = shinfo->gso_segs; 250 251 u64_stats_update_begin(&ring->stats.syncp); 252 ring->stats.twq.lso += shinfo->gso_segs; 253 u64_stats_update_end(&ring->stats.syncp); 254 255 return false; 256 } 257 258 static bool 259 fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) 260 { 261 struct skb_shared_info *shinfo = skb_shinfo(skb); 262 unsigned int l2len, i3len; 263 264 if (fbnic_tx_tstamp(skb)) 265 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_TS); 266 267 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) 268 return false; 269 270 l2len = skb_mac_header_len(skb); 271 i3len = skb_checksum_start(skb) - skb_network_header(skb); 272 273 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_CSUM_OFFSET_MASK, 274 skb->csum_offset / 2)); 275 276 if (shinfo->gso_size) { 277 if (fbnic_tx_lso(ring, skb, shinfo, meta, &l2len, &i3len)) 278 return true; 279 } else { 280 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO); 281 u64_stats_update_begin(&ring->stats.syncp); 282 ring->stats.twq.csum_partial++; 283 u64_stats_update_end(&ring->stats.syncp); 284 } 285 286 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) | 287 FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2)); 288 return false; 289 } 290 291 static void 292 fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq, 293 u64 *csum_cmpl, u64 *csum_none) 294 { 295 skb_checksum_none_assert(skb); 296 297 if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) { 298 (*csum_none)++; 299 return; 300 } 301 302 if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) { 303 skb->ip_summed = CHECKSUM_UNNECESSARY; 304 } else { 305 u16 csum = FIELD_GET(FBNIC_RCD_META_L2_CSUM_MASK, rcd); 306 307 skb->ip_summed = CHECKSUM_COMPLETE; 308 skb->csum = (__force __wsum)csum; 309 (*csum_cmpl)++; 310 } 311 } 312 313 static bool 314 fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) 315 { 316 struct device *dev = skb->dev->dev.parent; 317 unsigned int tail = ring->tail, first; 318 unsigned int size, data_len; 319 skb_frag_t *frag; 320 bool is_net_iov; 321 dma_addr_t dma; 322 __le64 *twd; 323 324 ring->tx_buf[tail] = skb; 325 326 tail++; 327 tail &= ring->size_mask; 328 first = tail; 329 330 size = skb_headlen(skb); 331 data_len = skb->data_len; 332 333 if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK)) 334 goto dma_error; 335 336 is_net_iov = false; 337 dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); 338 339 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 340 twd = &ring->desc[tail]; 341 342 if (dma_mapping_error(dev, dma)) 343 goto dma_error; 344 345 *twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) | 346 FIELD_PREP(FBNIC_TWD_LEN_MASK, size) | 347 FIELD_PREP(FBNIC_TWD_TYPE_MASK, 348 FBNIC_TWD_TYPE_AL)); 349 if (is_net_iov) 350 ring->tx_buf[tail] = FBNIC_XMIT_NOUNMAP; 351 352 tail++; 353 tail &= ring->size_mask; 354 355 if (!data_len) 356 break; 357 358 size = skb_frag_size(frag); 359 data_len -= size; 360 361 if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK)) 362 goto dma_error; 363 364 is_net_iov = skb_frag_is_net_iov(frag); 365 dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); 366 } 367 368 *twd |= FBNIC_TWD_TYPE(LAST_AL); 369 370 FBNIC_XMIT_CB(skb)->desc_count = ((twd - meta) + 1) & ring->size_mask; 371 372 ring->tail = tail; 373 374 /* Record SW timestamp */ 375 skb_tx_timestamp(skb); 376 377 /* Verify there is room for another packet */ 378 fbnic_maybe_stop_tx(skb->dev, ring, FBNIC_MAX_SKB_DESC); 379 380 if (fbnic_tx_sent_queue(skb, ring)) { 381 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_COMPLETION); 382 383 /* Force DMA writes to flush before writing to tail */ 384 dma_wmb(); 385 386 writel(tail, ring->doorbell); 387 } 388 389 return false; 390 dma_error: 391 if (net_ratelimit()) 392 netdev_err(skb->dev, "TX DMA map failed\n"); 393 394 while (tail != first) { 395 tail--; 396 tail &= ring->size_mask; 397 twd = &ring->desc[tail]; 398 if (tail == first) 399 fbnic_unmap_single_twd(dev, twd); 400 else if (ring->tx_buf[tail] == FBNIC_XMIT_NOUNMAP) 401 ring->tx_buf[tail] = NULL; 402 else 403 fbnic_unmap_page_twd(dev, twd); 404 } 405 406 return true; 407 } 408 409 #define FBNIC_MIN_FRAME_LEN 60 410 411 static netdev_tx_t 412 fbnic_xmit_frame_ring(struct sk_buff *skb, struct fbnic_ring *ring) 413 { 414 __le64 *meta = &ring->desc[ring->tail]; 415 u16 desc_needed; 416 417 if (skb_put_padto(skb, FBNIC_MIN_FRAME_LEN)) 418 goto err_count; 419 420 /* Need: 1 descriptor per page, 421 * + 1 desc for skb_head, 422 * + 2 desc for metadata and timestamp metadata 423 * + 7 desc gap to keep tail from touching head 424 * otherwise try next time 425 */ 426 desc_needed = skb_shinfo(skb)->nr_frags + 10; 427 if (fbnic_maybe_stop_tx(skb->dev, ring, desc_needed)) 428 return NETDEV_TX_BUSY; 429 430 *meta = cpu_to_le64(FBNIC_TWD_FLAG_DEST_MAC); 431 432 /* Write all members within DWORD to condense this into 2 4B writes */ 433 FBNIC_XMIT_CB(skb)->bytecount = skb->len; 434 FBNIC_XMIT_CB(skb)->gso_segs = 1; 435 FBNIC_XMIT_CB(skb)->desc_count = 0; 436 FBNIC_XMIT_CB(skb)->flags = 0; 437 438 if (fbnic_tx_offloads(ring, skb, meta)) 439 goto err_free; 440 441 if (fbnic_tx_map(ring, skb, meta)) 442 goto err_free; 443 444 return NETDEV_TX_OK; 445 446 err_free: 447 dev_kfree_skb_any(skb); 448 err_count: 449 u64_stats_update_begin(&ring->stats.syncp); 450 ring->stats.dropped++; 451 u64_stats_update_end(&ring->stats.syncp); 452 return NETDEV_TX_OK; 453 } 454 455 netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev) 456 { 457 struct fbnic_net *fbn = netdev_priv(dev); 458 unsigned int q_map = skb->queue_mapping; 459 460 return fbnic_xmit_frame_ring(skb, fbn->tx[q_map]); 461 } 462 463 static netdev_features_t 464 fbnic_features_check_encap_gso(struct sk_buff *skb, struct net_device *dev, 465 netdev_features_t features, unsigned int l3len) 466 { 467 netdev_features_t skb_gso_features; 468 struct ipv6hdr *ip6_hdr; 469 unsigned char l4_hdr; 470 unsigned int start; 471 __be16 frag_off; 472 473 /* Require MANGLEID for GSO_PARTIAL of IPv4. 474 * In theory we could support TSO with single, innermost v4 header 475 * by pretending everything before it is L2, but that needs to be 476 * parsed case by case.. so leaving it for when the need arises. 477 */ 478 if (!(features & NETIF_F_TSO_MANGLEID)) 479 features &= ~NETIF_F_TSO; 480 481 skb_gso_features = skb_shinfo(skb)->gso_type; 482 skb_gso_features <<= NETIF_F_GSO_SHIFT; 483 484 /* We'd only clear the native GSO features, so don't bother validating 485 * if the match can only be on those supported thru GSO_PARTIAL. 486 */ 487 if (!(skb_gso_features & FBNIC_TUN_GSO_FEATURES)) 488 return features; 489 490 /* We can only do IPv6-in-IPv6, not v4-in-v6. It'd be nice 491 * to fall back to partial for this, or any failure below. 492 * This is just an optimization, UDPv4 will be caught later on. 493 */ 494 if (skb_gso_features & NETIF_F_TSO) 495 return features & ~FBNIC_TUN_GSO_FEATURES; 496 497 /* Inner headers multiple of 2 */ 498 if ((skb_inner_network_header(skb) - skb_network_header(skb)) % 2) 499 return features & ~FBNIC_TUN_GSO_FEATURES; 500 501 /* Encapsulated GSO packet, make 100% sure it's IPv6-in-IPv6. */ 502 ip6_hdr = ipv6_hdr(skb); 503 if (ip6_hdr->version != 6) 504 return features & ~FBNIC_TUN_GSO_FEATURES; 505 506 l4_hdr = ip6_hdr->nexthdr; 507 start = (unsigned char *)ip6_hdr - skb->data + sizeof(struct ipv6hdr); 508 start = ipv6_skip_exthdr(skb, start, &l4_hdr, &frag_off); 509 if (frag_off || l4_hdr != IPPROTO_IPV6 || 510 skb->data + start != skb_inner_network_header(skb)) 511 return features & ~FBNIC_TUN_GSO_FEATURES; 512 513 return features; 514 } 515 516 netdev_features_t 517 fbnic_features_check(struct sk_buff *skb, struct net_device *dev, 518 netdev_features_t features) 519 { 520 unsigned int l2len, l3len; 521 522 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) 523 return features; 524 525 l2len = skb_mac_header_len(skb); 526 l3len = skb_checksum_start(skb) - skb_network_header(skb); 527 528 /* Check header lengths are multiple of 2. 529 * In case of 6in6 we support longer headers (IHLEN + OHLEN) 530 * but keep things simple for now, 512B is plenty. 531 */ 532 if ((l2len | l3len | skb->csum_offset) % 2 || 533 !FIELD_FIT(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) || 534 !FIELD_FIT(FBNIC_TWD_L3_IHLEN_MASK, l3len / 2) || 535 !FIELD_FIT(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2)) 536 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); 537 538 if (likely(!skb->encapsulation) || !skb_is_gso(skb)) 539 return features; 540 541 return fbnic_features_check_encap_gso(skb, dev, features, l3len); 542 } 543 544 static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, 545 struct fbnic_ring *ring, bool discard, 546 unsigned int hw_head) 547 { 548 u64 total_bytes = 0, total_packets = 0, ts_lost = 0; 549 unsigned int head = ring->head; 550 struct netdev_queue *txq; 551 unsigned int clean_desc; 552 553 clean_desc = (hw_head - head) & ring->size_mask; 554 555 while (clean_desc) { 556 struct sk_buff *skb = ring->tx_buf[head]; 557 unsigned int desc_cnt; 558 559 desc_cnt = FBNIC_XMIT_CB(skb)->desc_count; 560 if (desc_cnt > clean_desc) 561 break; 562 563 if (unlikely(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS)) { 564 FBNIC_XMIT_CB(skb)->hw_head = hw_head; 565 if (likely(!discard)) 566 break; 567 ts_lost++; 568 } 569 570 ring->tx_buf[head] = NULL; 571 572 clean_desc -= desc_cnt; 573 574 while (!(ring->desc[head] & FBNIC_TWD_TYPE(AL))) { 575 head++; 576 head &= ring->size_mask; 577 desc_cnt--; 578 } 579 580 fbnic_unmap_single_twd(nv->dev, &ring->desc[head]); 581 head++; 582 head &= ring->size_mask; 583 desc_cnt--; 584 585 while (desc_cnt--) { 586 if (ring->tx_buf[head] != FBNIC_XMIT_NOUNMAP) 587 fbnic_unmap_page_twd(nv->dev, 588 &ring->desc[head]); 589 else 590 ring->tx_buf[head] = NULL; 591 head++; 592 head &= ring->size_mask; 593 } 594 595 total_bytes += FBNIC_XMIT_CB(skb)->bytecount; 596 total_packets += FBNIC_XMIT_CB(skb)->gso_segs; 597 598 napi_consume_skb(skb, napi_budget); 599 } 600 601 if (!total_bytes) 602 return; 603 604 ring->head = head; 605 606 txq = txring_txq(nv->napi.dev, ring); 607 608 if (unlikely(discard)) { 609 u64_stats_update_begin(&ring->stats.syncp); 610 ring->stats.dropped += total_packets; 611 ring->stats.twq.ts_lost += ts_lost; 612 u64_stats_update_end(&ring->stats.syncp); 613 614 netdev_tx_completed_queue(txq, total_packets, total_bytes); 615 return; 616 } 617 618 u64_stats_update_begin(&ring->stats.syncp); 619 ring->stats.bytes += total_bytes; 620 ring->stats.packets += total_packets; 621 u64_stats_update_end(&ring->stats.syncp); 622 623 if (!netif_txq_completed_wake(txq, total_packets, total_bytes, 624 fbnic_desc_unused(ring), 625 FBNIC_TX_DESC_WAKEUP)) { 626 u64_stats_update_begin(&ring->stats.syncp); 627 ring->stats.twq.wake++; 628 u64_stats_update_end(&ring->stats.syncp); 629 } 630 } 631 632 static void fbnic_clean_twq1(struct fbnic_napi_vector *nv, bool pp_allow_direct, 633 struct fbnic_ring *ring, bool discard, 634 unsigned int hw_head) 635 { 636 u64 total_bytes = 0, total_packets = 0; 637 unsigned int head = ring->head; 638 639 while (hw_head != head) { 640 struct page *page; 641 u64 twd; 642 643 if (unlikely(!(ring->desc[head] & FBNIC_TWD_TYPE(AL)))) 644 goto next_desc; 645 646 twd = le64_to_cpu(ring->desc[head]); 647 page = ring->tx_buf[head]; 648 649 /* TYPE_AL is 2, TYPE_LAST_AL is 3. So this trick gives 650 * us one increment per packet, with no branches. 651 */ 652 total_packets += FIELD_GET(FBNIC_TWD_TYPE_MASK, twd) - 653 FBNIC_TWD_TYPE_AL; 654 total_bytes += FIELD_GET(FBNIC_TWD_LEN_MASK, twd); 655 656 page_pool_put_page(page->pp, page, -1, pp_allow_direct); 657 next_desc: 658 head++; 659 head &= ring->size_mask; 660 } 661 662 if (!total_bytes) 663 return; 664 665 ring->head = head; 666 667 if (discard) { 668 u64_stats_update_begin(&ring->stats.syncp); 669 ring->stats.dropped += total_packets; 670 u64_stats_update_end(&ring->stats.syncp); 671 return; 672 } 673 674 u64_stats_update_begin(&ring->stats.syncp); 675 ring->stats.bytes += total_bytes; 676 ring->stats.packets += total_packets; 677 u64_stats_update_end(&ring->stats.syncp); 678 } 679 680 static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, 681 struct fbnic_ring *ring, 682 u64 tcd, int *ts_head, int *head0) 683 { 684 struct skb_shared_hwtstamps hwtstamp; 685 struct fbnic_net *fbn; 686 struct sk_buff *skb; 687 int head; 688 u64 ns; 689 690 head = (*ts_head < 0) ? ring->head : *ts_head; 691 692 do { 693 unsigned int desc_cnt; 694 695 if (head == ring->tail) { 696 if (unlikely(net_ratelimit())) 697 netdev_err(nv->napi.dev, 698 "Tx timestamp without matching packet\n"); 699 return; 700 } 701 702 skb = ring->tx_buf[head]; 703 desc_cnt = FBNIC_XMIT_CB(skb)->desc_count; 704 705 head += desc_cnt; 706 head &= ring->size_mask; 707 } while (!(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS)); 708 709 fbn = netdev_priv(nv->napi.dev); 710 ns = fbnic_ts40_to_ns(fbn, FIELD_GET(FBNIC_TCD_TYPE1_TS_MASK, tcd)); 711 712 memset(&hwtstamp, 0, sizeof(hwtstamp)); 713 hwtstamp.hwtstamp = ns_to_ktime(ns); 714 715 *ts_head = head; 716 717 FBNIC_XMIT_CB(skb)->flags &= ~FBNIC_XMIT_CB_TS; 718 if (*head0 < 0) { 719 head = FBNIC_XMIT_CB(skb)->hw_head; 720 if (head >= 0) 721 *head0 = head; 722 } 723 724 skb_tstamp_tx(skb, &hwtstamp); 725 u64_stats_update_begin(&ring->stats.syncp); 726 ring->stats.twq.ts_packets++; 727 u64_stats_update_end(&ring->stats.syncp); 728 } 729 730 static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx, 731 netmem_ref netmem) 732 { 733 struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; 734 735 page_pool_fragment_netmem(netmem, FBNIC_PAGECNT_BIAS_MAX); 736 rx_buf->pagecnt_bias = FBNIC_PAGECNT_BIAS_MAX; 737 rx_buf->netmem = netmem; 738 } 739 740 static struct page * 741 fbnic_page_pool_get_head(struct fbnic_q_triad *qt, unsigned int idx) 742 { 743 struct fbnic_rx_buf *rx_buf = &qt->sub0.rx_buf[idx]; 744 745 rx_buf->pagecnt_bias--; 746 747 /* sub0 is always fed system pages, from the NAPI-level page_pool */ 748 return netmem_to_page(rx_buf->netmem); 749 } 750 751 static netmem_ref 752 fbnic_page_pool_get_data(struct fbnic_q_triad *qt, unsigned int idx) 753 { 754 struct fbnic_rx_buf *rx_buf = &qt->sub1.rx_buf[idx]; 755 756 rx_buf->pagecnt_bias--; 757 758 return rx_buf->netmem; 759 } 760 761 static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx, 762 int budget) 763 { 764 struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; 765 netmem_ref netmem = rx_buf->netmem; 766 767 if (!page_pool_unref_netmem(netmem, rx_buf->pagecnt_bias)) 768 page_pool_put_unrefed_netmem(ring->page_pool, netmem, -1, 769 !!budget); 770 771 rx_buf->netmem = 0; 772 } 773 774 static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget, 775 struct fbnic_q_triad *qt, s32 ts_head, s32 head0, 776 s32 head1) 777 { 778 if (head0 >= 0) 779 fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0); 780 else if (ts_head >= 0) 781 fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, ts_head); 782 783 if (head1 >= 0) { 784 qt->cmpl.deferred_head = -1; 785 if (napi_budget) 786 fbnic_clean_twq1(nv, true, &qt->sub1, false, head1); 787 else 788 qt->cmpl.deferred_head = head1; 789 } 790 } 791 792 static void 793 fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, 794 int napi_budget) 795 { 796 struct fbnic_ring *cmpl = &qt->cmpl; 797 s32 head1 = cmpl->deferred_head; 798 s32 head0 = -1, ts_head = -1; 799 __le64 *raw_tcd, done; 800 u32 head = cmpl->head; 801 802 done = (head & (cmpl->size_mask + 1)) ? 0 : cpu_to_le64(FBNIC_TCD_DONE); 803 raw_tcd = &cmpl->desc[head & cmpl->size_mask]; 804 805 /* Walk the completion queue collecting the heads reported by NIC */ 806 while ((*raw_tcd & cpu_to_le64(FBNIC_TCD_DONE)) == done) { 807 u64 tcd; 808 809 dma_rmb(); 810 811 tcd = le64_to_cpu(*raw_tcd); 812 813 switch (FIELD_GET(FBNIC_TCD_TYPE_MASK, tcd)) { 814 case FBNIC_TCD_TYPE_0: 815 if (tcd & FBNIC_TCD_TWQ1) 816 head1 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD1_MASK, 817 tcd); 818 else 819 head0 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD0_MASK, 820 tcd); 821 /* Currently all err status bits are related to 822 * timestamps and as those have yet to be added 823 * they are skipped for now. 824 */ 825 break; 826 case FBNIC_TCD_TYPE_1: 827 if (WARN_ON_ONCE(tcd & FBNIC_TCD_TWQ1)) 828 break; 829 830 fbnic_clean_tsq(nv, &qt->sub0, tcd, &ts_head, &head0); 831 break; 832 default: 833 break; 834 } 835 836 raw_tcd++; 837 head++; 838 if (!(head & cmpl->size_mask)) { 839 done ^= cpu_to_le64(FBNIC_TCD_DONE); 840 raw_tcd = &cmpl->desc[0]; 841 } 842 } 843 844 /* Record the current head/tail of the queue */ 845 if (cmpl->head != head) { 846 cmpl->head = head; 847 writel(head & cmpl->size_mask, cmpl->doorbell); 848 } 849 850 /* Unmap and free processed buffers */ 851 fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0, head1); 852 } 853 854 static void fbnic_clean_bdq(struct fbnic_ring *ring, unsigned int hw_head, 855 int napi_budget) 856 { 857 unsigned int head = ring->head; 858 859 if (head == hw_head) 860 return; 861 862 do { 863 fbnic_page_pool_drain(ring, head, napi_budget); 864 865 head++; 866 head &= ring->size_mask; 867 } while (head != hw_head); 868 869 ring->head = head; 870 } 871 872 static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, netmem_ref netmem) 873 { 874 __le64 *bdq_desc = &bdq->desc[id * FBNIC_BD_FRAG_COUNT]; 875 dma_addr_t dma = page_pool_get_dma_addr_netmem(netmem); 876 u64 bd, i = FBNIC_BD_FRAG_COUNT; 877 878 bd = (FBNIC_BD_PAGE_ADDR_MASK & dma) | 879 FIELD_PREP(FBNIC_BD_PAGE_ID_MASK, id); 880 881 /* In the case that a page size is larger than 4K we will map a 882 * single page to multiple fragments. The fragments will be 883 * FBNIC_BD_FRAG_COUNT in size and the lower n bits will be use 884 * to indicate the individual fragment IDs. 885 */ 886 do { 887 *bdq_desc = cpu_to_le64(bd); 888 bd += FIELD_PREP(FBNIC_BD_DESC_ADDR_MASK, 1) | 889 FIELD_PREP(FBNIC_BD_DESC_ID_MASK, 1); 890 } while (--i); 891 } 892 893 static void fbnic_fill_bdq(struct fbnic_ring *bdq) 894 { 895 unsigned int count = fbnic_desc_unused(bdq); 896 unsigned int i = bdq->tail; 897 898 if (!count) 899 return; 900 901 do { 902 netmem_ref netmem; 903 904 netmem = page_pool_dev_alloc_netmems(bdq->page_pool); 905 if (!netmem) { 906 u64_stats_update_begin(&bdq->stats.syncp); 907 bdq->stats.bdq.alloc_failed++; 908 u64_stats_update_end(&bdq->stats.syncp); 909 910 break; 911 } 912 913 fbnic_page_pool_init(bdq, i, netmem); 914 fbnic_bd_prep(bdq, i, netmem); 915 916 i++; 917 i &= bdq->size_mask; 918 919 count--; 920 } while (count); 921 922 if (bdq->tail != i) { 923 bdq->tail = i; 924 925 /* Force DMA writes to flush before writing to tail */ 926 dma_wmb(); 927 928 writel(i, bdq->doorbell); 929 } 930 } 931 932 static unsigned int fbnic_hdr_pg_start(unsigned int pg_off) 933 { 934 /* The headroom of the first header may be larger than FBNIC_RX_HROOM 935 * due to alignment. So account for that by just making the page 936 * offset 0 if we are starting at the first header. 937 */ 938 if (ALIGN(FBNIC_RX_HROOM, 128) > FBNIC_RX_HROOM && 939 pg_off == ALIGN(FBNIC_RX_HROOM, 128)) 940 return 0; 941 942 return pg_off - FBNIC_RX_HROOM; 943 } 944 945 static unsigned int fbnic_hdr_pg_end(unsigned int pg_off, unsigned int len) 946 { 947 /* Determine the end of the buffer by finding the start of the next 948 * and then subtracting the headroom from that frame. 949 */ 950 pg_off += len + FBNIC_RX_TROOM + FBNIC_RX_HROOM; 951 952 return ALIGN(pg_off, 128) - FBNIC_RX_HROOM; 953 } 954 955 static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, 956 struct fbnic_pkt_buff *pkt, 957 struct fbnic_q_triad *qt) 958 { 959 unsigned int hdr_pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 960 unsigned int hdr_pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); 961 struct page *page = fbnic_page_pool_get_head(qt, hdr_pg_idx); 962 unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); 963 unsigned int frame_sz, hdr_pg_start, hdr_pg_end, headroom; 964 unsigned char *hdr_start; 965 966 /* data_hard_start should always be NULL when this is called */ 967 WARN_ON_ONCE(pkt->buff.data_hard_start); 968 969 /* Short-cut the end calculation if we know page is fully consumed */ 970 hdr_pg_end = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ? 971 FBNIC_BD_FRAG_SIZE : fbnic_hdr_pg_end(hdr_pg_off, len); 972 hdr_pg_start = fbnic_hdr_pg_start(hdr_pg_off); 973 974 headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD; 975 frame_sz = hdr_pg_end - hdr_pg_start; 976 xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq); 977 hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * 978 FBNIC_BD_FRAG_SIZE; 979 980 /* Sync DMA buffer */ 981 dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page), 982 hdr_pg_start, frame_sz, 983 DMA_BIDIRECTIONAL); 984 985 /* Build frame around buffer */ 986 hdr_start = page_address(page) + hdr_pg_start; 987 net_prefetch(pkt->buff.data); 988 xdp_prepare_buff(&pkt->buff, hdr_start, headroom, 989 len - FBNIC_RX_PAD, true); 990 991 pkt->hwtstamp = 0; 992 pkt->add_frag_failed = false; 993 } 994 995 static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, 996 struct fbnic_pkt_buff *pkt, 997 struct fbnic_q_triad *qt) 998 { 999 unsigned int pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 1000 unsigned int pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); 1001 unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); 1002 netmem_ref netmem = fbnic_page_pool_get_data(qt, pg_idx); 1003 unsigned int truesize; 1004 bool added; 1005 1006 truesize = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ? 1007 FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128); 1008 1009 pg_off += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * 1010 FBNIC_BD_FRAG_SIZE; 1011 1012 /* Sync DMA buffer */ 1013 page_pool_dma_sync_netmem_for_cpu(qt->sub1.page_pool, netmem, 1014 pg_off, truesize); 1015 1016 added = xdp_buff_add_frag(&pkt->buff, netmem, pg_off, len, truesize); 1017 if (unlikely(!added)) { 1018 pkt->add_frag_failed = true; 1019 netdev_err_once(nv->napi.dev, 1020 "Failed to add fragment to xdp_buff\n"); 1021 } 1022 } 1023 1024 static void fbnic_put_pkt_buff(struct fbnic_q_triad *qt, 1025 struct fbnic_pkt_buff *pkt, int budget) 1026 { 1027 struct page *page; 1028 1029 if (!pkt->buff.data_hard_start) 1030 return; 1031 1032 if (xdp_buff_has_frags(&pkt->buff)) { 1033 struct skb_shared_info *shinfo; 1034 netmem_ref netmem; 1035 int nr_frags; 1036 1037 shinfo = xdp_get_shared_info_from_buff(&pkt->buff); 1038 nr_frags = shinfo->nr_frags; 1039 1040 while (nr_frags--) { 1041 netmem = skb_frag_netmem(&shinfo->frags[nr_frags]); 1042 page_pool_put_full_netmem(qt->sub1.page_pool, netmem, 1043 !!budget); 1044 } 1045 } 1046 1047 page = virt_to_page(pkt->buff.data_hard_start); 1048 page_pool_put_full_page(qt->sub0.page_pool, page, !!budget); 1049 } 1050 1051 static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv, 1052 struct fbnic_pkt_buff *pkt) 1053 { 1054 struct sk_buff *skb; 1055 1056 skb = xdp_build_skb_from_buff(&pkt->buff); 1057 if (!skb) 1058 return NULL; 1059 1060 /* Add timestamp if present */ 1061 if (pkt->hwtstamp) 1062 skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp; 1063 1064 return skb; 1065 } 1066 1067 static long fbnic_pkt_tx(struct fbnic_napi_vector *nv, 1068 struct fbnic_pkt_buff *pkt) 1069 { 1070 struct fbnic_ring *ring = &nv->qt[0].sub1; 1071 int size, offset, nsegs = 1, data_len = 0; 1072 unsigned int tail = ring->tail; 1073 struct skb_shared_info *shinfo; 1074 skb_frag_t *frag = NULL; 1075 struct page *page; 1076 dma_addr_t dma; 1077 __le64 *twd; 1078 1079 if (unlikely(xdp_buff_has_frags(&pkt->buff))) { 1080 shinfo = xdp_get_shared_info_from_buff(&pkt->buff); 1081 nsegs += shinfo->nr_frags; 1082 data_len = shinfo->xdp_frags_size; 1083 frag = &shinfo->frags[0]; 1084 } 1085 1086 if (fbnic_desc_unused(ring) < nsegs) { 1087 u64_stats_update_begin(&ring->stats.syncp); 1088 ring->stats.dropped++; 1089 u64_stats_update_end(&ring->stats.syncp); 1090 return -FBNIC_XDP_CONSUME; 1091 } 1092 1093 page = virt_to_page(pkt->buff.data_hard_start); 1094 offset = offset_in_page(pkt->buff.data); 1095 dma = page_pool_get_dma_addr(page); 1096 1097 size = pkt->buff.data_end - pkt->buff.data; 1098 1099 while (nsegs--) { 1100 dma_sync_single_range_for_device(nv->dev, dma, offset, size, 1101 DMA_BIDIRECTIONAL); 1102 dma += offset; 1103 1104 ring->tx_buf[tail] = page; 1105 1106 twd = &ring->desc[tail]; 1107 *twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) | 1108 FIELD_PREP(FBNIC_TWD_LEN_MASK, size) | 1109 FIELD_PREP(FBNIC_TWD_TYPE_MASK, 1110 FBNIC_TWD_TYPE_AL)); 1111 1112 tail++; 1113 tail &= ring->size_mask; 1114 1115 if (!data_len) 1116 break; 1117 1118 offset = skb_frag_off(frag); 1119 page = skb_frag_page(frag); 1120 dma = page_pool_get_dma_addr(page); 1121 1122 size = skb_frag_size(frag); 1123 data_len -= size; 1124 frag++; 1125 } 1126 1127 *twd |= FBNIC_TWD_TYPE(LAST_AL); 1128 1129 ring->tail = tail; 1130 1131 return -FBNIC_XDP_TX; 1132 } 1133 1134 static void fbnic_pkt_commit_tail(struct fbnic_napi_vector *nv, 1135 unsigned int pkt_tail) 1136 { 1137 struct fbnic_ring *ring = &nv->qt[0].sub1; 1138 1139 /* Force DMA writes to flush before writing to tail */ 1140 dma_wmb(); 1141 1142 writel(pkt_tail, ring->doorbell); 1143 } 1144 1145 static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv, 1146 struct fbnic_pkt_buff *pkt) 1147 { 1148 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 1149 struct bpf_prog *xdp_prog; 1150 int act; 1151 1152 xdp_prog = READ_ONCE(fbn->xdp_prog); 1153 if (!xdp_prog) 1154 goto xdp_pass; 1155 1156 /* Should never happen, config paths enforce HDS threshold > MTU */ 1157 if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags) 1158 return ERR_PTR(-FBNIC_XDP_LEN_ERR); 1159 1160 act = bpf_prog_run_xdp(xdp_prog, &pkt->buff); 1161 switch (act) { 1162 case XDP_PASS: 1163 xdp_pass: 1164 return fbnic_build_skb(nv, pkt); 1165 case XDP_TX: 1166 return ERR_PTR(fbnic_pkt_tx(nv, pkt)); 1167 default: 1168 bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act); 1169 fallthrough; 1170 case XDP_ABORTED: 1171 trace_xdp_exception(nv->napi.dev, xdp_prog, act); 1172 fallthrough; 1173 case XDP_DROP: 1174 break; 1175 } 1176 1177 return ERR_PTR(-FBNIC_XDP_CONSUME); 1178 } 1179 1180 static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd) 1181 { 1182 return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 : 1183 (FBNIC_RCD_META_L3_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L3 : 1184 PKT_HASH_TYPE_L2; 1185 } 1186 1187 static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd, 1188 struct fbnic_pkt_buff *pkt) 1189 { 1190 struct fbnic_net *fbn; 1191 u64 ns, ts; 1192 1193 if (!FIELD_GET(FBNIC_RCD_OPT_META_TS, rcd)) 1194 return; 1195 1196 fbn = netdev_priv(nv->napi.dev); 1197 ts = FIELD_GET(FBNIC_RCD_OPT_META_TS_MASK, rcd); 1198 ns = fbnic_ts40_to_ns(fbn, ts); 1199 1200 /* Add timestamp to shared info */ 1201 pkt->hwtstamp = ns_to_ktime(ns); 1202 } 1203 1204 static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv, 1205 u64 rcd, struct sk_buff *skb, 1206 struct fbnic_q_triad *qt, 1207 u64 *csum_cmpl, u64 *csum_none) 1208 { 1209 struct net_device *netdev = nv->napi.dev; 1210 struct fbnic_ring *rcq = &qt->cmpl; 1211 1212 fbnic_rx_csum(rcd, skb, rcq, csum_cmpl, csum_none); 1213 1214 if (netdev->features & NETIF_F_RXHASH) 1215 skb_set_hash(skb, 1216 FIELD_GET(FBNIC_RCD_META_RSS_HASH_MASK, rcd), 1217 fbnic_skb_hash_type(rcd)); 1218 1219 skb_record_rx_queue(skb, rcq->q_idx); 1220 } 1221 1222 static bool fbnic_rcd_metadata_err(u64 rcd) 1223 { 1224 return !!(FBNIC_RCD_META_UNCORRECTABLE_ERR_MASK & rcd); 1225 } 1226 1227 static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, 1228 struct fbnic_q_triad *qt, int budget) 1229 { 1230 unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0; 1231 u64 csum_complete = 0, csum_none = 0, length_errors = 0; 1232 s32 head0 = -1, head1 = -1, pkt_tail = -1; 1233 struct fbnic_ring *rcq = &qt->cmpl; 1234 struct fbnic_pkt_buff *pkt; 1235 __le64 *raw_rcd, done; 1236 u32 head = rcq->head; 1237 1238 done = (head & (rcq->size_mask + 1)) ? cpu_to_le64(FBNIC_RCD_DONE) : 0; 1239 raw_rcd = &rcq->desc[head & rcq->size_mask]; 1240 pkt = rcq->pkt; 1241 1242 /* Walk the completion queue collecting the heads reported by NIC */ 1243 while (likely(packets < budget)) { 1244 struct sk_buff *skb = ERR_PTR(-EINVAL); 1245 u32 pkt_bytes; 1246 u64 rcd; 1247 1248 if ((*raw_rcd & cpu_to_le64(FBNIC_RCD_DONE)) == done) 1249 break; 1250 1251 dma_rmb(); 1252 1253 rcd = le64_to_cpu(*raw_rcd); 1254 1255 switch (FIELD_GET(FBNIC_RCD_TYPE_MASK, rcd)) { 1256 case FBNIC_RCD_TYPE_HDR_AL: 1257 head0 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 1258 fbnic_pkt_prepare(nv, rcd, pkt, qt); 1259 1260 break; 1261 case FBNIC_RCD_TYPE_PAY_AL: 1262 head1 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 1263 fbnic_add_rx_frag(nv, rcd, pkt, qt); 1264 1265 break; 1266 case FBNIC_RCD_TYPE_OPT_META: 1267 /* Only type 0 is currently supported */ 1268 if (FIELD_GET(FBNIC_RCD_OPT_META_TYPE_MASK, rcd)) 1269 break; 1270 1271 fbnic_rx_tstamp(nv, rcd, pkt); 1272 1273 /* We currently ignore the action table index */ 1274 break; 1275 case FBNIC_RCD_TYPE_META: 1276 if (likely(!fbnic_rcd_metadata_err(rcd) && 1277 !pkt->add_frag_failed)) { 1278 pkt_bytes = xdp_get_buff_len(&pkt->buff); 1279 skb = fbnic_run_xdp(nv, pkt); 1280 } 1281 1282 /* Populate skb and invalidate XDP */ 1283 if (!IS_ERR_OR_NULL(skb)) { 1284 fbnic_populate_skb_fields(nv, rcd, skb, qt, 1285 &csum_complete, 1286 &csum_none); 1287 napi_gro_receive(&nv->napi, skb); 1288 } else if (skb == ERR_PTR(-FBNIC_XDP_TX)) { 1289 pkt_tail = nv->qt[0].sub1.tail; 1290 } else if (PTR_ERR(skb) == -FBNIC_XDP_CONSUME) { 1291 fbnic_put_pkt_buff(qt, pkt, 1); 1292 } else { 1293 if (!skb) 1294 alloc_failed++; 1295 1296 if (skb == ERR_PTR(-FBNIC_XDP_LEN_ERR)) 1297 length_errors++; 1298 else 1299 dropped++; 1300 1301 fbnic_put_pkt_buff(qt, pkt, 1); 1302 goto next_dont_count; 1303 } 1304 1305 packets++; 1306 bytes += pkt_bytes; 1307 next_dont_count: 1308 pkt->buff.data_hard_start = NULL; 1309 1310 break; 1311 } 1312 1313 raw_rcd++; 1314 head++; 1315 if (!(head & rcq->size_mask)) { 1316 done ^= cpu_to_le64(FBNIC_RCD_DONE); 1317 raw_rcd = &rcq->desc[0]; 1318 } 1319 } 1320 1321 u64_stats_update_begin(&rcq->stats.syncp); 1322 rcq->stats.packets += packets; 1323 rcq->stats.bytes += bytes; 1324 rcq->stats.dropped += dropped; 1325 rcq->stats.rx.alloc_failed += alloc_failed; 1326 rcq->stats.rx.csum_complete += csum_complete; 1327 rcq->stats.rx.csum_none += csum_none; 1328 rcq->stats.rx.length_errors += length_errors; 1329 u64_stats_update_end(&rcq->stats.syncp); 1330 1331 if (pkt_tail >= 0) 1332 fbnic_pkt_commit_tail(nv, pkt_tail); 1333 1334 /* Unmap and free processed buffers */ 1335 if (head0 >= 0) 1336 fbnic_clean_bdq(&qt->sub0, head0, budget); 1337 fbnic_fill_bdq(&qt->sub0); 1338 1339 if (head1 >= 0) 1340 fbnic_clean_bdq(&qt->sub1, head1, budget); 1341 fbnic_fill_bdq(&qt->sub1); 1342 1343 /* Record the current head/tail of the queue */ 1344 if (rcq->head != head) { 1345 rcq->head = head; 1346 writel(head & rcq->size_mask, rcq->doorbell); 1347 } 1348 1349 return packets; 1350 } 1351 1352 static void fbnic_nv_irq_disable(struct fbnic_napi_vector *nv) 1353 { 1354 struct fbnic_dev *fbd = nv->fbd; 1355 u32 v_idx = nv->v_idx; 1356 1357 fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(v_idx / 32), 1 << (v_idx % 32)); 1358 } 1359 1360 static void fbnic_nv_irq_rearm(struct fbnic_napi_vector *nv) 1361 { 1362 struct fbnic_dev *fbd = nv->fbd; 1363 u32 v_idx = nv->v_idx; 1364 1365 fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(v_idx), 1366 FBNIC_INTR_CQ_REARM_INTR_UNMASK); 1367 } 1368 1369 static int fbnic_poll(struct napi_struct *napi, int budget) 1370 { 1371 struct fbnic_napi_vector *nv = container_of(napi, 1372 struct fbnic_napi_vector, 1373 napi); 1374 int i, j, work_done = 0; 1375 1376 for (i = 0; i < nv->txt_count; i++) 1377 fbnic_clean_tcq(nv, &nv->qt[i], budget); 1378 1379 for (j = 0; j < nv->rxt_count; j++, i++) 1380 work_done += fbnic_clean_rcq(nv, &nv->qt[i], budget); 1381 1382 if (work_done >= budget) 1383 return budget; 1384 1385 if (likely(napi_complete_done(napi, work_done))) 1386 fbnic_nv_irq_rearm(nv); 1387 1388 return work_done; 1389 } 1390 1391 irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data) 1392 { 1393 struct fbnic_napi_vector *nv = *(void **)data; 1394 1395 napi_schedule_irqoff(&nv->napi); 1396 1397 return IRQ_HANDLED; 1398 } 1399 1400 void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, 1401 struct fbnic_ring *rxr) 1402 { 1403 struct fbnic_queue_stats *stats = &rxr->stats; 1404 1405 /* Capture stats from queues before dissasociating them */ 1406 fbn->rx_stats.bytes += stats->bytes; 1407 fbn->rx_stats.packets += stats->packets; 1408 fbn->rx_stats.dropped += stats->dropped; 1409 fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed; 1410 fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete; 1411 fbn->rx_stats.rx.csum_none += stats->rx.csum_none; 1412 fbn->rx_stats.rx.length_errors += stats->rx.length_errors; 1413 /* Remember to add new stats here */ 1414 BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 4); 1415 } 1416 1417 void fbnic_aggregate_ring_bdq_counters(struct fbnic_net *fbn, 1418 struct fbnic_ring *bdq) 1419 { 1420 struct fbnic_queue_stats *stats = &bdq->stats; 1421 1422 /* Capture stats from queues before dissasociating them */ 1423 fbn->bdq_stats.bdq.alloc_failed += stats->bdq.alloc_failed; 1424 /* Remember to add new stats here */ 1425 BUILD_BUG_ON(sizeof(fbn->rx_stats.bdq) / 8 != 1); 1426 } 1427 1428 void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, 1429 struct fbnic_ring *txr) 1430 { 1431 struct fbnic_queue_stats *stats = &txr->stats; 1432 1433 /* Capture stats from queues before dissasociating them */ 1434 fbn->tx_stats.bytes += stats->bytes; 1435 fbn->tx_stats.packets += stats->packets; 1436 fbn->tx_stats.dropped += stats->dropped; 1437 fbn->tx_stats.twq.csum_partial += stats->twq.csum_partial; 1438 fbn->tx_stats.twq.lso += stats->twq.lso; 1439 fbn->tx_stats.twq.ts_lost += stats->twq.ts_lost; 1440 fbn->tx_stats.twq.ts_packets += stats->twq.ts_packets; 1441 fbn->tx_stats.twq.stop += stats->twq.stop; 1442 fbn->tx_stats.twq.wake += stats->twq.wake; 1443 /* Remember to add new stats here */ 1444 BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 6); 1445 } 1446 1447 void fbnic_aggregate_ring_xdp_counters(struct fbnic_net *fbn, 1448 struct fbnic_ring *xdpr) 1449 { 1450 struct fbnic_queue_stats *stats = &xdpr->stats; 1451 1452 if (!(xdpr->flags & FBNIC_RING_F_STATS)) 1453 return; 1454 1455 /* Capture stats from queues before dissasociating them */ 1456 fbn->tx_stats.dropped += stats->dropped; 1457 fbn->tx_stats.bytes += stats->bytes; 1458 fbn->tx_stats.packets += stats->packets; 1459 } 1460 1461 static void fbnic_remove_tx_ring(struct fbnic_net *fbn, 1462 struct fbnic_ring *txr) 1463 { 1464 if (!(txr->flags & FBNIC_RING_F_STATS)) 1465 return; 1466 1467 fbnic_aggregate_ring_tx_counters(fbn, txr); 1468 1469 /* Remove pointer to the Tx ring */ 1470 WARN_ON(fbn->tx[txr->q_idx] && fbn->tx[txr->q_idx] != txr); 1471 fbn->tx[txr->q_idx] = NULL; 1472 } 1473 1474 static void fbnic_remove_xdp_ring(struct fbnic_net *fbn, 1475 struct fbnic_ring *xdpr) 1476 { 1477 if (!(xdpr->flags & FBNIC_RING_F_STATS)) 1478 return; 1479 1480 fbnic_aggregate_ring_xdp_counters(fbn, xdpr); 1481 1482 /* Remove pointer to the Tx ring */ 1483 WARN_ON(fbn->tx[xdpr->q_idx] && fbn->tx[xdpr->q_idx] != xdpr); 1484 fbn->tx[xdpr->q_idx] = NULL; 1485 } 1486 1487 static void fbnic_remove_rx_ring(struct fbnic_net *fbn, 1488 struct fbnic_ring *rxr) 1489 { 1490 if (!(rxr->flags & FBNIC_RING_F_STATS)) 1491 return; 1492 1493 fbnic_aggregate_ring_rx_counters(fbn, rxr); 1494 1495 /* Remove pointer to the Rx ring */ 1496 WARN_ON(fbn->rx[rxr->q_idx] && fbn->rx[rxr->q_idx] != rxr); 1497 fbn->rx[rxr->q_idx] = NULL; 1498 } 1499 1500 static void fbnic_remove_bdq_ring(struct fbnic_net *fbn, 1501 struct fbnic_ring *bdq) 1502 { 1503 if (!(bdq->flags & FBNIC_RING_F_STATS)) 1504 return; 1505 1506 fbnic_aggregate_ring_bdq_counters(fbn, bdq); 1507 } 1508 1509 static void fbnic_free_qt_page_pools(struct fbnic_q_triad *qt) 1510 { 1511 page_pool_destroy(qt->sub0.page_pool); 1512 page_pool_destroy(qt->sub1.page_pool); 1513 } 1514 1515 static void fbnic_free_napi_vector(struct fbnic_net *fbn, 1516 struct fbnic_napi_vector *nv) 1517 { 1518 struct fbnic_dev *fbd = nv->fbd; 1519 int i, j; 1520 1521 for (i = 0; i < nv->txt_count; i++) { 1522 fbnic_remove_tx_ring(fbn, &nv->qt[i].sub0); 1523 fbnic_remove_xdp_ring(fbn, &nv->qt[i].sub1); 1524 fbnic_remove_tx_ring(fbn, &nv->qt[i].cmpl); 1525 } 1526 1527 for (j = 0; j < nv->rxt_count; j++, i++) { 1528 fbnic_remove_bdq_ring(fbn, &nv->qt[i].sub0); 1529 fbnic_remove_bdq_ring(fbn, &nv->qt[i].sub1); 1530 fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl); 1531 } 1532 1533 fbnic_napi_free_irq(fbd, nv); 1534 netif_napi_del_locked(&nv->napi); 1535 fbn->napi[fbnic_napi_idx(nv)] = NULL; 1536 kfree(nv); 1537 } 1538 1539 void fbnic_free_napi_vectors(struct fbnic_net *fbn) 1540 { 1541 int i; 1542 1543 for (i = 0; i < fbn->num_napi; i++) 1544 if (fbn->napi[i]) 1545 fbnic_free_napi_vector(fbn, fbn->napi[i]); 1546 } 1547 1548 static int 1549 fbnic_alloc_qt_page_pools(struct fbnic_net *fbn, struct fbnic_q_triad *qt, 1550 unsigned int rxq_idx) 1551 { 1552 struct page_pool_params pp_params = { 1553 .order = 0, 1554 .flags = PP_FLAG_DMA_MAP | 1555 PP_FLAG_DMA_SYNC_DEV, 1556 .pool_size = fbn->hpq_size + fbn->ppq_size, 1557 .nid = NUMA_NO_NODE, 1558 .dev = fbn->netdev->dev.parent, 1559 .dma_dir = DMA_BIDIRECTIONAL, 1560 .offset = 0, 1561 .max_len = PAGE_SIZE, 1562 .netdev = fbn->netdev, 1563 .queue_idx = rxq_idx, 1564 }; 1565 struct page_pool *pp; 1566 1567 /* Page pool cannot exceed a size of 32768. This doesn't limit the 1568 * pages on the ring but the number we can have cached waiting on 1569 * the next use. 1570 * 1571 * TBD: Can this be reduced further? Would a multiple of 1572 * NAPI_POLL_WEIGHT possibly make more sense? The question is how 1573 * may pages do we need to hold in reserve to get the best return 1574 * without hogging too much system memory. 1575 */ 1576 if (pp_params.pool_size > 32768) 1577 pp_params.pool_size = 32768; 1578 1579 pp = page_pool_create(&pp_params); 1580 if (IS_ERR(pp)) 1581 return PTR_ERR(pp); 1582 1583 qt->sub0.page_pool = pp; 1584 if (netif_rxq_has_unreadable_mp(fbn->netdev, rxq_idx)) { 1585 pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; 1586 pp_params.dma_dir = DMA_FROM_DEVICE; 1587 1588 pp = page_pool_create(&pp_params); 1589 if (IS_ERR(pp)) 1590 goto err_destroy_sub0; 1591 } else { 1592 page_pool_get(pp); 1593 } 1594 qt->sub1.page_pool = pp; 1595 1596 return 0; 1597 1598 err_destroy_sub0: 1599 page_pool_destroy(pp); 1600 return PTR_ERR(pp); 1601 } 1602 1603 static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell, 1604 int q_idx, u8 flags) 1605 { 1606 u64_stats_init(&ring->stats.syncp); 1607 ring->doorbell = doorbell; 1608 ring->q_idx = q_idx; 1609 ring->flags = flags; 1610 ring->deferred_head = -1; 1611 } 1612 1613 static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, 1614 unsigned int v_count, unsigned int v_idx, 1615 unsigned int txq_count, unsigned int txq_idx, 1616 unsigned int rxq_count, unsigned int rxq_idx) 1617 { 1618 int txt_count = txq_count, rxt_count = rxq_count; 1619 u32 __iomem *uc_addr = fbd->uc_addr0; 1620 int xdp_count = 0, qt_count, err; 1621 struct fbnic_napi_vector *nv; 1622 struct fbnic_q_triad *qt; 1623 u32 __iomem *db; 1624 1625 /* We need to reserve at least one Tx Queue Triad for an XDP ring */ 1626 if (rxq_count) { 1627 xdp_count = 1; 1628 if (!txt_count) 1629 txt_count = 1; 1630 } 1631 1632 qt_count = txt_count + rxq_count; 1633 if (!qt_count) 1634 return -EINVAL; 1635 1636 /* If MMIO has already failed there are no rings to initialize */ 1637 if (!uc_addr) 1638 return -EIO; 1639 1640 /* Allocate NAPI vector and queue triads */ 1641 nv = kzalloc(struct_size(nv, qt, qt_count), GFP_KERNEL); 1642 if (!nv) 1643 return -ENOMEM; 1644 1645 /* Record queue triad counts */ 1646 nv->txt_count = txt_count; 1647 nv->rxt_count = rxt_count; 1648 1649 /* Provide pointer back to fbnic and MSI-X vectors */ 1650 nv->fbd = fbd; 1651 nv->v_idx = v_idx; 1652 1653 /* Tie napi to netdev */ 1654 fbn->napi[fbnic_napi_idx(nv)] = nv; 1655 netif_napi_add_config_locked(fbn->netdev, &nv->napi, fbnic_poll, 1656 fbnic_napi_idx(nv)); 1657 1658 /* Record IRQ to NAPI struct */ 1659 netif_napi_set_irq_locked(&nv->napi, 1660 pci_irq_vector(to_pci_dev(fbd->dev), 1661 nv->v_idx)); 1662 1663 /* Tie nv back to PCIe dev */ 1664 nv->dev = fbd->dev; 1665 1666 /* Request the IRQ for napi vector */ 1667 err = fbnic_napi_request_irq(fbd, nv); 1668 if (err) 1669 goto napi_del; 1670 1671 /* Initialize queue triads */ 1672 qt = nv->qt; 1673 1674 while (txt_count) { 1675 u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS; 1676 1677 /* Configure Tx queue */ 1678 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ0_TAIL]; 1679 1680 /* Assign Tx queue to netdev if applicable */ 1681 if (txq_count > 0) { 1682 1683 fbnic_ring_init(&qt->sub0, db, txq_idx, flags); 1684 fbn->tx[txq_idx] = &qt->sub0; 1685 txq_count--; 1686 } else { 1687 fbnic_ring_init(&qt->sub0, db, 0, 1688 FBNIC_RING_F_DISABLED); 1689 } 1690 1691 /* Configure XDP queue */ 1692 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ1_TAIL]; 1693 1694 /* Assign XDP queue to netdev if applicable 1695 * 1696 * The setup for this is in itself a bit different. 1697 * 1. We only need one XDP Tx queue per NAPI vector. 1698 * 2. We associate it to the first Rx queue index. 1699 * 3. The hardware side is associated based on the Tx Queue. 1700 * 4. The netdev queue is offset by FBNIC_MAX_TXQs. 1701 */ 1702 if (xdp_count > 0) { 1703 unsigned int xdp_idx = FBNIC_MAX_TXQS + rxq_idx; 1704 1705 fbnic_ring_init(&qt->sub1, db, xdp_idx, flags); 1706 fbn->tx[xdp_idx] = &qt->sub1; 1707 xdp_count--; 1708 } else { 1709 fbnic_ring_init(&qt->sub1, db, 0, 1710 FBNIC_RING_F_DISABLED); 1711 } 1712 1713 /* Configure Tx completion queue */ 1714 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TCQ_HEAD]; 1715 fbnic_ring_init(&qt->cmpl, db, 0, 0); 1716 1717 /* Update Tx queue index */ 1718 txt_count--; 1719 txq_idx += v_count; 1720 1721 /* Move to next queue triad */ 1722 qt++; 1723 } 1724 1725 while (rxt_count) { 1726 /* Configure header queue */ 1727 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_HPQ_TAIL]; 1728 fbnic_ring_init(&qt->sub0, db, 0, 1729 FBNIC_RING_F_CTX | FBNIC_RING_F_STATS); 1730 1731 /* Configure payload queue */ 1732 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_PPQ_TAIL]; 1733 fbnic_ring_init(&qt->sub1, db, 0, 1734 FBNIC_RING_F_CTX | FBNIC_RING_F_STATS); 1735 1736 /* Configure Rx completion queue */ 1737 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_RCQ_HEAD]; 1738 fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS); 1739 fbn->rx[rxq_idx] = &qt->cmpl; 1740 1741 /* Update Rx queue index */ 1742 rxt_count--; 1743 rxq_idx += v_count; 1744 1745 /* Move to next queue triad */ 1746 qt++; 1747 } 1748 1749 return 0; 1750 1751 napi_del: 1752 netif_napi_del_locked(&nv->napi); 1753 fbn->napi[fbnic_napi_idx(nv)] = NULL; 1754 kfree(nv); 1755 return err; 1756 } 1757 1758 int fbnic_alloc_napi_vectors(struct fbnic_net *fbn) 1759 { 1760 unsigned int txq_idx = 0, rxq_idx = 0, v_idx = FBNIC_NON_NAPI_VECTORS; 1761 unsigned int num_tx = fbn->num_tx_queues; 1762 unsigned int num_rx = fbn->num_rx_queues; 1763 unsigned int num_napi = fbn->num_napi; 1764 struct fbnic_dev *fbd = fbn->fbd; 1765 int err; 1766 1767 /* Allocate 1 Tx queue per napi vector */ 1768 if (num_napi < FBNIC_MAX_TXQS && num_napi == num_tx + num_rx) { 1769 while (num_tx) { 1770 err = fbnic_alloc_napi_vector(fbd, fbn, 1771 num_napi, v_idx, 1772 1, txq_idx, 0, 0); 1773 if (err) 1774 goto free_vectors; 1775 1776 /* Update counts and index */ 1777 num_tx--; 1778 txq_idx++; 1779 1780 v_idx++; 1781 } 1782 } 1783 1784 /* Allocate Tx/Rx queue pairs per vector, or allocate remaining Rx */ 1785 while (num_rx | num_tx) { 1786 int tqpv = DIV_ROUND_UP(num_tx, num_napi - txq_idx); 1787 int rqpv = DIV_ROUND_UP(num_rx, num_napi - rxq_idx); 1788 1789 err = fbnic_alloc_napi_vector(fbd, fbn, num_napi, v_idx, 1790 tqpv, txq_idx, rqpv, rxq_idx); 1791 if (err) 1792 goto free_vectors; 1793 1794 /* Update counts and index */ 1795 num_tx -= tqpv; 1796 txq_idx++; 1797 1798 num_rx -= rqpv; 1799 rxq_idx++; 1800 1801 v_idx++; 1802 } 1803 1804 return 0; 1805 1806 free_vectors: 1807 fbnic_free_napi_vectors(fbn); 1808 1809 return -ENOMEM; 1810 } 1811 1812 static void fbnic_free_ring_resources(struct device *dev, 1813 struct fbnic_ring *ring) 1814 { 1815 kvfree(ring->buffer); 1816 ring->buffer = NULL; 1817 1818 /* If size is not set there are no descriptors present */ 1819 if (!ring->size) 1820 return; 1821 1822 dma_free_coherent(dev, ring->size, ring->desc, ring->dma); 1823 ring->size_mask = 0; 1824 ring->size = 0; 1825 } 1826 1827 static int fbnic_alloc_tx_ring_desc(struct fbnic_net *fbn, 1828 struct fbnic_ring *txr) 1829 { 1830 struct device *dev = fbn->netdev->dev.parent; 1831 size_t size; 1832 1833 /* Round size up to nearest 4K */ 1834 size = ALIGN(array_size(sizeof(*txr->desc), fbn->txq_size), 4096); 1835 1836 txr->desc = dma_alloc_coherent(dev, size, &txr->dma, 1837 GFP_KERNEL | __GFP_NOWARN); 1838 if (!txr->desc) 1839 return -ENOMEM; 1840 1841 /* txq_size should be a power of 2, so mask is just that -1 */ 1842 txr->size_mask = fbn->txq_size - 1; 1843 txr->size = size; 1844 1845 return 0; 1846 } 1847 1848 static int fbnic_alloc_tx_ring_buffer(struct fbnic_ring *txr) 1849 { 1850 size_t size = array_size(sizeof(*txr->tx_buf), txr->size_mask + 1); 1851 1852 txr->tx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN); 1853 1854 return txr->tx_buf ? 0 : -ENOMEM; 1855 } 1856 1857 static int fbnic_alloc_tx_ring_resources(struct fbnic_net *fbn, 1858 struct fbnic_ring *txr) 1859 { 1860 struct device *dev = fbn->netdev->dev.parent; 1861 int err; 1862 1863 if (txr->flags & FBNIC_RING_F_DISABLED) 1864 return 0; 1865 1866 err = fbnic_alloc_tx_ring_desc(fbn, txr); 1867 if (err) 1868 return err; 1869 1870 if (!(txr->flags & FBNIC_RING_F_CTX)) 1871 return 0; 1872 1873 err = fbnic_alloc_tx_ring_buffer(txr); 1874 if (err) 1875 goto free_desc; 1876 1877 return 0; 1878 1879 free_desc: 1880 fbnic_free_ring_resources(dev, txr); 1881 return err; 1882 } 1883 1884 static int fbnic_alloc_rx_ring_desc(struct fbnic_net *fbn, 1885 struct fbnic_ring *rxr) 1886 { 1887 struct device *dev = fbn->netdev->dev.parent; 1888 size_t desc_size = sizeof(*rxr->desc); 1889 u32 rxq_size; 1890 size_t size; 1891 1892 switch (rxr->doorbell - fbnic_ring_csr_base(rxr)) { 1893 case FBNIC_QUEUE_BDQ_HPQ_TAIL: 1894 rxq_size = fbn->hpq_size / FBNIC_BD_FRAG_COUNT; 1895 desc_size *= FBNIC_BD_FRAG_COUNT; 1896 break; 1897 case FBNIC_QUEUE_BDQ_PPQ_TAIL: 1898 rxq_size = fbn->ppq_size / FBNIC_BD_FRAG_COUNT; 1899 desc_size *= FBNIC_BD_FRAG_COUNT; 1900 break; 1901 case FBNIC_QUEUE_RCQ_HEAD: 1902 rxq_size = fbn->rcq_size; 1903 break; 1904 default: 1905 return -EINVAL; 1906 } 1907 1908 /* Round size up to nearest 4K */ 1909 size = ALIGN(array_size(desc_size, rxq_size), 4096); 1910 1911 rxr->desc = dma_alloc_coherent(dev, size, &rxr->dma, 1912 GFP_KERNEL | __GFP_NOWARN); 1913 if (!rxr->desc) 1914 return -ENOMEM; 1915 1916 /* rxq_size should be a power of 2, so mask is just that -1 */ 1917 rxr->size_mask = rxq_size - 1; 1918 rxr->size = size; 1919 1920 return 0; 1921 } 1922 1923 static int fbnic_alloc_rx_ring_buffer(struct fbnic_ring *rxr) 1924 { 1925 size_t size = array_size(sizeof(*rxr->rx_buf), rxr->size_mask + 1); 1926 1927 if (rxr->flags & FBNIC_RING_F_CTX) 1928 size = sizeof(*rxr->rx_buf) * (rxr->size_mask + 1); 1929 else 1930 size = sizeof(*rxr->pkt); 1931 1932 rxr->rx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN); 1933 1934 return rxr->rx_buf ? 0 : -ENOMEM; 1935 } 1936 1937 static int fbnic_alloc_rx_ring_resources(struct fbnic_net *fbn, 1938 struct fbnic_ring *rxr) 1939 { 1940 struct device *dev = fbn->netdev->dev.parent; 1941 int err; 1942 1943 err = fbnic_alloc_rx_ring_desc(fbn, rxr); 1944 if (err) 1945 return err; 1946 1947 err = fbnic_alloc_rx_ring_buffer(rxr); 1948 if (err) 1949 goto free_desc; 1950 1951 return 0; 1952 1953 free_desc: 1954 fbnic_free_ring_resources(dev, rxr); 1955 return err; 1956 } 1957 1958 static void fbnic_free_qt_resources(struct fbnic_net *fbn, 1959 struct fbnic_q_triad *qt) 1960 { 1961 struct device *dev = fbn->netdev->dev.parent; 1962 1963 fbnic_free_ring_resources(dev, &qt->cmpl); 1964 fbnic_free_ring_resources(dev, &qt->sub1); 1965 fbnic_free_ring_resources(dev, &qt->sub0); 1966 1967 if (xdp_rxq_info_is_reg(&qt->xdp_rxq)) { 1968 xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq); 1969 xdp_rxq_info_unreg(&qt->xdp_rxq); 1970 fbnic_free_qt_page_pools(qt); 1971 } 1972 } 1973 1974 static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, 1975 struct fbnic_q_triad *qt) 1976 { 1977 struct device *dev = fbn->netdev->dev.parent; 1978 int err; 1979 1980 err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub0); 1981 if (err) 1982 return err; 1983 1984 err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub1); 1985 if (err) 1986 goto free_sub0; 1987 1988 err = fbnic_alloc_tx_ring_resources(fbn, &qt->cmpl); 1989 if (err) 1990 goto free_sub1; 1991 1992 return 0; 1993 1994 free_sub1: 1995 fbnic_free_ring_resources(dev, &qt->sub1); 1996 free_sub0: 1997 fbnic_free_ring_resources(dev, &qt->sub0); 1998 return err; 1999 } 2000 2001 static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn, 2002 struct fbnic_napi_vector *nv, 2003 struct fbnic_q_triad *qt) 2004 { 2005 struct device *dev = fbn->netdev->dev.parent; 2006 int err; 2007 2008 err = fbnic_alloc_qt_page_pools(fbn, qt, qt->cmpl.q_idx); 2009 if (err) 2010 return err; 2011 2012 err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, qt->sub0.q_idx, 2013 nv->napi.napi_id); 2014 if (err) 2015 goto free_page_pools; 2016 2017 err = xdp_rxq_info_reg_mem_model(&qt->xdp_rxq, MEM_TYPE_PAGE_POOL, 2018 qt->sub0.page_pool); 2019 if (err) 2020 goto unreg_rxq; 2021 2022 err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0); 2023 if (err) 2024 goto unreg_mm; 2025 2026 err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub1); 2027 if (err) 2028 goto free_sub0; 2029 2030 err = fbnic_alloc_rx_ring_resources(fbn, &qt->cmpl); 2031 if (err) 2032 goto free_sub1; 2033 2034 return 0; 2035 2036 free_sub1: 2037 fbnic_free_ring_resources(dev, &qt->sub1); 2038 free_sub0: 2039 fbnic_free_ring_resources(dev, &qt->sub0); 2040 unreg_mm: 2041 xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq); 2042 unreg_rxq: 2043 xdp_rxq_info_unreg(&qt->xdp_rxq); 2044 free_page_pools: 2045 fbnic_free_qt_page_pools(qt); 2046 return err; 2047 } 2048 2049 static void fbnic_free_nv_resources(struct fbnic_net *fbn, 2050 struct fbnic_napi_vector *nv) 2051 { 2052 int i; 2053 2054 for (i = 0; i < nv->txt_count + nv->rxt_count; i++) 2055 fbnic_free_qt_resources(fbn, &nv->qt[i]); 2056 } 2057 2058 static int fbnic_alloc_nv_resources(struct fbnic_net *fbn, 2059 struct fbnic_napi_vector *nv) 2060 { 2061 int i, j, err; 2062 2063 /* Allocate Tx Resources */ 2064 for (i = 0; i < nv->txt_count; i++) { 2065 err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]); 2066 if (err) 2067 goto free_qt_resources; 2068 } 2069 2070 /* Allocate Rx Resources */ 2071 for (j = 0; j < nv->rxt_count; j++, i++) { 2072 err = fbnic_alloc_rx_qt_resources(fbn, nv, &nv->qt[i]); 2073 if (err) 2074 goto free_qt_resources; 2075 } 2076 2077 return 0; 2078 2079 free_qt_resources: 2080 while (i--) 2081 fbnic_free_qt_resources(fbn, &nv->qt[i]); 2082 return err; 2083 } 2084 2085 void fbnic_free_resources(struct fbnic_net *fbn) 2086 { 2087 int i; 2088 2089 for (i = 0; i < fbn->num_napi; i++) 2090 fbnic_free_nv_resources(fbn, fbn->napi[i]); 2091 } 2092 2093 int fbnic_alloc_resources(struct fbnic_net *fbn) 2094 { 2095 int i, err = -ENODEV; 2096 2097 for (i = 0; i < fbn->num_napi; i++) { 2098 err = fbnic_alloc_nv_resources(fbn, fbn->napi[i]); 2099 if (err) 2100 goto free_resources; 2101 } 2102 2103 return 0; 2104 2105 free_resources: 2106 while (i--) 2107 fbnic_free_nv_resources(fbn, fbn->napi[i]); 2108 2109 return err; 2110 } 2111 2112 static void fbnic_set_netif_napi(struct fbnic_napi_vector *nv) 2113 { 2114 int i, j; 2115 2116 /* Associate Tx queue with NAPI */ 2117 for (i = 0; i < nv->txt_count; i++) { 2118 struct fbnic_q_triad *qt = &nv->qt[i]; 2119 2120 netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, 2121 NETDEV_QUEUE_TYPE_TX, &nv->napi); 2122 } 2123 2124 /* Associate Rx queue with NAPI */ 2125 for (j = 0; j < nv->rxt_count; j++, i++) { 2126 struct fbnic_q_triad *qt = &nv->qt[i]; 2127 2128 netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, 2129 NETDEV_QUEUE_TYPE_RX, &nv->napi); 2130 } 2131 } 2132 2133 static void fbnic_reset_netif_napi(struct fbnic_napi_vector *nv) 2134 { 2135 int i, j; 2136 2137 /* Disassociate Tx queue from NAPI */ 2138 for (i = 0; i < nv->txt_count; i++) { 2139 struct fbnic_q_triad *qt = &nv->qt[i]; 2140 2141 netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, 2142 NETDEV_QUEUE_TYPE_TX, NULL); 2143 } 2144 2145 /* Disassociate Rx queue from NAPI */ 2146 for (j = 0; j < nv->rxt_count; j++, i++) { 2147 struct fbnic_q_triad *qt = &nv->qt[i]; 2148 2149 netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, 2150 NETDEV_QUEUE_TYPE_RX, NULL); 2151 } 2152 } 2153 2154 int fbnic_set_netif_queues(struct fbnic_net *fbn) 2155 { 2156 int i, err; 2157 2158 err = netif_set_real_num_queues(fbn->netdev, fbn->num_tx_queues, 2159 fbn->num_rx_queues); 2160 if (err) 2161 return err; 2162 2163 for (i = 0; i < fbn->num_napi; i++) 2164 fbnic_set_netif_napi(fbn->napi[i]); 2165 2166 return 0; 2167 } 2168 2169 void fbnic_reset_netif_queues(struct fbnic_net *fbn) 2170 { 2171 int i; 2172 2173 for (i = 0; i < fbn->num_napi; i++) 2174 fbnic_reset_netif_napi(fbn->napi[i]); 2175 } 2176 2177 static void fbnic_disable_twq0(struct fbnic_ring *txr) 2178 { 2179 u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ0_CTL); 2180 2181 twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE; 2182 2183 fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ0_CTL, twq_ctl); 2184 } 2185 2186 static void fbnic_disable_twq1(struct fbnic_ring *txr) 2187 { 2188 u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ1_CTL); 2189 2190 twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE; 2191 2192 fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ1_CTL, twq_ctl); 2193 } 2194 2195 static void fbnic_disable_tcq(struct fbnic_ring *txr) 2196 { 2197 fbnic_ring_wr32(txr, FBNIC_QUEUE_TCQ_CTL, 0); 2198 fbnic_ring_wr32(txr, FBNIC_QUEUE_TIM_MASK, FBNIC_QUEUE_TIM_MASK_MASK); 2199 } 2200 2201 static void fbnic_disable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) 2202 { 2203 u32 bdq_ctl = fbnic_ring_rd32(hpq, FBNIC_QUEUE_BDQ_CTL); 2204 2205 bdq_ctl &= ~FBNIC_QUEUE_BDQ_CTL_ENABLE; 2206 2207 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl); 2208 } 2209 2210 static void fbnic_disable_rcq(struct fbnic_ring *rxr) 2211 { 2212 fbnic_ring_wr32(rxr, FBNIC_QUEUE_RCQ_CTL, 0); 2213 fbnic_ring_wr32(rxr, FBNIC_QUEUE_RIM_MASK, FBNIC_QUEUE_RIM_MASK_MASK); 2214 } 2215 2216 void fbnic_napi_disable(struct fbnic_net *fbn) 2217 { 2218 int i; 2219 2220 for (i = 0; i < fbn->num_napi; i++) { 2221 napi_disable_locked(&fbn->napi[i]->napi); 2222 2223 fbnic_nv_irq_disable(fbn->napi[i]); 2224 } 2225 } 2226 2227 static void __fbnic_nv_disable(struct fbnic_napi_vector *nv) 2228 { 2229 int i, t; 2230 2231 /* Disable Tx queue triads */ 2232 for (t = 0; t < nv->txt_count; t++) { 2233 struct fbnic_q_triad *qt = &nv->qt[t]; 2234 2235 fbnic_disable_twq0(&qt->sub0); 2236 fbnic_disable_twq1(&qt->sub1); 2237 fbnic_disable_tcq(&qt->cmpl); 2238 } 2239 2240 /* Disable Rx queue triads */ 2241 for (i = 0; i < nv->rxt_count; i++, t++) { 2242 struct fbnic_q_triad *qt = &nv->qt[t]; 2243 2244 fbnic_disable_bdq(&qt->sub0, &qt->sub1); 2245 fbnic_disable_rcq(&qt->cmpl); 2246 } 2247 } 2248 2249 static void 2250 fbnic_nv_disable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) 2251 { 2252 __fbnic_nv_disable(nv); 2253 fbnic_wrfl(fbn->fbd); 2254 } 2255 2256 void fbnic_disable(struct fbnic_net *fbn) 2257 { 2258 struct fbnic_dev *fbd = fbn->fbd; 2259 int i; 2260 2261 for (i = 0; i < fbn->num_napi; i++) 2262 __fbnic_nv_disable(fbn->napi[i]); 2263 2264 fbnic_wrfl(fbd); 2265 } 2266 2267 static void fbnic_tx_flush(struct fbnic_dev *fbd) 2268 { 2269 netdev_warn(fbd->netdev, "triggering Tx flush\n"); 2270 2271 fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN, 2272 FBNIC_TMI_DROP_CTRL_EN); 2273 } 2274 2275 static void fbnic_tx_flush_off(struct fbnic_dev *fbd) 2276 { 2277 fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN, 0); 2278 } 2279 2280 struct fbnic_idle_regs { 2281 u32 reg_base; 2282 u8 reg_cnt; 2283 }; 2284 2285 static bool fbnic_all_idle(struct fbnic_dev *fbd, 2286 const struct fbnic_idle_regs *regs, 2287 unsigned int nregs) 2288 { 2289 unsigned int i, j; 2290 2291 for (i = 0; i < nregs; i++) { 2292 for (j = 0; j < regs[i].reg_cnt; j++) { 2293 if (fbnic_rd32(fbd, regs[i].reg_base + j) != ~0U) 2294 return false; 2295 } 2296 } 2297 return true; 2298 } 2299 2300 static void fbnic_idle_dump(struct fbnic_dev *fbd, 2301 const struct fbnic_idle_regs *regs, 2302 unsigned int nregs, const char *dir, int err) 2303 { 2304 unsigned int i, j; 2305 2306 netdev_err(fbd->netdev, "error waiting for %s idle %d\n", dir, err); 2307 for (i = 0; i < nregs; i++) 2308 for (j = 0; j < regs[i].reg_cnt; j++) 2309 netdev_err(fbd->netdev, "0x%04x: %08x\n", 2310 regs[i].reg_base + j, 2311 fbnic_rd32(fbd, regs[i].reg_base + j)); 2312 } 2313 2314 int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail) 2315 { 2316 static const struct fbnic_idle_regs tx[] = { 2317 { FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TWQ_IDLE_CNT, }, 2318 { FBNIC_QM_TQS_IDLE(0), FBNIC_QM_TQS_IDLE_CNT, }, 2319 { FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TDE_IDLE_CNT, }, 2320 { FBNIC_QM_TCQ_IDLE(0), FBNIC_QM_TCQ_IDLE_CNT, }, 2321 }, rx[] = { 2322 { FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_HPQ_IDLE_CNT, }, 2323 { FBNIC_QM_PPQ_IDLE(0), FBNIC_QM_PPQ_IDLE_CNT, }, 2324 { FBNIC_QM_RCQ_IDLE(0), FBNIC_QM_RCQ_IDLE_CNT, }, 2325 }; 2326 bool idle; 2327 int err; 2328 2329 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000, 2330 false, fbd, tx, ARRAY_SIZE(tx)); 2331 if (err == -ETIMEDOUT) { 2332 fbnic_tx_flush(fbd); 2333 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2334 2, 500000, false, 2335 fbd, tx, ARRAY_SIZE(tx)); 2336 fbnic_tx_flush_off(fbd); 2337 } 2338 if (err) { 2339 fbnic_idle_dump(fbd, tx, ARRAY_SIZE(tx), "Tx", err); 2340 if (may_fail) 2341 return err; 2342 } 2343 2344 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000, 2345 false, fbd, rx, ARRAY_SIZE(rx)); 2346 if (err) 2347 fbnic_idle_dump(fbd, rx, ARRAY_SIZE(rx), "Rx", err); 2348 return err; 2349 } 2350 2351 static int 2352 fbnic_wait_queue_idle(struct fbnic_net *fbn, bool rx, unsigned int idx) 2353 { 2354 static const unsigned int tx_regs[] = { 2355 FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TQS_IDLE(0), 2356 FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TCQ_IDLE(0), 2357 }, rx_regs[] = { 2358 FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_PPQ_IDLE(0), 2359 FBNIC_QM_RCQ_IDLE(0), 2360 }; 2361 struct fbnic_dev *fbd = fbn->fbd; 2362 unsigned int val, mask, off; 2363 const unsigned int *regs; 2364 unsigned int reg_cnt; 2365 int i, err; 2366 2367 regs = rx ? rx_regs : tx_regs; 2368 reg_cnt = rx ? ARRAY_SIZE(rx_regs) : ARRAY_SIZE(tx_regs); 2369 2370 off = idx / 32; 2371 mask = BIT(idx % 32); 2372 2373 for (i = 0; i < reg_cnt; i++) { 2374 err = read_poll_timeout_atomic(fbnic_rd32, val, val & mask, 2375 2, 500000, false, 2376 fbd, regs[i] + off); 2377 if (err) { 2378 netdev_err(fbd->netdev, 2379 "wait for queue %s%d idle failed 0x%04x(%d): %08x (mask: %08x)\n", 2380 rx ? "Rx" : "Tx", idx, regs[i] + off, i, 2381 val, mask); 2382 return err; 2383 } 2384 } 2385 2386 return 0; 2387 } 2388 2389 static void fbnic_nv_flush(struct fbnic_napi_vector *nv) 2390 { 2391 int j, t; 2392 2393 /* Flush any processed Tx Queue Triads and drop the rest */ 2394 for (t = 0; t < nv->txt_count; t++) { 2395 struct fbnic_q_triad *qt = &nv->qt[t]; 2396 struct netdev_queue *tx_queue; 2397 2398 /* Clean the work queues of unprocessed work */ 2399 fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail); 2400 fbnic_clean_twq1(nv, false, &qt->sub1, true, 2401 qt->sub1.tail); 2402 2403 /* Reset completion queue descriptor ring */ 2404 memset(qt->cmpl.desc, 0, qt->cmpl.size); 2405 2406 /* Nothing else to do if Tx queue is disabled */ 2407 if (qt->sub0.flags & FBNIC_RING_F_DISABLED) 2408 continue; 2409 2410 /* Reset BQL associated with Tx queue */ 2411 tx_queue = netdev_get_tx_queue(nv->napi.dev, 2412 qt->sub0.q_idx); 2413 netdev_tx_reset_queue(tx_queue); 2414 } 2415 2416 /* Flush any processed Rx Queue Triads and drop the rest */ 2417 for (j = 0; j < nv->rxt_count; j++, t++) { 2418 struct fbnic_q_triad *qt = &nv->qt[t]; 2419 2420 /* Clean the work queues of unprocessed work */ 2421 fbnic_clean_bdq(&qt->sub0, qt->sub0.tail, 0); 2422 fbnic_clean_bdq(&qt->sub1, qt->sub1.tail, 0); 2423 2424 /* Reset completion queue descriptor ring */ 2425 memset(qt->cmpl.desc, 0, qt->cmpl.size); 2426 2427 fbnic_put_pkt_buff(qt, qt->cmpl.pkt, 0); 2428 memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff)); 2429 } 2430 } 2431 2432 void fbnic_flush(struct fbnic_net *fbn) 2433 { 2434 int i; 2435 2436 for (i = 0; i < fbn->num_napi; i++) 2437 fbnic_nv_flush(fbn->napi[i]); 2438 } 2439 2440 static void fbnic_nv_fill(struct fbnic_napi_vector *nv) 2441 { 2442 int j, t; 2443 2444 /* Configure NAPI mapping and populate pages 2445 * in the BDQ rings to use for Rx 2446 */ 2447 for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) { 2448 struct fbnic_q_triad *qt = &nv->qt[t]; 2449 2450 /* Populate the header and payload BDQs */ 2451 fbnic_fill_bdq(&qt->sub0); 2452 fbnic_fill_bdq(&qt->sub1); 2453 } 2454 } 2455 2456 void fbnic_fill(struct fbnic_net *fbn) 2457 { 2458 int i; 2459 2460 for (i = 0; i < fbn->num_napi; i++) 2461 fbnic_nv_fill(fbn->napi[i]); 2462 } 2463 2464 static void fbnic_enable_twq0(struct fbnic_ring *twq) 2465 { 2466 u32 log_size = fls(twq->size_mask); 2467 2468 if (!twq->size_mask) 2469 return; 2470 2471 /* Reset head/tail */ 2472 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_RESET); 2473 twq->tail = 0; 2474 twq->head = 0; 2475 2476 /* Store descriptor ring address and size */ 2477 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAL, lower_32_bits(twq->dma)); 2478 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAH, upper_32_bits(twq->dma)); 2479 2480 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2481 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_SIZE, log_size & 0xf); 2482 2483 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); 2484 } 2485 2486 static void fbnic_enable_twq1(struct fbnic_ring *twq) 2487 { 2488 u32 log_size = fls(twq->size_mask); 2489 2490 if (!twq->size_mask) 2491 return; 2492 2493 /* Reset head/tail */ 2494 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_RESET); 2495 twq->tail = 0; 2496 twq->head = 0; 2497 2498 /* Store descriptor ring address and size */ 2499 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAL, lower_32_bits(twq->dma)); 2500 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAH, upper_32_bits(twq->dma)); 2501 2502 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2503 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_SIZE, log_size & 0xf); 2504 2505 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); 2506 } 2507 2508 static void fbnic_enable_tcq(struct fbnic_napi_vector *nv, 2509 struct fbnic_ring *tcq) 2510 { 2511 u32 log_size = fls(tcq->size_mask); 2512 2513 if (!tcq->size_mask) 2514 return; 2515 2516 /* Reset head/tail */ 2517 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_RESET); 2518 tcq->tail = 0; 2519 tcq->head = 0; 2520 2521 /* Store descriptor ring address and size */ 2522 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAL, lower_32_bits(tcq->dma)); 2523 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAH, upper_32_bits(tcq->dma)); 2524 2525 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2526 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_SIZE, log_size & 0xf); 2527 2528 /* Store interrupt information for the completion queue */ 2529 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_CTL, nv->v_idx); 2530 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_THRESHOLD, tcq->size_mask / 2); 2531 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_MASK, 0); 2532 2533 /* Enable queue */ 2534 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_ENABLE); 2535 } 2536 2537 static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) 2538 { 2539 u32 bdq_ctl = FBNIC_QUEUE_BDQ_CTL_ENABLE; 2540 u32 log_size; 2541 2542 /* Reset head/tail */ 2543 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, FBNIC_QUEUE_BDQ_CTL_RESET); 2544 ppq->tail = 0; 2545 ppq->head = 0; 2546 hpq->tail = 0; 2547 hpq->head = 0; 2548 2549 log_size = fls(hpq->size_mask); 2550 2551 /* Store descriptor ring address and size */ 2552 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma)); 2553 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAH, upper_32_bits(hpq->dma)); 2554 2555 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2556 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_SIZE, log_size & 0xf); 2557 2558 if (!ppq->size_mask) 2559 goto write_ctl; 2560 2561 log_size = fls(ppq->size_mask); 2562 2563 /* Add enabling of PPQ to BDQ control */ 2564 bdq_ctl |= FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE; 2565 2566 /* Store descriptor ring address and size */ 2567 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAL, lower_32_bits(ppq->dma)); 2568 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAH, upper_32_bits(ppq->dma)); 2569 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_SIZE, log_size & 0xf); 2570 2571 write_ctl: 2572 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl); 2573 } 2574 2575 static void fbnic_config_drop_mode_rcq(struct fbnic_napi_vector *nv, 2576 struct fbnic_ring *rcq) 2577 { 2578 u32 drop_mode, rcq_ctl; 2579 2580 drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_IMMEDIATE; 2581 2582 /* Specify packet layout */ 2583 rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_DROP_MODE_MASK, drop_mode) | 2584 FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_HROOM_MASK, FBNIC_RX_HROOM) | 2585 FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_TROOM_MASK, FBNIC_RX_TROOM); 2586 2587 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL0, rcq_ctl); 2588 } 2589 2590 static void fbnic_config_rim_threshold(struct fbnic_ring *rcq, u16 nv_idx, u32 rx_desc) 2591 { 2592 u32 threshold; 2593 2594 /* Set the threhsold to half the ring size if rx_frames 2595 * is not configured 2596 */ 2597 threshold = rx_desc ? : rcq->size_mask / 2; 2598 2599 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_CTL, nv_idx); 2600 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_THRESHOLD, threshold); 2601 } 2602 2603 void fbnic_config_txrx_usecs(struct fbnic_napi_vector *nv, u32 arm) 2604 { 2605 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 2606 struct fbnic_dev *fbd = nv->fbd; 2607 u32 val = arm; 2608 2609 val |= FIELD_PREP(FBNIC_INTR_CQ_REARM_RCQ_TIMEOUT, fbn->rx_usecs) | 2610 FBNIC_INTR_CQ_REARM_RCQ_TIMEOUT_UPD_EN; 2611 val |= FIELD_PREP(FBNIC_INTR_CQ_REARM_TCQ_TIMEOUT, fbn->tx_usecs) | 2612 FBNIC_INTR_CQ_REARM_TCQ_TIMEOUT_UPD_EN; 2613 2614 fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(nv->v_idx), val); 2615 } 2616 2617 void fbnic_config_rx_frames(struct fbnic_napi_vector *nv) 2618 { 2619 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 2620 int i; 2621 2622 for (i = nv->txt_count; i < nv->rxt_count + nv->txt_count; i++) { 2623 struct fbnic_q_triad *qt = &nv->qt[i]; 2624 2625 fbnic_config_rim_threshold(&qt->cmpl, nv->v_idx, 2626 fbn->rx_max_frames * 2627 FBNIC_MIN_RXD_PER_FRAME); 2628 } 2629 } 2630 2631 static void fbnic_enable_rcq(struct fbnic_napi_vector *nv, 2632 struct fbnic_ring *rcq) 2633 { 2634 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 2635 u32 log_size = fls(rcq->size_mask); 2636 u32 hds_thresh = fbn->hds_thresh; 2637 u32 rcq_ctl = 0; 2638 2639 fbnic_config_drop_mode_rcq(nv, rcq); 2640 2641 /* Force lower bound on MAX_HEADER_BYTES. Below this, all frames should 2642 * be split at L4. It would also result in the frames being split at 2643 * L2/L3 depending on the frame size. 2644 */ 2645 if (fbn->hds_thresh < FBNIC_HDR_BYTES_MIN) { 2646 rcq_ctl = FBNIC_QUEUE_RDE_CTL0_EN_HDR_SPLIT; 2647 hds_thresh = FBNIC_HDR_BYTES_MIN; 2648 } 2649 2650 rcq_ctl |= FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) | 2651 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, hds_thresh) | 2652 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK, 2653 FBNIC_RX_PAYLD_OFFSET) | 2654 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_PG_CL_MASK, 2655 FBNIC_RX_PAYLD_PG_CL); 2656 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL1, rcq_ctl); 2657 2658 /* Reset head/tail */ 2659 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_RESET); 2660 rcq->head = 0; 2661 rcq->tail = 0; 2662 2663 /* Store descriptor ring address and size */ 2664 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAL, lower_32_bits(rcq->dma)); 2665 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAH, upper_32_bits(rcq->dma)); 2666 2667 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2668 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_SIZE, log_size & 0xf); 2669 2670 /* Store interrupt information for the completion queue */ 2671 fbnic_config_rim_threshold(rcq, nv->v_idx, fbn->rx_max_frames * 2672 FBNIC_MIN_RXD_PER_FRAME); 2673 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_MASK, 0); 2674 2675 /* Enable queue */ 2676 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_ENABLE); 2677 } 2678 2679 static void __fbnic_nv_enable(struct fbnic_napi_vector *nv) 2680 { 2681 int j, t; 2682 2683 /* Setup Tx Queue Triads */ 2684 for (t = 0; t < nv->txt_count; t++) { 2685 struct fbnic_q_triad *qt = &nv->qt[t]; 2686 2687 fbnic_enable_twq0(&qt->sub0); 2688 fbnic_enable_twq1(&qt->sub1); 2689 fbnic_enable_tcq(nv, &qt->cmpl); 2690 } 2691 2692 /* Setup Rx Queue Triads */ 2693 for (j = 0; j < nv->rxt_count; j++, t++) { 2694 struct fbnic_q_triad *qt = &nv->qt[t]; 2695 2696 page_pool_enable_direct_recycling(qt->sub0.page_pool, 2697 &nv->napi); 2698 page_pool_enable_direct_recycling(qt->sub1.page_pool, 2699 &nv->napi); 2700 2701 fbnic_enable_bdq(&qt->sub0, &qt->sub1); 2702 fbnic_config_drop_mode_rcq(nv, &qt->cmpl); 2703 fbnic_enable_rcq(nv, &qt->cmpl); 2704 } 2705 } 2706 2707 static void fbnic_nv_enable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) 2708 { 2709 __fbnic_nv_enable(nv); 2710 fbnic_wrfl(fbn->fbd); 2711 } 2712 2713 void fbnic_enable(struct fbnic_net *fbn) 2714 { 2715 struct fbnic_dev *fbd = fbn->fbd; 2716 int i; 2717 2718 for (i = 0; i < fbn->num_napi; i++) 2719 __fbnic_nv_enable(fbn->napi[i]); 2720 2721 fbnic_wrfl(fbd); 2722 } 2723 2724 static void fbnic_nv_irq_enable(struct fbnic_napi_vector *nv) 2725 { 2726 fbnic_config_txrx_usecs(nv, FBNIC_INTR_CQ_REARM_INTR_UNMASK); 2727 } 2728 2729 void fbnic_napi_enable(struct fbnic_net *fbn) 2730 { 2731 u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {}; 2732 struct fbnic_dev *fbd = fbn->fbd; 2733 int i; 2734 2735 for (i = 0; i < fbn->num_napi; i++) { 2736 struct fbnic_napi_vector *nv = fbn->napi[i]; 2737 2738 napi_enable_locked(&nv->napi); 2739 2740 fbnic_nv_irq_enable(nv); 2741 2742 /* Record bit used for NAPI IRQs so we can 2743 * set the mask appropriately 2744 */ 2745 irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32); 2746 } 2747 2748 /* Force the first interrupt on the device to guarantee 2749 * that any packets that may have been enqueued during the 2750 * bringup are processed. 2751 */ 2752 for (i = 0; i < ARRAY_SIZE(irqs); i++) { 2753 if (!irqs[i]) 2754 continue; 2755 fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]); 2756 } 2757 2758 fbnic_wrfl(fbd); 2759 } 2760 2761 void fbnic_napi_depletion_check(struct net_device *netdev) 2762 { 2763 struct fbnic_net *fbn = netdev_priv(netdev); 2764 u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {}; 2765 struct fbnic_dev *fbd = fbn->fbd; 2766 int i, j, t; 2767 2768 for (i = 0; i < fbn->num_napi; i++) { 2769 struct fbnic_napi_vector *nv = fbn->napi[i]; 2770 2771 /* Find RQs which are completely out of pages */ 2772 for (t = nv->txt_count, j = 0; j < nv->rxt_count; j++, t++) { 2773 /* Assume 4 pages is always enough to fit a packet 2774 * and therefore generate a completion and an IRQ. 2775 */ 2776 if (fbnic_desc_used(&nv->qt[t].sub0) < 4 || 2777 fbnic_desc_used(&nv->qt[t].sub1) < 4) 2778 irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32); 2779 } 2780 } 2781 2782 for (i = 0; i < ARRAY_SIZE(irqs); i++) { 2783 if (!irqs[i]) 2784 continue; 2785 fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(i), irqs[i]); 2786 fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]); 2787 } 2788 2789 fbnic_wrfl(fbd); 2790 } 2791 2792 static int fbnic_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) 2793 { 2794 struct fbnic_net *fbn = netdev_priv(dev); 2795 const struct fbnic_q_triad *real; 2796 struct fbnic_q_triad *qt = qmem; 2797 struct fbnic_napi_vector *nv; 2798 2799 if (!netif_running(dev)) 2800 return fbnic_alloc_qt_page_pools(fbn, qt, idx); 2801 2802 real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); 2803 nv = fbn->napi[idx % fbn->num_napi]; 2804 2805 fbnic_ring_init(&qt->sub0, real->sub0.doorbell, real->sub0.q_idx, 2806 real->sub0.flags); 2807 fbnic_ring_init(&qt->sub1, real->sub1.doorbell, real->sub1.q_idx, 2808 real->sub1.flags); 2809 fbnic_ring_init(&qt->cmpl, real->cmpl.doorbell, real->cmpl.q_idx, 2810 real->cmpl.flags); 2811 2812 return fbnic_alloc_rx_qt_resources(fbn, nv, qt); 2813 } 2814 2815 static void fbnic_queue_mem_free(struct net_device *dev, void *qmem) 2816 { 2817 struct fbnic_net *fbn = netdev_priv(dev); 2818 struct fbnic_q_triad *qt = qmem; 2819 2820 if (!netif_running(dev)) 2821 fbnic_free_qt_page_pools(qt); 2822 else 2823 fbnic_free_qt_resources(fbn, qt); 2824 } 2825 2826 static void __fbnic_nv_restart(struct fbnic_net *fbn, 2827 struct fbnic_napi_vector *nv) 2828 { 2829 struct fbnic_dev *fbd = fbn->fbd; 2830 int i; 2831 2832 fbnic_nv_enable(fbn, nv); 2833 fbnic_nv_fill(nv); 2834 2835 napi_enable_locked(&nv->napi); 2836 fbnic_nv_irq_enable(nv); 2837 fbnic_wr32(fbd, FBNIC_INTR_SET(nv->v_idx / 32), BIT(nv->v_idx % 32)); 2838 fbnic_wrfl(fbd); 2839 2840 for (i = 0; i < nv->txt_count; i++) 2841 netif_wake_subqueue(fbn->netdev, nv->qt[i].sub0.q_idx); 2842 } 2843 2844 static int fbnic_queue_start(struct net_device *dev, void *qmem, int idx) 2845 { 2846 struct fbnic_net *fbn = netdev_priv(dev); 2847 struct fbnic_napi_vector *nv; 2848 struct fbnic_q_triad *real; 2849 2850 real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); 2851 nv = fbn->napi[idx % fbn->num_napi]; 2852 2853 fbnic_aggregate_ring_bdq_counters(fbn, &real->sub0); 2854 fbnic_aggregate_ring_bdq_counters(fbn, &real->sub1); 2855 fbnic_aggregate_ring_rx_counters(fbn, &real->cmpl); 2856 2857 memcpy(real, qmem, sizeof(*real)); 2858 2859 __fbnic_nv_restart(fbn, nv); 2860 2861 return 0; 2862 } 2863 2864 static int fbnic_queue_stop(struct net_device *dev, void *qmem, int idx) 2865 { 2866 struct fbnic_net *fbn = netdev_priv(dev); 2867 const struct fbnic_q_triad *real; 2868 struct fbnic_napi_vector *nv; 2869 int i, t; 2870 int err; 2871 2872 real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); 2873 nv = fbn->napi[idx % fbn->num_napi]; 2874 2875 napi_disable_locked(&nv->napi); 2876 fbnic_nv_irq_disable(nv); 2877 2878 for (i = 0; i < nv->txt_count; i++) 2879 netif_stop_subqueue(dev, nv->qt[i].sub0.q_idx); 2880 fbnic_nv_disable(fbn, nv); 2881 2882 for (t = 0; t < nv->txt_count + nv->rxt_count; t++) { 2883 err = fbnic_wait_queue_idle(fbn, t >= nv->txt_count, 2884 nv->qt[t].sub0.q_idx); 2885 if (err) 2886 goto err_restart; 2887 } 2888 2889 fbnic_synchronize_irq(fbn->fbd, nv->v_idx); 2890 fbnic_nv_flush(nv); 2891 2892 page_pool_disable_direct_recycling(real->sub0.page_pool); 2893 page_pool_disable_direct_recycling(real->sub1.page_pool); 2894 2895 memcpy(qmem, real, sizeof(*real)); 2896 2897 return 0; 2898 2899 err_restart: 2900 __fbnic_nv_restart(fbn, nv); 2901 return err; 2902 } 2903 2904 const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_ops = { 2905 .ndo_queue_mem_size = sizeof(struct fbnic_q_triad), 2906 .ndo_queue_mem_alloc = fbnic_queue_mem_alloc, 2907 .ndo_queue_mem_free = fbnic_queue_mem_free, 2908 .ndo_queue_start = fbnic_queue_start, 2909 .ndo_queue_stop = fbnic_queue_stop, 2910 }; 2911