1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */ 3 4 #include <linux/bitfield.h> 5 #include <linux/bpf.h> 6 #include <linux/bpf_trace.h> 7 #include <linux/iopoll.h> 8 #include <linux/pci.h> 9 #include <net/netdev_queues.h> 10 #include <net/page_pool/helpers.h> 11 #include <net/tcp.h> 12 #include <net/xdp.h> 13 14 #include "fbnic.h" 15 #include "fbnic_csr.h" 16 #include "fbnic_netdev.h" 17 #include "fbnic_txrx.h" 18 19 enum { 20 FBNIC_XDP_PASS = 0, 21 FBNIC_XDP_CONSUME, 22 FBNIC_XDP_TX, 23 FBNIC_XDP_LEN_ERR, 24 }; 25 26 enum { 27 FBNIC_XMIT_CB_TS = 0x01, 28 }; 29 30 struct fbnic_xmit_cb { 31 u32 bytecount; 32 u16 gso_segs; 33 u8 desc_count; 34 u8 flags; 35 int hw_head; 36 }; 37 38 #define FBNIC_XMIT_CB(__skb) ((struct fbnic_xmit_cb *)((__skb)->cb)) 39 40 #define FBNIC_XMIT_NOUNMAP ((void *)1) 41 42 static u32 __iomem *fbnic_ring_csr_base(const struct fbnic_ring *ring) 43 { 44 unsigned long csr_base = (unsigned long)ring->doorbell; 45 46 csr_base &= ~(FBNIC_QUEUE_STRIDE * sizeof(u32) - 1); 47 48 return (u32 __iomem *)csr_base; 49 } 50 51 static u32 fbnic_ring_rd32(struct fbnic_ring *ring, unsigned int csr) 52 { 53 u32 __iomem *csr_base = fbnic_ring_csr_base(ring); 54 55 return readl(csr_base + csr); 56 } 57 58 static void fbnic_ring_wr32(struct fbnic_ring *ring, unsigned int csr, u32 val) 59 { 60 u32 __iomem *csr_base = fbnic_ring_csr_base(ring); 61 62 writel(val, csr_base + csr); 63 } 64 65 /** 66 * fbnic_ts40_to_ns() - convert descriptor timestamp to PHC time 67 * @fbn: netdev priv of the FB NIC 68 * @ts40: timestamp read from a descriptor 69 * 70 * Return: u64 value of PHC time in nanoseconds 71 * 72 * Convert truncated 40 bit device timestamp as read from a descriptor 73 * to the full PHC time in nanoseconds. 74 */ 75 static __maybe_unused u64 fbnic_ts40_to_ns(struct fbnic_net *fbn, u64 ts40) 76 { 77 unsigned int s; 78 u64 time_ns; 79 s64 offset; 80 u8 ts_top; 81 u32 high; 82 83 do { 84 s = u64_stats_fetch_begin(&fbn->time_seq); 85 offset = READ_ONCE(fbn->time_offset); 86 } while (u64_stats_fetch_retry(&fbn->time_seq, s)); 87 88 high = READ_ONCE(fbn->time_high); 89 90 /* Bits 63..40 from periodic clock reads, 39..0 from ts40 */ 91 time_ns = (u64)(high >> 8) << 40 | ts40; 92 93 /* Compare bits 32-39 between periodic reads and ts40, 94 * see if HW clock may have wrapped since last read. We are sure 95 * that periodic reads are always at least ~1 minute behind, so 96 * this logic works perfectly fine. 97 */ 98 ts_top = ts40 >> 32; 99 if (ts_top < (u8)high && (u8)high - ts_top > U8_MAX / 2) 100 time_ns += 1ULL << 40; 101 102 return time_ns + offset; 103 } 104 105 static unsigned int fbnic_desc_unused(struct fbnic_ring *ring) 106 { 107 return (ring->head - ring->tail - 1) & ring->size_mask; 108 } 109 110 static unsigned int fbnic_desc_used(struct fbnic_ring *ring) 111 { 112 return (ring->tail - ring->head) & ring->size_mask; 113 } 114 115 static struct netdev_queue *txring_txq(const struct net_device *dev, 116 const struct fbnic_ring *ring) 117 { 118 return netdev_get_tx_queue(dev, ring->q_idx); 119 } 120 121 static int fbnic_maybe_stop_tx(const struct net_device *dev, 122 struct fbnic_ring *ring, 123 const unsigned int size) 124 { 125 struct netdev_queue *txq = txring_txq(dev, ring); 126 int res; 127 128 res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size, 129 FBNIC_TX_DESC_WAKEUP); 130 if (!res) { 131 u64_stats_update_begin(&ring->stats.syncp); 132 ring->stats.twq.stop++; 133 u64_stats_update_end(&ring->stats.syncp); 134 } 135 136 return !res; 137 } 138 139 static bool fbnic_tx_sent_queue(struct sk_buff *skb, struct fbnic_ring *ring) 140 { 141 struct netdev_queue *dev_queue = txring_txq(skb->dev, ring); 142 unsigned int bytecount = FBNIC_XMIT_CB(skb)->bytecount; 143 bool xmit_more = netdev_xmit_more(); 144 145 /* TBD: Request completion more often if xmit_more becomes large */ 146 147 return __netdev_tx_sent_queue(dev_queue, bytecount, xmit_more); 148 } 149 150 static void fbnic_unmap_single_twd(struct device *dev, __le64 *twd) 151 { 152 u64 raw_twd = le64_to_cpu(*twd); 153 unsigned int len; 154 dma_addr_t dma; 155 156 dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd); 157 len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd); 158 159 dma_unmap_single(dev, dma, len, DMA_TO_DEVICE); 160 } 161 162 static void fbnic_unmap_page_twd(struct device *dev, __le64 *twd) 163 { 164 u64 raw_twd = le64_to_cpu(*twd); 165 unsigned int len; 166 dma_addr_t dma; 167 168 dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd); 169 len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd); 170 171 dma_unmap_page(dev, dma, len, DMA_TO_DEVICE); 172 } 173 174 #define FBNIC_TWD_TYPE(_type) \ 175 cpu_to_le64(FIELD_PREP(FBNIC_TWD_TYPE_MASK, FBNIC_TWD_TYPE_##_type)) 176 177 static bool fbnic_tx_tstamp(struct sk_buff *skb) 178 { 179 struct fbnic_net *fbn; 180 181 if (!unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) 182 return false; 183 184 fbn = netdev_priv(skb->dev); 185 if (fbn->hwtstamp_config.tx_type == HWTSTAMP_TX_OFF) 186 return false; 187 188 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 189 FBNIC_XMIT_CB(skb)->flags |= FBNIC_XMIT_CB_TS; 190 FBNIC_XMIT_CB(skb)->hw_head = -1; 191 192 return true; 193 } 194 195 static bool 196 fbnic_tx_lso(struct fbnic_ring *ring, struct sk_buff *skb, 197 struct skb_shared_info *shinfo, __le64 *meta, 198 unsigned int *l2len, unsigned int *i3len) 199 { 200 unsigned int l3_type, l4_type, l4len, hdrlen; 201 unsigned char *l4hdr; 202 __be16 payload_len; 203 204 if (unlikely(skb_cow_head(skb, 0))) 205 return true; 206 207 if (shinfo->gso_type & SKB_GSO_PARTIAL) { 208 l3_type = FBNIC_TWD_L3_TYPE_OTHER; 209 } else if (!skb->encapsulation) { 210 if (ip_hdr(skb)->version == 4) 211 l3_type = FBNIC_TWD_L3_TYPE_IPV4; 212 else 213 l3_type = FBNIC_TWD_L3_TYPE_IPV6; 214 } else { 215 unsigned int o3len; 216 217 o3len = skb_inner_network_header(skb) - skb_network_header(skb); 218 *i3len -= o3len; 219 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_OHLEN_MASK, 220 o3len / 2)); 221 l3_type = FBNIC_TWD_L3_TYPE_V6V6; 222 } 223 224 l4hdr = skb_checksum_start(skb); 225 payload_len = cpu_to_be16(skb->len - (l4hdr - skb->data)); 226 227 if (shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { 228 struct tcphdr *tcph = (struct tcphdr *)l4hdr; 229 230 l4_type = FBNIC_TWD_L4_TYPE_TCP; 231 l4len = __tcp_hdrlen((struct tcphdr *)l4hdr); 232 csum_replace_by_diff(&tcph->check, (__force __wsum)payload_len); 233 } else { 234 struct udphdr *udph = (struct udphdr *)l4hdr; 235 236 l4_type = FBNIC_TWD_L4_TYPE_UDP; 237 l4len = sizeof(struct udphdr); 238 csum_replace_by_diff(&udph->check, (__force __wsum)payload_len); 239 } 240 241 hdrlen = (l4hdr - skb->data) + l4len; 242 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_TYPE_MASK, l3_type) | 243 FIELD_PREP(FBNIC_TWD_L4_TYPE_MASK, l4_type) | 244 FIELD_PREP(FBNIC_TWD_L4_HLEN_MASK, l4len / 4) | 245 FIELD_PREP(FBNIC_TWD_MSS_MASK, shinfo->gso_size) | 246 FBNIC_TWD_FLAG_REQ_LSO); 247 248 FBNIC_XMIT_CB(skb)->bytecount += (shinfo->gso_segs - 1) * hdrlen; 249 FBNIC_XMIT_CB(skb)->gso_segs = shinfo->gso_segs; 250 251 u64_stats_update_begin(&ring->stats.syncp); 252 ring->stats.twq.lso += shinfo->gso_segs; 253 u64_stats_update_end(&ring->stats.syncp); 254 255 return false; 256 } 257 258 static bool 259 fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) 260 { 261 struct skb_shared_info *shinfo = skb_shinfo(skb); 262 unsigned int l2len, i3len; 263 264 if (fbnic_tx_tstamp(skb)) 265 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_TS); 266 267 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) 268 return false; 269 270 l2len = skb_mac_header_len(skb); 271 i3len = skb_checksum_start(skb) - skb_network_header(skb); 272 273 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_CSUM_OFFSET_MASK, 274 skb->csum_offset / 2)); 275 276 if (shinfo->gso_size) { 277 if (fbnic_tx_lso(ring, skb, shinfo, meta, &l2len, &i3len)) 278 return true; 279 } else { 280 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO); 281 u64_stats_update_begin(&ring->stats.syncp); 282 ring->stats.twq.csum_partial++; 283 u64_stats_update_end(&ring->stats.syncp); 284 } 285 286 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) | 287 FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2)); 288 return false; 289 } 290 291 static void 292 fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq, 293 u64 *csum_cmpl, u64 *csum_none) 294 { 295 skb_checksum_none_assert(skb); 296 297 if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) { 298 (*csum_none)++; 299 return; 300 } 301 302 if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) { 303 skb->ip_summed = CHECKSUM_UNNECESSARY; 304 } else { 305 u16 csum = FIELD_GET(FBNIC_RCD_META_L2_CSUM_MASK, rcd); 306 307 skb->ip_summed = CHECKSUM_COMPLETE; 308 skb->csum = (__force __wsum)csum; 309 (*csum_cmpl)++; 310 } 311 } 312 313 static bool 314 fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) 315 { 316 struct device *dev = skb->dev->dev.parent; 317 unsigned int tail = ring->tail, first; 318 unsigned int size, data_len; 319 skb_frag_t *frag; 320 bool is_net_iov; 321 dma_addr_t dma; 322 __le64 *twd; 323 324 ring->tx_buf[tail] = skb; 325 326 tail++; 327 tail &= ring->size_mask; 328 first = tail; 329 330 size = skb_headlen(skb); 331 data_len = skb->data_len; 332 333 if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK)) 334 goto dma_error; 335 336 is_net_iov = false; 337 dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); 338 339 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 340 twd = &ring->desc[tail]; 341 342 if (dma_mapping_error(dev, dma)) 343 goto dma_error; 344 345 *twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) | 346 FIELD_PREP(FBNIC_TWD_LEN_MASK, size) | 347 FIELD_PREP(FBNIC_TWD_TYPE_MASK, 348 FBNIC_TWD_TYPE_AL)); 349 if (is_net_iov) 350 ring->tx_buf[tail] = FBNIC_XMIT_NOUNMAP; 351 352 tail++; 353 tail &= ring->size_mask; 354 355 if (!data_len) 356 break; 357 358 size = skb_frag_size(frag); 359 data_len -= size; 360 361 if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK)) 362 goto dma_error; 363 364 is_net_iov = skb_frag_is_net_iov(frag); 365 dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); 366 } 367 368 *twd |= FBNIC_TWD_TYPE(LAST_AL); 369 370 FBNIC_XMIT_CB(skb)->desc_count = ((twd - meta) + 1) & ring->size_mask; 371 372 ring->tail = tail; 373 374 /* Record SW timestamp */ 375 skb_tx_timestamp(skb); 376 377 /* Verify there is room for another packet */ 378 fbnic_maybe_stop_tx(skb->dev, ring, FBNIC_MAX_SKB_DESC); 379 380 if (fbnic_tx_sent_queue(skb, ring)) { 381 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_COMPLETION); 382 383 /* Force DMA writes to flush before writing to tail */ 384 dma_wmb(); 385 386 writel(tail, ring->doorbell); 387 } 388 389 return false; 390 dma_error: 391 if (net_ratelimit()) 392 netdev_err(skb->dev, "TX DMA map failed\n"); 393 394 while (tail != first) { 395 tail--; 396 tail &= ring->size_mask; 397 twd = &ring->desc[tail]; 398 if (tail == first) 399 fbnic_unmap_single_twd(dev, twd); 400 else if (ring->tx_buf[tail] == FBNIC_XMIT_NOUNMAP) 401 ring->tx_buf[tail] = NULL; 402 else 403 fbnic_unmap_page_twd(dev, twd); 404 } 405 406 return true; 407 } 408 409 #define FBNIC_MIN_FRAME_LEN 60 410 411 static netdev_tx_t 412 fbnic_xmit_frame_ring(struct sk_buff *skb, struct fbnic_ring *ring) 413 { 414 __le64 *meta = &ring->desc[ring->tail]; 415 u16 desc_needed; 416 417 if (skb_put_padto(skb, FBNIC_MIN_FRAME_LEN)) 418 goto err_count; 419 420 /* Need: 1 descriptor per page, 421 * + 1 desc for skb_head, 422 * + 2 desc for metadata and timestamp metadata 423 * + 7 desc gap to keep tail from touching head 424 * otherwise try next time 425 */ 426 desc_needed = skb_shinfo(skb)->nr_frags + 10; 427 if (fbnic_maybe_stop_tx(skb->dev, ring, desc_needed)) 428 return NETDEV_TX_BUSY; 429 430 *meta = cpu_to_le64(FBNIC_TWD_FLAG_DEST_MAC); 431 432 /* Write all members within DWORD to condense this into 2 4B writes */ 433 FBNIC_XMIT_CB(skb)->bytecount = skb->len; 434 FBNIC_XMIT_CB(skb)->gso_segs = 1; 435 FBNIC_XMIT_CB(skb)->desc_count = 0; 436 FBNIC_XMIT_CB(skb)->flags = 0; 437 438 if (fbnic_tx_offloads(ring, skb, meta)) 439 goto err_free; 440 441 if (fbnic_tx_map(ring, skb, meta)) 442 goto err_free; 443 444 return NETDEV_TX_OK; 445 446 err_free: 447 dev_kfree_skb_any(skb); 448 err_count: 449 u64_stats_update_begin(&ring->stats.syncp); 450 ring->stats.dropped++; 451 u64_stats_update_end(&ring->stats.syncp); 452 return NETDEV_TX_OK; 453 } 454 455 netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev) 456 { 457 struct fbnic_net *fbn = netdev_priv(dev); 458 unsigned int q_map = skb->queue_mapping; 459 460 return fbnic_xmit_frame_ring(skb, fbn->tx[q_map]); 461 } 462 463 static netdev_features_t 464 fbnic_features_check_encap_gso(struct sk_buff *skb, struct net_device *dev, 465 netdev_features_t features, unsigned int l3len) 466 { 467 netdev_features_t skb_gso_features; 468 struct ipv6hdr *ip6_hdr; 469 unsigned char l4_hdr; 470 unsigned int start; 471 __be16 frag_off; 472 473 /* Require MANGLEID for GSO_PARTIAL of IPv4. 474 * In theory we could support TSO with single, innermost v4 header 475 * by pretending everything before it is L2, but that needs to be 476 * parsed case by case.. so leaving it for when the need arises. 477 */ 478 if (!(features & NETIF_F_TSO_MANGLEID)) 479 features &= ~NETIF_F_TSO; 480 481 skb_gso_features = skb_shinfo(skb)->gso_type; 482 skb_gso_features <<= NETIF_F_GSO_SHIFT; 483 484 /* We'd only clear the native GSO features, so don't bother validating 485 * if the match can only be on those supported thru GSO_PARTIAL. 486 */ 487 if (!(skb_gso_features & FBNIC_TUN_GSO_FEATURES)) 488 return features; 489 490 /* We can only do IPv6-in-IPv6, not v4-in-v6. It'd be nice 491 * to fall back to partial for this, or any failure below. 492 * This is just an optimization, UDPv4 will be caught later on. 493 */ 494 if (skb_gso_features & NETIF_F_TSO) 495 return features & ~FBNIC_TUN_GSO_FEATURES; 496 497 /* Inner headers multiple of 2 */ 498 if ((skb_inner_network_header(skb) - skb_network_header(skb)) % 2) 499 return features & ~FBNIC_TUN_GSO_FEATURES; 500 501 /* Encapsulated GSO packet, make 100% sure it's IPv6-in-IPv6. */ 502 ip6_hdr = ipv6_hdr(skb); 503 if (ip6_hdr->version != 6) 504 return features & ~FBNIC_TUN_GSO_FEATURES; 505 506 l4_hdr = ip6_hdr->nexthdr; 507 start = (unsigned char *)ip6_hdr - skb->data + sizeof(struct ipv6hdr); 508 start = ipv6_skip_exthdr(skb, start, &l4_hdr, &frag_off); 509 if (frag_off || l4_hdr != IPPROTO_IPV6 || 510 skb->data + start != skb_inner_network_header(skb)) 511 return features & ~FBNIC_TUN_GSO_FEATURES; 512 513 return features; 514 } 515 516 netdev_features_t 517 fbnic_features_check(struct sk_buff *skb, struct net_device *dev, 518 netdev_features_t features) 519 { 520 unsigned int l2len, l3len; 521 522 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) 523 return features; 524 525 l2len = skb_mac_header_len(skb); 526 l3len = skb_checksum_start(skb) - skb_network_header(skb); 527 528 /* Check header lengths are multiple of 2. 529 * In case of 6in6 we support longer headers (IHLEN + OHLEN) 530 * but keep things simple for now, 512B is plenty. 531 */ 532 if ((l2len | l3len | skb->csum_offset) % 2 || 533 !FIELD_FIT(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) || 534 !FIELD_FIT(FBNIC_TWD_L3_IHLEN_MASK, l3len / 2) || 535 !FIELD_FIT(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2)) 536 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); 537 538 if (likely(!skb->encapsulation) || !skb_is_gso(skb)) 539 return features; 540 541 return fbnic_features_check_encap_gso(skb, dev, features, l3len); 542 } 543 544 static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, 545 struct fbnic_ring *ring, bool discard, 546 unsigned int hw_head) 547 { 548 u64 total_bytes = 0, total_packets = 0, ts_lost = 0; 549 unsigned int head = ring->head; 550 struct netdev_queue *txq; 551 unsigned int clean_desc; 552 553 clean_desc = (hw_head - head) & ring->size_mask; 554 555 while (clean_desc) { 556 struct sk_buff *skb = ring->tx_buf[head]; 557 unsigned int desc_cnt; 558 559 desc_cnt = FBNIC_XMIT_CB(skb)->desc_count; 560 if (desc_cnt > clean_desc) 561 break; 562 563 if (unlikely(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS)) { 564 FBNIC_XMIT_CB(skb)->hw_head = hw_head; 565 if (likely(!discard)) 566 break; 567 ts_lost++; 568 } 569 570 ring->tx_buf[head] = NULL; 571 572 clean_desc -= desc_cnt; 573 574 while (!(ring->desc[head] & FBNIC_TWD_TYPE(AL))) { 575 head++; 576 head &= ring->size_mask; 577 desc_cnt--; 578 } 579 580 fbnic_unmap_single_twd(nv->dev, &ring->desc[head]); 581 head++; 582 head &= ring->size_mask; 583 desc_cnt--; 584 585 while (desc_cnt--) { 586 if (ring->tx_buf[head] != FBNIC_XMIT_NOUNMAP) 587 fbnic_unmap_page_twd(nv->dev, 588 &ring->desc[head]); 589 else 590 ring->tx_buf[head] = NULL; 591 head++; 592 head &= ring->size_mask; 593 } 594 595 total_bytes += FBNIC_XMIT_CB(skb)->bytecount; 596 total_packets += FBNIC_XMIT_CB(skb)->gso_segs; 597 598 napi_consume_skb(skb, napi_budget); 599 } 600 601 if (!total_bytes) 602 return; 603 604 ring->head = head; 605 606 txq = txring_txq(nv->napi.dev, ring); 607 608 if (unlikely(discard)) { 609 u64_stats_update_begin(&ring->stats.syncp); 610 ring->stats.dropped += total_packets; 611 ring->stats.twq.ts_lost += ts_lost; 612 u64_stats_update_end(&ring->stats.syncp); 613 614 netdev_tx_completed_queue(txq, total_packets, total_bytes); 615 return; 616 } 617 618 u64_stats_update_begin(&ring->stats.syncp); 619 ring->stats.bytes += total_bytes; 620 ring->stats.packets += total_packets; 621 u64_stats_update_end(&ring->stats.syncp); 622 623 if (!netif_txq_completed_wake(txq, total_packets, total_bytes, 624 fbnic_desc_unused(ring), 625 FBNIC_TX_DESC_WAKEUP)) { 626 u64_stats_update_begin(&ring->stats.syncp); 627 ring->stats.twq.wake++; 628 u64_stats_update_end(&ring->stats.syncp); 629 } 630 } 631 632 static void fbnic_clean_twq1(struct fbnic_napi_vector *nv, bool pp_allow_direct, 633 struct fbnic_ring *ring, bool discard, 634 unsigned int hw_head) 635 { 636 u64 total_bytes = 0, total_packets = 0; 637 unsigned int head = ring->head; 638 639 while (hw_head != head) { 640 struct page *page; 641 u64 twd; 642 643 if (unlikely(!(ring->desc[head] & FBNIC_TWD_TYPE(AL)))) 644 goto next_desc; 645 646 twd = le64_to_cpu(ring->desc[head]); 647 page = ring->tx_buf[head]; 648 649 /* TYPE_AL is 2, TYPE_LAST_AL is 3. So this trick gives 650 * us one increment per packet, with no branches. 651 */ 652 total_packets += FIELD_GET(FBNIC_TWD_TYPE_MASK, twd) - 653 FBNIC_TWD_TYPE_AL; 654 total_bytes += FIELD_GET(FBNIC_TWD_LEN_MASK, twd); 655 656 page_pool_put_page(pp_page_to_nmdesc(page)->pp, page, -1, 657 pp_allow_direct); 658 next_desc: 659 head++; 660 head &= ring->size_mask; 661 } 662 663 if (!total_bytes) 664 return; 665 666 ring->head = head; 667 668 if (discard) { 669 u64_stats_update_begin(&ring->stats.syncp); 670 ring->stats.dropped += total_packets; 671 u64_stats_update_end(&ring->stats.syncp); 672 return; 673 } 674 675 u64_stats_update_begin(&ring->stats.syncp); 676 ring->stats.bytes += total_bytes; 677 ring->stats.packets += total_packets; 678 u64_stats_update_end(&ring->stats.syncp); 679 } 680 681 static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, 682 struct fbnic_ring *ring, 683 u64 tcd, int *ts_head, int *head0) 684 { 685 struct skb_shared_hwtstamps hwtstamp; 686 struct fbnic_net *fbn; 687 struct sk_buff *skb; 688 int head; 689 u64 ns; 690 691 head = (*ts_head < 0) ? ring->head : *ts_head; 692 693 do { 694 unsigned int desc_cnt; 695 696 if (head == ring->tail) { 697 if (unlikely(net_ratelimit())) 698 netdev_err(nv->napi.dev, 699 "Tx timestamp without matching packet\n"); 700 return; 701 } 702 703 skb = ring->tx_buf[head]; 704 desc_cnt = FBNIC_XMIT_CB(skb)->desc_count; 705 706 head += desc_cnt; 707 head &= ring->size_mask; 708 } while (!(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS)); 709 710 fbn = netdev_priv(nv->napi.dev); 711 ns = fbnic_ts40_to_ns(fbn, FIELD_GET(FBNIC_TCD_TYPE1_TS_MASK, tcd)); 712 713 memset(&hwtstamp, 0, sizeof(hwtstamp)); 714 hwtstamp.hwtstamp = ns_to_ktime(ns); 715 716 *ts_head = head; 717 718 FBNIC_XMIT_CB(skb)->flags &= ~FBNIC_XMIT_CB_TS; 719 if (*head0 < 0) { 720 head = FBNIC_XMIT_CB(skb)->hw_head; 721 if (head >= 0) 722 *head0 = head; 723 } 724 725 skb_tstamp_tx(skb, &hwtstamp); 726 u64_stats_update_begin(&ring->stats.syncp); 727 ring->stats.twq.ts_packets++; 728 u64_stats_update_end(&ring->stats.syncp); 729 } 730 731 static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx, 732 netmem_ref netmem) 733 { 734 struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; 735 736 page_pool_fragment_netmem(netmem, FBNIC_PAGECNT_BIAS_MAX); 737 rx_buf->pagecnt_bias = FBNIC_PAGECNT_BIAS_MAX; 738 rx_buf->netmem = netmem; 739 } 740 741 static struct page * 742 fbnic_page_pool_get_head(struct fbnic_q_triad *qt, unsigned int idx) 743 { 744 struct fbnic_rx_buf *rx_buf = &qt->sub0.rx_buf[idx]; 745 746 rx_buf->pagecnt_bias--; 747 748 /* sub0 is always fed system pages, from the NAPI-level page_pool */ 749 return netmem_to_page(rx_buf->netmem); 750 } 751 752 static netmem_ref 753 fbnic_page_pool_get_data(struct fbnic_q_triad *qt, unsigned int idx) 754 { 755 struct fbnic_rx_buf *rx_buf = &qt->sub1.rx_buf[idx]; 756 757 rx_buf->pagecnt_bias--; 758 759 return rx_buf->netmem; 760 } 761 762 static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx, 763 int budget) 764 { 765 struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; 766 netmem_ref netmem = rx_buf->netmem; 767 768 if (!page_pool_unref_netmem(netmem, rx_buf->pagecnt_bias)) 769 page_pool_put_unrefed_netmem(ring->page_pool, netmem, -1, 770 !!budget); 771 772 rx_buf->netmem = 0; 773 } 774 775 static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget, 776 struct fbnic_q_triad *qt, s32 ts_head, s32 head0, 777 s32 head1) 778 { 779 if (head0 >= 0) 780 fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0); 781 else if (ts_head >= 0) 782 fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, ts_head); 783 784 if (head1 >= 0) { 785 qt->cmpl.deferred_head = -1; 786 if (napi_budget) 787 fbnic_clean_twq1(nv, true, &qt->sub1, false, head1); 788 else 789 qt->cmpl.deferred_head = head1; 790 } 791 } 792 793 static void 794 fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, 795 int napi_budget) 796 { 797 struct fbnic_ring *cmpl = &qt->cmpl; 798 s32 head1 = cmpl->deferred_head; 799 s32 head0 = -1, ts_head = -1; 800 __le64 *raw_tcd, done; 801 u32 head = cmpl->head; 802 803 done = (head & (cmpl->size_mask + 1)) ? 0 : cpu_to_le64(FBNIC_TCD_DONE); 804 raw_tcd = &cmpl->desc[head & cmpl->size_mask]; 805 806 /* Walk the completion queue collecting the heads reported by NIC */ 807 while ((*raw_tcd & cpu_to_le64(FBNIC_TCD_DONE)) == done) { 808 u64 tcd; 809 810 dma_rmb(); 811 812 tcd = le64_to_cpu(*raw_tcd); 813 814 switch (FIELD_GET(FBNIC_TCD_TYPE_MASK, tcd)) { 815 case FBNIC_TCD_TYPE_0: 816 if (tcd & FBNIC_TCD_TWQ1) 817 head1 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD1_MASK, 818 tcd); 819 else 820 head0 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD0_MASK, 821 tcd); 822 /* Currently all err status bits are related to 823 * timestamps and as those have yet to be added 824 * they are skipped for now. 825 */ 826 break; 827 case FBNIC_TCD_TYPE_1: 828 if (WARN_ON_ONCE(tcd & FBNIC_TCD_TWQ1)) 829 break; 830 831 fbnic_clean_tsq(nv, &qt->sub0, tcd, &ts_head, &head0); 832 break; 833 default: 834 break; 835 } 836 837 raw_tcd++; 838 head++; 839 if (!(head & cmpl->size_mask)) { 840 done ^= cpu_to_le64(FBNIC_TCD_DONE); 841 raw_tcd = &cmpl->desc[0]; 842 } 843 } 844 845 /* Record the current head/tail of the queue */ 846 if (cmpl->head != head) { 847 cmpl->head = head; 848 writel(head & cmpl->size_mask, cmpl->doorbell); 849 } 850 851 /* Unmap and free processed buffers */ 852 fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0, head1); 853 } 854 855 static void fbnic_clean_bdq(struct fbnic_ring *ring, unsigned int hw_head, 856 int napi_budget) 857 { 858 unsigned int head = ring->head; 859 860 if (head == hw_head) 861 return; 862 863 do { 864 fbnic_page_pool_drain(ring, head, napi_budget); 865 866 head++; 867 head &= ring->size_mask; 868 } while (head != hw_head); 869 870 ring->head = head; 871 } 872 873 static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, netmem_ref netmem) 874 { 875 __le64 *bdq_desc = &bdq->desc[id * FBNIC_BD_FRAG_COUNT]; 876 dma_addr_t dma = page_pool_get_dma_addr_netmem(netmem); 877 u64 bd, i = FBNIC_BD_FRAG_COUNT; 878 879 bd = (FBNIC_BD_PAGE_ADDR_MASK & dma) | 880 FIELD_PREP(FBNIC_BD_PAGE_ID_MASK, id); 881 882 /* In the case that a page size is larger than 4K we will map a 883 * single page to multiple fragments. The fragments will be 884 * FBNIC_BD_FRAG_COUNT in size and the lower n bits will be use 885 * to indicate the individual fragment IDs. 886 */ 887 do { 888 *bdq_desc = cpu_to_le64(bd); 889 bd += FIELD_PREP(FBNIC_BD_DESC_ADDR_MASK, 1) | 890 FIELD_PREP(FBNIC_BD_DESC_ID_MASK, 1); 891 bdq_desc++; 892 } while (--i); 893 } 894 895 static void fbnic_fill_bdq(struct fbnic_ring *bdq) 896 { 897 unsigned int count = fbnic_desc_unused(bdq); 898 unsigned int i = bdq->tail; 899 900 if (!count) 901 return; 902 903 do { 904 netmem_ref netmem; 905 906 netmem = page_pool_dev_alloc_netmems(bdq->page_pool); 907 if (!netmem) { 908 u64_stats_update_begin(&bdq->stats.syncp); 909 bdq->stats.bdq.alloc_failed++; 910 u64_stats_update_end(&bdq->stats.syncp); 911 912 break; 913 } 914 915 fbnic_page_pool_init(bdq, i, netmem); 916 fbnic_bd_prep(bdq, i, netmem); 917 918 i++; 919 i &= bdq->size_mask; 920 921 count--; 922 } while (count); 923 924 if (bdq->tail != i) { 925 bdq->tail = i; 926 927 /* Force DMA writes to flush before writing to tail */ 928 dma_wmb(); 929 930 writel(i, bdq->doorbell); 931 } 932 } 933 934 static unsigned int fbnic_hdr_pg_start(unsigned int pg_off) 935 { 936 /* The headroom of the first header may be larger than FBNIC_RX_HROOM 937 * due to alignment. So account for that by just making the page 938 * offset 0 if we are starting at the first header. 939 */ 940 if (ALIGN(FBNIC_RX_HROOM, 128) > FBNIC_RX_HROOM && 941 pg_off == ALIGN(FBNIC_RX_HROOM, 128)) 942 return 0; 943 944 return pg_off - FBNIC_RX_HROOM; 945 } 946 947 static unsigned int fbnic_hdr_pg_end(unsigned int pg_off, unsigned int len) 948 { 949 /* Determine the end of the buffer by finding the start of the next 950 * and then subtracting the headroom from that frame. 951 */ 952 pg_off += len + FBNIC_RX_TROOM + FBNIC_RX_HROOM; 953 954 return ALIGN(pg_off, 128) - FBNIC_RX_HROOM; 955 } 956 957 static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, 958 struct fbnic_pkt_buff *pkt, 959 struct fbnic_q_triad *qt) 960 { 961 unsigned int hdr_pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 962 unsigned int hdr_pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); 963 struct page *page = fbnic_page_pool_get_head(qt, hdr_pg_idx); 964 unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); 965 unsigned int frame_sz, hdr_pg_start, hdr_pg_end, headroom; 966 unsigned char *hdr_start; 967 968 /* data_hard_start should always be NULL when this is called */ 969 WARN_ON_ONCE(pkt->buff.data_hard_start); 970 971 /* Short-cut the end calculation if we know page is fully consumed */ 972 hdr_pg_end = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ? 973 FBNIC_BD_FRAG_SIZE : fbnic_hdr_pg_end(hdr_pg_off, len); 974 hdr_pg_start = fbnic_hdr_pg_start(hdr_pg_off); 975 976 headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD; 977 frame_sz = hdr_pg_end - hdr_pg_start; 978 xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq); 979 hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * 980 FBNIC_BD_FRAG_SIZE; 981 982 /* Sync DMA buffer */ 983 dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page), 984 hdr_pg_start, frame_sz, 985 DMA_BIDIRECTIONAL); 986 987 /* Build frame around buffer */ 988 hdr_start = page_address(page) + hdr_pg_start; 989 net_prefetch(pkt->buff.data); 990 xdp_prepare_buff(&pkt->buff, hdr_start, headroom, 991 len - FBNIC_RX_PAD, true); 992 993 pkt->hwtstamp = 0; 994 pkt->add_frag_failed = false; 995 } 996 997 static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, 998 struct fbnic_pkt_buff *pkt, 999 struct fbnic_q_triad *qt) 1000 { 1001 unsigned int pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 1002 unsigned int pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); 1003 unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); 1004 netmem_ref netmem = fbnic_page_pool_get_data(qt, pg_idx); 1005 unsigned int truesize; 1006 bool added; 1007 1008 truesize = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ? 1009 FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128); 1010 1011 pg_off += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * 1012 FBNIC_BD_FRAG_SIZE; 1013 1014 /* Sync DMA buffer */ 1015 page_pool_dma_sync_netmem_for_cpu(qt->sub1.page_pool, netmem, 1016 pg_off, truesize); 1017 1018 added = xdp_buff_add_frag(&pkt->buff, netmem, pg_off, len, truesize); 1019 if (unlikely(!added)) { 1020 pkt->add_frag_failed = true; 1021 netdev_err_once(nv->napi.dev, 1022 "Failed to add fragment to xdp_buff\n"); 1023 } 1024 } 1025 1026 static void fbnic_put_pkt_buff(struct fbnic_q_triad *qt, 1027 struct fbnic_pkt_buff *pkt, int budget) 1028 { 1029 struct page *page; 1030 1031 if (!pkt->buff.data_hard_start) 1032 return; 1033 1034 if (xdp_buff_has_frags(&pkt->buff)) { 1035 struct skb_shared_info *shinfo; 1036 netmem_ref netmem; 1037 int nr_frags; 1038 1039 shinfo = xdp_get_shared_info_from_buff(&pkt->buff); 1040 nr_frags = shinfo->nr_frags; 1041 1042 while (nr_frags--) { 1043 netmem = skb_frag_netmem(&shinfo->frags[nr_frags]); 1044 page_pool_put_full_netmem(qt->sub1.page_pool, netmem, 1045 !!budget); 1046 } 1047 } 1048 1049 page = virt_to_page(pkt->buff.data_hard_start); 1050 page_pool_put_full_page(qt->sub0.page_pool, page, !!budget); 1051 } 1052 1053 static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv, 1054 struct fbnic_pkt_buff *pkt) 1055 { 1056 struct sk_buff *skb; 1057 1058 skb = xdp_build_skb_from_buff(&pkt->buff); 1059 if (!skb) 1060 return NULL; 1061 1062 /* Add timestamp if present */ 1063 if (pkt->hwtstamp) 1064 skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp; 1065 1066 return skb; 1067 } 1068 1069 static long fbnic_pkt_tx(struct fbnic_napi_vector *nv, 1070 struct fbnic_pkt_buff *pkt) 1071 { 1072 struct fbnic_ring *ring = &nv->qt[0].sub1; 1073 int size, offset, nsegs = 1, data_len = 0; 1074 unsigned int tail = ring->tail; 1075 struct skb_shared_info *shinfo; 1076 skb_frag_t *frag = NULL; 1077 struct page *page; 1078 dma_addr_t dma; 1079 __le64 *twd; 1080 1081 if (unlikely(xdp_buff_has_frags(&pkt->buff))) { 1082 shinfo = xdp_get_shared_info_from_buff(&pkt->buff); 1083 nsegs += shinfo->nr_frags; 1084 data_len = shinfo->xdp_frags_size; 1085 frag = &shinfo->frags[0]; 1086 } 1087 1088 if (fbnic_desc_unused(ring) < nsegs) { 1089 u64_stats_update_begin(&ring->stats.syncp); 1090 ring->stats.dropped++; 1091 u64_stats_update_end(&ring->stats.syncp); 1092 return -FBNIC_XDP_CONSUME; 1093 } 1094 1095 page = virt_to_page(pkt->buff.data_hard_start); 1096 offset = offset_in_page(pkt->buff.data); 1097 dma = page_pool_get_dma_addr(page); 1098 1099 size = pkt->buff.data_end - pkt->buff.data; 1100 1101 while (nsegs--) { 1102 dma_sync_single_range_for_device(nv->dev, dma, offset, size, 1103 DMA_BIDIRECTIONAL); 1104 dma += offset; 1105 1106 ring->tx_buf[tail] = page; 1107 1108 twd = &ring->desc[tail]; 1109 *twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) | 1110 FIELD_PREP(FBNIC_TWD_LEN_MASK, size) | 1111 FIELD_PREP(FBNIC_TWD_TYPE_MASK, 1112 FBNIC_TWD_TYPE_AL)); 1113 1114 tail++; 1115 tail &= ring->size_mask; 1116 1117 if (!data_len) 1118 break; 1119 1120 offset = skb_frag_off(frag); 1121 page = skb_frag_page(frag); 1122 dma = page_pool_get_dma_addr(page); 1123 1124 size = skb_frag_size(frag); 1125 data_len -= size; 1126 frag++; 1127 } 1128 1129 *twd |= FBNIC_TWD_TYPE(LAST_AL); 1130 1131 ring->tail = tail; 1132 1133 return -FBNIC_XDP_TX; 1134 } 1135 1136 static void fbnic_pkt_commit_tail(struct fbnic_napi_vector *nv, 1137 unsigned int pkt_tail) 1138 { 1139 struct fbnic_ring *ring = &nv->qt[0].sub1; 1140 1141 /* Force DMA writes to flush before writing to tail */ 1142 dma_wmb(); 1143 1144 writel(pkt_tail, ring->doorbell); 1145 } 1146 1147 static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv, 1148 struct fbnic_pkt_buff *pkt) 1149 { 1150 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 1151 struct bpf_prog *xdp_prog; 1152 int act; 1153 1154 xdp_prog = READ_ONCE(fbn->xdp_prog); 1155 if (!xdp_prog) 1156 goto xdp_pass; 1157 1158 /* Should never happen, config paths enforce HDS threshold > MTU */ 1159 if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags) 1160 return ERR_PTR(-FBNIC_XDP_LEN_ERR); 1161 1162 act = bpf_prog_run_xdp(xdp_prog, &pkt->buff); 1163 switch (act) { 1164 case XDP_PASS: 1165 xdp_pass: 1166 return fbnic_build_skb(nv, pkt); 1167 case XDP_TX: 1168 return ERR_PTR(fbnic_pkt_tx(nv, pkt)); 1169 default: 1170 bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act); 1171 fallthrough; 1172 case XDP_ABORTED: 1173 trace_xdp_exception(nv->napi.dev, xdp_prog, act); 1174 fallthrough; 1175 case XDP_DROP: 1176 break; 1177 } 1178 1179 return ERR_PTR(-FBNIC_XDP_CONSUME); 1180 } 1181 1182 static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd) 1183 { 1184 return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 : 1185 (FBNIC_RCD_META_L3_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L3 : 1186 PKT_HASH_TYPE_L2; 1187 } 1188 1189 static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd, 1190 struct fbnic_pkt_buff *pkt) 1191 { 1192 struct fbnic_net *fbn; 1193 u64 ns, ts; 1194 1195 if (!FIELD_GET(FBNIC_RCD_OPT_META_TS, rcd)) 1196 return; 1197 1198 fbn = netdev_priv(nv->napi.dev); 1199 ts = FIELD_GET(FBNIC_RCD_OPT_META_TS_MASK, rcd); 1200 ns = fbnic_ts40_to_ns(fbn, ts); 1201 1202 /* Add timestamp to shared info */ 1203 pkt->hwtstamp = ns_to_ktime(ns); 1204 } 1205 1206 static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv, 1207 u64 rcd, struct sk_buff *skb, 1208 struct fbnic_q_triad *qt, 1209 u64 *csum_cmpl, u64 *csum_none) 1210 { 1211 struct net_device *netdev = nv->napi.dev; 1212 struct fbnic_ring *rcq = &qt->cmpl; 1213 1214 fbnic_rx_csum(rcd, skb, rcq, csum_cmpl, csum_none); 1215 1216 if (netdev->features & NETIF_F_RXHASH) 1217 skb_set_hash(skb, 1218 FIELD_GET(FBNIC_RCD_META_RSS_HASH_MASK, rcd), 1219 fbnic_skb_hash_type(rcd)); 1220 1221 skb_record_rx_queue(skb, rcq->q_idx); 1222 } 1223 1224 static bool fbnic_rcd_metadata_err(u64 rcd) 1225 { 1226 return !!(FBNIC_RCD_META_UNCORRECTABLE_ERR_MASK & rcd); 1227 } 1228 1229 static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, 1230 struct fbnic_q_triad *qt, int budget) 1231 { 1232 unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0; 1233 u64 csum_complete = 0, csum_none = 0, length_errors = 0; 1234 s32 head0 = -1, head1 = -1, pkt_tail = -1; 1235 struct fbnic_ring *rcq = &qt->cmpl; 1236 struct fbnic_pkt_buff *pkt; 1237 __le64 *raw_rcd, done; 1238 u32 head = rcq->head; 1239 1240 done = (head & (rcq->size_mask + 1)) ? cpu_to_le64(FBNIC_RCD_DONE) : 0; 1241 raw_rcd = &rcq->desc[head & rcq->size_mask]; 1242 pkt = rcq->pkt; 1243 1244 /* Walk the completion queue collecting the heads reported by NIC */ 1245 while (likely(packets < budget)) { 1246 struct sk_buff *skb = ERR_PTR(-EINVAL); 1247 u32 pkt_bytes; 1248 u64 rcd; 1249 1250 if ((*raw_rcd & cpu_to_le64(FBNIC_RCD_DONE)) == done) 1251 break; 1252 1253 dma_rmb(); 1254 1255 rcd = le64_to_cpu(*raw_rcd); 1256 1257 switch (FIELD_GET(FBNIC_RCD_TYPE_MASK, rcd)) { 1258 case FBNIC_RCD_TYPE_HDR_AL: 1259 head0 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 1260 fbnic_pkt_prepare(nv, rcd, pkt, qt); 1261 1262 break; 1263 case FBNIC_RCD_TYPE_PAY_AL: 1264 head1 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 1265 fbnic_add_rx_frag(nv, rcd, pkt, qt); 1266 1267 break; 1268 case FBNIC_RCD_TYPE_OPT_META: 1269 /* Only type 0 is currently supported */ 1270 if (FIELD_GET(FBNIC_RCD_OPT_META_TYPE_MASK, rcd)) 1271 break; 1272 1273 fbnic_rx_tstamp(nv, rcd, pkt); 1274 1275 /* We currently ignore the action table index */ 1276 break; 1277 case FBNIC_RCD_TYPE_META: 1278 if (likely(!fbnic_rcd_metadata_err(rcd) && 1279 !pkt->add_frag_failed)) { 1280 pkt_bytes = xdp_get_buff_len(&pkt->buff); 1281 skb = fbnic_run_xdp(nv, pkt); 1282 } 1283 1284 /* Populate skb and invalidate XDP */ 1285 if (!IS_ERR_OR_NULL(skb)) { 1286 fbnic_populate_skb_fields(nv, rcd, skb, qt, 1287 &csum_complete, 1288 &csum_none); 1289 napi_gro_receive(&nv->napi, skb); 1290 } else if (skb == ERR_PTR(-FBNIC_XDP_TX)) { 1291 pkt_tail = nv->qt[0].sub1.tail; 1292 } else if (PTR_ERR(skb) == -FBNIC_XDP_CONSUME) { 1293 fbnic_put_pkt_buff(qt, pkt, 1); 1294 } else { 1295 if (!skb) 1296 alloc_failed++; 1297 1298 if (skb == ERR_PTR(-FBNIC_XDP_LEN_ERR)) 1299 length_errors++; 1300 else 1301 dropped++; 1302 1303 fbnic_put_pkt_buff(qt, pkt, 1); 1304 goto next_dont_count; 1305 } 1306 1307 packets++; 1308 bytes += pkt_bytes; 1309 next_dont_count: 1310 pkt->buff.data_hard_start = NULL; 1311 1312 break; 1313 } 1314 1315 raw_rcd++; 1316 head++; 1317 if (!(head & rcq->size_mask)) { 1318 done ^= cpu_to_le64(FBNIC_RCD_DONE); 1319 raw_rcd = &rcq->desc[0]; 1320 } 1321 } 1322 1323 u64_stats_update_begin(&rcq->stats.syncp); 1324 rcq->stats.packets += packets; 1325 rcq->stats.bytes += bytes; 1326 rcq->stats.dropped += dropped; 1327 rcq->stats.rx.alloc_failed += alloc_failed; 1328 rcq->stats.rx.csum_complete += csum_complete; 1329 rcq->stats.rx.csum_none += csum_none; 1330 rcq->stats.rx.length_errors += length_errors; 1331 u64_stats_update_end(&rcq->stats.syncp); 1332 1333 if (pkt_tail >= 0) 1334 fbnic_pkt_commit_tail(nv, pkt_tail); 1335 1336 /* Unmap and free processed buffers */ 1337 if (head0 >= 0) 1338 fbnic_clean_bdq(&qt->sub0, head0, budget); 1339 fbnic_fill_bdq(&qt->sub0); 1340 1341 if (head1 >= 0) 1342 fbnic_clean_bdq(&qt->sub1, head1, budget); 1343 fbnic_fill_bdq(&qt->sub1); 1344 1345 /* Record the current head/tail of the queue */ 1346 if (rcq->head != head) { 1347 rcq->head = head; 1348 writel(head & rcq->size_mask, rcq->doorbell); 1349 } 1350 1351 return packets; 1352 } 1353 1354 static void fbnic_nv_irq_disable(struct fbnic_napi_vector *nv) 1355 { 1356 struct fbnic_dev *fbd = nv->fbd; 1357 u32 v_idx = nv->v_idx; 1358 1359 fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(v_idx / 32), 1 << (v_idx % 32)); 1360 } 1361 1362 static void fbnic_nv_irq_rearm(struct fbnic_napi_vector *nv) 1363 { 1364 struct fbnic_dev *fbd = nv->fbd; 1365 u32 v_idx = nv->v_idx; 1366 1367 fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(v_idx), 1368 FBNIC_INTR_CQ_REARM_INTR_UNMASK); 1369 } 1370 1371 static int fbnic_poll(struct napi_struct *napi, int budget) 1372 { 1373 struct fbnic_napi_vector *nv = container_of(napi, 1374 struct fbnic_napi_vector, 1375 napi); 1376 int i, j, work_done = 0; 1377 1378 for (i = 0; i < nv->txt_count; i++) 1379 fbnic_clean_tcq(nv, &nv->qt[i], budget); 1380 1381 for (j = 0; j < nv->rxt_count; j++, i++) 1382 work_done += fbnic_clean_rcq(nv, &nv->qt[i], budget); 1383 1384 if (work_done >= budget) 1385 return budget; 1386 1387 if (likely(napi_complete_done(napi, work_done))) 1388 fbnic_nv_irq_rearm(nv); 1389 1390 return work_done; 1391 } 1392 1393 irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data) 1394 { 1395 struct fbnic_napi_vector *nv = *(void **)data; 1396 1397 napi_schedule_irqoff(&nv->napi); 1398 1399 return IRQ_HANDLED; 1400 } 1401 1402 void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, 1403 struct fbnic_ring *rxr) 1404 { 1405 struct fbnic_queue_stats *stats = &rxr->stats; 1406 1407 /* Capture stats from queues before dissasociating them */ 1408 fbn->rx_stats.bytes += stats->bytes; 1409 fbn->rx_stats.packets += stats->packets; 1410 fbn->rx_stats.dropped += stats->dropped; 1411 fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed; 1412 fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete; 1413 fbn->rx_stats.rx.csum_none += stats->rx.csum_none; 1414 fbn->rx_stats.rx.length_errors += stats->rx.length_errors; 1415 /* Remember to add new stats here */ 1416 BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 4); 1417 } 1418 1419 void fbnic_aggregate_ring_bdq_counters(struct fbnic_net *fbn, 1420 struct fbnic_ring *bdq) 1421 { 1422 struct fbnic_queue_stats *stats = &bdq->stats; 1423 1424 /* Capture stats from queues before dissasociating them */ 1425 fbn->bdq_stats.bdq.alloc_failed += stats->bdq.alloc_failed; 1426 /* Remember to add new stats here */ 1427 BUILD_BUG_ON(sizeof(fbn->rx_stats.bdq) / 8 != 1); 1428 } 1429 1430 void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, 1431 struct fbnic_ring *txr) 1432 { 1433 struct fbnic_queue_stats *stats = &txr->stats; 1434 1435 /* Capture stats from queues before dissasociating them */ 1436 fbn->tx_stats.bytes += stats->bytes; 1437 fbn->tx_stats.packets += stats->packets; 1438 fbn->tx_stats.dropped += stats->dropped; 1439 fbn->tx_stats.twq.csum_partial += stats->twq.csum_partial; 1440 fbn->tx_stats.twq.lso += stats->twq.lso; 1441 fbn->tx_stats.twq.ts_lost += stats->twq.ts_lost; 1442 fbn->tx_stats.twq.ts_packets += stats->twq.ts_packets; 1443 fbn->tx_stats.twq.stop += stats->twq.stop; 1444 fbn->tx_stats.twq.wake += stats->twq.wake; 1445 /* Remember to add new stats here */ 1446 BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 6); 1447 } 1448 1449 void fbnic_aggregate_ring_xdp_counters(struct fbnic_net *fbn, 1450 struct fbnic_ring *xdpr) 1451 { 1452 struct fbnic_queue_stats *stats = &xdpr->stats; 1453 1454 if (!(xdpr->flags & FBNIC_RING_F_STATS)) 1455 return; 1456 1457 /* Capture stats from queues before dissasociating them */ 1458 fbn->tx_stats.dropped += stats->dropped; 1459 fbn->tx_stats.bytes += stats->bytes; 1460 fbn->tx_stats.packets += stats->packets; 1461 } 1462 1463 static void fbnic_remove_tx_ring(struct fbnic_net *fbn, 1464 struct fbnic_ring *txr) 1465 { 1466 if (!(txr->flags & FBNIC_RING_F_STATS)) 1467 return; 1468 1469 fbnic_aggregate_ring_tx_counters(fbn, txr); 1470 1471 /* Remove pointer to the Tx ring */ 1472 WARN_ON(fbn->tx[txr->q_idx] && fbn->tx[txr->q_idx] != txr); 1473 fbn->tx[txr->q_idx] = NULL; 1474 } 1475 1476 static void fbnic_remove_xdp_ring(struct fbnic_net *fbn, 1477 struct fbnic_ring *xdpr) 1478 { 1479 if (!(xdpr->flags & FBNIC_RING_F_STATS)) 1480 return; 1481 1482 fbnic_aggregate_ring_xdp_counters(fbn, xdpr); 1483 1484 /* Remove pointer to the Tx ring */ 1485 WARN_ON(fbn->tx[xdpr->q_idx] && fbn->tx[xdpr->q_idx] != xdpr); 1486 fbn->tx[xdpr->q_idx] = NULL; 1487 } 1488 1489 static void fbnic_remove_rx_ring(struct fbnic_net *fbn, 1490 struct fbnic_ring *rxr) 1491 { 1492 if (!(rxr->flags & FBNIC_RING_F_STATS)) 1493 return; 1494 1495 fbnic_aggregate_ring_rx_counters(fbn, rxr); 1496 1497 /* Remove pointer to the Rx ring */ 1498 WARN_ON(fbn->rx[rxr->q_idx] && fbn->rx[rxr->q_idx] != rxr); 1499 fbn->rx[rxr->q_idx] = NULL; 1500 } 1501 1502 static void fbnic_remove_bdq_ring(struct fbnic_net *fbn, 1503 struct fbnic_ring *bdq) 1504 { 1505 if (!(bdq->flags & FBNIC_RING_F_STATS)) 1506 return; 1507 1508 fbnic_aggregate_ring_bdq_counters(fbn, bdq); 1509 } 1510 1511 static void fbnic_free_qt_page_pools(struct fbnic_q_triad *qt) 1512 { 1513 page_pool_destroy(qt->sub0.page_pool); 1514 page_pool_destroy(qt->sub1.page_pool); 1515 } 1516 1517 static void fbnic_free_napi_vector(struct fbnic_net *fbn, 1518 struct fbnic_napi_vector *nv) 1519 { 1520 struct fbnic_dev *fbd = nv->fbd; 1521 int i, j; 1522 1523 for (i = 0; i < nv->txt_count; i++) { 1524 fbnic_remove_tx_ring(fbn, &nv->qt[i].sub0); 1525 fbnic_remove_xdp_ring(fbn, &nv->qt[i].sub1); 1526 fbnic_remove_tx_ring(fbn, &nv->qt[i].cmpl); 1527 } 1528 1529 for (j = 0; j < nv->rxt_count; j++, i++) { 1530 fbnic_remove_bdq_ring(fbn, &nv->qt[i].sub0); 1531 fbnic_remove_bdq_ring(fbn, &nv->qt[i].sub1); 1532 fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl); 1533 } 1534 1535 fbnic_napi_free_irq(fbd, nv); 1536 netif_napi_del_locked(&nv->napi); 1537 fbn->napi[fbnic_napi_idx(nv)] = NULL; 1538 kfree(nv); 1539 } 1540 1541 void fbnic_free_napi_vectors(struct fbnic_net *fbn) 1542 { 1543 int i; 1544 1545 for (i = 0; i < fbn->num_napi; i++) 1546 if (fbn->napi[i]) 1547 fbnic_free_napi_vector(fbn, fbn->napi[i]); 1548 } 1549 1550 static int 1551 fbnic_alloc_qt_page_pools(struct fbnic_net *fbn, struct fbnic_q_triad *qt, 1552 unsigned int rxq_idx) 1553 { 1554 struct page_pool_params pp_params = { 1555 .order = 0, 1556 .flags = PP_FLAG_DMA_MAP | 1557 PP_FLAG_DMA_SYNC_DEV, 1558 .pool_size = fbn->hpq_size + fbn->ppq_size, 1559 .nid = NUMA_NO_NODE, 1560 .dev = fbn->netdev->dev.parent, 1561 .dma_dir = DMA_BIDIRECTIONAL, 1562 .offset = 0, 1563 .max_len = PAGE_SIZE, 1564 .netdev = fbn->netdev, 1565 .queue_idx = rxq_idx, 1566 }; 1567 struct page_pool *pp; 1568 1569 /* Page pool cannot exceed a size of 32768. This doesn't limit the 1570 * pages on the ring but the number we can have cached waiting on 1571 * the next use. 1572 * 1573 * TBD: Can this be reduced further? Would a multiple of 1574 * NAPI_POLL_WEIGHT possibly make more sense? The question is how 1575 * may pages do we need to hold in reserve to get the best return 1576 * without hogging too much system memory. 1577 */ 1578 if (pp_params.pool_size > 32768) 1579 pp_params.pool_size = 32768; 1580 1581 pp = page_pool_create(&pp_params); 1582 if (IS_ERR(pp)) 1583 return PTR_ERR(pp); 1584 1585 qt->sub0.page_pool = pp; 1586 if (netif_rxq_has_unreadable_mp(fbn->netdev, rxq_idx)) { 1587 pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; 1588 pp_params.dma_dir = DMA_FROM_DEVICE; 1589 1590 pp = page_pool_create(&pp_params); 1591 if (IS_ERR(pp)) 1592 goto err_destroy_sub0; 1593 } else { 1594 page_pool_get(pp); 1595 } 1596 qt->sub1.page_pool = pp; 1597 1598 return 0; 1599 1600 err_destroy_sub0: 1601 page_pool_destroy(pp); 1602 return PTR_ERR(pp); 1603 } 1604 1605 static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell, 1606 int q_idx, u8 flags) 1607 { 1608 u64_stats_init(&ring->stats.syncp); 1609 ring->doorbell = doorbell; 1610 ring->q_idx = q_idx; 1611 ring->flags = flags; 1612 ring->deferred_head = -1; 1613 } 1614 1615 static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, 1616 unsigned int v_count, unsigned int v_idx, 1617 unsigned int txq_count, unsigned int txq_idx, 1618 unsigned int rxq_count, unsigned int rxq_idx) 1619 { 1620 int txt_count = txq_count, rxt_count = rxq_count; 1621 u32 __iomem *uc_addr = fbd->uc_addr0; 1622 int xdp_count = 0, qt_count, err; 1623 struct fbnic_napi_vector *nv; 1624 struct fbnic_q_triad *qt; 1625 u32 __iomem *db; 1626 1627 /* We need to reserve at least one Tx Queue Triad for an XDP ring */ 1628 if (rxq_count) { 1629 xdp_count = 1; 1630 if (!txt_count) 1631 txt_count = 1; 1632 } 1633 1634 qt_count = txt_count + rxq_count; 1635 if (!qt_count) 1636 return -EINVAL; 1637 1638 /* If MMIO has already failed there are no rings to initialize */ 1639 if (!uc_addr) 1640 return -EIO; 1641 1642 /* Allocate NAPI vector and queue triads */ 1643 nv = kzalloc(struct_size(nv, qt, qt_count), GFP_KERNEL); 1644 if (!nv) 1645 return -ENOMEM; 1646 1647 /* Record queue triad counts */ 1648 nv->txt_count = txt_count; 1649 nv->rxt_count = rxt_count; 1650 1651 /* Provide pointer back to fbnic and MSI-X vectors */ 1652 nv->fbd = fbd; 1653 nv->v_idx = v_idx; 1654 1655 /* Tie napi to netdev */ 1656 fbn->napi[fbnic_napi_idx(nv)] = nv; 1657 netif_napi_add_config_locked(fbn->netdev, &nv->napi, fbnic_poll, 1658 fbnic_napi_idx(nv)); 1659 1660 /* Record IRQ to NAPI struct */ 1661 netif_napi_set_irq_locked(&nv->napi, 1662 pci_irq_vector(to_pci_dev(fbd->dev), 1663 nv->v_idx)); 1664 1665 /* Tie nv back to PCIe dev */ 1666 nv->dev = fbd->dev; 1667 1668 /* Request the IRQ for napi vector */ 1669 err = fbnic_napi_request_irq(fbd, nv); 1670 if (err) 1671 goto napi_del; 1672 1673 /* Initialize queue triads */ 1674 qt = nv->qt; 1675 1676 while (txt_count) { 1677 u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS; 1678 1679 /* Configure Tx queue */ 1680 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ0_TAIL]; 1681 1682 /* Assign Tx queue to netdev if applicable */ 1683 if (txq_count > 0) { 1684 1685 fbnic_ring_init(&qt->sub0, db, txq_idx, flags); 1686 fbn->tx[txq_idx] = &qt->sub0; 1687 txq_count--; 1688 } else { 1689 fbnic_ring_init(&qt->sub0, db, 0, 1690 FBNIC_RING_F_DISABLED); 1691 } 1692 1693 /* Configure XDP queue */ 1694 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ1_TAIL]; 1695 1696 /* Assign XDP queue to netdev if applicable 1697 * 1698 * The setup for this is in itself a bit different. 1699 * 1. We only need one XDP Tx queue per NAPI vector. 1700 * 2. We associate it to the first Rx queue index. 1701 * 3. The hardware side is associated based on the Tx Queue. 1702 * 4. The netdev queue is offset by FBNIC_MAX_TXQs. 1703 */ 1704 if (xdp_count > 0) { 1705 unsigned int xdp_idx = FBNIC_MAX_TXQS + rxq_idx; 1706 1707 fbnic_ring_init(&qt->sub1, db, xdp_idx, flags); 1708 fbn->tx[xdp_idx] = &qt->sub1; 1709 xdp_count--; 1710 } else { 1711 fbnic_ring_init(&qt->sub1, db, 0, 1712 FBNIC_RING_F_DISABLED); 1713 } 1714 1715 /* Configure Tx completion queue */ 1716 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TCQ_HEAD]; 1717 fbnic_ring_init(&qt->cmpl, db, 0, 0); 1718 1719 /* Update Tx queue index */ 1720 txt_count--; 1721 txq_idx += v_count; 1722 1723 /* Move to next queue triad */ 1724 qt++; 1725 } 1726 1727 while (rxt_count) { 1728 /* Configure header queue */ 1729 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_HPQ_TAIL]; 1730 fbnic_ring_init(&qt->sub0, db, 0, 1731 FBNIC_RING_F_CTX | FBNIC_RING_F_STATS); 1732 1733 /* Configure payload queue */ 1734 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_PPQ_TAIL]; 1735 fbnic_ring_init(&qt->sub1, db, 0, 1736 FBNIC_RING_F_CTX | FBNIC_RING_F_STATS); 1737 1738 /* Configure Rx completion queue */ 1739 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_RCQ_HEAD]; 1740 fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS); 1741 fbn->rx[rxq_idx] = &qt->cmpl; 1742 1743 /* Update Rx queue index */ 1744 rxt_count--; 1745 rxq_idx += v_count; 1746 1747 /* Move to next queue triad */ 1748 qt++; 1749 } 1750 1751 return 0; 1752 1753 napi_del: 1754 netif_napi_del_locked(&nv->napi); 1755 fbn->napi[fbnic_napi_idx(nv)] = NULL; 1756 kfree(nv); 1757 return err; 1758 } 1759 1760 int fbnic_alloc_napi_vectors(struct fbnic_net *fbn) 1761 { 1762 unsigned int txq_idx = 0, rxq_idx = 0, v_idx = FBNIC_NON_NAPI_VECTORS; 1763 unsigned int num_tx = fbn->num_tx_queues; 1764 unsigned int num_rx = fbn->num_rx_queues; 1765 unsigned int num_napi = fbn->num_napi; 1766 struct fbnic_dev *fbd = fbn->fbd; 1767 int err; 1768 1769 /* Allocate 1 Tx queue per napi vector */ 1770 if (num_napi < FBNIC_MAX_TXQS && num_napi == num_tx + num_rx) { 1771 while (num_tx) { 1772 err = fbnic_alloc_napi_vector(fbd, fbn, 1773 num_napi, v_idx, 1774 1, txq_idx, 0, 0); 1775 if (err) 1776 goto free_vectors; 1777 1778 /* Update counts and index */ 1779 num_tx--; 1780 txq_idx++; 1781 1782 v_idx++; 1783 } 1784 } 1785 1786 /* Allocate Tx/Rx queue pairs per vector, or allocate remaining Rx */ 1787 while (num_rx | num_tx) { 1788 int tqpv = DIV_ROUND_UP(num_tx, num_napi - txq_idx); 1789 int rqpv = DIV_ROUND_UP(num_rx, num_napi - rxq_idx); 1790 1791 err = fbnic_alloc_napi_vector(fbd, fbn, num_napi, v_idx, 1792 tqpv, txq_idx, rqpv, rxq_idx); 1793 if (err) 1794 goto free_vectors; 1795 1796 /* Update counts and index */ 1797 num_tx -= tqpv; 1798 txq_idx++; 1799 1800 num_rx -= rqpv; 1801 rxq_idx++; 1802 1803 v_idx++; 1804 } 1805 1806 return 0; 1807 1808 free_vectors: 1809 fbnic_free_napi_vectors(fbn); 1810 1811 return err; 1812 } 1813 1814 static void fbnic_free_ring_resources(struct device *dev, 1815 struct fbnic_ring *ring) 1816 { 1817 kvfree(ring->buffer); 1818 ring->buffer = NULL; 1819 1820 /* If size is not set there are no descriptors present */ 1821 if (!ring->size) 1822 return; 1823 1824 dma_free_coherent(dev, ring->size, ring->desc, ring->dma); 1825 ring->size_mask = 0; 1826 ring->size = 0; 1827 } 1828 1829 static int fbnic_alloc_tx_ring_desc(struct fbnic_net *fbn, 1830 struct fbnic_ring *txr) 1831 { 1832 struct device *dev = fbn->netdev->dev.parent; 1833 size_t size; 1834 1835 /* Round size up to nearest 4K */ 1836 size = ALIGN(array_size(sizeof(*txr->desc), fbn->txq_size), 4096); 1837 1838 txr->desc = dma_alloc_coherent(dev, size, &txr->dma, 1839 GFP_KERNEL | __GFP_NOWARN); 1840 if (!txr->desc) 1841 return -ENOMEM; 1842 1843 /* txq_size should be a power of 2, so mask is just that -1 */ 1844 txr->size_mask = fbn->txq_size - 1; 1845 txr->size = size; 1846 1847 return 0; 1848 } 1849 1850 static int fbnic_alloc_tx_ring_buffer(struct fbnic_ring *txr) 1851 { 1852 size_t size = array_size(sizeof(*txr->tx_buf), txr->size_mask + 1); 1853 1854 txr->tx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN); 1855 1856 return txr->tx_buf ? 0 : -ENOMEM; 1857 } 1858 1859 static int fbnic_alloc_tx_ring_resources(struct fbnic_net *fbn, 1860 struct fbnic_ring *txr) 1861 { 1862 struct device *dev = fbn->netdev->dev.parent; 1863 int err; 1864 1865 if (txr->flags & FBNIC_RING_F_DISABLED) 1866 return 0; 1867 1868 err = fbnic_alloc_tx_ring_desc(fbn, txr); 1869 if (err) 1870 return err; 1871 1872 if (!(txr->flags & FBNIC_RING_F_CTX)) 1873 return 0; 1874 1875 err = fbnic_alloc_tx_ring_buffer(txr); 1876 if (err) 1877 goto free_desc; 1878 1879 return 0; 1880 1881 free_desc: 1882 fbnic_free_ring_resources(dev, txr); 1883 return err; 1884 } 1885 1886 static int fbnic_alloc_rx_ring_desc(struct fbnic_net *fbn, 1887 struct fbnic_ring *rxr) 1888 { 1889 struct device *dev = fbn->netdev->dev.parent; 1890 size_t desc_size = sizeof(*rxr->desc); 1891 u32 rxq_size; 1892 size_t size; 1893 1894 switch (rxr->doorbell - fbnic_ring_csr_base(rxr)) { 1895 case FBNIC_QUEUE_BDQ_HPQ_TAIL: 1896 rxq_size = fbn->hpq_size / FBNIC_BD_FRAG_COUNT; 1897 desc_size *= FBNIC_BD_FRAG_COUNT; 1898 break; 1899 case FBNIC_QUEUE_BDQ_PPQ_TAIL: 1900 rxq_size = fbn->ppq_size / FBNIC_BD_FRAG_COUNT; 1901 desc_size *= FBNIC_BD_FRAG_COUNT; 1902 break; 1903 case FBNIC_QUEUE_RCQ_HEAD: 1904 rxq_size = fbn->rcq_size; 1905 break; 1906 default: 1907 return -EINVAL; 1908 } 1909 1910 /* Round size up to nearest 4K */ 1911 size = ALIGN(array_size(desc_size, rxq_size), 4096); 1912 1913 rxr->desc = dma_alloc_coherent(dev, size, &rxr->dma, 1914 GFP_KERNEL | __GFP_NOWARN); 1915 if (!rxr->desc) 1916 return -ENOMEM; 1917 1918 /* rxq_size should be a power of 2, so mask is just that -1 */ 1919 rxr->size_mask = rxq_size - 1; 1920 rxr->size = size; 1921 1922 return 0; 1923 } 1924 1925 static int fbnic_alloc_rx_ring_buffer(struct fbnic_ring *rxr) 1926 { 1927 size_t size = array_size(sizeof(*rxr->rx_buf), rxr->size_mask + 1); 1928 1929 if (rxr->flags & FBNIC_RING_F_CTX) 1930 size = sizeof(*rxr->rx_buf) * (rxr->size_mask + 1); 1931 else 1932 size = sizeof(*rxr->pkt); 1933 1934 rxr->rx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN); 1935 1936 return rxr->rx_buf ? 0 : -ENOMEM; 1937 } 1938 1939 static int fbnic_alloc_rx_ring_resources(struct fbnic_net *fbn, 1940 struct fbnic_ring *rxr) 1941 { 1942 struct device *dev = fbn->netdev->dev.parent; 1943 int err; 1944 1945 err = fbnic_alloc_rx_ring_desc(fbn, rxr); 1946 if (err) 1947 return err; 1948 1949 err = fbnic_alloc_rx_ring_buffer(rxr); 1950 if (err) 1951 goto free_desc; 1952 1953 return 0; 1954 1955 free_desc: 1956 fbnic_free_ring_resources(dev, rxr); 1957 return err; 1958 } 1959 1960 static void fbnic_free_qt_resources(struct fbnic_net *fbn, 1961 struct fbnic_q_triad *qt) 1962 { 1963 struct device *dev = fbn->netdev->dev.parent; 1964 1965 fbnic_free_ring_resources(dev, &qt->cmpl); 1966 fbnic_free_ring_resources(dev, &qt->sub1); 1967 fbnic_free_ring_resources(dev, &qt->sub0); 1968 1969 if (xdp_rxq_info_is_reg(&qt->xdp_rxq)) { 1970 xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq); 1971 xdp_rxq_info_unreg(&qt->xdp_rxq); 1972 fbnic_free_qt_page_pools(qt); 1973 } 1974 } 1975 1976 static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, 1977 struct fbnic_q_triad *qt) 1978 { 1979 struct device *dev = fbn->netdev->dev.parent; 1980 int err; 1981 1982 err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub0); 1983 if (err) 1984 return err; 1985 1986 err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub1); 1987 if (err) 1988 goto free_sub0; 1989 1990 err = fbnic_alloc_tx_ring_resources(fbn, &qt->cmpl); 1991 if (err) 1992 goto free_sub1; 1993 1994 return 0; 1995 1996 free_sub1: 1997 fbnic_free_ring_resources(dev, &qt->sub1); 1998 free_sub0: 1999 fbnic_free_ring_resources(dev, &qt->sub0); 2000 return err; 2001 } 2002 2003 static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn, 2004 struct fbnic_napi_vector *nv, 2005 struct fbnic_q_triad *qt) 2006 { 2007 struct device *dev = fbn->netdev->dev.parent; 2008 int err; 2009 2010 err = fbnic_alloc_qt_page_pools(fbn, qt, qt->cmpl.q_idx); 2011 if (err) 2012 return err; 2013 2014 err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, qt->sub0.q_idx, 2015 nv->napi.napi_id); 2016 if (err) 2017 goto free_page_pools; 2018 2019 err = xdp_rxq_info_reg_mem_model(&qt->xdp_rxq, MEM_TYPE_PAGE_POOL, 2020 qt->sub0.page_pool); 2021 if (err) 2022 goto unreg_rxq; 2023 2024 err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0); 2025 if (err) 2026 goto unreg_mm; 2027 2028 err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub1); 2029 if (err) 2030 goto free_sub0; 2031 2032 err = fbnic_alloc_rx_ring_resources(fbn, &qt->cmpl); 2033 if (err) 2034 goto free_sub1; 2035 2036 return 0; 2037 2038 free_sub1: 2039 fbnic_free_ring_resources(dev, &qt->sub1); 2040 free_sub0: 2041 fbnic_free_ring_resources(dev, &qt->sub0); 2042 unreg_mm: 2043 xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq); 2044 unreg_rxq: 2045 xdp_rxq_info_unreg(&qt->xdp_rxq); 2046 free_page_pools: 2047 fbnic_free_qt_page_pools(qt); 2048 return err; 2049 } 2050 2051 static void fbnic_free_nv_resources(struct fbnic_net *fbn, 2052 struct fbnic_napi_vector *nv) 2053 { 2054 int i; 2055 2056 for (i = 0; i < nv->txt_count + nv->rxt_count; i++) 2057 fbnic_free_qt_resources(fbn, &nv->qt[i]); 2058 } 2059 2060 static int fbnic_alloc_nv_resources(struct fbnic_net *fbn, 2061 struct fbnic_napi_vector *nv) 2062 { 2063 int i, j, err; 2064 2065 /* Allocate Tx Resources */ 2066 for (i = 0; i < nv->txt_count; i++) { 2067 err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]); 2068 if (err) 2069 goto free_qt_resources; 2070 } 2071 2072 /* Allocate Rx Resources */ 2073 for (j = 0; j < nv->rxt_count; j++, i++) { 2074 err = fbnic_alloc_rx_qt_resources(fbn, nv, &nv->qt[i]); 2075 if (err) 2076 goto free_qt_resources; 2077 } 2078 2079 return 0; 2080 2081 free_qt_resources: 2082 while (i--) 2083 fbnic_free_qt_resources(fbn, &nv->qt[i]); 2084 return err; 2085 } 2086 2087 void fbnic_free_resources(struct fbnic_net *fbn) 2088 { 2089 int i; 2090 2091 for (i = 0; i < fbn->num_napi; i++) 2092 fbnic_free_nv_resources(fbn, fbn->napi[i]); 2093 } 2094 2095 int fbnic_alloc_resources(struct fbnic_net *fbn) 2096 { 2097 int i, err = -ENODEV; 2098 2099 for (i = 0; i < fbn->num_napi; i++) { 2100 err = fbnic_alloc_nv_resources(fbn, fbn->napi[i]); 2101 if (err) 2102 goto free_resources; 2103 } 2104 2105 return 0; 2106 2107 free_resources: 2108 while (i--) 2109 fbnic_free_nv_resources(fbn, fbn->napi[i]); 2110 2111 return err; 2112 } 2113 2114 static void fbnic_set_netif_napi(struct fbnic_napi_vector *nv) 2115 { 2116 int i, j; 2117 2118 /* Associate Tx queue with NAPI */ 2119 for (i = 0; i < nv->txt_count; i++) { 2120 struct fbnic_q_triad *qt = &nv->qt[i]; 2121 2122 netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, 2123 NETDEV_QUEUE_TYPE_TX, &nv->napi); 2124 } 2125 2126 /* Associate Rx queue with NAPI */ 2127 for (j = 0; j < nv->rxt_count; j++, i++) { 2128 struct fbnic_q_triad *qt = &nv->qt[i]; 2129 2130 netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, 2131 NETDEV_QUEUE_TYPE_RX, &nv->napi); 2132 } 2133 } 2134 2135 static void fbnic_reset_netif_napi(struct fbnic_napi_vector *nv) 2136 { 2137 int i, j; 2138 2139 /* Disassociate Tx queue from NAPI */ 2140 for (i = 0; i < nv->txt_count; i++) { 2141 struct fbnic_q_triad *qt = &nv->qt[i]; 2142 2143 netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, 2144 NETDEV_QUEUE_TYPE_TX, NULL); 2145 } 2146 2147 /* Disassociate Rx queue from NAPI */ 2148 for (j = 0; j < nv->rxt_count; j++, i++) { 2149 struct fbnic_q_triad *qt = &nv->qt[i]; 2150 2151 netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, 2152 NETDEV_QUEUE_TYPE_RX, NULL); 2153 } 2154 } 2155 2156 int fbnic_set_netif_queues(struct fbnic_net *fbn) 2157 { 2158 int i, err; 2159 2160 err = netif_set_real_num_queues(fbn->netdev, fbn->num_tx_queues, 2161 fbn->num_rx_queues); 2162 if (err) 2163 return err; 2164 2165 for (i = 0; i < fbn->num_napi; i++) 2166 fbnic_set_netif_napi(fbn->napi[i]); 2167 2168 return 0; 2169 } 2170 2171 void fbnic_reset_netif_queues(struct fbnic_net *fbn) 2172 { 2173 int i; 2174 2175 for (i = 0; i < fbn->num_napi; i++) 2176 fbnic_reset_netif_napi(fbn->napi[i]); 2177 } 2178 2179 static void fbnic_disable_twq0(struct fbnic_ring *txr) 2180 { 2181 u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ0_CTL); 2182 2183 twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE; 2184 2185 fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ0_CTL, twq_ctl); 2186 } 2187 2188 static void fbnic_disable_twq1(struct fbnic_ring *txr) 2189 { 2190 u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ1_CTL); 2191 2192 twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE; 2193 2194 fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ1_CTL, twq_ctl); 2195 } 2196 2197 static void fbnic_disable_tcq(struct fbnic_ring *txr) 2198 { 2199 fbnic_ring_wr32(txr, FBNIC_QUEUE_TCQ_CTL, 0); 2200 fbnic_ring_wr32(txr, FBNIC_QUEUE_TIM_MASK, FBNIC_QUEUE_TIM_MASK_MASK); 2201 } 2202 2203 static void fbnic_disable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) 2204 { 2205 u32 bdq_ctl = fbnic_ring_rd32(hpq, FBNIC_QUEUE_BDQ_CTL); 2206 2207 bdq_ctl &= ~FBNIC_QUEUE_BDQ_CTL_ENABLE; 2208 2209 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl); 2210 } 2211 2212 static void fbnic_disable_rcq(struct fbnic_ring *rxr) 2213 { 2214 fbnic_ring_wr32(rxr, FBNIC_QUEUE_RCQ_CTL, 0); 2215 fbnic_ring_wr32(rxr, FBNIC_QUEUE_RIM_MASK, FBNIC_QUEUE_RIM_MASK_MASK); 2216 } 2217 2218 void fbnic_napi_disable(struct fbnic_net *fbn) 2219 { 2220 int i; 2221 2222 for (i = 0; i < fbn->num_napi; i++) { 2223 napi_disable_locked(&fbn->napi[i]->napi); 2224 2225 fbnic_nv_irq_disable(fbn->napi[i]); 2226 } 2227 } 2228 2229 static void __fbnic_nv_disable(struct fbnic_napi_vector *nv) 2230 { 2231 int i, t; 2232 2233 /* Disable Tx queue triads */ 2234 for (t = 0; t < nv->txt_count; t++) { 2235 struct fbnic_q_triad *qt = &nv->qt[t]; 2236 2237 fbnic_disable_twq0(&qt->sub0); 2238 fbnic_disable_twq1(&qt->sub1); 2239 fbnic_disable_tcq(&qt->cmpl); 2240 } 2241 2242 /* Disable Rx queue triads */ 2243 for (i = 0; i < nv->rxt_count; i++, t++) { 2244 struct fbnic_q_triad *qt = &nv->qt[t]; 2245 2246 fbnic_disable_bdq(&qt->sub0, &qt->sub1); 2247 fbnic_disable_rcq(&qt->cmpl); 2248 } 2249 } 2250 2251 static void 2252 fbnic_nv_disable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) 2253 { 2254 __fbnic_nv_disable(nv); 2255 fbnic_wrfl(fbn->fbd); 2256 } 2257 2258 void fbnic_disable(struct fbnic_net *fbn) 2259 { 2260 struct fbnic_dev *fbd = fbn->fbd; 2261 int i; 2262 2263 for (i = 0; i < fbn->num_napi; i++) 2264 __fbnic_nv_disable(fbn->napi[i]); 2265 2266 fbnic_wrfl(fbd); 2267 } 2268 2269 static void fbnic_tx_flush(struct fbnic_dev *fbd) 2270 { 2271 netdev_warn(fbd->netdev, "triggering Tx flush\n"); 2272 2273 fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN, 2274 FBNIC_TMI_DROP_CTRL_EN); 2275 } 2276 2277 static void fbnic_tx_flush_off(struct fbnic_dev *fbd) 2278 { 2279 fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN, 0); 2280 } 2281 2282 struct fbnic_idle_regs { 2283 u32 reg_base; 2284 u8 reg_cnt; 2285 }; 2286 2287 static bool fbnic_all_idle(struct fbnic_dev *fbd, 2288 const struct fbnic_idle_regs *regs, 2289 unsigned int nregs) 2290 { 2291 unsigned int i, j; 2292 2293 for (i = 0; i < nregs; i++) { 2294 for (j = 0; j < regs[i].reg_cnt; j++) { 2295 if (fbnic_rd32(fbd, regs[i].reg_base + j) != ~0U) 2296 return false; 2297 } 2298 } 2299 return true; 2300 } 2301 2302 static void fbnic_idle_dump(struct fbnic_dev *fbd, 2303 const struct fbnic_idle_regs *regs, 2304 unsigned int nregs, const char *dir, int err) 2305 { 2306 unsigned int i, j; 2307 2308 netdev_err(fbd->netdev, "error waiting for %s idle %d\n", dir, err); 2309 for (i = 0; i < nregs; i++) 2310 for (j = 0; j < regs[i].reg_cnt; j++) 2311 netdev_err(fbd->netdev, "0x%04x: %08x\n", 2312 regs[i].reg_base + j, 2313 fbnic_rd32(fbd, regs[i].reg_base + j)); 2314 } 2315 2316 int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail) 2317 { 2318 static const struct fbnic_idle_regs tx[] = { 2319 { FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TWQ_IDLE_CNT, }, 2320 { FBNIC_QM_TQS_IDLE(0), FBNIC_QM_TQS_IDLE_CNT, }, 2321 { FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TDE_IDLE_CNT, }, 2322 { FBNIC_QM_TCQ_IDLE(0), FBNIC_QM_TCQ_IDLE_CNT, }, 2323 }, rx[] = { 2324 { FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_HPQ_IDLE_CNT, }, 2325 { FBNIC_QM_PPQ_IDLE(0), FBNIC_QM_PPQ_IDLE_CNT, }, 2326 { FBNIC_QM_RCQ_IDLE(0), FBNIC_QM_RCQ_IDLE_CNT, }, 2327 }; 2328 bool idle; 2329 int err; 2330 2331 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000, 2332 false, fbd, tx, ARRAY_SIZE(tx)); 2333 if (err == -ETIMEDOUT) { 2334 fbnic_tx_flush(fbd); 2335 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2336 2, 500000, false, 2337 fbd, tx, ARRAY_SIZE(tx)); 2338 fbnic_tx_flush_off(fbd); 2339 } 2340 if (err) { 2341 fbnic_idle_dump(fbd, tx, ARRAY_SIZE(tx), "Tx", err); 2342 if (may_fail) 2343 return err; 2344 } 2345 2346 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000, 2347 false, fbd, rx, ARRAY_SIZE(rx)); 2348 if (err) 2349 fbnic_idle_dump(fbd, rx, ARRAY_SIZE(rx), "Rx", err); 2350 return err; 2351 } 2352 2353 static int 2354 fbnic_wait_queue_idle(struct fbnic_net *fbn, bool rx, unsigned int idx) 2355 { 2356 static const unsigned int tx_regs[] = { 2357 FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TQS_IDLE(0), 2358 FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TCQ_IDLE(0), 2359 }, rx_regs[] = { 2360 FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_PPQ_IDLE(0), 2361 FBNIC_QM_RCQ_IDLE(0), 2362 }; 2363 struct fbnic_dev *fbd = fbn->fbd; 2364 unsigned int val, mask, off; 2365 const unsigned int *regs; 2366 unsigned int reg_cnt; 2367 int i, err; 2368 2369 regs = rx ? rx_regs : tx_regs; 2370 reg_cnt = rx ? ARRAY_SIZE(rx_regs) : ARRAY_SIZE(tx_regs); 2371 2372 off = idx / 32; 2373 mask = BIT(idx % 32); 2374 2375 for (i = 0; i < reg_cnt; i++) { 2376 err = read_poll_timeout_atomic(fbnic_rd32, val, val & mask, 2377 2, 500000, false, 2378 fbd, regs[i] + off); 2379 if (err) { 2380 netdev_err(fbd->netdev, 2381 "wait for queue %s%d idle failed 0x%04x(%d): %08x (mask: %08x)\n", 2382 rx ? "Rx" : "Tx", idx, regs[i] + off, i, 2383 val, mask); 2384 return err; 2385 } 2386 } 2387 2388 return 0; 2389 } 2390 2391 static void fbnic_nv_flush(struct fbnic_napi_vector *nv) 2392 { 2393 int j, t; 2394 2395 /* Flush any processed Tx Queue Triads and drop the rest */ 2396 for (t = 0; t < nv->txt_count; t++) { 2397 struct fbnic_q_triad *qt = &nv->qt[t]; 2398 struct netdev_queue *tx_queue; 2399 2400 /* Clean the work queues of unprocessed work */ 2401 fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail); 2402 fbnic_clean_twq1(nv, false, &qt->sub1, true, 2403 qt->sub1.tail); 2404 2405 /* Reset completion queue descriptor ring */ 2406 memset(qt->cmpl.desc, 0, qt->cmpl.size); 2407 2408 /* Nothing else to do if Tx queue is disabled */ 2409 if (qt->sub0.flags & FBNIC_RING_F_DISABLED) 2410 continue; 2411 2412 /* Reset BQL associated with Tx queue */ 2413 tx_queue = netdev_get_tx_queue(nv->napi.dev, 2414 qt->sub0.q_idx); 2415 netdev_tx_reset_queue(tx_queue); 2416 } 2417 2418 /* Flush any processed Rx Queue Triads and drop the rest */ 2419 for (j = 0; j < nv->rxt_count; j++, t++) { 2420 struct fbnic_q_triad *qt = &nv->qt[t]; 2421 2422 /* Clean the work queues of unprocessed work */ 2423 fbnic_clean_bdq(&qt->sub0, qt->sub0.tail, 0); 2424 fbnic_clean_bdq(&qt->sub1, qt->sub1.tail, 0); 2425 2426 /* Reset completion queue descriptor ring */ 2427 memset(qt->cmpl.desc, 0, qt->cmpl.size); 2428 2429 fbnic_put_pkt_buff(qt, qt->cmpl.pkt, 0); 2430 memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff)); 2431 } 2432 } 2433 2434 void fbnic_flush(struct fbnic_net *fbn) 2435 { 2436 int i; 2437 2438 for (i = 0; i < fbn->num_napi; i++) 2439 fbnic_nv_flush(fbn->napi[i]); 2440 } 2441 2442 static void fbnic_nv_fill(struct fbnic_napi_vector *nv) 2443 { 2444 int j, t; 2445 2446 /* Configure NAPI mapping and populate pages 2447 * in the BDQ rings to use for Rx 2448 */ 2449 for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) { 2450 struct fbnic_q_triad *qt = &nv->qt[t]; 2451 2452 /* Populate the header and payload BDQs */ 2453 fbnic_fill_bdq(&qt->sub0); 2454 fbnic_fill_bdq(&qt->sub1); 2455 } 2456 } 2457 2458 void fbnic_fill(struct fbnic_net *fbn) 2459 { 2460 int i; 2461 2462 for (i = 0; i < fbn->num_napi; i++) 2463 fbnic_nv_fill(fbn->napi[i]); 2464 } 2465 2466 static void fbnic_enable_twq0(struct fbnic_ring *twq) 2467 { 2468 u32 log_size = fls(twq->size_mask); 2469 2470 if (!twq->size_mask) 2471 return; 2472 2473 /* Reset head/tail */ 2474 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_RESET); 2475 twq->tail = 0; 2476 twq->head = 0; 2477 2478 /* Store descriptor ring address and size */ 2479 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAL, lower_32_bits(twq->dma)); 2480 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAH, upper_32_bits(twq->dma)); 2481 2482 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2483 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_SIZE, log_size & 0xf); 2484 2485 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); 2486 } 2487 2488 static void fbnic_enable_twq1(struct fbnic_ring *twq) 2489 { 2490 u32 log_size = fls(twq->size_mask); 2491 2492 if (!twq->size_mask) 2493 return; 2494 2495 /* Reset head/tail */ 2496 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_RESET); 2497 twq->tail = 0; 2498 twq->head = 0; 2499 2500 /* Store descriptor ring address and size */ 2501 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAL, lower_32_bits(twq->dma)); 2502 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAH, upper_32_bits(twq->dma)); 2503 2504 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2505 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_SIZE, log_size & 0xf); 2506 2507 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); 2508 } 2509 2510 static void fbnic_enable_tcq(struct fbnic_napi_vector *nv, 2511 struct fbnic_ring *tcq) 2512 { 2513 u32 log_size = fls(tcq->size_mask); 2514 2515 if (!tcq->size_mask) 2516 return; 2517 2518 /* Reset head/tail */ 2519 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_RESET); 2520 tcq->tail = 0; 2521 tcq->head = 0; 2522 2523 /* Store descriptor ring address and size */ 2524 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAL, lower_32_bits(tcq->dma)); 2525 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAH, upper_32_bits(tcq->dma)); 2526 2527 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2528 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_SIZE, log_size & 0xf); 2529 2530 /* Store interrupt information for the completion queue */ 2531 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_CTL, nv->v_idx); 2532 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_THRESHOLD, tcq->size_mask / 2); 2533 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_MASK, 0); 2534 2535 /* Enable queue */ 2536 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_ENABLE); 2537 } 2538 2539 static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) 2540 { 2541 u32 bdq_ctl = FBNIC_QUEUE_BDQ_CTL_ENABLE; 2542 u32 log_size; 2543 2544 /* Reset head/tail */ 2545 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, FBNIC_QUEUE_BDQ_CTL_RESET); 2546 ppq->tail = 0; 2547 ppq->head = 0; 2548 hpq->tail = 0; 2549 hpq->head = 0; 2550 2551 log_size = fls(hpq->size_mask); 2552 2553 /* Store descriptor ring address and size */ 2554 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma)); 2555 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAH, upper_32_bits(hpq->dma)); 2556 2557 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2558 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_SIZE, log_size & 0xf); 2559 2560 if (!ppq->size_mask) 2561 goto write_ctl; 2562 2563 log_size = fls(ppq->size_mask); 2564 2565 /* Add enabling of PPQ to BDQ control */ 2566 bdq_ctl |= FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE; 2567 2568 /* Store descriptor ring address and size */ 2569 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAL, lower_32_bits(ppq->dma)); 2570 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAH, upper_32_bits(ppq->dma)); 2571 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_SIZE, log_size & 0xf); 2572 2573 write_ctl: 2574 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl); 2575 } 2576 2577 static void fbnic_config_drop_mode_rcq(struct fbnic_napi_vector *nv, 2578 struct fbnic_ring *rcq, bool tx_pause) 2579 { 2580 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 2581 u32 drop_mode, rcq_ctl; 2582 2583 if (!tx_pause && fbn->num_rx_queues > 1) 2584 drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_IMMEDIATE; 2585 else 2586 drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_NEVER; 2587 2588 /* Specify packet layout */ 2589 rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_DROP_MODE_MASK, drop_mode) | 2590 FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_HROOM_MASK, FBNIC_RX_HROOM) | 2591 FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_TROOM_MASK, FBNIC_RX_TROOM); 2592 2593 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL0, rcq_ctl); 2594 } 2595 2596 void fbnic_config_drop_mode(struct fbnic_net *fbn, bool tx_pause) 2597 { 2598 int i, t; 2599 2600 for (i = 0; i < fbn->num_napi; i++) { 2601 struct fbnic_napi_vector *nv = fbn->napi[i]; 2602 2603 for (t = 0; t < nv->rxt_count; t++) { 2604 struct fbnic_q_triad *qt = &nv->qt[nv->txt_count + t]; 2605 2606 fbnic_config_drop_mode_rcq(nv, &qt->cmpl, tx_pause); 2607 } 2608 } 2609 } 2610 2611 static void fbnic_config_rim_threshold(struct fbnic_ring *rcq, u16 nv_idx, u32 rx_desc) 2612 { 2613 u32 threshold; 2614 2615 /* Set the threhsold to half the ring size if rx_frames 2616 * is not configured 2617 */ 2618 threshold = rx_desc ? : rcq->size_mask / 2; 2619 2620 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_CTL, nv_idx); 2621 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_THRESHOLD, threshold); 2622 } 2623 2624 void fbnic_config_txrx_usecs(struct fbnic_napi_vector *nv, u32 arm) 2625 { 2626 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 2627 struct fbnic_dev *fbd = nv->fbd; 2628 u32 val = arm; 2629 2630 val |= FIELD_PREP(FBNIC_INTR_CQ_REARM_RCQ_TIMEOUT, fbn->rx_usecs) | 2631 FBNIC_INTR_CQ_REARM_RCQ_TIMEOUT_UPD_EN; 2632 val |= FIELD_PREP(FBNIC_INTR_CQ_REARM_TCQ_TIMEOUT, fbn->tx_usecs) | 2633 FBNIC_INTR_CQ_REARM_TCQ_TIMEOUT_UPD_EN; 2634 2635 fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(nv->v_idx), val); 2636 } 2637 2638 void fbnic_config_rx_frames(struct fbnic_napi_vector *nv) 2639 { 2640 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 2641 int i; 2642 2643 for (i = nv->txt_count; i < nv->rxt_count + nv->txt_count; i++) { 2644 struct fbnic_q_triad *qt = &nv->qt[i]; 2645 2646 fbnic_config_rim_threshold(&qt->cmpl, nv->v_idx, 2647 fbn->rx_max_frames * 2648 FBNIC_MIN_RXD_PER_FRAME); 2649 } 2650 } 2651 2652 static void fbnic_enable_rcq(struct fbnic_napi_vector *nv, 2653 struct fbnic_ring *rcq) 2654 { 2655 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 2656 u32 log_size = fls(rcq->size_mask); 2657 u32 hds_thresh = fbn->hds_thresh; 2658 u32 rcq_ctl = 0; 2659 2660 fbnic_config_drop_mode_rcq(nv, rcq, fbn->tx_pause); 2661 2662 /* Force lower bound on MAX_HEADER_BYTES. Below this, all frames should 2663 * be split at L4. It would also result in the frames being split at 2664 * L2/L3 depending on the frame size. 2665 */ 2666 if (fbn->hds_thresh < FBNIC_HDR_BYTES_MIN) { 2667 rcq_ctl = FBNIC_QUEUE_RDE_CTL0_EN_HDR_SPLIT; 2668 hds_thresh = FBNIC_HDR_BYTES_MIN; 2669 } 2670 2671 rcq_ctl |= FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) | 2672 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, hds_thresh) | 2673 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK, 2674 FBNIC_RX_PAYLD_OFFSET) | 2675 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_PG_CL_MASK, 2676 FBNIC_RX_PAYLD_PG_CL); 2677 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL1, rcq_ctl); 2678 2679 /* Reset head/tail */ 2680 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_RESET); 2681 rcq->head = 0; 2682 rcq->tail = 0; 2683 2684 /* Store descriptor ring address and size */ 2685 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAL, lower_32_bits(rcq->dma)); 2686 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAH, upper_32_bits(rcq->dma)); 2687 2688 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2689 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_SIZE, log_size & 0xf); 2690 2691 /* Store interrupt information for the completion queue */ 2692 fbnic_config_rim_threshold(rcq, nv->v_idx, fbn->rx_max_frames * 2693 FBNIC_MIN_RXD_PER_FRAME); 2694 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_MASK, 0); 2695 2696 /* Enable queue */ 2697 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_ENABLE); 2698 } 2699 2700 static void __fbnic_nv_enable(struct fbnic_napi_vector *nv) 2701 { 2702 int j, t; 2703 2704 /* Setup Tx Queue Triads */ 2705 for (t = 0; t < nv->txt_count; t++) { 2706 struct fbnic_q_triad *qt = &nv->qt[t]; 2707 2708 fbnic_enable_twq0(&qt->sub0); 2709 fbnic_enable_twq1(&qt->sub1); 2710 fbnic_enable_tcq(nv, &qt->cmpl); 2711 } 2712 2713 /* Setup Rx Queue Triads */ 2714 for (j = 0; j < nv->rxt_count; j++, t++) { 2715 struct fbnic_q_triad *qt = &nv->qt[t]; 2716 2717 page_pool_enable_direct_recycling(qt->sub0.page_pool, 2718 &nv->napi); 2719 page_pool_enable_direct_recycling(qt->sub1.page_pool, 2720 &nv->napi); 2721 2722 fbnic_enable_bdq(&qt->sub0, &qt->sub1); 2723 fbnic_enable_rcq(nv, &qt->cmpl); 2724 } 2725 } 2726 2727 static void fbnic_nv_enable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) 2728 { 2729 __fbnic_nv_enable(nv); 2730 fbnic_wrfl(fbn->fbd); 2731 } 2732 2733 void fbnic_enable(struct fbnic_net *fbn) 2734 { 2735 struct fbnic_dev *fbd = fbn->fbd; 2736 int i; 2737 2738 for (i = 0; i < fbn->num_napi; i++) 2739 __fbnic_nv_enable(fbn->napi[i]); 2740 2741 fbnic_wrfl(fbd); 2742 } 2743 2744 static void fbnic_nv_irq_enable(struct fbnic_napi_vector *nv) 2745 { 2746 fbnic_config_txrx_usecs(nv, FBNIC_INTR_CQ_REARM_INTR_UNMASK); 2747 } 2748 2749 void fbnic_napi_enable(struct fbnic_net *fbn) 2750 { 2751 u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {}; 2752 struct fbnic_dev *fbd = fbn->fbd; 2753 int i; 2754 2755 for (i = 0; i < fbn->num_napi; i++) { 2756 struct fbnic_napi_vector *nv = fbn->napi[i]; 2757 2758 napi_enable_locked(&nv->napi); 2759 2760 fbnic_nv_irq_enable(nv); 2761 2762 /* Record bit used for NAPI IRQs so we can 2763 * set the mask appropriately 2764 */ 2765 irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32); 2766 } 2767 2768 /* Force the first interrupt on the device to guarantee 2769 * that any packets that may have been enqueued during the 2770 * bringup are processed. 2771 */ 2772 for (i = 0; i < ARRAY_SIZE(irqs); i++) { 2773 if (!irqs[i]) 2774 continue; 2775 fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]); 2776 } 2777 2778 fbnic_wrfl(fbd); 2779 } 2780 2781 void fbnic_napi_depletion_check(struct net_device *netdev) 2782 { 2783 struct fbnic_net *fbn = netdev_priv(netdev); 2784 u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {}; 2785 struct fbnic_dev *fbd = fbn->fbd; 2786 int i, j, t; 2787 2788 for (i = 0; i < fbn->num_napi; i++) { 2789 struct fbnic_napi_vector *nv = fbn->napi[i]; 2790 2791 /* Find RQs which are completely out of pages */ 2792 for (t = nv->txt_count, j = 0; j < nv->rxt_count; j++, t++) { 2793 /* Assume 4 pages is always enough to fit a packet 2794 * and therefore generate a completion and an IRQ. 2795 */ 2796 if (fbnic_desc_used(&nv->qt[t].sub0) < 4 || 2797 fbnic_desc_used(&nv->qt[t].sub1) < 4) 2798 irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32); 2799 } 2800 } 2801 2802 for (i = 0; i < ARRAY_SIZE(irqs); i++) { 2803 if (!irqs[i]) 2804 continue; 2805 fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(i), irqs[i]); 2806 fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]); 2807 } 2808 2809 fbnic_wrfl(fbd); 2810 } 2811 2812 static int fbnic_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) 2813 { 2814 struct fbnic_net *fbn = netdev_priv(dev); 2815 const struct fbnic_q_triad *real; 2816 struct fbnic_q_triad *qt = qmem; 2817 struct fbnic_napi_vector *nv; 2818 2819 if (!netif_running(dev)) 2820 return fbnic_alloc_qt_page_pools(fbn, qt, idx); 2821 2822 real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); 2823 nv = fbn->napi[idx % fbn->num_napi]; 2824 2825 fbnic_ring_init(&qt->sub0, real->sub0.doorbell, real->sub0.q_idx, 2826 real->sub0.flags); 2827 fbnic_ring_init(&qt->sub1, real->sub1.doorbell, real->sub1.q_idx, 2828 real->sub1.flags); 2829 fbnic_ring_init(&qt->cmpl, real->cmpl.doorbell, real->cmpl.q_idx, 2830 real->cmpl.flags); 2831 2832 return fbnic_alloc_rx_qt_resources(fbn, nv, qt); 2833 } 2834 2835 static void fbnic_queue_mem_free(struct net_device *dev, void *qmem) 2836 { 2837 struct fbnic_net *fbn = netdev_priv(dev); 2838 struct fbnic_q_triad *qt = qmem; 2839 2840 if (!netif_running(dev)) 2841 fbnic_free_qt_page_pools(qt); 2842 else 2843 fbnic_free_qt_resources(fbn, qt); 2844 } 2845 2846 static void __fbnic_nv_restart(struct fbnic_net *fbn, 2847 struct fbnic_napi_vector *nv) 2848 { 2849 struct fbnic_dev *fbd = fbn->fbd; 2850 int i; 2851 2852 fbnic_nv_enable(fbn, nv); 2853 fbnic_nv_fill(nv); 2854 2855 napi_enable_locked(&nv->napi); 2856 fbnic_nv_irq_enable(nv); 2857 fbnic_wr32(fbd, FBNIC_INTR_SET(nv->v_idx / 32), BIT(nv->v_idx % 32)); 2858 fbnic_wrfl(fbd); 2859 2860 for (i = 0; i < nv->txt_count; i++) 2861 netif_wake_subqueue(fbn->netdev, nv->qt[i].sub0.q_idx); 2862 } 2863 2864 static int fbnic_queue_start(struct net_device *dev, void *qmem, int idx) 2865 { 2866 struct fbnic_net *fbn = netdev_priv(dev); 2867 struct fbnic_napi_vector *nv; 2868 struct fbnic_q_triad *real; 2869 2870 real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); 2871 nv = fbn->napi[idx % fbn->num_napi]; 2872 2873 fbnic_aggregate_ring_bdq_counters(fbn, &real->sub0); 2874 fbnic_aggregate_ring_bdq_counters(fbn, &real->sub1); 2875 fbnic_aggregate_ring_rx_counters(fbn, &real->cmpl); 2876 2877 memcpy(real, qmem, sizeof(*real)); 2878 2879 __fbnic_nv_restart(fbn, nv); 2880 2881 return 0; 2882 } 2883 2884 static int fbnic_queue_stop(struct net_device *dev, void *qmem, int idx) 2885 { 2886 struct fbnic_net *fbn = netdev_priv(dev); 2887 const struct fbnic_q_triad *real; 2888 struct fbnic_napi_vector *nv; 2889 int i, t; 2890 int err; 2891 2892 real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); 2893 nv = fbn->napi[idx % fbn->num_napi]; 2894 2895 napi_disable_locked(&nv->napi); 2896 fbnic_nv_irq_disable(nv); 2897 2898 for (i = 0; i < nv->txt_count; i++) 2899 netif_stop_subqueue(dev, nv->qt[i].sub0.q_idx); 2900 fbnic_nv_disable(fbn, nv); 2901 2902 for (t = 0; t < nv->txt_count + nv->rxt_count; t++) { 2903 err = fbnic_wait_queue_idle(fbn, t >= nv->txt_count, 2904 nv->qt[t].sub0.q_idx); 2905 if (err) 2906 goto err_restart; 2907 } 2908 2909 fbnic_synchronize_irq(fbn->fbd, nv->v_idx); 2910 fbnic_nv_flush(nv); 2911 2912 page_pool_disable_direct_recycling(real->sub0.page_pool); 2913 page_pool_disable_direct_recycling(real->sub1.page_pool); 2914 2915 memcpy(qmem, real, sizeof(*real)); 2916 2917 return 0; 2918 2919 err_restart: 2920 __fbnic_nv_restart(fbn, nv); 2921 return err; 2922 } 2923 2924 const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_ops = { 2925 .ndo_queue_mem_size = sizeof(struct fbnic_q_triad), 2926 .ndo_queue_mem_alloc = fbnic_queue_mem_alloc, 2927 .ndo_queue_mem_free = fbnic_queue_mem_free, 2928 .ndo_queue_start = fbnic_queue_start, 2929 .ndo_queue_stop = fbnic_queue_stop, 2930 }; 2931