1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */ 3 4 #include <linux/bitfield.h> 5 #include <linux/bpf.h> 6 #include <linux/bpf_trace.h> 7 #include <linux/iopoll.h> 8 #include <linux/pci.h> 9 #include <net/netdev_queues.h> 10 #include <net/page_pool/helpers.h> 11 #include <net/tcp.h> 12 #include <net/xdp.h> 13 14 #include "fbnic.h" 15 #include "fbnic_csr.h" 16 #include "fbnic_netdev.h" 17 #include "fbnic_txrx.h" 18 19 enum { 20 FBNIC_XDP_PASS = 0, 21 FBNIC_XDP_CONSUME, 22 FBNIC_XDP_TX, 23 FBNIC_XDP_LEN_ERR, 24 }; 25 26 enum { 27 FBNIC_XMIT_CB_TS = 0x01, 28 }; 29 30 struct fbnic_xmit_cb { 31 u32 bytecount; 32 u16 gso_segs; 33 u8 desc_count; 34 u8 flags; 35 int hw_head; 36 }; 37 38 #define FBNIC_XMIT_CB(__skb) ((struct fbnic_xmit_cb *)((__skb)->cb)) 39 40 #define FBNIC_XMIT_NOUNMAP ((void *)1) 41 42 static u32 __iomem *fbnic_ring_csr_base(const struct fbnic_ring *ring) 43 { 44 unsigned long csr_base = (unsigned long)ring->doorbell; 45 46 csr_base &= ~(FBNIC_QUEUE_STRIDE * sizeof(u32) - 1); 47 48 return (u32 __iomem *)csr_base; 49 } 50 51 static u32 fbnic_ring_rd32(struct fbnic_ring *ring, unsigned int csr) 52 { 53 u32 __iomem *csr_base = fbnic_ring_csr_base(ring); 54 55 return readl(csr_base + csr); 56 } 57 58 static void fbnic_ring_wr32(struct fbnic_ring *ring, unsigned int csr, u32 val) 59 { 60 u32 __iomem *csr_base = fbnic_ring_csr_base(ring); 61 62 writel(val, csr_base + csr); 63 } 64 65 /** 66 * fbnic_ts40_to_ns() - convert descriptor timestamp to PHC time 67 * @fbn: netdev priv of the FB NIC 68 * @ts40: timestamp read from a descriptor 69 * 70 * Return: u64 value of PHC time in nanoseconds 71 * 72 * Convert truncated 40 bit device timestamp as read from a descriptor 73 * to the full PHC time in nanoseconds. 74 */ 75 static __maybe_unused u64 fbnic_ts40_to_ns(struct fbnic_net *fbn, u64 ts40) 76 { 77 unsigned int s; 78 u64 time_ns; 79 s64 offset; 80 u8 ts_top; 81 u32 high; 82 83 do { 84 s = u64_stats_fetch_begin(&fbn->time_seq); 85 offset = READ_ONCE(fbn->time_offset); 86 } while (u64_stats_fetch_retry(&fbn->time_seq, s)); 87 88 high = READ_ONCE(fbn->time_high); 89 90 /* Bits 63..40 from periodic clock reads, 39..0 from ts40 */ 91 time_ns = (u64)(high >> 8) << 40 | ts40; 92 93 /* Compare bits 32-39 between periodic reads and ts40, 94 * see if HW clock may have wrapped since last read. We are sure 95 * that periodic reads are always at least ~1 minute behind, so 96 * this logic works perfectly fine. 97 */ 98 ts_top = ts40 >> 32; 99 if (ts_top < (u8)high && (u8)high - ts_top > U8_MAX / 2) 100 time_ns += 1ULL << 40; 101 102 return time_ns + offset; 103 } 104 105 static unsigned int fbnic_desc_unused(struct fbnic_ring *ring) 106 { 107 return (ring->head - ring->tail - 1) & ring->size_mask; 108 } 109 110 static unsigned int fbnic_desc_used(struct fbnic_ring *ring) 111 { 112 return (ring->tail - ring->head) & ring->size_mask; 113 } 114 115 static struct netdev_queue *txring_txq(const struct net_device *dev, 116 const struct fbnic_ring *ring) 117 { 118 return netdev_get_tx_queue(dev, ring->q_idx); 119 } 120 121 static int fbnic_maybe_stop_tx(const struct net_device *dev, 122 struct fbnic_ring *ring, 123 const unsigned int size) 124 { 125 struct netdev_queue *txq = txring_txq(dev, ring); 126 int res; 127 128 res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size, 129 FBNIC_TX_DESC_WAKEUP); 130 if (!res) { 131 u64_stats_update_begin(&ring->stats.syncp); 132 ring->stats.twq.stop++; 133 u64_stats_update_end(&ring->stats.syncp); 134 } 135 136 return !res; 137 } 138 139 static bool fbnic_tx_sent_queue(struct sk_buff *skb, struct fbnic_ring *ring) 140 { 141 struct netdev_queue *dev_queue = txring_txq(skb->dev, ring); 142 unsigned int bytecount = FBNIC_XMIT_CB(skb)->bytecount; 143 bool xmit_more = netdev_xmit_more(); 144 145 /* TBD: Request completion more often if xmit_more becomes large */ 146 147 return __netdev_tx_sent_queue(dev_queue, bytecount, xmit_more); 148 } 149 150 static void fbnic_unmap_single_twd(struct device *dev, __le64 *twd) 151 { 152 u64 raw_twd = le64_to_cpu(*twd); 153 unsigned int len; 154 dma_addr_t dma; 155 156 dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd); 157 len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd); 158 159 dma_unmap_single(dev, dma, len, DMA_TO_DEVICE); 160 } 161 162 static void fbnic_unmap_page_twd(struct device *dev, __le64 *twd) 163 { 164 u64 raw_twd = le64_to_cpu(*twd); 165 unsigned int len; 166 dma_addr_t dma; 167 168 dma = FIELD_GET(FBNIC_TWD_ADDR_MASK, raw_twd); 169 len = FIELD_GET(FBNIC_TWD_LEN_MASK, raw_twd); 170 171 dma_unmap_page(dev, dma, len, DMA_TO_DEVICE); 172 } 173 174 #define FBNIC_TWD_TYPE(_type) \ 175 cpu_to_le64(FIELD_PREP(FBNIC_TWD_TYPE_MASK, FBNIC_TWD_TYPE_##_type)) 176 177 static bool fbnic_tx_tstamp(struct sk_buff *skb) 178 { 179 struct fbnic_net *fbn; 180 181 if (!unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) 182 return false; 183 184 fbn = netdev_priv(skb->dev); 185 if (fbn->hwtstamp_config.tx_type == HWTSTAMP_TX_OFF) 186 return false; 187 188 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 189 FBNIC_XMIT_CB(skb)->flags |= FBNIC_XMIT_CB_TS; 190 FBNIC_XMIT_CB(skb)->hw_head = -1; 191 192 return true; 193 } 194 195 static bool 196 fbnic_tx_lso(struct fbnic_ring *ring, struct sk_buff *skb, 197 struct skb_shared_info *shinfo, __le64 *meta, 198 unsigned int *l2len, unsigned int *i3len) 199 { 200 unsigned int l3_type, l4_type, l4len, hdrlen; 201 unsigned char *l4hdr; 202 __be16 payload_len; 203 204 if (unlikely(skb_cow_head(skb, 0))) 205 return true; 206 207 if (shinfo->gso_type & SKB_GSO_PARTIAL) { 208 l3_type = FBNIC_TWD_L3_TYPE_OTHER; 209 } else if (!skb->encapsulation) { 210 if (ip_hdr(skb)->version == 4) 211 l3_type = FBNIC_TWD_L3_TYPE_IPV4; 212 else 213 l3_type = FBNIC_TWD_L3_TYPE_IPV6; 214 } else { 215 unsigned int o3len; 216 217 o3len = skb_inner_network_header(skb) - skb_network_header(skb); 218 *i3len -= o3len; 219 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_OHLEN_MASK, 220 o3len / 2)); 221 l3_type = FBNIC_TWD_L3_TYPE_V6V6; 222 } 223 224 l4hdr = skb_checksum_start(skb); 225 payload_len = cpu_to_be16(skb->len - (l4hdr - skb->data)); 226 227 if (shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { 228 struct tcphdr *tcph = (struct tcphdr *)l4hdr; 229 230 l4_type = FBNIC_TWD_L4_TYPE_TCP; 231 l4len = __tcp_hdrlen((struct tcphdr *)l4hdr); 232 csum_replace_by_diff(&tcph->check, (__force __wsum)payload_len); 233 } else { 234 struct udphdr *udph = (struct udphdr *)l4hdr; 235 236 l4_type = FBNIC_TWD_L4_TYPE_UDP; 237 l4len = sizeof(struct udphdr); 238 csum_replace_by_diff(&udph->check, (__force __wsum)payload_len); 239 } 240 241 hdrlen = (l4hdr - skb->data) + l4len; 242 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_TYPE_MASK, l3_type) | 243 FIELD_PREP(FBNIC_TWD_L4_TYPE_MASK, l4_type) | 244 FIELD_PREP(FBNIC_TWD_L4_HLEN_MASK, l4len / 4) | 245 FIELD_PREP(FBNIC_TWD_MSS_MASK, shinfo->gso_size) | 246 FBNIC_TWD_FLAG_REQ_LSO); 247 248 FBNIC_XMIT_CB(skb)->bytecount += (shinfo->gso_segs - 1) * hdrlen; 249 FBNIC_XMIT_CB(skb)->gso_segs = shinfo->gso_segs; 250 251 u64_stats_update_begin(&ring->stats.syncp); 252 ring->stats.twq.lso += shinfo->gso_segs; 253 u64_stats_update_end(&ring->stats.syncp); 254 255 return false; 256 } 257 258 static bool 259 fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) 260 { 261 struct skb_shared_info *shinfo = skb_shinfo(skb); 262 unsigned int l2len, i3len; 263 264 if (fbnic_tx_tstamp(skb)) 265 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_TS); 266 267 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) 268 return false; 269 270 l2len = skb_mac_header_len(skb); 271 i3len = skb_checksum_start(skb) - skb_network_header(skb); 272 273 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_CSUM_OFFSET_MASK, 274 skb->csum_offset / 2)); 275 276 if (shinfo->gso_size) { 277 if (fbnic_tx_lso(ring, skb, shinfo, meta, &l2len, &i3len)) 278 return true; 279 } else { 280 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO); 281 u64_stats_update_begin(&ring->stats.syncp); 282 ring->stats.twq.csum_partial++; 283 u64_stats_update_end(&ring->stats.syncp); 284 } 285 286 *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) | 287 FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2)); 288 return false; 289 } 290 291 static void 292 fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq, 293 u64 *csum_cmpl, u64 *csum_none) 294 { 295 skb_checksum_none_assert(skb); 296 297 if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) { 298 (*csum_none)++; 299 return; 300 } 301 302 if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) { 303 skb->ip_summed = CHECKSUM_UNNECESSARY; 304 } else { 305 u16 csum = FIELD_GET(FBNIC_RCD_META_L2_CSUM_MASK, rcd); 306 307 skb->ip_summed = CHECKSUM_COMPLETE; 308 skb->csum = (__force __wsum)csum; 309 (*csum_cmpl)++; 310 } 311 } 312 313 static bool 314 fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) 315 { 316 struct device *dev = skb->dev->dev.parent; 317 unsigned int tail = ring->tail, first; 318 unsigned int size, data_len; 319 skb_frag_t *frag; 320 bool is_net_iov; 321 dma_addr_t dma; 322 __le64 *twd; 323 324 ring->tx_buf[tail] = skb; 325 326 tail++; 327 tail &= ring->size_mask; 328 first = tail; 329 330 size = skb_headlen(skb); 331 data_len = skb->data_len; 332 333 if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK)) 334 goto dma_error; 335 336 is_net_iov = false; 337 dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); 338 339 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 340 twd = &ring->desc[tail]; 341 342 if (dma_mapping_error(dev, dma)) 343 goto dma_error; 344 345 *twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) | 346 FIELD_PREP(FBNIC_TWD_LEN_MASK, size) | 347 FIELD_PREP(FBNIC_TWD_TYPE_MASK, 348 FBNIC_TWD_TYPE_AL)); 349 if (is_net_iov) 350 ring->tx_buf[tail] = FBNIC_XMIT_NOUNMAP; 351 352 tail++; 353 tail &= ring->size_mask; 354 355 if (!data_len) 356 break; 357 358 size = skb_frag_size(frag); 359 data_len -= size; 360 361 if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK)) 362 goto dma_error; 363 364 is_net_iov = skb_frag_is_net_iov(frag); 365 dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); 366 } 367 368 *twd |= FBNIC_TWD_TYPE(LAST_AL); 369 370 FBNIC_XMIT_CB(skb)->desc_count = ((twd - meta) + 1) & ring->size_mask; 371 372 ring->tail = tail; 373 374 /* Record SW timestamp */ 375 skb_tx_timestamp(skb); 376 377 /* Verify there is room for another packet */ 378 fbnic_maybe_stop_tx(skb->dev, ring, FBNIC_MAX_SKB_DESC); 379 380 if (fbnic_tx_sent_queue(skb, ring)) { 381 *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_COMPLETION); 382 383 /* Force DMA writes to flush before writing to tail */ 384 dma_wmb(); 385 386 writel(tail, ring->doorbell); 387 } 388 389 return false; 390 dma_error: 391 if (net_ratelimit()) 392 netdev_err(skb->dev, "TX DMA map failed\n"); 393 394 while (tail != first) { 395 tail--; 396 tail &= ring->size_mask; 397 twd = &ring->desc[tail]; 398 if (tail == first) 399 fbnic_unmap_single_twd(dev, twd); 400 else if (ring->tx_buf[tail] == FBNIC_XMIT_NOUNMAP) 401 ring->tx_buf[tail] = NULL; 402 else 403 fbnic_unmap_page_twd(dev, twd); 404 } 405 406 return true; 407 } 408 409 #define FBNIC_MIN_FRAME_LEN 60 410 411 static netdev_tx_t 412 fbnic_xmit_frame_ring(struct sk_buff *skb, struct fbnic_ring *ring) 413 { 414 __le64 *meta = &ring->desc[ring->tail]; 415 u16 desc_needed; 416 417 if (skb_put_padto(skb, FBNIC_MIN_FRAME_LEN)) 418 goto err_count; 419 420 /* Need: 1 descriptor per page, 421 * + 1 desc for skb_head, 422 * + 2 desc for metadata and timestamp metadata 423 * + 7 desc gap to keep tail from touching head 424 * otherwise try next time 425 */ 426 desc_needed = skb_shinfo(skb)->nr_frags + 10; 427 if (fbnic_maybe_stop_tx(skb->dev, ring, desc_needed)) 428 return NETDEV_TX_BUSY; 429 430 *meta = cpu_to_le64(FBNIC_TWD_FLAG_DEST_MAC); 431 432 /* Write all members within DWORD to condense this into 2 4B writes */ 433 FBNIC_XMIT_CB(skb)->bytecount = skb->len; 434 FBNIC_XMIT_CB(skb)->gso_segs = 1; 435 FBNIC_XMIT_CB(skb)->desc_count = 0; 436 FBNIC_XMIT_CB(skb)->flags = 0; 437 438 if (fbnic_tx_offloads(ring, skb, meta)) 439 goto err_free; 440 441 if (fbnic_tx_map(ring, skb, meta)) 442 goto err_free; 443 444 return NETDEV_TX_OK; 445 446 err_free: 447 dev_kfree_skb_any(skb); 448 err_count: 449 u64_stats_update_begin(&ring->stats.syncp); 450 ring->stats.dropped++; 451 u64_stats_update_end(&ring->stats.syncp); 452 return NETDEV_TX_OK; 453 } 454 455 netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev) 456 { 457 struct fbnic_net *fbn = netdev_priv(dev); 458 unsigned int q_map = skb->queue_mapping; 459 460 return fbnic_xmit_frame_ring(skb, fbn->tx[q_map]); 461 } 462 463 static netdev_features_t 464 fbnic_features_check_encap_gso(struct sk_buff *skb, struct net_device *dev, 465 netdev_features_t features, unsigned int l3len) 466 { 467 netdev_features_t skb_gso_features; 468 struct ipv6hdr *ip6_hdr; 469 unsigned char l4_hdr; 470 unsigned int start; 471 __be16 frag_off; 472 473 /* Require MANGLEID for GSO_PARTIAL of IPv4. 474 * In theory we could support TSO with single, innermost v4 header 475 * by pretending everything before it is L2, but that needs to be 476 * parsed case by case.. so leaving it for when the need arises. 477 */ 478 if (!(features & NETIF_F_TSO_MANGLEID)) 479 features &= ~NETIF_F_TSO; 480 481 skb_gso_features = skb_shinfo(skb)->gso_type; 482 skb_gso_features <<= NETIF_F_GSO_SHIFT; 483 484 /* We'd only clear the native GSO features, so don't bother validating 485 * if the match can only be on those supported thru GSO_PARTIAL. 486 */ 487 if (!(skb_gso_features & FBNIC_TUN_GSO_FEATURES)) 488 return features; 489 490 /* We can only do IPv6-in-IPv6, not v4-in-v6. It'd be nice 491 * to fall back to partial for this, or any failure below. 492 * This is just an optimization, UDPv4 will be caught later on. 493 */ 494 if (skb_gso_features & NETIF_F_TSO) 495 return features & ~FBNIC_TUN_GSO_FEATURES; 496 497 /* Inner headers multiple of 2 */ 498 if ((skb_inner_network_header(skb) - skb_network_header(skb)) % 2) 499 return features & ~FBNIC_TUN_GSO_FEATURES; 500 501 /* Encapsulated GSO packet, make 100% sure it's IPv6-in-IPv6. */ 502 ip6_hdr = ipv6_hdr(skb); 503 if (ip6_hdr->version != 6) 504 return features & ~FBNIC_TUN_GSO_FEATURES; 505 506 l4_hdr = ip6_hdr->nexthdr; 507 start = (unsigned char *)ip6_hdr - skb->data + sizeof(struct ipv6hdr); 508 start = ipv6_skip_exthdr(skb, start, &l4_hdr, &frag_off); 509 if (frag_off || l4_hdr != IPPROTO_IPV6 || 510 skb->data + start != skb_inner_network_header(skb)) 511 return features & ~FBNIC_TUN_GSO_FEATURES; 512 513 return features; 514 } 515 516 netdev_features_t 517 fbnic_features_check(struct sk_buff *skb, struct net_device *dev, 518 netdev_features_t features) 519 { 520 unsigned int l2len, l3len; 521 522 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) 523 return features; 524 525 l2len = skb_mac_header_len(skb); 526 l3len = skb_checksum_start(skb) - skb_network_header(skb); 527 528 /* Check header lengths are multiple of 2. 529 * In case of 6in6 we support longer headers (IHLEN + OHLEN) 530 * but keep things simple for now, 512B is plenty. 531 */ 532 if ((l2len | l3len | skb->csum_offset) % 2 || 533 !FIELD_FIT(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) || 534 !FIELD_FIT(FBNIC_TWD_L3_IHLEN_MASK, l3len / 2) || 535 !FIELD_FIT(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2)) 536 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); 537 538 if (likely(!skb->encapsulation) || !skb_is_gso(skb)) 539 return features; 540 541 return fbnic_features_check_encap_gso(skb, dev, features, l3len); 542 } 543 544 static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, 545 struct fbnic_ring *ring, bool discard, 546 unsigned int hw_head) 547 { 548 u64 total_bytes = 0, total_packets = 0, ts_lost = 0; 549 unsigned int head = ring->head; 550 struct netdev_queue *txq; 551 unsigned int clean_desc; 552 553 clean_desc = (hw_head - head) & ring->size_mask; 554 555 while (clean_desc) { 556 struct sk_buff *skb = ring->tx_buf[head]; 557 unsigned int desc_cnt; 558 559 desc_cnt = FBNIC_XMIT_CB(skb)->desc_count; 560 if (desc_cnt > clean_desc) 561 break; 562 563 if (unlikely(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS)) { 564 FBNIC_XMIT_CB(skb)->hw_head = hw_head; 565 if (likely(!discard)) 566 break; 567 ts_lost++; 568 } 569 570 ring->tx_buf[head] = NULL; 571 572 clean_desc -= desc_cnt; 573 574 while (!(ring->desc[head] & FBNIC_TWD_TYPE(AL))) { 575 head++; 576 head &= ring->size_mask; 577 desc_cnt--; 578 } 579 580 fbnic_unmap_single_twd(nv->dev, &ring->desc[head]); 581 head++; 582 head &= ring->size_mask; 583 desc_cnt--; 584 585 while (desc_cnt--) { 586 if (ring->tx_buf[head] != FBNIC_XMIT_NOUNMAP) 587 fbnic_unmap_page_twd(nv->dev, 588 &ring->desc[head]); 589 else 590 ring->tx_buf[head] = NULL; 591 head++; 592 head &= ring->size_mask; 593 } 594 595 total_bytes += FBNIC_XMIT_CB(skb)->bytecount; 596 total_packets += FBNIC_XMIT_CB(skb)->gso_segs; 597 598 napi_consume_skb(skb, napi_budget); 599 } 600 601 if (!total_bytes) 602 return; 603 604 ring->head = head; 605 606 txq = txring_txq(nv->napi.dev, ring); 607 608 if (unlikely(discard)) { 609 u64_stats_update_begin(&ring->stats.syncp); 610 ring->stats.dropped += total_packets; 611 ring->stats.twq.ts_lost += ts_lost; 612 u64_stats_update_end(&ring->stats.syncp); 613 614 netdev_tx_completed_queue(txq, total_packets, total_bytes); 615 return; 616 } 617 618 u64_stats_update_begin(&ring->stats.syncp); 619 ring->stats.bytes += total_bytes; 620 ring->stats.packets += total_packets; 621 u64_stats_update_end(&ring->stats.syncp); 622 623 if (!netif_txq_completed_wake(txq, total_packets, total_bytes, 624 fbnic_desc_unused(ring), 625 FBNIC_TX_DESC_WAKEUP)) { 626 u64_stats_update_begin(&ring->stats.syncp); 627 ring->stats.twq.wake++; 628 u64_stats_update_end(&ring->stats.syncp); 629 } 630 } 631 632 static void fbnic_clean_twq1(struct fbnic_napi_vector *nv, bool pp_allow_direct, 633 struct fbnic_ring *ring, bool discard, 634 unsigned int hw_head) 635 { 636 u64 total_bytes = 0, total_packets = 0; 637 unsigned int head = ring->head; 638 639 while (hw_head != head) { 640 struct page *page; 641 u64 twd; 642 643 if (unlikely(!(ring->desc[head] & FBNIC_TWD_TYPE(AL)))) 644 goto next_desc; 645 646 twd = le64_to_cpu(ring->desc[head]); 647 page = ring->tx_buf[head]; 648 649 /* TYPE_AL is 2, TYPE_LAST_AL is 3. So this trick gives 650 * us one increment per packet, with no branches. 651 */ 652 total_packets += FIELD_GET(FBNIC_TWD_TYPE_MASK, twd) - 653 FBNIC_TWD_TYPE_AL; 654 total_bytes += FIELD_GET(FBNIC_TWD_LEN_MASK, twd); 655 656 page_pool_put_page(page->pp, page, -1, pp_allow_direct); 657 next_desc: 658 head++; 659 head &= ring->size_mask; 660 } 661 662 if (!total_bytes) 663 return; 664 665 ring->head = head; 666 667 if (discard) { 668 u64_stats_update_begin(&ring->stats.syncp); 669 ring->stats.dropped += total_packets; 670 u64_stats_update_end(&ring->stats.syncp); 671 return; 672 } 673 674 u64_stats_update_begin(&ring->stats.syncp); 675 ring->stats.bytes += total_bytes; 676 ring->stats.packets += total_packets; 677 u64_stats_update_end(&ring->stats.syncp); 678 } 679 680 static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, 681 struct fbnic_ring *ring, 682 u64 tcd, int *ts_head, int *head0) 683 { 684 struct skb_shared_hwtstamps hwtstamp; 685 struct fbnic_net *fbn; 686 struct sk_buff *skb; 687 int head; 688 u64 ns; 689 690 head = (*ts_head < 0) ? ring->head : *ts_head; 691 692 do { 693 unsigned int desc_cnt; 694 695 if (head == ring->tail) { 696 if (unlikely(net_ratelimit())) 697 netdev_err(nv->napi.dev, 698 "Tx timestamp without matching packet\n"); 699 return; 700 } 701 702 skb = ring->tx_buf[head]; 703 desc_cnt = FBNIC_XMIT_CB(skb)->desc_count; 704 705 head += desc_cnt; 706 head &= ring->size_mask; 707 } while (!(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS)); 708 709 fbn = netdev_priv(nv->napi.dev); 710 ns = fbnic_ts40_to_ns(fbn, FIELD_GET(FBNIC_TCD_TYPE1_TS_MASK, tcd)); 711 712 memset(&hwtstamp, 0, sizeof(hwtstamp)); 713 hwtstamp.hwtstamp = ns_to_ktime(ns); 714 715 *ts_head = head; 716 717 FBNIC_XMIT_CB(skb)->flags &= ~FBNIC_XMIT_CB_TS; 718 if (*head0 < 0) { 719 head = FBNIC_XMIT_CB(skb)->hw_head; 720 if (head >= 0) 721 *head0 = head; 722 } 723 724 skb_tstamp_tx(skb, &hwtstamp); 725 u64_stats_update_begin(&ring->stats.syncp); 726 ring->stats.twq.ts_packets++; 727 u64_stats_update_end(&ring->stats.syncp); 728 } 729 730 static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx, 731 netmem_ref netmem) 732 { 733 struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; 734 735 page_pool_fragment_netmem(netmem, FBNIC_PAGECNT_BIAS_MAX); 736 rx_buf->pagecnt_bias = FBNIC_PAGECNT_BIAS_MAX; 737 rx_buf->netmem = netmem; 738 } 739 740 static struct page * 741 fbnic_page_pool_get_head(struct fbnic_q_triad *qt, unsigned int idx) 742 { 743 struct fbnic_rx_buf *rx_buf = &qt->sub0.rx_buf[idx]; 744 745 rx_buf->pagecnt_bias--; 746 747 /* sub0 is always fed system pages, from the NAPI-level page_pool */ 748 return netmem_to_page(rx_buf->netmem); 749 } 750 751 static netmem_ref 752 fbnic_page_pool_get_data(struct fbnic_q_triad *qt, unsigned int idx) 753 { 754 struct fbnic_rx_buf *rx_buf = &qt->sub1.rx_buf[idx]; 755 756 rx_buf->pagecnt_bias--; 757 758 return rx_buf->netmem; 759 } 760 761 static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx, 762 int budget) 763 { 764 struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; 765 netmem_ref netmem = rx_buf->netmem; 766 767 if (!page_pool_unref_netmem(netmem, rx_buf->pagecnt_bias)) 768 page_pool_put_unrefed_netmem(ring->page_pool, netmem, -1, 769 !!budget); 770 771 rx_buf->netmem = 0; 772 } 773 774 static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget, 775 struct fbnic_q_triad *qt, s32 ts_head, s32 head0, 776 s32 head1) 777 { 778 if (head0 >= 0) 779 fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0); 780 else if (ts_head >= 0) 781 fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, ts_head); 782 783 if (head1 >= 0) { 784 qt->cmpl.deferred_head = -1; 785 if (napi_budget) 786 fbnic_clean_twq1(nv, true, &qt->sub1, false, head1); 787 else 788 qt->cmpl.deferred_head = head1; 789 } 790 } 791 792 static void 793 fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, 794 int napi_budget) 795 { 796 struct fbnic_ring *cmpl = &qt->cmpl; 797 s32 head1 = cmpl->deferred_head; 798 s32 head0 = -1, ts_head = -1; 799 __le64 *raw_tcd, done; 800 u32 head = cmpl->head; 801 802 done = (head & (cmpl->size_mask + 1)) ? 0 : cpu_to_le64(FBNIC_TCD_DONE); 803 raw_tcd = &cmpl->desc[head & cmpl->size_mask]; 804 805 /* Walk the completion queue collecting the heads reported by NIC */ 806 while ((*raw_tcd & cpu_to_le64(FBNIC_TCD_DONE)) == done) { 807 u64 tcd; 808 809 dma_rmb(); 810 811 tcd = le64_to_cpu(*raw_tcd); 812 813 switch (FIELD_GET(FBNIC_TCD_TYPE_MASK, tcd)) { 814 case FBNIC_TCD_TYPE_0: 815 if (tcd & FBNIC_TCD_TWQ1) 816 head1 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD1_MASK, 817 tcd); 818 else 819 head0 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD0_MASK, 820 tcd); 821 /* Currently all err status bits are related to 822 * timestamps and as those have yet to be added 823 * they are skipped for now. 824 */ 825 break; 826 case FBNIC_TCD_TYPE_1: 827 if (WARN_ON_ONCE(tcd & FBNIC_TCD_TWQ1)) 828 break; 829 830 fbnic_clean_tsq(nv, &qt->sub0, tcd, &ts_head, &head0); 831 break; 832 default: 833 break; 834 } 835 836 raw_tcd++; 837 head++; 838 if (!(head & cmpl->size_mask)) { 839 done ^= cpu_to_le64(FBNIC_TCD_DONE); 840 raw_tcd = &cmpl->desc[0]; 841 } 842 } 843 844 /* Record the current head/tail of the queue */ 845 if (cmpl->head != head) { 846 cmpl->head = head; 847 writel(head & cmpl->size_mask, cmpl->doorbell); 848 } 849 850 /* Unmap and free processed buffers */ 851 fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0, head1); 852 } 853 854 static void fbnic_clean_bdq(struct fbnic_ring *ring, unsigned int hw_head, 855 int napi_budget) 856 { 857 unsigned int head = ring->head; 858 859 if (head == hw_head) 860 return; 861 862 do { 863 fbnic_page_pool_drain(ring, head, napi_budget); 864 865 head++; 866 head &= ring->size_mask; 867 } while (head != hw_head); 868 869 ring->head = head; 870 } 871 872 static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, netmem_ref netmem) 873 { 874 __le64 *bdq_desc = &bdq->desc[id * FBNIC_BD_FRAG_COUNT]; 875 dma_addr_t dma = page_pool_get_dma_addr_netmem(netmem); 876 u64 bd, i = FBNIC_BD_FRAG_COUNT; 877 878 bd = (FBNIC_BD_PAGE_ADDR_MASK & dma) | 879 FIELD_PREP(FBNIC_BD_PAGE_ID_MASK, id); 880 881 /* In the case that a page size is larger than 4K we will map a 882 * single page to multiple fragments. The fragments will be 883 * FBNIC_BD_FRAG_COUNT in size and the lower n bits will be use 884 * to indicate the individual fragment IDs. 885 */ 886 do { 887 *bdq_desc = cpu_to_le64(bd); 888 bd += FIELD_PREP(FBNIC_BD_DESC_ADDR_MASK, 1) | 889 FIELD_PREP(FBNIC_BD_DESC_ID_MASK, 1); 890 } while (--i); 891 } 892 893 static void fbnic_fill_bdq(struct fbnic_ring *bdq) 894 { 895 unsigned int count = fbnic_desc_unused(bdq); 896 unsigned int i = bdq->tail; 897 898 if (!count) 899 return; 900 901 do { 902 netmem_ref netmem; 903 904 netmem = page_pool_dev_alloc_netmems(bdq->page_pool); 905 if (!netmem) { 906 u64_stats_update_begin(&bdq->stats.syncp); 907 bdq->stats.rx.alloc_failed++; 908 u64_stats_update_end(&bdq->stats.syncp); 909 910 break; 911 } 912 913 fbnic_page_pool_init(bdq, i, netmem); 914 fbnic_bd_prep(bdq, i, netmem); 915 916 i++; 917 i &= bdq->size_mask; 918 919 count--; 920 } while (count); 921 922 if (bdq->tail != i) { 923 bdq->tail = i; 924 925 /* Force DMA writes to flush before writing to tail */ 926 dma_wmb(); 927 928 writel(i, bdq->doorbell); 929 } 930 } 931 932 static unsigned int fbnic_hdr_pg_start(unsigned int pg_off) 933 { 934 /* The headroom of the first header may be larger than FBNIC_RX_HROOM 935 * due to alignment. So account for that by just making the page 936 * offset 0 if we are starting at the first header. 937 */ 938 if (ALIGN(FBNIC_RX_HROOM, 128) > FBNIC_RX_HROOM && 939 pg_off == ALIGN(FBNIC_RX_HROOM, 128)) 940 return 0; 941 942 return pg_off - FBNIC_RX_HROOM; 943 } 944 945 static unsigned int fbnic_hdr_pg_end(unsigned int pg_off, unsigned int len) 946 { 947 /* Determine the end of the buffer by finding the start of the next 948 * and then subtracting the headroom from that frame. 949 */ 950 pg_off += len + FBNIC_RX_TROOM + FBNIC_RX_HROOM; 951 952 return ALIGN(pg_off, 128) - FBNIC_RX_HROOM; 953 } 954 955 static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, 956 struct fbnic_pkt_buff *pkt, 957 struct fbnic_q_triad *qt) 958 { 959 unsigned int hdr_pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 960 unsigned int hdr_pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); 961 struct page *page = fbnic_page_pool_get_head(qt, hdr_pg_idx); 962 unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); 963 unsigned int frame_sz, hdr_pg_start, hdr_pg_end, headroom; 964 unsigned char *hdr_start; 965 966 /* data_hard_start should always be NULL when this is called */ 967 WARN_ON_ONCE(pkt->buff.data_hard_start); 968 969 /* Short-cut the end calculation if we know page is fully consumed */ 970 hdr_pg_end = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ? 971 FBNIC_BD_FRAG_SIZE : fbnic_hdr_pg_end(hdr_pg_off, len); 972 hdr_pg_start = fbnic_hdr_pg_start(hdr_pg_off); 973 974 headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD; 975 frame_sz = hdr_pg_end - hdr_pg_start; 976 xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq); 977 hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * 978 FBNIC_BD_FRAG_SIZE; 979 980 /* Sync DMA buffer */ 981 dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page), 982 hdr_pg_start, frame_sz, 983 DMA_BIDIRECTIONAL); 984 985 /* Build frame around buffer */ 986 hdr_start = page_address(page) + hdr_pg_start; 987 net_prefetch(pkt->buff.data); 988 xdp_prepare_buff(&pkt->buff, hdr_start, headroom, 989 len - FBNIC_RX_PAD, true); 990 991 pkt->hwtstamp = 0; 992 pkt->add_frag_failed = false; 993 } 994 995 static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, 996 struct fbnic_pkt_buff *pkt, 997 struct fbnic_q_triad *qt) 998 { 999 unsigned int pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 1000 unsigned int pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); 1001 unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); 1002 netmem_ref netmem = fbnic_page_pool_get_data(qt, pg_idx); 1003 unsigned int truesize; 1004 bool added; 1005 1006 truesize = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ? 1007 FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128); 1008 1009 pg_off += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * 1010 FBNIC_BD_FRAG_SIZE; 1011 1012 /* Sync DMA buffer */ 1013 page_pool_dma_sync_netmem_for_cpu(qt->sub1.page_pool, netmem, 1014 pg_off, truesize); 1015 1016 added = xdp_buff_add_frag(&pkt->buff, netmem, pg_off, len, truesize); 1017 if (unlikely(!added)) { 1018 pkt->add_frag_failed = true; 1019 netdev_err_once(nv->napi.dev, 1020 "Failed to add fragment to xdp_buff\n"); 1021 } 1022 } 1023 1024 static void fbnic_put_pkt_buff(struct fbnic_q_triad *qt, 1025 struct fbnic_pkt_buff *pkt, int budget) 1026 { 1027 struct page *page; 1028 1029 if (!pkt->buff.data_hard_start) 1030 return; 1031 1032 if (xdp_buff_has_frags(&pkt->buff)) { 1033 struct skb_shared_info *shinfo; 1034 netmem_ref netmem; 1035 int nr_frags; 1036 1037 shinfo = xdp_get_shared_info_from_buff(&pkt->buff); 1038 nr_frags = shinfo->nr_frags; 1039 1040 while (nr_frags--) { 1041 netmem = skb_frag_netmem(&shinfo->frags[nr_frags]); 1042 page_pool_put_full_netmem(qt->sub1.page_pool, netmem, 1043 !!budget); 1044 } 1045 } 1046 1047 page = virt_to_page(pkt->buff.data_hard_start); 1048 page_pool_put_full_page(qt->sub0.page_pool, page, !!budget); 1049 } 1050 1051 static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv, 1052 struct fbnic_pkt_buff *pkt) 1053 { 1054 struct sk_buff *skb; 1055 1056 skb = xdp_build_skb_from_buff(&pkt->buff); 1057 if (!skb) 1058 return NULL; 1059 1060 /* Add timestamp if present */ 1061 if (pkt->hwtstamp) 1062 skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp; 1063 1064 return skb; 1065 } 1066 1067 static long fbnic_pkt_tx(struct fbnic_napi_vector *nv, 1068 struct fbnic_pkt_buff *pkt) 1069 { 1070 struct fbnic_ring *ring = &nv->qt[0].sub1; 1071 int size, offset, nsegs = 1, data_len = 0; 1072 unsigned int tail = ring->tail; 1073 struct skb_shared_info *shinfo; 1074 skb_frag_t *frag = NULL; 1075 struct page *page; 1076 dma_addr_t dma; 1077 __le64 *twd; 1078 1079 if (unlikely(xdp_buff_has_frags(&pkt->buff))) { 1080 shinfo = xdp_get_shared_info_from_buff(&pkt->buff); 1081 nsegs += shinfo->nr_frags; 1082 data_len = shinfo->xdp_frags_size; 1083 frag = &shinfo->frags[0]; 1084 } 1085 1086 if (fbnic_desc_unused(ring) < nsegs) { 1087 u64_stats_update_begin(&ring->stats.syncp); 1088 ring->stats.dropped++; 1089 u64_stats_update_end(&ring->stats.syncp); 1090 return -FBNIC_XDP_CONSUME; 1091 } 1092 1093 page = virt_to_page(pkt->buff.data_hard_start); 1094 offset = offset_in_page(pkt->buff.data); 1095 dma = page_pool_get_dma_addr(page); 1096 1097 size = pkt->buff.data_end - pkt->buff.data; 1098 1099 while (nsegs--) { 1100 dma_sync_single_range_for_device(nv->dev, dma, offset, size, 1101 DMA_BIDIRECTIONAL); 1102 dma += offset; 1103 1104 ring->tx_buf[tail] = page; 1105 1106 twd = &ring->desc[tail]; 1107 *twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) | 1108 FIELD_PREP(FBNIC_TWD_LEN_MASK, size) | 1109 FIELD_PREP(FBNIC_TWD_TYPE_MASK, 1110 FBNIC_TWD_TYPE_AL)); 1111 1112 tail++; 1113 tail &= ring->size_mask; 1114 1115 if (!data_len) 1116 break; 1117 1118 offset = skb_frag_off(frag); 1119 page = skb_frag_page(frag); 1120 dma = page_pool_get_dma_addr(page); 1121 1122 size = skb_frag_size(frag); 1123 data_len -= size; 1124 frag++; 1125 } 1126 1127 *twd |= FBNIC_TWD_TYPE(LAST_AL); 1128 1129 ring->tail = tail; 1130 1131 return -FBNIC_XDP_TX; 1132 } 1133 1134 static void fbnic_pkt_commit_tail(struct fbnic_napi_vector *nv, 1135 unsigned int pkt_tail) 1136 { 1137 struct fbnic_ring *ring = &nv->qt[0].sub1; 1138 1139 /* Force DMA writes to flush before writing to tail */ 1140 dma_wmb(); 1141 1142 writel(pkt_tail, ring->doorbell); 1143 } 1144 1145 static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv, 1146 struct fbnic_pkt_buff *pkt) 1147 { 1148 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 1149 struct bpf_prog *xdp_prog; 1150 int act; 1151 1152 xdp_prog = READ_ONCE(fbn->xdp_prog); 1153 if (!xdp_prog) 1154 goto xdp_pass; 1155 1156 /* Should never happen, config paths enforce HDS threshold > MTU */ 1157 if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags) 1158 return ERR_PTR(-FBNIC_XDP_LEN_ERR); 1159 1160 act = bpf_prog_run_xdp(xdp_prog, &pkt->buff); 1161 switch (act) { 1162 case XDP_PASS: 1163 xdp_pass: 1164 return fbnic_build_skb(nv, pkt); 1165 case XDP_TX: 1166 return ERR_PTR(fbnic_pkt_tx(nv, pkt)); 1167 default: 1168 bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act); 1169 fallthrough; 1170 case XDP_ABORTED: 1171 trace_xdp_exception(nv->napi.dev, xdp_prog, act); 1172 fallthrough; 1173 case XDP_DROP: 1174 break; 1175 } 1176 1177 return ERR_PTR(-FBNIC_XDP_CONSUME); 1178 } 1179 1180 static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd) 1181 { 1182 return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 : 1183 (FBNIC_RCD_META_L3_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L3 : 1184 PKT_HASH_TYPE_L2; 1185 } 1186 1187 static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd, 1188 struct fbnic_pkt_buff *pkt) 1189 { 1190 struct fbnic_net *fbn; 1191 u64 ns, ts; 1192 1193 if (!FIELD_GET(FBNIC_RCD_OPT_META_TS, rcd)) 1194 return; 1195 1196 fbn = netdev_priv(nv->napi.dev); 1197 ts = FIELD_GET(FBNIC_RCD_OPT_META_TS_MASK, rcd); 1198 ns = fbnic_ts40_to_ns(fbn, ts); 1199 1200 /* Add timestamp to shared info */ 1201 pkt->hwtstamp = ns_to_ktime(ns); 1202 } 1203 1204 static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv, 1205 u64 rcd, struct sk_buff *skb, 1206 struct fbnic_q_triad *qt, 1207 u64 *csum_cmpl, u64 *csum_none) 1208 { 1209 struct net_device *netdev = nv->napi.dev; 1210 struct fbnic_ring *rcq = &qt->cmpl; 1211 1212 fbnic_rx_csum(rcd, skb, rcq, csum_cmpl, csum_none); 1213 1214 if (netdev->features & NETIF_F_RXHASH) 1215 skb_set_hash(skb, 1216 FIELD_GET(FBNIC_RCD_META_RSS_HASH_MASK, rcd), 1217 fbnic_skb_hash_type(rcd)); 1218 1219 skb_record_rx_queue(skb, rcq->q_idx); 1220 } 1221 1222 static bool fbnic_rcd_metadata_err(u64 rcd) 1223 { 1224 return !!(FBNIC_RCD_META_UNCORRECTABLE_ERR_MASK & rcd); 1225 } 1226 1227 static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, 1228 struct fbnic_q_triad *qt, int budget) 1229 { 1230 unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0; 1231 u64 csum_complete = 0, csum_none = 0, length_errors = 0; 1232 s32 head0 = -1, head1 = -1, pkt_tail = -1; 1233 struct fbnic_ring *rcq = &qt->cmpl; 1234 struct fbnic_pkt_buff *pkt; 1235 __le64 *raw_rcd, done; 1236 u32 head = rcq->head; 1237 1238 done = (head & (rcq->size_mask + 1)) ? cpu_to_le64(FBNIC_RCD_DONE) : 0; 1239 raw_rcd = &rcq->desc[head & rcq->size_mask]; 1240 pkt = rcq->pkt; 1241 1242 /* Walk the completion queue collecting the heads reported by NIC */ 1243 while (likely(packets < budget)) { 1244 struct sk_buff *skb = ERR_PTR(-EINVAL); 1245 u64 rcd; 1246 1247 if ((*raw_rcd & cpu_to_le64(FBNIC_RCD_DONE)) == done) 1248 break; 1249 1250 dma_rmb(); 1251 1252 rcd = le64_to_cpu(*raw_rcd); 1253 1254 switch (FIELD_GET(FBNIC_RCD_TYPE_MASK, rcd)) { 1255 case FBNIC_RCD_TYPE_HDR_AL: 1256 head0 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 1257 fbnic_pkt_prepare(nv, rcd, pkt, qt); 1258 1259 break; 1260 case FBNIC_RCD_TYPE_PAY_AL: 1261 head1 = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); 1262 fbnic_add_rx_frag(nv, rcd, pkt, qt); 1263 1264 break; 1265 case FBNIC_RCD_TYPE_OPT_META: 1266 /* Only type 0 is currently supported */ 1267 if (FIELD_GET(FBNIC_RCD_OPT_META_TYPE_MASK, rcd)) 1268 break; 1269 1270 fbnic_rx_tstamp(nv, rcd, pkt); 1271 1272 /* We currently ignore the action table index */ 1273 break; 1274 case FBNIC_RCD_TYPE_META: 1275 if (unlikely(pkt->add_frag_failed)) 1276 skb = NULL; 1277 else if (likely(!fbnic_rcd_metadata_err(rcd))) 1278 skb = fbnic_run_xdp(nv, pkt); 1279 1280 /* Populate skb and invalidate XDP */ 1281 if (!IS_ERR_OR_NULL(skb)) { 1282 fbnic_populate_skb_fields(nv, rcd, skb, qt, 1283 &csum_complete, 1284 &csum_none); 1285 1286 packets++; 1287 bytes += skb->len; 1288 1289 napi_gro_receive(&nv->napi, skb); 1290 } else if (skb == ERR_PTR(-FBNIC_XDP_TX)) { 1291 pkt_tail = nv->qt[0].sub1.tail; 1292 bytes += xdp_get_buff_len(&pkt->buff); 1293 } else { 1294 if (!skb) { 1295 alloc_failed++; 1296 dropped++; 1297 } else if (skb == ERR_PTR(-FBNIC_XDP_LEN_ERR)) { 1298 length_errors++; 1299 } else { 1300 dropped++; 1301 } 1302 1303 fbnic_put_pkt_buff(qt, pkt, 1); 1304 } 1305 1306 pkt->buff.data_hard_start = NULL; 1307 1308 break; 1309 } 1310 1311 raw_rcd++; 1312 head++; 1313 if (!(head & rcq->size_mask)) { 1314 done ^= cpu_to_le64(FBNIC_RCD_DONE); 1315 raw_rcd = &rcq->desc[0]; 1316 } 1317 } 1318 1319 u64_stats_update_begin(&rcq->stats.syncp); 1320 rcq->stats.packets += packets; 1321 rcq->stats.bytes += bytes; 1322 /* Re-add ethernet header length (removed in fbnic_build_skb) */ 1323 rcq->stats.bytes += ETH_HLEN * packets; 1324 rcq->stats.dropped += dropped; 1325 rcq->stats.rx.alloc_failed += alloc_failed; 1326 rcq->stats.rx.csum_complete += csum_complete; 1327 rcq->stats.rx.csum_none += csum_none; 1328 rcq->stats.rx.length_errors += length_errors; 1329 u64_stats_update_end(&rcq->stats.syncp); 1330 1331 if (pkt_tail >= 0) 1332 fbnic_pkt_commit_tail(nv, pkt_tail); 1333 1334 /* Unmap and free processed buffers */ 1335 if (head0 >= 0) 1336 fbnic_clean_bdq(&qt->sub0, head0, budget); 1337 fbnic_fill_bdq(&qt->sub0); 1338 1339 if (head1 >= 0) 1340 fbnic_clean_bdq(&qt->sub1, head1, budget); 1341 fbnic_fill_bdq(&qt->sub1); 1342 1343 /* Record the current head/tail of the queue */ 1344 if (rcq->head != head) { 1345 rcq->head = head; 1346 writel(head & rcq->size_mask, rcq->doorbell); 1347 } 1348 1349 return packets; 1350 } 1351 1352 static void fbnic_nv_irq_disable(struct fbnic_napi_vector *nv) 1353 { 1354 struct fbnic_dev *fbd = nv->fbd; 1355 u32 v_idx = nv->v_idx; 1356 1357 fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(v_idx / 32), 1 << (v_idx % 32)); 1358 } 1359 1360 static void fbnic_nv_irq_rearm(struct fbnic_napi_vector *nv) 1361 { 1362 struct fbnic_dev *fbd = nv->fbd; 1363 u32 v_idx = nv->v_idx; 1364 1365 fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(v_idx), 1366 FBNIC_INTR_CQ_REARM_INTR_UNMASK); 1367 } 1368 1369 static int fbnic_poll(struct napi_struct *napi, int budget) 1370 { 1371 struct fbnic_napi_vector *nv = container_of(napi, 1372 struct fbnic_napi_vector, 1373 napi); 1374 int i, j, work_done = 0; 1375 1376 for (i = 0; i < nv->txt_count; i++) 1377 fbnic_clean_tcq(nv, &nv->qt[i], budget); 1378 1379 for (j = 0; j < nv->rxt_count; j++, i++) 1380 work_done += fbnic_clean_rcq(nv, &nv->qt[i], budget); 1381 1382 if (work_done >= budget) 1383 return budget; 1384 1385 if (likely(napi_complete_done(napi, work_done))) 1386 fbnic_nv_irq_rearm(nv); 1387 1388 return work_done; 1389 } 1390 1391 irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data) 1392 { 1393 struct fbnic_napi_vector *nv = *(void **)data; 1394 1395 napi_schedule_irqoff(&nv->napi); 1396 1397 return IRQ_HANDLED; 1398 } 1399 1400 void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, 1401 struct fbnic_ring *rxr) 1402 { 1403 struct fbnic_queue_stats *stats = &rxr->stats; 1404 1405 /* Capture stats from queues before dissasociating them */ 1406 fbn->rx_stats.bytes += stats->bytes; 1407 fbn->rx_stats.packets += stats->packets; 1408 fbn->rx_stats.dropped += stats->dropped; 1409 fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed; 1410 fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete; 1411 fbn->rx_stats.rx.csum_none += stats->rx.csum_none; 1412 fbn->rx_stats.rx.length_errors += stats->rx.length_errors; 1413 /* Remember to add new stats here */ 1414 BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 4); 1415 } 1416 1417 void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, 1418 struct fbnic_ring *txr) 1419 { 1420 struct fbnic_queue_stats *stats = &txr->stats; 1421 1422 /* Capture stats from queues before dissasociating them */ 1423 fbn->tx_stats.bytes += stats->bytes; 1424 fbn->tx_stats.packets += stats->packets; 1425 fbn->tx_stats.dropped += stats->dropped; 1426 fbn->tx_stats.twq.csum_partial += stats->twq.csum_partial; 1427 fbn->tx_stats.twq.lso += stats->twq.lso; 1428 fbn->tx_stats.twq.ts_lost += stats->twq.ts_lost; 1429 fbn->tx_stats.twq.ts_packets += stats->twq.ts_packets; 1430 fbn->tx_stats.twq.stop += stats->twq.stop; 1431 fbn->tx_stats.twq.wake += stats->twq.wake; 1432 /* Remember to add new stats here */ 1433 BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 6); 1434 } 1435 1436 static void fbnic_aggregate_ring_xdp_counters(struct fbnic_net *fbn, 1437 struct fbnic_ring *xdpr) 1438 { 1439 struct fbnic_queue_stats *stats = &xdpr->stats; 1440 1441 if (!(xdpr->flags & FBNIC_RING_F_STATS)) 1442 return; 1443 1444 /* Capture stats from queues before dissasociating them */ 1445 fbn->rx_stats.bytes += stats->bytes; 1446 fbn->rx_stats.packets += stats->packets; 1447 fbn->rx_stats.dropped += stats->dropped; 1448 fbn->tx_stats.bytes += stats->bytes; 1449 fbn->tx_stats.packets += stats->packets; 1450 } 1451 1452 static void fbnic_remove_tx_ring(struct fbnic_net *fbn, 1453 struct fbnic_ring *txr) 1454 { 1455 if (!(txr->flags & FBNIC_RING_F_STATS)) 1456 return; 1457 1458 fbnic_aggregate_ring_tx_counters(fbn, txr); 1459 1460 /* Remove pointer to the Tx ring */ 1461 WARN_ON(fbn->tx[txr->q_idx] && fbn->tx[txr->q_idx] != txr); 1462 fbn->tx[txr->q_idx] = NULL; 1463 } 1464 1465 static void fbnic_remove_xdp_ring(struct fbnic_net *fbn, 1466 struct fbnic_ring *xdpr) 1467 { 1468 if (!(xdpr->flags & FBNIC_RING_F_STATS)) 1469 return; 1470 1471 fbnic_aggregate_ring_xdp_counters(fbn, xdpr); 1472 1473 /* Remove pointer to the Tx ring */ 1474 WARN_ON(fbn->tx[xdpr->q_idx] && fbn->tx[xdpr->q_idx] != xdpr); 1475 fbn->tx[xdpr->q_idx] = NULL; 1476 } 1477 1478 static void fbnic_remove_rx_ring(struct fbnic_net *fbn, 1479 struct fbnic_ring *rxr) 1480 { 1481 if (!(rxr->flags & FBNIC_RING_F_STATS)) 1482 return; 1483 1484 fbnic_aggregate_ring_rx_counters(fbn, rxr); 1485 1486 /* Remove pointer to the Rx ring */ 1487 WARN_ON(fbn->rx[rxr->q_idx] && fbn->rx[rxr->q_idx] != rxr); 1488 fbn->rx[rxr->q_idx] = NULL; 1489 } 1490 1491 static void fbnic_free_qt_page_pools(struct fbnic_q_triad *qt) 1492 { 1493 page_pool_destroy(qt->sub0.page_pool); 1494 page_pool_destroy(qt->sub1.page_pool); 1495 } 1496 1497 static void fbnic_free_napi_vector(struct fbnic_net *fbn, 1498 struct fbnic_napi_vector *nv) 1499 { 1500 struct fbnic_dev *fbd = nv->fbd; 1501 int i, j; 1502 1503 for (i = 0; i < nv->txt_count; i++) { 1504 fbnic_remove_tx_ring(fbn, &nv->qt[i].sub0); 1505 fbnic_remove_xdp_ring(fbn, &nv->qt[i].sub1); 1506 fbnic_remove_tx_ring(fbn, &nv->qt[i].cmpl); 1507 } 1508 1509 for (j = 0; j < nv->rxt_count; j++, i++) { 1510 fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0); 1511 fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1); 1512 fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl); 1513 } 1514 1515 fbnic_napi_free_irq(fbd, nv); 1516 netif_napi_del_locked(&nv->napi); 1517 fbn->napi[fbnic_napi_idx(nv)] = NULL; 1518 kfree(nv); 1519 } 1520 1521 void fbnic_free_napi_vectors(struct fbnic_net *fbn) 1522 { 1523 int i; 1524 1525 for (i = 0; i < fbn->num_napi; i++) 1526 if (fbn->napi[i]) 1527 fbnic_free_napi_vector(fbn, fbn->napi[i]); 1528 } 1529 1530 static int 1531 fbnic_alloc_qt_page_pools(struct fbnic_net *fbn, struct fbnic_q_triad *qt, 1532 unsigned int rxq_idx) 1533 { 1534 struct page_pool_params pp_params = { 1535 .order = 0, 1536 .flags = PP_FLAG_DMA_MAP | 1537 PP_FLAG_DMA_SYNC_DEV, 1538 .pool_size = fbn->hpq_size + fbn->ppq_size, 1539 .nid = NUMA_NO_NODE, 1540 .dev = fbn->netdev->dev.parent, 1541 .dma_dir = DMA_BIDIRECTIONAL, 1542 .offset = 0, 1543 .max_len = PAGE_SIZE, 1544 .netdev = fbn->netdev, 1545 .queue_idx = rxq_idx, 1546 }; 1547 struct page_pool *pp; 1548 1549 /* Page pool cannot exceed a size of 32768. This doesn't limit the 1550 * pages on the ring but the number we can have cached waiting on 1551 * the next use. 1552 * 1553 * TBD: Can this be reduced further? Would a multiple of 1554 * NAPI_POLL_WEIGHT possibly make more sense? The question is how 1555 * may pages do we need to hold in reserve to get the best return 1556 * without hogging too much system memory. 1557 */ 1558 if (pp_params.pool_size > 32768) 1559 pp_params.pool_size = 32768; 1560 1561 pp = page_pool_create(&pp_params); 1562 if (IS_ERR(pp)) 1563 return PTR_ERR(pp); 1564 1565 qt->sub0.page_pool = pp; 1566 if (netif_rxq_has_unreadable_mp(fbn->netdev, rxq_idx)) { 1567 pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; 1568 pp_params.dma_dir = DMA_FROM_DEVICE; 1569 1570 pp = page_pool_create(&pp_params); 1571 if (IS_ERR(pp)) 1572 goto err_destroy_sub0; 1573 } else { 1574 page_pool_get(pp); 1575 } 1576 qt->sub1.page_pool = pp; 1577 1578 return 0; 1579 1580 err_destroy_sub0: 1581 page_pool_destroy(pp); 1582 return PTR_ERR(pp); 1583 } 1584 1585 static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell, 1586 int q_idx, u8 flags) 1587 { 1588 u64_stats_init(&ring->stats.syncp); 1589 ring->doorbell = doorbell; 1590 ring->q_idx = q_idx; 1591 ring->flags = flags; 1592 ring->deferred_head = -1; 1593 } 1594 1595 static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, 1596 unsigned int v_count, unsigned int v_idx, 1597 unsigned int txq_count, unsigned int txq_idx, 1598 unsigned int rxq_count, unsigned int rxq_idx) 1599 { 1600 int txt_count = txq_count, rxt_count = rxq_count; 1601 u32 __iomem *uc_addr = fbd->uc_addr0; 1602 int xdp_count = 0, qt_count, err; 1603 struct fbnic_napi_vector *nv; 1604 struct fbnic_q_triad *qt; 1605 u32 __iomem *db; 1606 1607 /* We need to reserve at least one Tx Queue Triad for an XDP ring */ 1608 if (rxq_count) { 1609 xdp_count = 1; 1610 if (!txt_count) 1611 txt_count = 1; 1612 } 1613 1614 qt_count = txt_count + rxq_count; 1615 if (!qt_count) 1616 return -EINVAL; 1617 1618 /* If MMIO has already failed there are no rings to initialize */ 1619 if (!uc_addr) 1620 return -EIO; 1621 1622 /* Allocate NAPI vector and queue triads */ 1623 nv = kzalloc(struct_size(nv, qt, qt_count), GFP_KERNEL); 1624 if (!nv) 1625 return -ENOMEM; 1626 1627 /* Record queue triad counts */ 1628 nv->txt_count = txt_count; 1629 nv->rxt_count = rxt_count; 1630 1631 /* Provide pointer back to fbnic and MSI-X vectors */ 1632 nv->fbd = fbd; 1633 nv->v_idx = v_idx; 1634 1635 /* Tie napi to netdev */ 1636 fbn->napi[fbnic_napi_idx(nv)] = nv; 1637 netif_napi_add_config_locked(fbn->netdev, &nv->napi, fbnic_poll, 1638 fbnic_napi_idx(nv)); 1639 1640 /* Record IRQ to NAPI struct */ 1641 netif_napi_set_irq_locked(&nv->napi, 1642 pci_irq_vector(to_pci_dev(fbd->dev), 1643 nv->v_idx)); 1644 1645 /* Tie nv back to PCIe dev */ 1646 nv->dev = fbd->dev; 1647 1648 /* Request the IRQ for napi vector */ 1649 err = fbnic_napi_request_irq(fbd, nv); 1650 if (err) 1651 goto napi_del; 1652 1653 /* Initialize queue triads */ 1654 qt = nv->qt; 1655 1656 while (txt_count) { 1657 u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS; 1658 1659 /* Configure Tx queue */ 1660 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ0_TAIL]; 1661 1662 /* Assign Tx queue to netdev if applicable */ 1663 if (txq_count > 0) { 1664 1665 fbnic_ring_init(&qt->sub0, db, txq_idx, flags); 1666 fbn->tx[txq_idx] = &qt->sub0; 1667 txq_count--; 1668 } else { 1669 fbnic_ring_init(&qt->sub0, db, 0, 1670 FBNIC_RING_F_DISABLED); 1671 } 1672 1673 /* Configure XDP queue */ 1674 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ1_TAIL]; 1675 1676 /* Assign XDP queue to netdev if applicable 1677 * 1678 * The setup for this is in itself a bit different. 1679 * 1. We only need one XDP Tx queue per NAPI vector. 1680 * 2. We associate it to the first Rx queue index. 1681 * 3. The hardware side is associated based on the Tx Queue. 1682 * 4. The netdev queue is offset by FBNIC_MAX_TXQs. 1683 */ 1684 if (xdp_count > 0) { 1685 unsigned int xdp_idx = FBNIC_MAX_TXQS + rxq_idx; 1686 1687 fbnic_ring_init(&qt->sub1, db, xdp_idx, flags); 1688 fbn->tx[xdp_idx] = &qt->sub1; 1689 xdp_count--; 1690 } else { 1691 fbnic_ring_init(&qt->sub1, db, 0, 1692 FBNIC_RING_F_DISABLED); 1693 } 1694 1695 /* Configure Tx completion queue */ 1696 db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TCQ_HEAD]; 1697 fbnic_ring_init(&qt->cmpl, db, 0, 0); 1698 1699 /* Update Tx queue index */ 1700 txt_count--; 1701 txq_idx += v_count; 1702 1703 /* Move to next queue triad */ 1704 qt++; 1705 } 1706 1707 while (rxt_count) { 1708 /* Configure header queue */ 1709 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_HPQ_TAIL]; 1710 fbnic_ring_init(&qt->sub0, db, 0, FBNIC_RING_F_CTX); 1711 1712 /* Configure payload queue */ 1713 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_PPQ_TAIL]; 1714 fbnic_ring_init(&qt->sub1, db, 0, FBNIC_RING_F_CTX); 1715 1716 /* Configure Rx completion queue */ 1717 db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_RCQ_HEAD]; 1718 fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS); 1719 fbn->rx[rxq_idx] = &qt->cmpl; 1720 1721 /* Update Rx queue index */ 1722 rxt_count--; 1723 rxq_idx += v_count; 1724 1725 /* Move to next queue triad */ 1726 qt++; 1727 } 1728 1729 return 0; 1730 1731 napi_del: 1732 netif_napi_del_locked(&nv->napi); 1733 fbn->napi[fbnic_napi_idx(nv)] = NULL; 1734 kfree(nv); 1735 return err; 1736 } 1737 1738 int fbnic_alloc_napi_vectors(struct fbnic_net *fbn) 1739 { 1740 unsigned int txq_idx = 0, rxq_idx = 0, v_idx = FBNIC_NON_NAPI_VECTORS; 1741 unsigned int num_tx = fbn->num_tx_queues; 1742 unsigned int num_rx = fbn->num_rx_queues; 1743 unsigned int num_napi = fbn->num_napi; 1744 struct fbnic_dev *fbd = fbn->fbd; 1745 int err; 1746 1747 /* Allocate 1 Tx queue per napi vector */ 1748 if (num_napi < FBNIC_MAX_TXQS && num_napi == num_tx + num_rx) { 1749 while (num_tx) { 1750 err = fbnic_alloc_napi_vector(fbd, fbn, 1751 num_napi, v_idx, 1752 1, txq_idx, 0, 0); 1753 if (err) 1754 goto free_vectors; 1755 1756 /* Update counts and index */ 1757 num_tx--; 1758 txq_idx++; 1759 1760 v_idx++; 1761 } 1762 } 1763 1764 /* Allocate Tx/Rx queue pairs per vector, or allocate remaining Rx */ 1765 while (num_rx | num_tx) { 1766 int tqpv = DIV_ROUND_UP(num_tx, num_napi - txq_idx); 1767 int rqpv = DIV_ROUND_UP(num_rx, num_napi - rxq_idx); 1768 1769 err = fbnic_alloc_napi_vector(fbd, fbn, num_napi, v_idx, 1770 tqpv, txq_idx, rqpv, rxq_idx); 1771 if (err) 1772 goto free_vectors; 1773 1774 /* Update counts and index */ 1775 num_tx -= tqpv; 1776 txq_idx++; 1777 1778 num_rx -= rqpv; 1779 rxq_idx++; 1780 1781 v_idx++; 1782 } 1783 1784 return 0; 1785 1786 free_vectors: 1787 fbnic_free_napi_vectors(fbn); 1788 1789 return -ENOMEM; 1790 } 1791 1792 static void fbnic_free_ring_resources(struct device *dev, 1793 struct fbnic_ring *ring) 1794 { 1795 kvfree(ring->buffer); 1796 ring->buffer = NULL; 1797 1798 /* If size is not set there are no descriptors present */ 1799 if (!ring->size) 1800 return; 1801 1802 dma_free_coherent(dev, ring->size, ring->desc, ring->dma); 1803 ring->size_mask = 0; 1804 ring->size = 0; 1805 } 1806 1807 static int fbnic_alloc_tx_ring_desc(struct fbnic_net *fbn, 1808 struct fbnic_ring *txr) 1809 { 1810 struct device *dev = fbn->netdev->dev.parent; 1811 size_t size; 1812 1813 /* Round size up to nearest 4K */ 1814 size = ALIGN(array_size(sizeof(*txr->desc), fbn->txq_size), 4096); 1815 1816 txr->desc = dma_alloc_coherent(dev, size, &txr->dma, 1817 GFP_KERNEL | __GFP_NOWARN); 1818 if (!txr->desc) 1819 return -ENOMEM; 1820 1821 /* txq_size should be a power of 2, so mask is just that -1 */ 1822 txr->size_mask = fbn->txq_size - 1; 1823 txr->size = size; 1824 1825 return 0; 1826 } 1827 1828 static int fbnic_alloc_tx_ring_buffer(struct fbnic_ring *txr) 1829 { 1830 size_t size = array_size(sizeof(*txr->tx_buf), txr->size_mask + 1); 1831 1832 txr->tx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN); 1833 1834 return txr->tx_buf ? 0 : -ENOMEM; 1835 } 1836 1837 static int fbnic_alloc_tx_ring_resources(struct fbnic_net *fbn, 1838 struct fbnic_ring *txr) 1839 { 1840 struct device *dev = fbn->netdev->dev.parent; 1841 int err; 1842 1843 if (txr->flags & FBNIC_RING_F_DISABLED) 1844 return 0; 1845 1846 err = fbnic_alloc_tx_ring_desc(fbn, txr); 1847 if (err) 1848 return err; 1849 1850 if (!(txr->flags & FBNIC_RING_F_CTX)) 1851 return 0; 1852 1853 err = fbnic_alloc_tx_ring_buffer(txr); 1854 if (err) 1855 goto free_desc; 1856 1857 return 0; 1858 1859 free_desc: 1860 fbnic_free_ring_resources(dev, txr); 1861 return err; 1862 } 1863 1864 static int fbnic_alloc_rx_ring_desc(struct fbnic_net *fbn, 1865 struct fbnic_ring *rxr) 1866 { 1867 struct device *dev = fbn->netdev->dev.parent; 1868 size_t desc_size = sizeof(*rxr->desc); 1869 u32 rxq_size; 1870 size_t size; 1871 1872 switch (rxr->doorbell - fbnic_ring_csr_base(rxr)) { 1873 case FBNIC_QUEUE_BDQ_HPQ_TAIL: 1874 rxq_size = fbn->hpq_size / FBNIC_BD_FRAG_COUNT; 1875 desc_size *= FBNIC_BD_FRAG_COUNT; 1876 break; 1877 case FBNIC_QUEUE_BDQ_PPQ_TAIL: 1878 rxq_size = fbn->ppq_size / FBNIC_BD_FRAG_COUNT; 1879 desc_size *= FBNIC_BD_FRAG_COUNT; 1880 break; 1881 case FBNIC_QUEUE_RCQ_HEAD: 1882 rxq_size = fbn->rcq_size; 1883 break; 1884 default: 1885 return -EINVAL; 1886 } 1887 1888 /* Round size up to nearest 4K */ 1889 size = ALIGN(array_size(desc_size, rxq_size), 4096); 1890 1891 rxr->desc = dma_alloc_coherent(dev, size, &rxr->dma, 1892 GFP_KERNEL | __GFP_NOWARN); 1893 if (!rxr->desc) 1894 return -ENOMEM; 1895 1896 /* rxq_size should be a power of 2, so mask is just that -1 */ 1897 rxr->size_mask = rxq_size - 1; 1898 rxr->size = size; 1899 1900 return 0; 1901 } 1902 1903 static int fbnic_alloc_rx_ring_buffer(struct fbnic_ring *rxr) 1904 { 1905 size_t size = array_size(sizeof(*rxr->rx_buf), rxr->size_mask + 1); 1906 1907 if (rxr->flags & FBNIC_RING_F_CTX) 1908 size = sizeof(*rxr->rx_buf) * (rxr->size_mask + 1); 1909 else 1910 size = sizeof(*rxr->pkt); 1911 1912 rxr->rx_buf = kvzalloc(size, GFP_KERNEL | __GFP_NOWARN); 1913 1914 return rxr->rx_buf ? 0 : -ENOMEM; 1915 } 1916 1917 static int fbnic_alloc_rx_ring_resources(struct fbnic_net *fbn, 1918 struct fbnic_ring *rxr) 1919 { 1920 struct device *dev = fbn->netdev->dev.parent; 1921 int err; 1922 1923 err = fbnic_alloc_rx_ring_desc(fbn, rxr); 1924 if (err) 1925 return err; 1926 1927 err = fbnic_alloc_rx_ring_buffer(rxr); 1928 if (err) 1929 goto free_desc; 1930 1931 return 0; 1932 1933 free_desc: 1934 fbnic_free_ring_resources(dev, rxr); 1935 return err; 1936 } 1937 1938 static void fbnic_free_qt_resources(struct fbnic_net *fbn, 1939 struct fbnic_q_triad *qt) 1940 { 1941 struct device *dev = fbn->netdev->dev.parent; 1942 1943 fbnic_free_ring_resources(dev, &qt->cmpl); 1944 fbnic_free_ring_resources(dev, &qt->sub1); 1945 fbnic_free_ring_resources(dev, &qt->sub0); 1946 1947 if (xdp_rxq_info_is_reg(&qt->xdp_rxq)) { 1948 xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq); 1949 xdp_rxq_info_unreg(&qt->xdp_rxq); 1950 fbnic_free_qt_page_pools(qt); 1951 } 1952 } 1953 1954 static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, 1955 struct fbnic_q_triad *qt) 1956 { 1957 struct device *dev = fbn->netdev->dev.parent; 1958 int err; 1959 1960 err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub0); 1961 if (err) 1962 return err; 1963 1964 err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub1); 1965 if (err) 1966 goto free_sub0; 1967 1968 err = fbnic_alloc_tx_ring_resources(fbn, &qt->cmpl); 1969 if (err) 1970 goto free_sub1; 1971 1972 return 0; 1973 1974 free_sub1: 1975 fbnic_free_ring_resources(dev, &qt->sub1); 1976 free_sub0: 1977 fbnic_free_ring_resources(dev, &qt->sub0); 1978 return err; 1979 } 1980 1981 static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn, 1982 struct fbnic_napi_vector *nv, 1983 struct fbnic_q_triad *qt) 1984 { 1985 struct device *dev = fbn->netdev->dev.parent; 1986 int err; 1987 1988 err = fbnic_alloc_qt_page_pools(fbn, qt, qt->cmpl.q_idx); 1989 if (err) 1990 return err; 1991 1992 err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, qt->sub0.q_idx, 1993 nv->napi.napi_id); 1994 if (err) 1995 goto free_page_pools; 1996 1997 err = xdp_rxq_info_reg_mem_model(&qt->xdp_rxq, MEM_TYPE_PAGE_POOL, 1998 qt->sub0.page_pool); 1999 if (err) 2000 goto unreg_rxq; 2001 2002 err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0); 2003 if (err) 2004 goto unreg_mm; 2005 2006 err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub1); 2007 if (err) 2008 goto free_sub0; 2009 2010 err = fbnic_alloc_rx_ring_resources(fbn, &qt->cmpl); 2011 if (err) 2012 goto free_sub1; 2013 2014 return 0; 2015 2016 free_sub1: 2017 fbnic_free_ring_resources(dev, &qt->sub1); 2018 free_sub0: 2019 fbnic_free_ring_resources(dev, &qt->sub0); 2020 unreg_mm: 2021 xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq); 2022 unreg_rxq: 2023 xdp_rxq_info_unreg(&qt->xdp_rxq); 2024 free_page_pools: 2025 fbnic_free_qt_page_pools(qt); 2026 return err; 2027 } 2028 2029 static void fbnic_free_nv_resources(struct fbnic_net *fbn, 2030 struct fbnic_napi_vector *nv) 2031 { 2032 int i; 2033 2034 for (i = 0; i < nv->txt_count + nv->rxt_count; i++) 2035 fbnic_free_qt_resources(fbn, &nv->qt[i]); 2036 } 2037 2038 static int fbnic_alloc_nv_resources(struct fbnic_net *fbn, 2039 struct fbnic_napi_vector *nv) 2040 { 2041 int i, j, err; 2042 2043 /* Allocate Tx Resources */ 2044 for (i = 0; i < nv->txt_count; i++) { 2045 err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]); 2046 if (err) 2047 goto free_qt_resources; 2048 } 2049 2050 /* Allocate Rx Resources */ 2051 for (j = 0; j < nv->rxt_count; j++, i++) { 2052 err = fbnic_alloc_rx_qt_resources(fbn, nv, &nv->qt[i]); 2053 if (err) 2054 goto free_qt_resources; 2055 } 2056 2057 return 0; 2058 2059 free_qt_resources: 2060 while (i--) 2061 fbnic_free_qt_resources(fbn, &nv->qt[i]); 2062 return err; 2063 } 2064 2065 void fbnic_free_resources(struct fbnic_net *fbn) 2066 { 2067 int i; 2068 2069 for (i = 0; i < fbn->num_napi; i++) 2070 fbnic_free_nv_resources(fbn, fbn->napi[i]); 2071 } 2072 2073 int fbnic_alloc_resources(struct fbnic_net *fbn) 2074 { 2075 int i, err = -ENODEV; 2076 2077 for (i = 0; i < fbn->num_napi; i++) { 2078 err = fbnic_alloc_nv_resources(fbn, fbn->napi[i]); 2079 if (err) 2080 goto free_resources; 2081 } 2082 2083 return 0; 2084 2085 free_resources: 2086 while (i--) 2087 fbnic_free_nv_resources(fbn, fbn->napi[i]); 2088 2089 return err; 2090 } 2091 2092 static void fbnic_set_netif_napi(struct fbnic_napi_vector *nv) 2093 { 2094 int i, j; 2095 2096 /* Associate Tx queue with NAPI */ 2097 for (i = 0; i < nv->txt_count; i++) { 2098 struct fbnic_q_triad *qt = &nv->qt[i]; 2099 2100 netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, 2101 NETDEV_QUEUE_TYPE_TX, &nv->napi); 2102 } 2103 2104 /* Associate Rx queue with NAPI */ 2105 for (j = 0; j < nv->rxt_count; j++, i++) { 2106 struct fbnic_q_triad *qt = &nv->qt[i]; 2107 2108 netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, 2109 NETDEV_QUEUE_TYPE_RX, &nv->napi); 2110 } 2111 } 2112 2113 static void fbnic_reset_netif_napi(struct fbnic_napi_vector *nv) 2114 { 2115 int i, j; 2116 2117 /* Disassociate Tx queue from NAPI */ 2118 for (i = 0; i < nv->txt_count; i++) { 2119 struct fbnic_q_triad *qt = &nv->qt[i]; 2120 2121 netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, 2122 NETDEV_QUEUE_TYPE_TX, NULL); 2123 } 2124 2125 /* Disassociate Rx queue from NAPI */ 2126 for (j = 0; j < nv->rxt_count; j++, i++) { 2127 struct fbnic_q_triad *qt = &nv->qt[i]; 2128 2129 netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, 2130 NETDEV_QUEUE_TYPE_RX, NULL); 2131 } 2132 } 2133 2134 int fbnic_set_netif_queues(struct fbnic_net *fbn) 2135 { 2136 int i, err; 2137 2138 err = netif_set_real_num_queues(fbn->netdev, fbn->num_tx_queues, 2139 fbn->num_rx_queues); 2140 if (err) 2141 return err; 2142 2143 for (i = 0; i < fbn->num_napi; i++) 2144 fbnic_set_netif_napi(fbn->napi[i]); 2145 2146 return 0; 2147 } 2148 2149 void fbnic_reset_netif_queues(struct fbnic_net *fbn) 2150 { 2151 int i; 2152 2153 for (i = 0; i < fbn->num_napi; i++) 2154 fbnic_reset_netif_napi(fbn->napi[i]); 2155 } 2156 2157 static void fbnic_disable_twq0(struct fbnic_ring *txr) 2158 { 2159 u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ0_CTL); 2160 2161 twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE; 2162 2163 fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ0_CTL, twq_ctl); 2164 } 2165 2166 static void fbnic_disable_twq1(struct fbnic_ring *txr) 2167 { 2168 u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ1_CTL); 2169 2170 twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE; 2171 2172 fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ1_CTL, twq_ctl); 2173 } 2174 2175 static void fbnic_disable_tcq(struct fbnic_ring *txr) 2176 { 2177 fbnic_ring_wr32(txr, FBNIC_QUEUE_TCQ_CTL, 0); 2178 fbnic_ring_wr32(txr, FBNIC_QUEUE_TIM_MASK, FBNIC_QUEUE_TIM_MASK_MASK); 2179 } 2180 2181 static void fbnic_disable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) 2182 { 2183 u32 bdq_ctl = fbnic_ring_rd32(hpq, FBNIC_QUEUE_BDQ_CTL); 2184 2185 bdq_ctl &= ~FBNIC_QUEUE_BDQ_CTL_ENABLE; 2186 2187 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl); 2188 } 2189 2190 static void fbnic_disable_rcq(struct fbnic_ring *rxr) 2191 { 2192 fbnic_ring_wr32(rxr, FBNIC_QUEUE_RCQ_CTL, 0); 2193 fbnic_ring_wr32(rxr, FBNIC_QUEUE_RIM_MASK, FBNIC_QUEUE_RIM_MASK_MASK); 2194 } 2195 2196 void fbnic_napi_disable(struct fbnic_net *fbn) 2197 { 2198 int i; 2199 2200 for (i = 0; i < fbn->num_napi; i++) { 2201 napi_disable_locked(&fbn->napi[i]->napi); 2202 2203 fbnic_nv_irq_disable(fbn->napi[i]); 2204 } 2205 } 2206 2207 static void __fbnic_nv_disable(struct fbnic_napi_vector *nv) 2208 { 2209 int i, t; 2210 2211 /* Disable Tx queue triads */ 2212 for (t = 0; t < nv->txt_count; t++) { 2213 struct fbnic_q_triad *qt = &nv->qt[t]; 2214 2215 fbnic_disable_twq0(&qt->sub0); 2216 fbnic_disable_twq1(&qt->sub1); 2217 fbnic_disable_tcq(&qt->cmpl); 2218 } 2219 2220 /* Disable Rx queue triads */ 2221 for (i = 0; i < nv->rxt_count; i++, t++) { 2222 struct fbnic_q_triad *qt = &nv->qt[t]; 2223 2224 fbnic_disable_bdq(&qt->sub0, &qt->sub1); 2225 fbnic_disable_rcq(&qt->cmpl); 2226 } 2227 } 2228 2229 static void 2230 fbnic_nv_disable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) 2231 { 2232 __fbnic_nv_disable(nv); 2233 fbnic_wrfl(fbn->fbd); 2234 } 2235 2236 void fbnic_disable(struct fbnic_net *fbn) 2237 { 2238 struct fbnic_dev *fbd = fbn->fbd; 2239 int i; 2240 2241 for (i = 0; i < fbn->num_napi; i++) 2242 __fbnic_nv_disable(fbn->napi[i]); 2243 2244 fbnic_wrfl(fbd); 2245 } 2246 2247 static void fbnic_tx_flush(struct fbnic_dev *fbd) 2248 { 2249 netdev_warn(fbd->netdev, "triggering Tx flush\n"); 2250 2251 fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN, 2252 FBNIC_TMI_DROP_CTRL_EN); 2253 } 2254 2255 static void fbnic_tx_flush_off(struct fbnic_dev *fbd) 2256 { 2257 fbnic_rmw32(fbd, FBNIC_TMI_DROP_CTRL, FBNIC_TMI_DROP_CTRL_EN, 0); 2258 } 2259 2260 struct fbnic_idle_regs { 2261 u32 reg_base; 2262 u8 reg_cnt; 2263 }; 2264 2265 static bool fbnic_all_idle(struct fbnic_dev *fbd, 2266 const struct fbnic_idle_regs *regs, 2267 unsigned int nregs) 2268 { 2269 unsigned int i, j; 2270 2271 for (i = 0; i < nregs; i++) { 2272 for (j = 0; j < regs[i].reg_cnt; j++) { 2273 if (fbnic_rd32(fbd, regs[i].reg_base + j) != ~0U) 2274 return false; 2275 } 2276 } 2277 return true; 2278 } 2279 2280 static void fbnic_idle_dump(struct fbnic_dev *fbd, 2281 const struct fbnic_idle_regs *regs, 2282 unsigned int nregs, const char *dir, int err) 2283 { 2284 unsigned int i, j; 2285 2286 netdev_err(fbd->netdev, "error waiting for %s idle %d\n", dir, err); 2287 for (i = 0; i < nregs; i++) 2288 for (j = 0; j < regs[i].reg_cnt; j++) 2289 netdev_err(fbd->netdev, "0x%04x: %08x\n", 2290 regs[i].reg_base + j, 2291 fbnic_rd32(fbd, regs[i].reg_base + j)); 2292 } 2293 2294 int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail) 2295 { 2296 static const struct fbnic_idle_regs tx[] = { 2297 { FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TWQ_IDLE_CNT, }, 2298 { FBNIC_QM_TQS_IDLE(0), FBNIC_QM_TQS_IDLE_CNT, }, 2299 { FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TDE_IDLE_CNT, }, 2300 { FBNIC_QM_TCQ_IDLE(0), FBNIC_QM_TCQ_IDLE_CNT, }, 2301 }, rx[] = { 2302 { FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_HPQ_IDLE_CNT, }, 2303 { FBNIC_QM_PPQ_IDLE(0), FBNIC_QM_PPQ_IDLE_CNT, }, 2304 { FBNIC_QM_RCQ_IDLE(0), FBNIC_QM_RCQ_IDLE_CNT, }, 2305 }; 2306 bool idle; 2307 int err; 2308 2309 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000, 2310 false, fbd, tx, ARRAY_SIZE(tx)); 2311 if (err == -ETIMEDOUT) { 2312 fbnic_tx_flush(fbd); 2313 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2314 2, 500000, false, 2315 fbd, tx, ARRAY_SIZE(tx)); 2316 fbnic_tx_flush_off(fbd); 2317 } 2318 if (err) { 2319 fbnic_idle_dump(fbd, tx, ARRAY_SIZE(tx), "Tx", err); 2320 if (may_fail) 2321 return err; 2322 } 2323 2324 err = read_poll_timeout_atomic(fbnic_all_idle, idle, idle, 2, 500000, 2325 false, fbd, rx, ARRAY_SIZE(rx)); 2326 if (err) 2327 fbnic_idle_dump(fbd, rx, ARRAY_SIZE(rx), "Rx", err); 2328 return err; 2329 } 2330 2331 static int 2332 fbnic_wait_queue_idle(struct fbnic_net *fbn, bool rx, unsigned int idx) 2333 { 2334 static const unsigned int tx_regs[] = { 2335 FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TQS_IDLE(0), 2336 FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TCQ_IDLE(0), 2337 }, rx_regs[] = { 2338 FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_PPQ_IDLE(0), 2339 FBNIC_QM_RCQ_IDLE(0), 2340 }; 2341 struct fbnic_dev *fbd = fbn->fbd; 2342 unsigned int val, mask, off; 2343 const unsigned int *regs; 2344 unsigned int reg_cnt; 2345 int i, err; 2346 2347 regs = rx ? rx_regs : tx_regs; 2348 reg_cnt = rx ? ARRAY_SIZE(rx_regs) : ARRAY_SIZE(tx_regs); 2349 2350 off = idx / 32; 2351 mask = BIT(idx % 32); 2352 2353 for (i = 0; i < reg_cnt; i++) { 2354 err = read_poll_timeout_atomic(fbnic_rd32, val, val & mask, 2355 2, 500000, false, 2356 fbd, regs[i] + off); 2357 if (err) { 2358 netdev_err(fbd->netdev, 2359 "wait for queue %s%d idle failed 0x%04x(%d): %08x (mask: %08x)\n", 2360 rx ? "Rx" : "Tx", idx, regs[i] + off, i, 2361 val, mask); 2362 return err; 2363 } 2364 } 2365 2366 return 0; 2367 } 2368 2369 static void fbnic_nv_flush(struct fbnic_napi_vector *nv) 2370 { 2371 int j, t; 2372 2373 /* Flush any processed Tx Queue Triads and drop the rest */ 2374 for (t = 0; t < nv->txt_count; t++) { 2375 struct fbnic_q_triad *qt = &nv->qt[t]; 2376 struct netdev_queue *tx_queue; 2377 2378 /* Clean the work queues of unprocessed work */ 2379 fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail); 2380 fbnic_clean_twq1(nv, false, &qt->sub1, true, 2381 qt->sub1.tail); 2382 2383 /* Reset completion queue descriptor ring */ 2384 memset(qt->cmpl.desc, 0, qt->cmpl.size); 2385 2386 /* Nothing else to do if Tx queue is disabled */ 2387 if (qt->sub0.flags & FBNIC_RING_F_DISABLED) 2388 continue; 2389 2390 /* Reset BQL associated with Tx queue */ 2391 tx_queue = netdev_get_tx_queue(nv->napi.dev, 2392 qt->sub0.q_idx); 2393 netdev_tx_reset_queue(tx_queue); 2394 } 2395 2396 /* Flush any processed Rx Queue Triads and drop the rest */ 2397 for (j = 0; j < nv->rxt_count; j++, t++) { 2398 struct fbnic_q_triad *qt = &nv->qt[t]; 2399 2400 /* Clean the work queues of unprocessed work */ 2401 fbnic_clean_bdq(&qt->sub0, qt->sub0.tail, 0); 2402 fbnic_clean_bdq(&qt->sub1, qt->sub1.tail, 0); 2403 2404 /* Reset completion queue descriptor ring */ 2405 memset(qt->cmpl.desc, 0, qt->cmpl.size); 2406 2407 fbnic_put_pkt_buff(qt, qt->cmpl.pkt, 0); 2408 memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff)); 2409 } 2410 } 2411 2412 void fbnic_flush(struct fbnic_net *fbn) 2413 { 2414 int i; 2415 2416 for (i = 0; i < fbn->num_napi; i++) 2417 fbnic_nv_flush(fbn->napi[i]); 2418 } 2419 2420 static void fbnic_nv_fill(struct fbnic_napi_vector *nv) 2421 { 2422 int j, t; 2423 2424 /* Configure NAPI mapping and populate pages 2425 * in the BDQ rings to use for Rx 2426 */ 2427 for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) { 2428 struct fbnic_q_triad *qt = &nv->qt[t]; 2429 2430 /* Populate the header and payload BDQs */ 2431 fbnic_fill_bdq(&qt->sub0); 2432 fbnic_fill_bdq(&qt->sub1); 2433 } 2434 } 2435 2436 void fbnic_fill(struct fbnic_net *fbn) 2437 { 2438 int i; 2439 2440 for (i = 0; i < fbn->num_napi; i++) 2441 fbnic_nv_fill(fbn->napi[i]); 2442 } 2443 2444 static void fbnic_enable_twq0(struct fbnic_ring *twq) 2445 { 2446 u32 log_size = fls(twq->size_mask); 2447 2448 if (!twq->size_mask) 2449 return; 2450 2451 /* Reset head/tail */ 2452 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_RESET); 2453 twq->tail = 0; 2454 twq->head = 0; 2455 2456 /* Store descriptor ring address and size */ 2457 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAL, lower_32_bits(twq->dma)); 2458 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_BAH, upper_32_bits(twq->dma)); 2459 2460 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2461 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_SIZE, log_size & 0xf); 2462 2463 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); 2464 } 2465 2466 static void fbnic_enable_twq1(struct fbnic_ring *twq) 2467 { 2468 u32 log_size = fls(twq->size_mask); 2469 2470 if (!twq->size_mask) 2471 return; 2472 2473 /* Reset head/tail */ 2474 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_RESET); 2475 twq->tail = 0; 2476 twq->head = 0; 2477 2478 /* Store descriptor ring address and size */ 2479 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAL, lower_32_bits(twq->dma)); 2480 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAH, upper_32_bits(twq->dma)); 2481 2482 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2483 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_SIZE, log_size & 0xf); 2484 2485 fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); 2486 } 2487 2488 static void fbnic_enable_tcq(struct fbnic_napi_vector *nv, 2489 struct fbnic_ring *tcq) 2490 { 2491 u32 log_size = fls(tcq->size_mask); 2492 2493 if (!tcq->size_mask) 2494 return; 2495 2496 /* Reset head/tail */ 2497 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_RESET); 2498 tcq->tail = 0; 2499 tcq->head = 0; 2500 2501 /* Store descriptor ring address and size */ 2502 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAL, lower_32_bits(tcq->dma)); 2503 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_BAH, upper_32_bits(tcq->dma)); 2504 2505 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2506 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_SIZE, log_size & 0xf); 2507 2508 /* Store interrupt information for the completion queue */ 2509 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_CTL, nv->v_idx); 2510 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_THRESHOLD, tcq->size_mask / 2); 2511 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TIM_MASK, 0); 2512 2513 /* Enable queue */ 2514 fbnic_ring_wr32(tcq, FBNIC_QUEUE_TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_ENABLE); 2515 } 2516 2517 static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) 2518 { 2519 u32 bdq_ctl = FBNIC_QUEUE_BDQ_CTL_ENABLE; 2520 u32 log_size; 2521 2522 /* Reset head/tail */ 2523 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, FBNIC_QUEUE_BDQ_CTL_RESET); 2524 ppq->tail = 0; 2525 ppq->head = 0; 2526 hpq->tail = 0; 2527 hpq->head = 0; 2528 2529 log_size = fls(hpq->size_mask); 2530 2531 /* Store descriptor ring address and size */ 2532 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma)); 2533 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAH, upper_32_bits(hpq->dma)); 2534 2535 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2536 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_SIZE, log_size & 0xf); 2537 2538 if (!ppq->size_mask) 2539 goto write_ctl; 2540 2541 log_size = fls(ppq->size_mask); 2542 2543 /* Add enabling of PPQ to BDQ control */ 2544 bdq_ctl |= FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE; 2545 2546 /* Store descriptor ring address and size */ 2547 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAL, lower_32_bits(ppq->dma)); 2548 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_BAH, upper_32_bits(ppq->dma)); 2549 fbnic_ring_wr32(ppq, FBNIC_QUEUE_BDQ_PPQ_SIZE, log_size & 0xf); 2550 2551 write_ctl: 2552 fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_CTL, bdq_ctl); 2553 } 2554 2555 static void fbnic_config_drop_mode_rcq(struct fbnic_napi_vector *nv, 2556 struct fbnic_ring *rcq) 2557 { 2558 u32 drop_mode, rcq_ctl; 2559 2560 drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_IMMEDIATE; 2561 2562 /* Specify packet layout */ 2563 rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_DROP_MODE_MASK, drop_mode) | 2564 FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_HROOM_MASK, FBNIC_RX_HROOM) | 2565 FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_TROOM_MASK, FBNIC_RX_TROOM); 2566 2567 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL0, rcq_ctl); 2568 } 2569 2570 static void fbnic_config_rim_threshold(struct fbnic_ring *rcq, u16 nv_idx, u32 rx_desc) 2571 { 2572 u32 threshold; 2573 2574 /* Set the threhsold to half the ring size if rx_frames 2575 * is not configured 2576 */ 2577 threshold = rx_desc ? : rcq->size_mask / 2; 2578 2579 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_CTL, nv_idx); 2580 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_THRESHOLD, threshold); 2581 } 2582 2583 void fbnic_config_txrx_usecs(struct fbnic_napi_vector *nv, u32 arm) 2584 { 2585 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 2586 struct fbnic_dev *fbd = nv->fbd; 2587 u32 val = arm; 2588 2589 val |= FIELD_PREP(FBNIC_INTR_CQ_REARM_RCQ_TIMEOUT, fbn->rx_usecs) | 2590 FBNIC_INTR_CQ_REARM_RCQ_TIMEOUT_UPD_EN; 2591 val |= FIELD_PREP(FBNIC_INTR_CQ_REARM_TCQ_TIMEOUT, fbn->tx_usecs) | 2592 FBNIC_INTR_CQ_REARM_TCQ_TIMEOUT_UPD_EN; 2593 2594 fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(nv->v_idx), val); 2595 } 2596 2597 void fbnic_config_rx_frames(struct fbnic_napi_vector *nv) 2598 { 2599 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 2600 int i; 2601 2602 for (i = nv->txt_count; i < nv->rxt_count + nv->txt_count; i++) { 2603 struct fbnic_q_triad *qt = &nv->qt[i]; 2604 2605 fbnic_config_rim_threshold(&qt->cmpl, nv->v_idx, 2606 fbn->rx_max_frames * 2607 FBNIC_MIN_RXD_PER_FRAME); 2608 } 2609 } 2610 2611 static void fbnic_enable_rcq(struct fbnic_napi_vector *nv, 2612 struct fbnic_ring *rcq) 2613 { 2614 struct fbnic_net *fbn = netdev_priv(nv->napi.dev); 2615 u32 log_size = fls(rcq->size_mask); 2616 u32 hds_thresh = fbn->hds_thresh; 2617 u32 rcq_ctl = 0; 2618 2619 fbnic_config_drop_mode_rcq(nv, rcq); 2620 2621 /* Force lower bound on MAX_HEADER_BYTES. Below this, all frames should 2622 * be split at L4. It would also result in the frames being split at 2623 * L2/L3 depending on the frame size. 2624 */ 2625 if (fbn->hds_thresh < FBNIC_HDR_BYTES_MIN) { 2626 rcq_ctl = FBNIC_QUEUE_RDE_CTL0_EN_HDR_SPLIT; 2627 hds_thresh = FBNIC_HDR_BYTES_MIN; 2628 } 2629 2630 rcq_ctl |= FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) | 2631 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, hds_thresh) | 2632 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK, 2633 FBNIC_RX_PAYLD_OFFSET) | 2634 FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_PG_CL_MASK, 2635 FBNIC_RX_PAYLD_PG_CL); 2636 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL1, rcq_ctl); 2637 2638 /* Reset head/tail */ 2639 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_RESET); 2640 rcq->head = 0; 2641 rcq->tail = 0; 2642 2643 /* Store descriptor ring address and size */ 2644 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAL, lower_32_bits(rcq->dma)); 2645 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_BAH, upper_32_bits(rcq->dma)); 2646 2647 /* Write lower 4 bits of log size as 64K ring size is 0 */ 2648 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_SIZE, log_size & 0xf); 2649 2650 /* Store interrupt information for the completion queue */ 2651 fbnic_config_rim_threshold(rcq, nv->v_idx, fbn->rx_max_frames * 2652 FBNIC_MIN_RXD_PER_FRAME); 2653 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_MASK, 0); 2654 2655 /* Enable queue */ 2656 fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_ENABLE); 2657 } 2658 2659 static void __fbnic_nv_enable(struct fbnic_napi_vector *nv) 2660 { 2661 int j, t; 2662 2663 /* Setup Tx Queue Triads */ 2664 for (t = 0; t < nv->txt_count; t++) { 2665 struct fbnic_q_triad *qt = &nv->qt[t]; 2666 2667 fbnic_enable_twq0(&qt->sub0); 2668 fbnic_enable_twq1(&qt->sub1); 2669 fbnic_enable_tcq(nv, &qt->cmpl); 2670 } 2671 2672 /* Setup Rx Queue Triads */ 2673 for (j = 0; j < nv->rxt_count; j++, t++) { 2674 struct fbnic_q_triad *qt = &nv->qt[t]; 2675 2676 page_pool_enable_direct_recycling(qt->sub0.page_pool, 2677 &nv->napi); 2678 page_pool_enable_direct_recycling(qt->sub1.page_pool, 2679 &nv->napi); 2680 2681 fbnic_enable_bdq(&qt->sub0, &qt->sub1); 2682 fbnic_config_drop_mode_rcq(nv, &qt->cmpl); 2683 fbnic_enable_rcq(nv, &qt->cmpl); 2684 } 2685 } 2686 2687 static void fbnic_nv_enable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) 2688 { 2689 __fbnic_nv_enable(nv); 2690 fbnic_wrfl(fbn->fbd); 2691 } 2692 2693 void fbnic_enable(struct fbnic_net *fbn) 2694 { 2695 struct fbnic_dev *fbd = fbn->fbd; 2696 int i; 2697 2698 for (i = 0; i < fbn->num_napi; i++) 2699 __fbnic_nv_enable(fbn->napi[i]); 2700 2701 fbnic_wrfl(fbd); 2702 } 2703 2704 static void fbnic_nv_irq_enable(struct fbnic_napi_vector *nv) 2705 { 2706 fbnic_config_txrx_usecs(nv, FBNIC_INTR_CQ_REARM_INTR_UNMASK); 2707 } 2708 2709 void fbnic_napi_enable(struct fbnic_net *fbn) 2710 { 2711 u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {}; 2712 struct fbnic_dev *fbd = fbn->fbd; 2713 int i; 2714 2715 for (i = 0; i < fbn->num_napi; i++) { 2716 struct fbnic_napi_vector *nv = fbn->napi[i]; 2717 2718 napi_enable_locked(&nv->napi); 2719 2720 fbnic_nv_irq_enable(nv); 2721 2722 /* Record bit used for NAPI IRQs so we can 2723 * set the mask appropriately 2724 */ 2725 irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32); 2726 } 2727 2728 /* Force the first interrupt on the device to guarantee 2729 * that any packets that may have been enqueued during the 2730 * bringup are processed. 2731 */ 2732 for (i = 0; i < ARRAY_SIZE(irqs); i++) { 2733 if (!irqs[i]) 2734 continue; 2735 fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]); 2736 } 2737 2738 fbnic_wrfl(fbd); 2739 } 2740 2741 void fbnic_napi_depletion_check(struct net_device *netdev) 2742 { 2743 struct fbnic_net *fbn = netdev_priv(netdev); 2744 u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {}; 2745 struct fbnic_dev *fbd = fbn->fbd; 2746 int i, j, t; 2747 2748 for (i = 0; i < fbn->num_napi; i++) { 2749 struct fbnic_napi_vector *nv = fbn->napi[i]; 2750 2751 /* Find RQs which are completely out of pages */ 2752 for (t = nv->txt_count, j = 0; j < nv->rxt_count; j++, t++) { 2753 /* Assume 4 pages is always enough to fit a packet 2754 * and therefore generate a completion and an IRQ. 2755 */ 2756 if (fbnic_desc_used(&nv->qt[t].sub0) < 4 || 2757 fbnic_desc_used(&nv->qt[t].sub1) < 4) 2758 irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32); 2759 } 2760 } 2761 2762 for (i = 0; i < ARRAY_SIZE(irqs); i++) { 2763 if (!irqs[i]) 2764 continue; 2765 fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(i), irqs[i]); 2766 fbnic_wr32(fbd, FBNIC_INTR_SET(i), irqs[i]); 2767 } 2768 2769 fbnic_wrfl(fbd); 2770 } 2771 2772 static int fbnic_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) 2773 { 2774 struct fbnic_net *fbn = netdev_priv(dev); 2775 const struct fbnic_q_triad *real; 2776 struct fbnic_q_triad *qt = qmem; 2777 struct fbnic_napi_vector *nv; 2778 2779 if (!netif_running(dev)) 2780 return fbnic_alloc_qt_page_pools(fbn, qt, idx); 2781 2782 real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); 2783 nv = fbn->napi[idx % fbn->num_napi]; 2784 2785 fbnic_ring_init(&qt->sub0, real->sub0.doorbell, real->sub0.q_idx, 2786 real->sub0.flags); 2787 fbnic_ring_init(&qt->sub1, real->sub1.doorbell, real->sub1.q_idx, 2788 real->sub1.flags); 2789 fbnic_ring_init(&qt->cmpl, real->cmpl.doorbell, real->cmpl.q_idx, 2790 real->cmpl.flags); 2791 2792 return fbnic_alloc_rx_qt_resources(fbn, nv, qt); 2793 } 2794 2795 static void fbnic_queue_mem_free(struct net_device *dev, void *qmem) 2796 { 2797 struct fbnic_net *fbn = netdev_priv(dev); 2798 struct fbnic_q_triad *qt = qmem; 2799 2800 if (!netif_running(dev)) 2801 fbnic_free_qt_page_pools(qt); 2802 else 2803 fbnic_free_qt_resources(fbn, qt); 2804 } 2805 2806 static void __fbnic_nv_restart(struct fbnic_net *fbn, 2807 struct fbnic_napi_vector *nv) 2808 { 2809 struct fbnic_dev *fbd = fbn->fbd; 2810 int i; 2811 2812 fbnic_nv_enable(fbn, nv); 2813 fbnic_nv_fill(nv); 2814 2815 napi_enable_locked(&nv->napi); 2816 fbnic_nv_irq_enable(nv); 2817 fbnic_wr32(fbd, FBNIC_INTR_SET(nv->v_idx / 32), BIT(nv->v_idx % 32)); 2818 fbnic_wrfl(fbd); 2819 2820 for (i = 0; i < nv->txt_count; i++) 2821 netif_wake_subqueue(fbn->netdev, nv->qt[i].sub0.q_idx); 2822 } 2823 2824 static int fbnic_queue_start(struct net_device *dev, void *qmem, int idx) 2825 { 2826 struct fbnic_net *fbn = netdev_priv(dev); 2827 struct fbnic_napi_vector *nv; 2828 struct fbnic_q_triad *real; 2829 2830 real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); 2831 nv = fbn->napi[idx % fbn->num_napi]; 2832 2833 fbnic_aggregate_ring_rx_counters(fbn, &real->sub0); 2834 fbnic_aggregate_ring_rx_counters(fbn, &real->sub1); 2835 fbnic_aggregate_ring_rx_counters(fbn, &real->cmpl); 2836 2837 memcpy(real, qmem, sizeof(*real)); 2838 2839 __fbnic_nv_restart(fbn, nv); 2840 2841 return 0; 2842 } 2843 2844 static int fbnic_queue_stop(struct net_device *dev, void *qmem, int idx) 2845 { 2846 struct fbnic_net *fbn = netdev_priv(dev); 2847 const struct fbnic_q_triad *real; 2848 struct fbnic_napi_vector *nv; 2849 int i, t; 2850 int err; 2851 2852 real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); 2853 nv = fbn->napi[idx % fbn->num_napi]; 2854 2855 napi_disable_locked(&nv->napi); 2856 fbnic_nv_irq_disable(nv); 2857 2858 for (i = 0; i < nv->txt_count; i++) 2859 netif_stop_subqueue(dev, nv->qt[i].sub0.q_idx); 2860 fbnic_nv_disable(fbn, nv); 2861 2862 for (t = 0; t < nv->txt_count + nv->rxt_count; t++) { 2863 err = fbnic_wait_queue_idle(fbn, t >= nv->txt_count, 2864 nv->qt[t].sub0.q_idx); 2865 if (err) 2866 goto err_restart; 2867 } 2868 2869 fbnic_synchronize_irq(fbn->fbd, nv->v_idx); 2870 fbnic_nv_flush(nv); 2871 2872 page_pool_disable_direct_recycling(real->sub0.page_pool); 2873 page_pool_disable_direct_recycling(real->sub1.page_pool); 2874 2875 memcpy(qmem, real, sizeof(*real)); 2876 2877 return 0; 2878 2879 err_restart: 2880 __fbnic_nv_restart(fbn, nv); 2881 return err; 2882 } 2883 2884 const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_ops = { 2885 .ndo_queue_mem_size = sizeof(struct fbnic_q_triad), 2886 .ndo_queue_mem_alloc = fbnic_queue_mem_alloc, 2887 .ndo_queue_mem_free = fbnic_queue_mem_free, 2888 .ndo_queue_start = fbnic_queue_start, 2889 .ndo_queue_stop = fbnic_queue_stop, 2890 }; 2891