1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 /* Copyright (C) 2015-2019 Netronome Systems, Inc. */ 3 4 #include <linux/bpf_trace.h> 5 #include <linux/netdevice.h> 6 #include <linux/overflow.h> 7 #include <linux/sizes.h> 8 #include <linux/bitfield.h> 9 #include <net/xfrm.h> 10 11 #include "../nfp_app.h" 12 #include "../nfp_net.h" 13 #include "../nfp_net_dp.h" 14 #include "../crypto/crypto.h" 15 #include "../crypto/fw.h" 16 #include "nfdk.h" 17 18 static int nfp_nfdk_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) 19 { 20 return !nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT * 2); 21 } 22 23 static int nfp_nfdk_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) 24 { 25 return nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT); 26 } 27 28 static void nfp_nfdk_tx_ring_stop(struct netdev_queue *nd_q, 29 struct nfp_net_tx_ring *tx_ring) 30 { 31 netif_tx_stop_queue(nd_q); 32 33 /* We can race with the TX completion out of NAPI so recheck */ 34 smp_mb(); 35 if (unlikely(nfp_nfdk_tx_ring_should_wake(tx_ring))) 36 netif_tx_start_queue(nd_q); 37 } 38 39 static __le64 40 nfp_nfdk_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfdk_tx_buf *txbuf, 41 struct sk_buff *skb) 42 { 43 u32 segs, hdrlen, l3_offset, l4_offset, l4_hdrlen; 44 struct nfp_nfdk_tx_desc txd; 45 u16 mss; 46 47 if (!skb->encapsulation) { 48 l3_offset = skb_network_offset(skb); 49 l4_offset = skb_transport_offset(skb); 50 l4_hdrlen = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ? 51 sizeof(struct udphdr) : tcp_hdrlen(skb); 52 } else { 53 l3_offset = skb_inner_network_offset(skb); 54 l4_offset = skb_inner_transport_offset(skb); 55 l4_hdrlen = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ? 56 sizeof(struct udphdr) : inner_tcp_hdrlen(skb); 57 } 58 59 hdrlen = l4_offset + l4_hdrlen; 60 segs = skb_shinfo(skb)->gso_segs; 61 mss = skb_shinfo(skb)->gso_size & NFDK_DESC_TX_MSS_MASK; 62 63 txd.l3_offset = l3_offset; 64 txd.l4_offset = l4_offset; 65 txd.lso_meta_res = 0; 66 txd.mss = cpu_to_le16(mss); 67 txd.lso_hdrlen = hdrlen; 68 txd.lso_totsegs = segs; 69 70 txbuf->pkt_cnt = segs; 71 txbuf->real_len = skb->len + hdrlen * (txbuf->pkt_cnt - 1); 72 73 u64_stats_update_begin(&r_vec->tx_sync); 74 r_vec->tx_lso++; 75 u64_stats_update_end(&r_vec->tx_sync); 76 77 return txd.raw; 78 } 79 80 static u8 81 nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 82 unsigned int pkt_cnt, struct sk_buff *skb, u64 flags) 83 { 84 struct ipv6hdr *ipv6h; 85 struct iphdr *iph; 86 87 if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) 88 return flags; 89 90 if (skb->ip_summed != CHECKSUM_PARTIAL) 91 return flags; 92 93 flags |= NFDK_DESC_TX_L4_CSUM; 94 95 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); 96 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); 97 98 /* L3 checksum offloading flag is not required for ipv6 */ 99 if (iph->version == 4) { 100 flags |= NFDK_DESC_TX_L3_CSUM; 101 } else if (ipv6h->version != 6) { 102 nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version); 103 return flags; 104 } 105 106 u64_stats_update_begin(&r_vec->tx_sync); 107 if (!skb->encapsulation) { 108 r_vec->hw_csum_tx += pkt_cnt; 109 } else { 110 flags |= NFDK_DESC_TX_ENCAP; 111 r_vec->hw_csum_tx_inner += pkt_cnt; 112 } 113 u64_stats_update_end(&r_vec->tx_sync); 114 115 return flags; 116 } 117 118 static int 119 nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring, 120 struct sk_buff *skb) 121 { 122 unsigned int n_descs, wr_p, nop_slots; 123 const skb_frag_t *frag, *fend; 124 struct nfp_nfdk_tx_desc *txd; 125 unsigned int nr_frags; 126 unsigned int wr_idx; 127 int err; 128 129 recount_descs: 130 n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb)); 131 nr_frags = skb_shinfo(skb)->nr_frags; 132 frag = skb_shinfo(skb)->frags; 133 fend = frag + nr_frags; 134 for (; frag < fend; frag++) 135 n_descs += DIV_ROUND_UP(skb_frag_size(frag), 136 NFDK_TX_MAX_DATA_PER_DESC); 137 138 if (unlikely(n_descs > NFDK_TX_DESC_GATHER_MAX)) { 139 if (skb_is_nonlinear(skb)) { 140 err = skb_linearize(skb); 141 if (err) 142 return err; 143 goto recount_descs; 144 } 145 return -EINVAL; 146 } 147 148 /* Under count by 1 (don't count meta) for the round down to work out */ 149 n_descs += !!skb_is_gso(skb); 150 151 if (round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 152 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) 153 goto close_block; 154 155 if ((u32)tx_ring->data_pending + skb->len > NFDK_TX_MAX_DATA_PER_BLOCK) 156 goto close_block; 157 158 return 0; 159 160 close_block: 161 wr_p = tx_ring->wr_p; 162 nop_slots = D_BLOCK_CPL(wr_p); 163 164 wr_idx = D_IDX(tx_ring, wr_p); 165 tx_ring->ktxbufs[wr_idx].skb = NULL; 166 txd = &tx_ring->ktxds[wr_idx]; 167 168 memset(txd, 0, array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); 169 170 tx_ring->data_pending = 0; 171 tx_ring->wr_p += nop_slots; 172 tx_ring->wr_ptr_add += nop_slots; 173 174 return 0; 175 } 176 177 static int 178 nfp_nfdk_prep_tx_meta(struct nfp_net_dp *dp, struct nfp_app *app, 179 struct sk_buff *skb, bool *ipsec) 180 { 181 struct metadata_dst *md_dst = skb_metadata_dst(skb); 182 struct nfp_ipsec_offload offload_info; 183 unsigned char *data; 184 bool vlan_insert; 185 u32 meta_id = 0; 186 int md_bytes; 187 188 #ifdef CONFIG_NFP_NET_IPSEC 189 if (xfrm_offload(skb)) 190 *ipsec = nfp_net_ipsec_tx_prep(dp, skb, &offload_info); 191 #endif 192 193 if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) 194 md_dst = NULL; 195 196 vlan_insert = skb_vlan_tag_present(skb) && (dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2); 197 198 if (!(md_dst || vlan_insert || *ipsec)) 199 return 0; 200 201 md_bytes = sizeof(meta_id) + 202 (!!md_dst ? NFP_NET_META_PORTID_SIZE : 0) + 203 (vlan_insert ? NFP_NET_META_VLAN_SIZE : 0) + 204 (*ipsec ? NFP_NET_META_IPSEC_FIELD_SIZE : 0); 205 206 if (unlikely(skb_cow_head(skb, md_bytes))) 207 return -ENOMEM; 208 209 data = skb_push(skb, md_bytes) + md_bytes; 210 if (md_dst) { 211 data -= NFP_NET_META_PORTID_SIZE; 212 put_unaligned_be32(md_dst->u.port_info.port_id, data); 213 meta_id = NFP_NET_META_PORTID; 214 } 215 if (vlan_insert) { 216 data -= NFP_NET_META_VLAN_SIZE; 217 /* data type of skb->vlan_proto is __be16 218 * so it fills metadata without calling put_unaligned_be16 219 */ 220 memcpy(data, &skb->vlan_proto, sizeof(skb->vlan_proto)); 221 put_unaligned_be16(skb_vlan_tag_get(skb), data + sizeof(skb->vlan_proto)); 222 meta_id <<= NFP_NET_META_FIELD_SIZE; 223 meta_id |= NFP_NET_META_VLAN; 224 } 225 226 if (*ipsec) { 227 data -= NFP_NET_META_IPSEC_SIZE; 228 put_unaligned_be32(offload_info.seq_hi, data); 229 data -= NFP_NET_META_IPSEC_SIZE; 230 put_unaligned_be32(offload_info.seq_low, data); 231 data -= NFP_NET_META_IPSEC_SIZE; 232 put_unaligned_be32(offload_info.handle - 1, data); 233 meta_id <<= NFP_NET_META_IPSEC_FIELD_SIZE; 234 meta_id |= NFP_NET_META_IPSEC << 8 | NFP_NET_META_IPSEC << 4 | NFP_NET_META_IPSEC; 235 } 236 237 meta_id = FIELD_PREP(NFDK_META_LEN, md_bytes) | 238 FIELD_PREP(NFDK_META_FIELDS, meta_id); 239 240 data -= sizeof(meta_id); 241 put_unaligned_be32(meta_id, data); 242 243 return NFDK_DESC_TX_CHAIN_META; 244 } 245 246 /** 247 * nfp_nfdk_tx() - Main transmit entry point 248 * @skb: SKB to transmit 249 * @netdev: netdev structure 250 * 251 * Return: NETDEV_TX_OK on success. 252 */ 253 netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) 254 { 255 struct nfp_net *nn = netdev_priv(netdev); 256 struct nfp_nfdk_tx_buf *txbuf, *etxbuf; 257 u32 cnt, tmp_dlen, dlen_type = 0; 258 struct nfp_net_tx_ring *tx_ring; 259 struct nfp_net_r_vector *r_vec; 260 const skb_frag_t *frag, *fend; 261 struct nfp_nfdk_tx_desc *txd; 262 unsigned int real_len, qidx; 263 unsigned int dma_len, type; 264 struct netdev_queue *nd_q; 265 struct nfp_net_dp *dp; 266 int nr_frags, wr_idx; 267 dma_addr_t dma_addr; 268 bool ipsec = false; 269 u64 metadata; 270 271 dp = &nn->dp; 272 qidx = skb_get_queue_mapping(skb); 273 tx_ring = &dp->tx_rings[qidx]; 274 r_vec = tx_ring->r_vec; 275 nd_q = netdev_get_tx_queue(dp->netdev, qidx); 276 277 /* Don't bother counting frags, assume the worst */ 278 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 279 nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n", 280 qidx, tx_ring->wr_p, tx_ring->rd_p); 281 netif_tx_stop_queue(nd_q); 282 nfp_net_tx_xmit_more_flush(tx_ring); 283 u64_stats_update_begin(&r_vec->tx_sync); 284 r_vec->tx_busy++; 285 u64_stats_update_end(&r_vec->tx_sync); 286 return NETDEV_TX_BUSY; 287 } 288 289 metadata = nfp_nfdk_prep_tx_meta(dp, nn->app, skb, &ipsec); 290 if (unlikely((int)metadata < 0)) 291 goto err_flush; 292 293 if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) 294 goto err_flush; 295 296 /* nr_frags will change after skb_linearize so we get nr_frags after 297 * nfp_nfdk_tx_maybe_close_block function 298 */ 299 nr_frags = skb_shinfo(skb)->nr_frags; 300 /* DMA map all */ 301 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 302 txd = &tx_ring->ktxds[wr_idx]; 303 txbuf = &tx_ring->ktxbufs[wr_idx]; 304 305 dma_len = skb_headlen(skb); 306 if (skb_is_gso(skb)) 307 type = NFDK_DESC_TX_TYPE_TSO; 308 else if (!nr_frags && dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) 309 type = NFDK_DESC_TX_TYPE_SIMPLE; 310 else 311 type = NFDK_DESC_TX_TYPE_GATHER; 312 313 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); 314 if (dma_mapping_error(dp->dev, dma_addr)) 315 goto err_warn_dma; 316 317 txbuf->skb = skb; 318 txbuf++; 319 320 txbuf->dma_addr = dma_addr; 321 txbuf++; 322 323 /* FIELD_PREP() implicitly truncates to chunk */ 324 dma_len -= 1; 325 326 /* We will do our best to pass as much data as we can in descriptor 327 * and we need to make sure the first descriptor includes whole head 328 * since there is limitation in firmware side. Sometimes the value of 329 * dma_len bitwise and NFDK_DESC_TX_DMA_LEN_HEAD will less than 330 * headlen. 331 */ 332 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, 333 dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? 334 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | 335 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 336 337 txd->dma_len_type = cpu_to_le16(dlen_type); 338 nfp_desc_set_dma_addr_48b(txd, dma_addr); 339 340 /* starts at bit 0 */ 341 BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1)); 342 343 /* Preserve the original dlen_type, this way below the EOP logic 344 * can use dlen_type. 345 */ 346 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 347 dma_len -= tmp_dlen; 348 dma_addr += tmp_dlen + 1; 349 txd++; 350 351 /* The rest of the data (if any) will be in larger dma descritors 352 * and is handled with the fragment loop. 353 */ 354 frag = skb_shinfo(skb)->frags; 355 fend = frag + nr_frags; 356 357 while (true) { 358 while (dma_len > 0) { 359 dma_len -= 1; 360 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 361 362 txd->dma_len_type = cpu_to_le16(dlen_type); 363 nfp_desc_set_dma_addr_48b(txd, dma_addr); 364 365 dma_len -= dlen_type; 366 dma_addr += dlen_type + 1; 367 txd++; 368 } 369 370 if (frag >= fend) 371 break; 372 373 dma_len = skb_frag_size(frag); 374 dma_addr = skb_frag_dma_map(dp->dev, frag, 0, dma_len, 375 DMA_TO_DEVICE); 376 if (dma_mapping_error(dp->dev, dma_addr)) 377 goto err_unmap; 378 379 txbuf->dma_addr = dma_addr; 380 txbuf++; 381 382 frag++; 383 } 384 385 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 386 387 if (ipsec) 388 metadata = nfp_nfdk_ipsec_tx(metadata, skb); 389 390 if (!skb_is_gso(skb)) { 391 real_len = skb->len; 392 /* Metadata desc */ 393 if (!ipsec) 394 metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata); 395 txd->raw = cpu_to_le64(metadata); 396 txd++; 397 } else { 398 /* lso desc should be placed after metadata desc */ 399 (txd + 1)->raw = nfp_nfdk_tx_tso(r_vec, txbuf, skb); 400 real_len = txbuf->real_len; 401 /* Metadata desc */ 402 if (!ipsec) 403 metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata); 404 txd->raw = cpu_to_le64(metadata); 405 txd += 2; 406 txbuf++; 407 } 408 409 cnt = txd - tx_ring->ktxds - wr_idx; 410 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != 411 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) 412 goto err_warn_overflow; 413 414 skb_tx_timestamp(skb); 415 416 tx_ring->wr_p += cnt; 417 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 418 tx_ring->data_pending += skb->len; 419 else 420 tx_ring->data_pending = 0; 421 422 if (nfp_nfdk_tx_ring_should_stop(tx_ring)) 423 nfp_nfdk_tx_ring_stop(nd_q, tx_ring); 424 425 tx_ring->wr_ptr_add += cnt; 426 if (__netdev_tx_sent_queue(nd_q, real_len, netdev_xmit_more())) 427 nfp_net_tx_xmit_more_flush(tx_ring); 428 429 return NETDEV_TX_OK; 430 431 err_warn_overflow: 432 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", 433 wr_idx, skb_headlen(skb), nr_frags, cnt); 434 if (skb_is_gso(skb)) 435 txbuf--; 436 err_unmap: 437 /* txbuf pointed to the next-to-use */ 438 etxbuf = txbuf; 439 /* first txbuf holds the skb */ 440 txbuf = &tx_ring->ktxbufs[wr_idx + 1]; 441 if (txbuf < etxbuf) { 442 dma_unmap_single(dp->dev, txbuf->dma_addr, 443 skb_headlen(skb), DMA_TO_DEVICE); 444 txbuf->raw = 0; 445 txbuf++; 446 } 447 frag = skb_shinfo(skb)->frags; 448 while (etxbuf < txbuf) { 449 dma_unmap_page(dp->dev, txbuf->dma_addr, 450 skb_frag_size(frag), DMA_TO_DEVICE); 451 txbuf->raw = 0; 452 frag++; 453 txbuf++; 454 } 455 err_warn_dma: 456 nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); 457 err_flush: 458 nfp_net_tx_xmit_more_flush(tx_ring); 459 u64_stats_update_begin(&r_vec->tx_sync); 460 r_vec->tx_errors++; 461 u64_stats_update_end(&r_vec->tx_sync); 462 dev_kfree_skb_any(skb); 463 return NETDEV_TX_OK; 464 } 465 466 /** 467 * nfp_nfdk_tx_complete() - Handled completed TX packets 468 * @tx_ring: TX ring structure 469 * @budget: NAPI budget (only used as bool to determine if in NAPI context) 470 */ 471 static void nfp_nfdk_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) 472 { 473 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 474 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 475 u32 done_pkts = 0, done_bytes = 0; 476 struct nfp_nfdk_tx_buf *ktxbufs; 477 struct device *dev = dp->dev; 478 struct netdev_queue *nd_q; 479 u32 rd_p, qcp_rd_p; 480 int todo; 481 482 rd_p = tx_ring->rd_p; 483 if (tx_ring->wr_p == rd_p) 484 return; 485 486 /* Work out how many descriptors have been transmitted */ 487 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); 488 489 if (qcp_rd_p == tx_ring->qcp_rd_p) 490 return; 491 492 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 493 ktxbufs = tx_ring->ktxbufs; 494 495 while (todo > 0) { 496 const skb_frag_t *frag, *fend; 497 unsigned int size, n_descs = 1; 498 struct nfp_nfdk_tx_buf *txbuf; 499 struct sk_buff *skb; 500 501 txbuf = &ktxbufs[D_IDX(tx_ring, rd_p)]; 502 skb = txbuf->skb; 503 txbuf++; 504 505 /* Closed block */ 506 if (!skb) { 507 n_descs = D_BLOCK_CPL(rd_p); 508 goto next; 509 } 510 511 /* Unmap head */ 512 size = skb_headlen(skb); 513 n_descs += nfp_nfdk_headlen_to_segs(size); 514 dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE); 515 txbuf++; 516 517 /* Unmap frags */ 518 frag = skb_shinfo(skb)->frags; 519 fend = frag + skb_shinfo(skb)->nr_frags; 520 for (; frag < fend; frag++) { 521 size = skb_frag_size(frag); 522 n_descs += DIV_ROUND_UP(size, 523 NFDK_TX_MAX_DATA_PER_DESC); 524 dma_unmap_page(dev, txbuf->dma_addr, 525 skb_frag_size(frag), DMA_TO_DEVICE); 526 txbuf++; 527 } 528 529 if (!skb_is_gso(skb)) { 530 done_bytes += skb->len; 531 done_pkts++; 532 } else { 533 done_bytes += txbuf->real_len; 534 done_pkts += txbuf->pkt_cnt; 535 n_descs++; 536 } 537 538 napi_consume_skb(skb, budget); 539 next: 540 rd_p += n_descs; 541 todo -= n_descs; 542 } 543 544 tx_ring->rd_p = rd_p; 545 tx_ring->qcp_rd_p = qcp_rd_p; 546 547 u64_stats_update_begin(&r_vec->tx_sync); 548 r_vec->tx_bytes += done_bytes; 549 r_vec->tx_pkts += done_pkts; 550 u64_stats_update_end(&r_vec->tx_sync); 551 552 if (!dp->netdev) 553 return; 554 555 nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); 556 netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); 557 if (nfp_nfdk_tx_ring_should_wake(tx_ring)) { 558 /* Make sure TX thread will see updated tx_ring->rd_p */ 559 smp_mb(); 560 561 if (unlikely(netif_tx_queue_stopped(nd_q))) 562 netif_tx_wake_queue(nd_q); 563 } 564 565 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 566 "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 567 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 568 } 569 570 /* Receive processing */ 571 static void * 572 nfp_nfdk_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) 573 { 574 void *frag; 575 576 if (!dp->xdp_prog) { 577 frag = napi_alloc_frag(dp->fl_bufsz); 578 if (unlikely(!frag)) 579 return NULL; 580 } else { 581 struct page *page; 582 583 page = dev_alloc_page(); 584 if (unlikely(!page)) 585 return NULL; 586 frag = page_address(page); 587 } 588 589 *dma_addr = nfp_net_dma_map_rx(dp, frag); 590 if (dma_mapping_error(dp->dev, *dma_addr)) { 591 nfp_net_free_frag(frag, dp->xdp_prog); 592 nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); 593 return NULL; 594 } 595 596 return frag; 597 } 598 599 /** 600 * nfp_nfdk_rx_give_one() - Put mapped skb on the software and hardware rings 601 * @dp: NFP Net data path struct 602 * @rx_ring: RX ring structure 603 * @frag: page fragment buffer 604 * @dma_addr: DMA address of skb mapping 605 */ 606 static void 607 nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp, 608 struct nfp_net_rx_ring *rx_ring, 609 void *frag, dma_addr_t dma_addr) 610 { 611 unsigned int wr_idx; 612 613 wr_idx = D_IDX(rx_ring, rx_ring->wr_p); 614 615 nfp_net_dma_sync_dev_rx(dp, dma_addr); 616 617 /* Stash SKB and DMA address away */ 618 rx_ring->rxbufs[wr_idx].frag = frag; 619 rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; 620 621 /* Fill freelist descriptor */ 622 rx_ring->rxds[wr_idx].fld.reserved = 0; 623 rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; 624 nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, 625 dma_addr + dp->rx_dma_off); 626 627 rx_ring->wr_p++; 628 if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { 629 /* Update write pointer of the freelist queue. Make 630 * sure all writes are flushed before telling the hardware. 631 */ 632 wmb(); 633 nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH); 634 } 635 } 636 637 /** 638 * nfp_nfdk_rx_ring_fill_freelist() - Give buffers from the ring to FW 639 * @dp: NFP Net data path struct 640 * @rx_ring: RX ring to fill 641 */ 642 void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp, 643 struct nfp_net_rx_ring *rx_ring) 644 { 645 unsigned int i; 646 647 for (i = 0; i < rx_ring->cnt - 1; i++) 648 nfp_nfdk_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, 649 rx_ring->rxbufs[i].dma_addr); 650 } 651 652 /** 653 * nfp_nfdk_rx_csum_has_errors() - group check if rxd has any csum errors 654 * @flags: RX descriptor flags field in CPU byte order 655 */ 656 static int nfp_nfdk_rx_csum_has_errors(u16 flags) 657 { 658 u16 csum_all_checked, csum_all_ok; 659 660 csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; 661 csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; 662 663 return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); 664 } 665 666 /** 667 * nfp_nfdk_rx_csum() - set SKB checksum field based on RX descriptor flags 668 * @dp: NFP Net data path struct 669 * @r_vec: per-ring structure 670 * @rxd: Pointer to RX descriptor 671 * @meta: Parsed metadata prepend 672 * @skb: Pointer to SKB 673 */ 674 static void 675 nfp_nfdk_rx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 676 struct nfp_net_rx_desc *rxd, struct nfp_meta_parsed *meta, 677 struct sk_buff *skb) 678 { 679 skb_checksum_none_assert(skb); 680 681 if (!(dp->netdev->features & NETIF_F_RXCSUM)) 682 return; 683 684 if (meta->csum_type) { 685 skb->ip_summed = meta->csum_type; 686 skb->csum = meta->csum; 687 u64_stats_update_begin(&r_vec->rx_sync); 688 r_vec->hw_csum_rx_complete++; 689 u64_stats_update_end(&r_vec->rx_sync); 690 return; 691 } 692 693 if (nfp_nfdk_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { 694 u64_stats_update_begin(&r_vec->rx_sync); 695 r_vec->hw_csum_rx_error++; 696 u64_stats_update_end(&r_vec->rx_sync); 697 return; 698 } 699 700 /* Assume that the firmware will never report inner CSUM_OK unless outer 701 * L4 headers were successfully parsed. FW will always report zero UDP 702 * checksum as CSUM_OK. 703 */ 704 if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || 705 rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { 706 __skb_incr_checksum_unnecessary(skb); 707 u64_stats_update_begin(&r_vec->rx_sync); 708 r_vec->hw_csum_rx_ok++; 709 u64_stats_update_end(&r_vec->rx_sync); 710 } 711 712 if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || 713 rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { 714 __skb_incr_checksum_unnecessary(skb); 715 u64_stats_update_begin(&r_vec->rx_sync); 716 r_vec->hw_csum_rx_inner_ok++; 717 u64_stats_update_end(&r_vec->rx_sync); 718 } 719 } 720 721 static void 722 nfp_nfdk_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, 723 unsigned int type, __be32 *hash) 724 { 725 if (!(netdev->features & NETIF_F_RXHASH)) 726 return; 727 728 switch (type) { 729 case NFP_NET_RSS_IPV4: 730 case NFP_NET_RSS_IPV6: 731 case NFP_NET_RSS_IPV6_EX: 732 meta->hash_type = PKT_HASH_TYPE_L3; 733 break; 734 default: 735 meta->hash_type = PKT_HASH_TYPE_L4; 736 break; 737 } 738 739 meta->hash = get_unaligned_be32(hash); 740 } 741 742 static bool 743 nfp_nfdk_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, 744 void *data, void *pkt, unsigned int pkt_len, int meta_len) 745 { 746 u32 meta_info, vlan_info; 747 748 meta_info = get_unaligned_be32(data); 749 data += 4; 750 751 while (meta_info) { 752 switch (meta_info & NFP_NET_META_FIELD_MASK) { 753 case NFP_NET_META_HASH: 754 meta_info >>= NFP_NET_META_FIELD_SIZE; 755 nfp_nfdk_set_hash(netdev, meta, 756 meta_info & NFP_NET_META_FIELD_MASK, 757 (__be32 *)data); 758 data += 4; 759 break; 760 case NFP_NET_META_MARK: 761 meta->mark = get_unaligned_be32(data); 762 data += 4; 763 break; 764 case NFP_NET_META_VLAN: 765 vlan_info = get_unaligned_be32(data); 766 if (FIELD_GET(NFP_NET_META_VLAN_STRIP, vlan_info)) { 767 meta->vlan.stripped = true; 768 meta->vlan.tpid = FIELD_GET(NFP_NET_META_VLAN_TPID_MASK, 769 vlan_info); 770 meta->vlan.tci = FIELD_GET(NFP_NET_META_VLAN_TCI_MASK, 771 vlan_info); 772 } 773 data += 4; 774 break; 775 case NFP_NET_META_PORTID: 776 meta->portid = get_unaligned_be32(data); 777 data += 4; 778 break; 779 case NFP_NET_META_CSUM: 780 meta->csum_type = CHECKSUM_COMPLETE; 781 meta->csum = 782 (__force __wsum)__get_unaligned_cpu32(data); 783 data += 4; 784 break; 785 case NFP_NET_META_RESYNC_INFO: 786 if (nfp_net_tls_rx_resync_req(netdev, data, pkt, 787 pkt_len)) 788 return false; 789 data += sizeof(struct nfp_net_tls_resync_req); 790 break; 791 #ifdef CONFIG_NFP_NET_IPSEC 792 case NFP_NET_META_IPSEC: 793 /* Note: IPsec packet could have zero saidx, so need add 1 794 * to indicate packet is IPsec packet within driver. 795 */ 796 meta->ipsec_saidx = get_unaligned_be32(data) + 1; 797 data += 4; 798 break; 799 #endif 800 default: 801 return true; 802 } 803 804 meta_info >>= NFP_NET_META_FIELD_SIZE; 805 } 806 807 return data != pkt; 808 } 809 810 static void 811 nfp_nfdk_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 812 struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, 813 struct sk_buff *skb) 814 { 815 u64_stats_update_begin(&r_vec->rx_sync); 816 r_vec->rx_drops++; 817 /* If we have both skb and rxbuf the replacement buffer allocation 818 * must have failed, count this as an alloc failure. 819 */ 820 if (skb && rxbuf) 821 r_vec->rx_replace_buf_alloc_fail++; 822 u64_stats_update_end(&r_vec->rx_sync); 823 824 /* skb is build based on the frag, free_skb() would free the frag 825 * so to be able to reuse it we need an extra ref. 826 */ 827 if (skb && rxbuf && skb->head == rxbuf->frag) 828 page_ref_inc(virt_to_head_page(rxbuf->frag)); 829 if (rxbuf) 830 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr); 831 if (skb) 832 dev_kfree_skb_any(skb); 833 } 834 835 static bool nfp_nfdk_xdp_complete(struct nfp_net_tx_ring *tx_ring) 836 { 837 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 838 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 839 struct nfp_net_rx_ring *rx_ring; 840 u32 qcp_rd_p, done = 0; 841 bool done_all; 842 int todo; 843 844 /* Work out how many descriptors have been transmitted */ 845 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); 846 if (qcp_rd_p == tx_ring->qcp_rd_p) 847 return true; 848 849 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 850 851 done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; 852 todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); 853 854 rx_ring = r_vec->rx_ring; 855 while (todo > 0) { 856 int idx = D_IDX(tx_ring, tx_ring->rd_p + done); 857 struct nfp_nfdk_tx_buf *txbuf; 858 unsigned int step = 1; 859 860 txbuf = &tx_ring->ktxbufs[idx]; 861 if (!txbuf->raw) 862 goto next; 863 864 if (NFDK_TX_BUF_INFO(txbuf->val) != NFDK_TX_BUF_INFO_SOP) { 865 WARN_ONCE(1, "Unexpected TX buffer in XDP TX ring\n"); 866 goto next; 867 } 868 869 /* Two successive txbufs are used to stash virtual and dma 870 * address respectively, recycle and clean them here. 871 */ 872 nfp_nfdk_rx_give_one(dp, rx_ring, 873 (void *)NFDK_TX_BUF_PTR(txbuf[0].val), 874 txbuf[1].dma_addr); 875 txbuf[0].raw = 0; 876 txbuf[1].raw = 0; 877 step = 2; 878 879 u64_stats_update_begin(&r_vec->tx_sync); 880 /* Note: tx_bytes not accumulated. */ 881 r_vec->tx_pkts++; 882 u64_stats_update_end(&r_vec->tx_sync); 883 next: 884 todo -= step; 885 done += step; 886 } 887 888 tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + done); 889 tx_ring->rd_p += done; 890 891 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 892 "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 893 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 894 895 return done_all; 896 } 897 898 static bool 899 nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, 900 struct nfp_net_tx_ring *tx_ring, 901 struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, 902 unsigned int pkt_len, bool *completed) 903 { 904 unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; 905 unsigned int dma_len, type, cnt, dlen_type, tmp_dlen; 906 struct nfp_nfdk_tx_buf *txbuf; 907 struct nfp_nfdk_tx_desc *txd; 908 unsigned int n_descs; 909 dma_addr_t dma_addr; 910 int wr_idx; 911 912 /* Reject if xdp_adjust_tail grow packet beyond DMA area */ 913 if (pkt_len + dma_off > dma_map_sz) 914 return false; 915 916 /* Make sure there's still at least one block available after 917 * aligning to block boundary, so that the txds used below 918 * won't wrap around the tx_ring. 919 */ 920 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 921 if (!*completed) { 922 nfp_nfdk_xdp_complete(tx_ring); 923 *completed = true; 924 } 925 926 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 927 nfp_nfdk_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, 928 NULL); 929 return false; 930 } 931 } 932 933 /* Check if cross block boundary */ 934 n_descs = nfp_nfdk_headlen_to_segs(pkt_len); 935 if ((round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 936 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) || 937 ((u32)tx_ring->data_pending + pkt_len > 938 NFDK_TX_MAX_DATA_PER_BLOCK)) { 939 unsigned int nop_slots = D_BLOCK_CPL(tx_ring->wr_p); 940 941 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 942 txd = &tx_ring->ktxds[wr_idx]; 943 memset(txd, 0, 944 array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); 945 946 tx_ring->data_pending = 0; 947 tx_ring->wr_p += nop_slots; 948 tx_ring->wr_ptr_add += nop_slots; 949 } 950 951 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 952 953 txbuf = &tx_ring->ktxbufs[wr_idx]; 954 955 txbuf[0].val = (unsigned long)rxbuf->frag | NFDK_TX_BUF_INFO_SOP; 956 txbuf[1].dma_addr = rxbuf->dma_addr; 957 /* Note: pkt len not stored */ 958 959 dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off, 960 pkt_len, DMA_BIDIRECTIONAL); 961 962 /* Build TX descriptor */ 963 txd = &tx_ring->ktxds[wr_idx]; 964 dma_len = pkt_len; 965 dma_addr = rxbuf->dma_addr + dma_off; 966 967 if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) 968 type = NFDK_DESC_TX_TYPE_SIMPLE; 969 else 970 type = NFDK_DESC_TX_TYPE_GATHER; 971 972 /* FIELD_PREP() implicitly truncates to chunk */ 973 dma_len -= 1; 974 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, 975 dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? 976 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | 977 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 978 979 txd->dma_len_type = cpu_to_le16(dlen_type); 980 nfp_desc_set_dma_addr_48b(txd, dma_addr); 981 982 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 983 dma_len -= tmp_dlen; 984 dma_addr += tmp_dlen + 1; 985 txd++; 986 987 while (dma_len > 0) { 988 dma_len -= 1; 989 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 990 txd->dma_len_type = cpu_to_le16(dlen_type); 991 nfp_desc_set_dma_addr_48b(txd, dma_addr); 992 993 dlen_type &= NFDK_DESC_TX_DMA_LEN; 994 dma_len -= dlen_type; 995 dma_addr += dlen_type + 1; 996 txd++; 997 } 998 999 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 1000 1001 /* Metadata desc */ 1002 txd->raw = 0; 1003 txd++; 1004 1005 cnt = txd - tx_ring->ktxds - wr_idx; 1006 tx_ring->wr_p += cnt; 1007 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 1008 tx_ring->data_pending += pkt_len; 1009 else 1010 tx_ring->data_pending = 0; 1011 1012 tx_ring->wr_ptr_add += cnt; 1013 return true; 1014 } 1015 1016 /** 1017 * nfp_nfdk_rx() - receive up to @budget packets on @rx_ring 1018 * @rx_ring: RX ring to receive from 1019 * @budget: NAPI budget 1020 * 1021 * Note, this function is separated out from the napi poll function to 1022 * more cleanly separate packet receive code from other bookkeeping 1023 * functions performed in the napi poll function. 1024 * 1025 * Return: Number of packets received. 1026 */ 1027 static int nfp_nfdk_rx(struct nfp_net_rx_ring *rx_ring, int budget) 1028 { 1029 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 1030 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 1031 struct nfp_net_tx_ring *tx_ring; 1032 struct bpf_prog *xdp_prog; 1033 bool xdp_tx_cmpl = false; 1034 unsigned int true_bufsz; 1035 struct sk_buff *skb; 1036 int pkts_polled = 0; 1037 struct xdp_buff xdp; 1038 int idx; 1039 1040 xdp_prog = READ_ONCE(dp->xdp_prog); 1041 true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; 1042 xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, 1043 &rx_ring->xdp_rxq); 1044 tx_ring = r_vec->xdp_ring; 1045 1046 while (pkts_polled < budget) { 1047 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; 1048 struct nfp_net_rx_buf *rxbuf; 1049 struct nfp_net_rx_desc *rxd; 1050 struct nfp_meta_parsed meta; 1051 bool redir_egress = false; 1052 struct net_device *netdev; 1053 dma_addr_t new_dma_addr; 1054 u32 meta_len_xdp = 0; 1055 void *new_frag; 1056 1057 idx = D_IDX(rx_ring, rx_ring->rd_p); 1058 1059 rxd = &rx_ring->rxds[idx]; 1060 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 1061 break; 1062 1063 /* Memory barrier to ensure that we won't do other reads 1064 * before the DD bit. 1065 */ 1066 dma_rmb(); 1067 1068 memset(&meta, 0, sizeof(meta)); 1069 1070 rx_ring->rd_p++; 1071 pkts_polled++; 1072 1073 rxbuf = &rx_ring->rxbufs[idx]; 1074 /* < meta_len > 1075 * <-- [rx_offset] --> 1076 * --------------------------------------------------------- 1077 * | [XX] | metadata | packet | XXXX | 1078 * --------------------------------------------------------- 1079 * <---------------- data_len ---------------> 1080 * 1081 * The rx_offset is fixed for all packets, the meta_len can vary 1082 * on a packet by packet basis. If rx_offset is set to zero 1083 * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the 1084 * buffer and is immediately followed by the packet (no [XX]). 1085 */ 1086 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1087 data_len = le16_to_cpu(rxd->rxd.data_len); 1088 pkt_len = data_len - meta_len; 1089 1090 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; 1091 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) 1092 pkt_off += meta_len; 1093 else 1094 pkt_off += dp->rx_offset; 1095 meta_off = pkt_off - meta_len; 1096 1097 /* Stats update */ 1098 u64_stats_update_begin(&r_vec->rx_sync); 1099 r_vec->rx_pkts++; 1100 r_vec->rx_bytes += pkt_len; 1101 u64_stats_update_end(&r_vec->rx_sync); 1102 1103 if (unlikely(meta_len > NFP_NET_MAX_PREPEND || 1104 (dp->rx_offset && meta_len > dp->rx_offset))) { 1105 nn_dp_warn(dp, "oversized RX packet metadata %u\n", 1106 meta_len); 1107 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1108 continue; 1109 } 1110 1111 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, 1112 data_len); 1113 1114 if (meta_len) { 1115 if (unlikely(nfp_nfdk_parse_meta(dp->netdev, &meta, 1116 rxbuf->frag + meta_off, 1117 rxbuf->frag + pkt_off, 1118 pkt_len, meta_len))) { 1119 nn_dp_warn(dp, "invalid RX packet metadata\n"); 1120 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, 1121 NULL); 1122 continue; 1123 } 1124 } 1125 1126 if (xdp_prog && !meta.portid) { 1127 void *orig_data = rxbuf->frag + pkt_off; 1128 unsigned int dma_off; 1129 int act; 1130 1131 xdp_prepare_buff(&xdp, 1132 rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, 1133 pkt_off - NFP_NET_RX_BUF_HEADROOM, 1134 pkt_len, true); 1135 1136 act = bpf_prog_run_xdp(xdp_prog, &xdp); 1137 1138 pkt_len = xdp.data_end - xdp.data; 1139 pkt_off += xdp.data - orig_data; 1140 1141 switch (act) { 1142 case XDP_PASS: 1143 meta_len_xdp = xdp.data - xdp.data_meta; 1144 break; 1145 case XDP_TX: 1146 dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; 1147 if (unlikely(!nfp_nfdk_tx_xdp_buf(dp, rx_ring, 1148 tx_ring, 1149 rxbuf, 1150 dma_off, 1151 pkt_len, 1152 &xdp_tx_cmpl))) 1153 trace_xdp_exception(dp->netdev, 1154 xdp_prog, act); 1155 continue; 1156 default: 1157 bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); 1158 fallthrough; 1159 case XDP_ABORTED: 1160 trace_xdp_exception(dp->netdev, xdp_prog, act); 1161 fallthrough; 1162 case XDP_DROP: 1163 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, 1164 rxbuf->dma_addr); 1165 continue; 1166 } 1167 } 1168 1169 if (likely(!meta.portid)) { 1170 netdev = dp->netdev; 1171 } else if (meta.portid == NFP_META_PORT_ID_CTRL) { 1172 struct nfp_net *nn = netdev_priv(dp->netdev); 1173 1174 nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, 1175 pkt_len); 1176 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, 1177 rxbuf->dma_addr); 1178 continue; 1179 } else { 1180 struct nfp_net *nn; 1181 1182 nn = netdev_priv(dp->netdev); 1183 netdev = nfp_app_dev_get(nn->app, meta.portid, 1184 &redir_egress); 1185 if (unlikely(!netdev)) { 1186 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, 1187 NULL); 1188 continue; 1189 } 1190 1191 if (nfp_netdev_is_nfp_repr(netdev)) 1192 nfp_repr_inc_rx_stats(netdev, pkt_len); 1193 } 1194 1195 skb = napi_build_skb(rxbuf->frag, true_bufsz); 1196 if (unlikely(!skb)) { 1197 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1198 continue; 1199 } 1200 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); 1201 if (unlikely(!new_frag)) { 1202 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); 1203 continue; 1204 } 1205 1206 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); 1207 1208 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); 1209 1210 skb_reserve(skb, pkt_off); 1211 skb_put(skb, pkt_len); 1212 1213 skb->mark = meta.mark; 1214 skb_set_hash(skb, meta.hash, meta.hash_type); 1215 1216 skb_record_rx_queue(skb, rx_ring->idx); 1217 skb->protocol = eth_type_trans(skb, netdev); 1218 1219 nfp_nfdk_rx_csum(dp, r_vec, rxd, &meta, skb); 1220 1221 if (unlikely(!nfp_net_vlan_strip(skb, rxd, &meta))) { 1222 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, NULL, skb); 1223 continue; 1224 } 1225 1226 #ifdef CONFIG_NFP_NET_IPSEC 1227 if (meta.ipsec_saidx != 0 && unlikely(nfp_net_ipsec_rx(&meta, skb))) { 1228 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, NULL, skb); 1229 continue; 1230 } 1231 #endif 1232 1233 if (meta_len_xdp) 1234 skb_metadata_set(skb, meta_len_xdp); 1235 1236 if (likely(!redir_egress)) { 1237 napi_gro_receive(&rx_ring->r_vec->napi, skb); 1238 } else { 1239 skb->dev = netdev; 1240 skb_reset_network_header(skb); 1241 __skb_push(skb, ETH_HLEN); 1242 dev_queue_xmit(skb); 1243 } 1244 } 1245 1246 if (xdp_prog) { 1247 if (tx_ring->wr_ptr_add) 1248 nfp_net_tx_xmit_more_flush(tx_ring); 1249 else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && 1250 !xdp_tx_cmpl) 1251 if (!nfp_nfdk_xdp_complete(tx_ring)) 1252 pkts_polled = budget; 1253 } 1254 1255 return pkts_polled; 1256 } 1257 1258 /** 1259 * nfp_nfdk_poll() - napi poll function 1260 * @napi: NAPI structure 1261 * @budget: NAPI budget 1262 * 1263 * Return: number of packets polled. 1264 */ 1265 int nfp_nfdk_poll(struct napi_struct *napi, int budget) 1266 { 1267 struct nfp_net_r_vector *r_vec = 1268 container_of(napi, struct nfp_net_r_vector, napi); 1269 unsigned int pkts_polled = 0; 1270 1271 if (r_vec->tx_ring) 1272 nfp_nfdk_tx_complete(r_vec->tx_ring, budget); 1273 if (r_vec->rx_ring) 1274 pkts_polled = nfp_nfdk_rx(r_vec->rx_ring, budget); 1275 1276 if (pkts_polled < budget) 1277 if (napi_complete_done(napi, pkts_polled)) 1278 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 1279 1280 if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { 1281 struct dim_sample dim_sample = {}; 1282 unsigned int start; 1283 u64 pkts, bytes; 1284 1285 do { 1286 start = u64_stats_fetch_begin(&r_vec->rx_sync); 1287 pkts = r_vec->rx_pkts; 1288 bytes = r_vec->rx_bytes; 1289 } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); 1290 1291 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); 1292 net_dim(&r_vec->rx_dim, dim_sample); 1293 } 1294 1295 if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { 1296 struct dim_sample dim_sample = {}; 1297 unsigned int start; 1298 u64 pkts, bytes; 1299 1300 do { 1301 start = u64_stats_fetch_begin(&r_vec->tx_sync); 1302 pkts = r_vec->tx_pkts; 1303 bytes = r_vec->tx_bytes; 1304 } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); 1305 1306 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); 1307 net_dim(&r_vec->tx_dim, dim_sample); 1308 } 1309 1310 return pkts_polled; 1311 } 1312 1313 /* Control device data path 1314 */ 1315 1316 bool 1317 nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, 1318 struct sk_buff *skb, bool old) 1319 { 1320 u32 cnt, tmp_dlen, dlen_type = 0; 1321 struct nfp_net_tx_ring *tx_ring; 1322 struct nfp_nfdk_tx_buf *txbuf; 1323 struct nfp_nfdk_tx_desc *txd; 1324 unsigned int dma_len, type; 1325 struct nfp_net_dp *dp; 1326 dma_addr_t dma_addr; 1327 u64 metadata = 0; 1328 int wr_idx; 1329 1330 dp = &r_vec->nfp_net->dp; 1331 tx_ring = r_vec->tx_ring; 1332 1333 if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { 1334 nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n"); 1335 goto err_free; 1336 } 1337 1338 /* Don't bother counting frags, assume the worst */ 1339 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 1340 u64_stats_update_begin(&r_vec->tx_sync); 1341 r_vec->tx_busy++; 1342 u64_stats_update_end(&r_vec->tx_sync); 1343 if (!old) 1344 __skb_queue_tail(&r_vec->queue, skb); 1345 else 1346 __skb_queue_head(&r_vec->queue, skb); 1347 return NETDEV_TX_BUSY; 1348 } 1349 1350 if (nfp_app_ctrl_has_meta(nn->app)) { 1351 if (unlikely(skb_headroom(skb) < 8)) { 1352 nn_dp_warn(dp, "CTRL TX on skb without headroom\n"); 1353 goto err_free; 1354 } 1355 metadata = NFDK_DESC_TX_CHAIN_META; 1356 put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4)); 1357 put_unaligned_be32(FIELD_PREP(NFDK_META_LEN, 8) | 1358 FIELD_PREP(NFDK_META_FIELDS, 1359 NFP_NET_META_PORTID), 1360 skb_push(skb, 4)); 1361 } 1362 1363 if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) 1364 goto err_free; 1365 1366 /* DMA map all */ 1367 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 1368 txd = &tx_ring->ktxds[wr_idx]; 1369 txbuf = &tx_ring->ktxbufs[wr_idx]; 1370 1371 dma_len = skb_headlen(skb); 1372 if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) 1373 type = NFDK_DESC_TX_TYPE_SIMPLE; 1374 else 1375 type = NFDK_DESC_TX_TYPE_GATHER; 1376 1377 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); 1378 if (dma_mapping_error(dp->dev, dma_addr)) 1379 goto err_warn_dma; 1380 1381 txbuf->skb = skb; 1382 txbuf++; 1383 1384 txbuf->dma_addr = dma_addr; 1385 txbuf++; 1386 1387 dma_len -= 1; 1388 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, 1389 dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? 1390 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | 1391 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 1392 1393 txd->dma_len_type = cpu_to_le16(dlen_type); 1394 nfp_desc_set_dma_addr_48b(txd, dma_addr); 1395 1396 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 1397 dma_len -= tmp_dlen; 1398 dma_addr += tmp_dlen + 1; 1399 txd++; 1400 1401 while (dma_len > 0) { 1402 dma_len -= 1; 1403 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 1404 txd->dma_len_type = cpu_to_le16(dlen_type); 1405 nfp_desc_set_dma_addr_48b(txd, dma_addr); 1406 1407 dlen_type &= NFDK_DESC_TX_DMA_LEN; 1408 dma_len -= dlen_type; 1409 dma_addr += dlen_type + 1; 1410 txd++; 1411 } 1412 1413 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 1414 1415 /* Metadata desc */ 1416 txd->raw = cpu_to_le64(metadata); 1417 txd++; 1418 1419 cnt = txd - tx_ring->ktxds - wr_idx; 1420 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != 1421 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) 1422 goto err_warn_overflow; 1423 1424 tx_ring->wr_p += cnt; 1425 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 1426 tx_ring->data_pending += skb->len; 1427 else 1428 tx_ring->data_pending = 0; 1429 1430 tx_ring->wr_ptr_add += cnt; 1431 nfp_net_tx_xmit_more_flush(tx_ring); 1432 1433 return NETDEV_TX_OK; 1434 1435 err_warn_overflow: 1436 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", 1437 wr_idx, skb_headlen(skb), 0, cnt); 1438 txbuf--; 1439 dma_unmap_single(dp->dev, txbuf->dma_addr, 1440 skb_headlen(skb), DMA_TO_DEVICE); 1441 txbuf->raw = 0; 1442 err_warn_dma: 1443 nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); 1444 err_free: 1445 u64_stats_update_begin(&r_vec->tx_sync); 1446 r_vec->tx_errors++; 1447 u64_stats_update_end(&r_vec->tx_sync); 1448 dev_kfree_skb_any(skb); 1449 return NETDEV_TX_OK; 1450 } 1451 1452 static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) 1453 { 1454 struct sk_buff *skb; 1455 1456 while ((skb = __skb_dequeue(&r_vec->queue))) 1457 if (nfp_nfdk_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) 1458 return; 1459 } 1460 1461 static bool 1462 nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) 1463 { 1464 u32 meta_type, meta_tag; 1465 1466 if (!nfp_app_ctrl_has_meta(nn->app)) 1467 return !meta_len; 1468 1469 if (meta_len != 8) 1470 return false; 1471 1472 meta_type = get_unaligned_be32(data); 1473 meta_tag = get_unaligned_be32(data + 4); 1474 1475 return (meta_type == NFP_NET_META_PORTID && 1476 meta_tag == NFP_META_PORT_ID_CTRL); 1477 } 1478 1479 static bool 1480 nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, 1481 struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) 1482 { 1483 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; 1484 struct nfp_net_rx_buf *rxbuf; 1485 struct nfp_net_rx_desc *rxd; 1486 dma_addr_t new_dma_addr; 1487 struct sk_buff *skb; 1488 void *new_frag; 1489 int idx; 1490 1491 idx = D_IDX(rx_ring, rx_ring->rd_p); 1492 1493 rxd = &rx_ring->rxds[idx]; 1494 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 1495 return false; 1496 1497 /* Memory barrier to ensure that we won't do other reads 1498 * before the DD bit. 1499 */ 1500 dma_rmb(); 1501 1502 rx_ring->rd_p++; 1503 1504 rxbuf = &rx_ring->rxbufs[idx]; 1505 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1506 data_len = le16_to_cpu(rxd->rxd.data_len); 1507 pkt_len = data_len - meta_len; 1508 1509 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; 1510 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) 1511 pkt_off += meta_len; 1512 else 1513 pkt_off += dp->rx_offset; 1514 meta_off = pkt_off - meta_len; 1515 1516 /* Stats update */ 1517 u64_stats_update_begin(&r_vec->rx_sync); 1518 r_vec->rx_pkts++; 1519 r_vec->rx_bytes += pkt_len; 1520 u64_stats_update_end(&r_vec->rx_sync); 1521 1522 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len); 1523 1524 if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { 1525 nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n", 1526 meta_len); 1527 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1528 return true; 1529 } 1530 1531 skb = build_skb(rxbuf->frag, dp->fl_bufsz); 1532 if (unlikely(!skb)) { 1533 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1534 return true; 1535 } 1536 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); 1537 if (unlikely(!new_frag)) { 1538 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); 1539 return true; 1540 } 1541 1542 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); 1543 1544 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); 1545 1546 skb_reserve(skb, pkt_off); 1547 skb_put(skb, pkt_len); 1548 1549 nfp_app_ctrl_rx(nn->app, skb); 1550 1551 return true; 1552 } 1553 1554 static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) 1555 { 1556 struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; 1557 struct nfp_net *nn = r_vec->nfp_net; 1558 struct nfp_net_dp *dp = &nn->dp; 1559 unsigned int budget = 512; 1560 1561 while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) 1562 continue; 1563 1564 return budget; 1565 } 1566 1567 void nfp_nfdk_ctrl_poll(struct tasklet_struct *t) 1568 { 1569 struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); 1570 1571 spin_lock(&r_vec->lock); 1572 nfp_nfdk_tx_complete(r_vec->tx_ring, 0); 1573 __nfp_ctrl_tx_queued(r_vec); 1574 spin_unlock(&r_vec->lock); 1575 1576 if (nfp_ctrl_rx(r_vec)) { 1577 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 1578 } else { 1579 tasklet_schedule(&r_vec->tasklet); 1580 nn_dp_warn(&r_vec->nfp_net->dp, 1581 "control message budget exceeded!\n"); 1582 } 1583 } 1584