1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (C) 2023 Intel Corporation */ 3 4 #include <net/libeth/rx.h> 5 #include <net/libeth/tx.h> 6 7 #include "idpf.h" 8 9 /** 10 * idpf_tx_singleq_csum - Enable tx checksum offloads 11 * @skb: pointer to skb 12 * @off: pointer to struct that holds offload parameters 13 * 14 * Returns 0 or error (negative) if checksum offload cannot be executed, 1 15 * otherwise. 16 */ 17 static int idpf_tx_singleq_csum(struct sk_buff *skb, 18 struct idpf_tx_offload_params *off) 19 { 20 u32 l4_len, l3_len, l2_len; 21 union { 22 struct iphdr *v4; 23 struct ipv6hdr *v6; 24 unsigned char *hdr; 25 } ip; 26 union { 27 struct tcphdr *tcp; 28 unsigned char *hdr; 29 } l4; 30 u32 offset, cmd = 0; 31 u8 l4_proto = 0; 32 __be16 frag_off; 33 bool is_tso; 34 35 if (skb->ip_summed != CHECKSUM_PARTIAL) 36 return 0; 37 38 ip.hdr = skb_network_header(skb); 39 l4.hdr = skb_transport_header(skb); 40 41 /* compute outer L2 header size */ 42 l2_len = ip.hdr - skb->data; 43 offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2); 44 is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO); 45 if (skb->encapsulation) { 46 u32 tunnel = 0; 47 48 /* define outer network header type */ 49 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 50 /* The stack computes the IP header already, the only 51 * time we need the hardware to recompute it is in the 52 * case of TSO. 53 */ 54 tunnel |= is_tso ? 55 IDPF_TX_CTX_EXT_IP_IPV4 : 56 IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM; 57 58 l4_proto = ip.v4->protocol; 59 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 60 tunnel |= IDPF_TX_CTX_EXT_IP_IPV6; 61 62 l4_proto = ip.v6->nexthdr; 63 if (ipv6_ext_hdr(l4_proto)) 64 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 65 sizeof(*ip.v6), 66 &l4_proto, &frag_off); 67 } 68 69 /* define outer transport */ 70 switch (l4_proto) { 71 case IPPROTO_UDP: 72 tunnel |= IDPF_TXD_CTX_UDP_TUNNELING; 73 break; 74 case IPPROTO_GRE: 75 tunnel |= IDPF_TXD_CTX_GRE_TUNNELING; 76 break; 77 case IPPROTO_IPIP: 78 case IPPROTO_IPV6: 79 l4.hdr = skb_inner_network_header(skb); 80 break; 81 default: 82 if (is_tso) 83 return -1; 84 85 skb_checksum_help(skb); 86 87 return 0; 88 } 89 off->tx_flags |= IDPF_TX_FLAGS_TUNNEL; 90 91 /* compute outer L3 header size */ 92 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M, 93 (l4.hdr - ip.hdr) / 4); 94 95 /* switch IP header pointer from outer to inner header */ 96 ip.hdr = skb_inner_network_header(skb); 97 98 /* compute tunnel header size */ 99 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M, 100 (ip.hdr - l4.hdr) / 2); 101 102 /* indicate if we need to offload outer UDP header */ 103 if (is_tso && 104 !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && 105 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) 106 tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M; 107 108 /* record tunnel offload values */ 109 off->cd_tunneling |= tunnel; 110 111 /* switch L4 header pointer from outer to inner */ 112 l4.hdr = skb_inner_transport_header(skb); 113 l4_proto = 0; 114 115 /* reset type as we transition from outer to inner headers */ 116 off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6); 117 if (ip.v4->version == 4) 118 off->tx_flags |= IDPF_TX_FLAGS_IPV4; 119 if (ip.v6->version == 6) 120 off->tx_flags |= IDPF_TX_FLAGS_IPV6; 121 } 122 123 /* Enable IP checksum offloads */ 124 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 125 l4_proto = ip.v4->protocol; 126 /* See comment above regarding need for HW to recompute IP 127 * header checksum in the case of TSO. 128 */ 129 if (is_tso) 130 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM; 131 else 132 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4; 133 134 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 135 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6; 136 l4_proto = ip.v6->nexthdr; 137 if (ipv6_ext_hdr(l4_proto)) 138 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 139 sizeof(*ip.v6), &l4_proto, 140 &frag_off); 141 } else { 142 return -1; 143 } 144 145 /* compute inner L3 header size */ 146 l3_len = l4.hdr - ip.hdr; 147 offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S; 148 149 /* Enable L4 checksum offloads */ 150 switch (l4_proto) { 151 case IPPROTO_TCP: 152 /* enable checksum offloads */ 153 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP; 154 l4_len = l4.tcp->doff; 155 break; 156 case IPPROTO_UDP: 157 /* enable UDP checksum offload */ 158 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP; 159 l4_len = sizeof(struct udphdr) >> 2; 160 break; 161 case IPPROTO_SCTP: 162 /* enable SCTP checksum offload */ 163 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP; 164 l4_len = sizeof(struct sctphdr) >> 2; 165 break; 166 default: 167 if (is_tso) 168 return -1; 169 170 skb_checksum_help(skb); 171 172 return 0; 173 } 174 175 offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S; 176 off->td_cmd |= cmd; 177 off->hdr_offsets |= offset; 178 179 return 1; 180 } 181 182 /** 183 * idpf_tx_singleq_map - Build the Tx base descriptor 184 * @tx_q: queue to send buffer on 185 * @first: first buffer info buffer to use 186 * @offloads: pointer to struct that holds offload parameters 187 * 188 * This function loops over the skb data pointed to by *first 189 * and gets a physical address for each memory location and programs 190 * it and the length into the transmit base mode descriptor. 191 */ 192 static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, 193 struct idpf_tx_buf *first, 194 struct idpf_tx_offload_params *offloads) 195 { 196 u32 offsets = offloads->hdr_offsets; 197 struct idpf_tx_buf *tx_buf = first; 198 struct idpf_base_tx_desc *tx_desc; 199 struct sk_buff *skb = first->skb; 200 u64 td_cmd = offloads->td_cmd; 201 unsigned int data_len, size; 202 u16 i = tx_q->next_to_use; 203 struct netdev_queue *nq; 204 skb_frag_t *frag; 205 dma_addr_t dma; 206 u64 td_tag = 0; 207 208 data_len = skb->data_len; 209 size = skb_headlen(skb); 210 211 tx_desc = &tx_q->base_tx[i]; 212 213 dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); 214 215 /* write each descriptor with CRC bit */ 216 if (idpf_queue_has(CRC_EN, tx_q)) 217 td_cmd |= IDPF_TX_DESC_CMD_ICRC; 218 219 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 220 unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 221 222 if (dma_mapping_error(tx_q->dev, dma)) 223 return idpf_tx_dma_map_error(tx_q, skb, first, i); 224 225 /* record length, and DMA address */ 226 dma_unmap_len_set(tx_buf, len, size); 227 dma_unmap_addr_set(tx_buf, dma, dma); 228 tx_buf->type = LIBETH_SQE_FRAG; 229 230 /* align size to end of page */ 231 max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1); 232 tx_desc->buf_addr = cpu_to_le64(dma); 233 234 /* account for data chunks larger than the hardware 235 * can handle 236 */ 237 while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) { 238 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, 239 offsets, 240 max_data, 241 td_tag); 242 if (unlikely(++i == tx_q->desc_count)) { 243 tx_buf = &tx_q->tx_buf[0]; 244 tx_desc = &tx_q->base_tx[0]; 245 i = 0; 246 } else { 247 tx_buf++; 248 tx_desc++; 249 } 250 251 tx_buf->type = LIBETH_SQE_EMPTY; 252 253 dma += max_data; 254 size -= max_data; 255 256 max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 257 tx_desc->buf_addr = cpu_to_le64(dma); 258 } 259 260 if (!data_len) 261 break; 262 263 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 264 size, td_tag); 265 266 if (unlikely(++i == tx_q->desc_count)) { 267 tx_buf = &tx_q->tx_buf[0]; 268 tx_desc = &tx_q->base_tx[0]; 269 i = 0; 270 } else { 271 tx_buf++; 272 tx_desc++; 273 } 274 275 size = skb_frag_size(frag); 276 data_len -= size; 277 278 dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, 279 DMA_TO_DEVICE); 280 } 281 282 skb_tx_timestamp(first->skb); 283 284 /* write last descriptor with RS and EOP bits */ 285 td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS); 286 287 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 288 size, td_tag); 289 290 first->type = LIBETH_SQE_SKB; 291 first->rs_idx = i; 292 293 IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i); 294 295 nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); 296 netdev_tx_sent_queue(nq, first->bytes); 297 298 idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); 299 } 300 301 /** 302 * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring 303 * @txq: queue to put context descriptor on 304 * 305 * Since the TX buffer rings mimics the descriptor ring, update the tx buffer 306 * ring entry to reflect that this index is a context descriptor 307 */ 308 static struct idpf_base_tx_ctx_desc * 309 idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq) 310 { 311 struct idpf_base_tx_ctx_desc *ctx_desc; 312 int ntu = txq->next_to_use; 313 314 txq->tx_buf[ntu].type = LIBETH_SQE_CTX; 315 316 ctx_desc = &txq->base_ctx[ntu]; 317 318 IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu); 319 txq->next_to_use = ntu; 320 321 return ctx_desc; 322 } 323 324 /** 325 * idpf_tx_singleq_build_ctx_desc - populate context descriptor 326 * @txq: queue to send buffer on 327 * @offload: offload parameter structure 328 **/ 329 static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq, 330 struct idpf_tx_offload_params *offload) 331 { 332 struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); 333 u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX; 334 335 if (offload->tso_segs) { 336 qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S; 337 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_TSO_LEN_M, 338 offload->tso_len); 339 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss); 340 341 u64_stats_update_begin(&txq->stats_sync); 342 u64_stats_inc(&txq->q_stats.lso_pkts); 343 u64_stats_update_end(&txq->stats_sync); 344 } 345 346 desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling); 347 348 desc->qw0.l2tag2 = 0; 349 desc->qw0.rsvd1 = 0; 350 desc->qw1 = cpu_to_le64(qw1); 351 } 352 353 /** 354 * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors 355 * @skb: send buffer 356 * @tx_q: queue to send buffer on 357 * 358 * Returns NETDEV_TX_OK if sent, else an error code 359 */ 360 netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, 361 struct idpf_tx_queue *tx_q) 362 { 363 struct idpf_tx_offload_params offload = { }; 364 struct idpf_tx_buf *first; 365 unsigned int count; 366 __be16 protocol; 367 int csum, tso; 368 369 count = idpf_tx_desc_count_required(tx_q, skb); 370 if (unlikely(!count)) 371 return idpf_tx_drop_skb(tx_q, skb); 372 373 if (idpf_tx_maybe_stop_common(tx_q, 374 count + IDPF_TX_DESCS_PER_CACHE_LINE + 375 IDPF_TX_DESCS_FOR_CTX)) { 376 idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); 377 378 u64_stats_update_begin(&tx_q->stats_sync); 379 u64_stats_inc(&tx_q->q_stats.q_busy); 380 u64_stats_update_end(&tx_q->stats_sync); 381 382 return NETDEV_TX_BUSY; 383 } 384 385 protocol = vlan_get_protocol(skb); 386 if (protocol == htons(ETH_P_IP)) 387 offload.tx_flags |= IDPF_TX_FLAGS_IPV4; 388 else if (protocol == htons(ETH_P_IPV6)) 389 offload.tx_flags |= IDPF_TX_FLAGS_IPV6; 390 391 tso = idpf_tso(skb, &offload); 392 if (tso < 0) 393 goto out_drop; 394 395 csum = idpf_tx_singleq_csum(skb, &offload); 396 if (csum < 0) 397 goto out_drop; 398 399 if (tso || offload.cd_tunneling) 400 idpf_tx_singleq_build_ctx_desc(tx_q, &offload); 401 402 /* record the location of the first descriptor for this packet */ 403 first = &tx_q->tx_buf[tx_q->next_to_use]; 404 first->skb = skb; 405 406 if (tso) { 407 first->packets = offload.tso_segs; 408 first->bytes = skb->len + ((first->packets - 1) * offload.tso_hdr_len); 409 } else { 410 first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); 411 first->packets = 1; 412 } 413 idpf_tx_singleq_map(tx_q, first, &offload); 414 415 return NETDEV_TX_OK; 416 417 out_drop: 418 return idpf_tx_drop_skb(tx_q, skb); 419 } 420 421 /** 422 * idpf_tx_singleq_clean - Reclaim resources from queue 423 * @tx_q: Tx queue to clean 424 * @napi_budget: Used to determine if we are in netpoll 425 * @cleaned: returns number of packets cleaned 426 * 427 */ 428 static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget, 429 int *cleaned) 430 { 431 struct libeth_sq_napi_stats ss = { }; 432 struct idpf_base_tx_desc *tx_desc; 433 u32 budget = tx_q->clean_budget; 434 s16 ntc = tx_q->next_to_clean; 435 struct libeth_cq_pp cp = { 436 .dev = tx_q->dev, 437 .ss = &ss, 438 .napi = napi_budget, 439 }; 440 struct idpf_netdev_priv *np; 441 struct idpf_tx_buf *tx_buf; 442 struct netdev_queue *nq; 443 bool dont_wake; 444 445 tx_desc = &tx_q->base_tx[ntc]; 446 tx_buf = &tx_q->tx_buf[ntc]; 447 ntc -= tx_q->desc_count; 448 449 do { 450 struct idpf_base_tx_desc *eop_desc; 451 452 /* If this entry in the ring was used as a context descriptor, 453 * it's corresponding entry in the buffer ring will indicate as 454 * such. We can skip this descriptor since there is no buffer 455 * to clean. 456 */ 457 if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) { 458 tx_buf->type = LIBETH_SQE_EMPTY; 459 goto fetch_next_txq_desc; 460 } 461 462 if (unlikely(tx_buf->type != LIBETH_SQE_SKB)) 463 break; 464 465 /* prevent any other reads prior to type */ 466 smp_rmb(); 467 468 eop_desc = &tx_q->base_tx[tx_buf->rs_idx]; 469 470 /* if the descriptor isn't done, no work yet to do */ 471 if (!(eop_desc->qw1 & 472 cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE))) 473 break; 474 475 /* update the statistics for this packet */ 476 libeth_tx_complete(tx_buf, &cp); 477 478 /* unmap remaining buffers */ 479 while (tx_desc != eop_desc) { 480 tx_buf++; 481 tx_desc++; 482 ntc++; 483 if (unlikely(!ntc)) { 484 ntc -= tx_q->desc_count; 485 tx_buf = tx_q->tx_buf; 486 tx_desc = &tx_q->base_tx[0]; 487 } 488 489 /* unmap any remaining paged data */ 490 libeth_tx_complete(tx_buf, &cp); 491 } 492 493 /* update budget only if we did something */ 494 budget--; 495 496 fetch_next_txq_desc: 497 tx_buf++; 498 tx_desc++; 499 ntc++; 500 if (unlikely(!ntc)) { 501 ntc -= tx_q->desc_count; 502 tx_buf = tx_q->tx_buf; 503 tx_desc = &tx_q->base_tx[0]; 504 } 505 } while (likely(budget)); 506 507 ntc += tx_q->desc_count; 508 tx_q->next_to_clean = ntc; 509 510 *cleaned += ss.packets; 511 512 u64_stats_update_begin(&tx_q->stats_sync); 513 u64_stats_add(&tx_q->q_stats.packets, ss.packets); 514 u64_stats_add(&tx_q->q_stats.bytes, ss.bytes); 515 u64_stats_update_end(&tx_q->stats_sync); 516 517 np = netdev_priv(tx_q->netdev); 518 nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); 519 520 dont_wake = np->state != __IDPF_VPORT_UP || 521 !netif_carrier_ok(tx_q->netdev); 522 __netif_txq_completed_wake(nq, ss.packets, ss.bytes, 523 IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, 524 dont_wake); 525 526 return !!budget; 527 } 528 529 /** 530 * idpf_tx_singleq_clean_all - Clean all Tx queues 531 * @q_vec: queue vector 532 * @budget: Used to determine if we are in netpoll 533 * @cleaned: returns number of packets cleaned 534 * 535 * Returns false if clean is not complete else returns true 536 */ 537 static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 538 int *cleaned) 539 { 540 u16 num_txq = q_vec->num_txq; 541 bool clean_complete = true; 542 int i, budget_per_q; 543 544 budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; 545 for (i = 0; i < num_txq; i++) { 546 struct idpf_tx_queue *q; 547 548 q = q_vec->tx[i]; 549 clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, 550 cleaned); 551 } 552 553 return clean_complete; 554 } 555 556 /** 557 * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor 558 * status and error fields 559 * @rx_desc: pointer to receive descriptor (in le64 format) 560 * @stat_err_bits: value to mask 561 * 562 * This function does some fast chicanery in order to return the 563 * value of the mask which is really only used for boolean tests. 564 * The status_error_ptype_len doesn't need to be shifted because it begins 565 * at offset zero. 566 */ 567 static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, 568 const u64 stat_err_bits) 569 { 570 return !!(rx_desc->base_wb.qword1.status_error_ptype_len & 571 cpu_to_le64(stat_err_bits)); 572 } 573 574 /** 575 * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers 576 * @rx_desc: Rx descriptor for current buffer 577 */ 578 static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc) 579 { 580 /* if we are the last buffer then there is nothing else to do */ 581 if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) 582 return false; 583 584 return true; 585 } 586 587 /** 588 * idpf_rx_singleq_csum - Indicate in skb if checksum is good 589 * @rxq: Rx ring being processed 590 * @skb: skb currently being received and modified 591 * @csum_bits: checksum bits from descriptor 592 * @decoded: the packet type decoded by hardware 593 * 594 * skb->protocol must be set before this function is called 595 */ 596 static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, 597 struct sk_buff *skb, 598 struct idpf_rx_csum_decoded csum_bits, 599 struct libeth_rx_pt decoded) 600 { 601 bool ipv4, ipv6; 602 603 /* check if Rx checksum is enabled */ 604 if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded)) 605 return; 606 607 /* check if HW has decoded the packet and checksum */ 608 if (unlikely(!csum_bits.l3l4p)) 609 return; 610 611 ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4; 612 ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6; 613 614 /* Check if there were any checksum errors */ 615 if (unlikely(ipv4 && (csum_bits.ipe || csum_bits.eipe))) 616 goto checksum_fail; 617 618 /* Device could not do any checksum offload for certain extension 619 * headers as indicated by setting IPV6EXADD bit 620 */ 621 if (unlikely(ipv6 && csum_bits.ipv6exadd)) 622 return; 623 624 /* check for L4 errors and handle packets that were not able to be 625 * checksummed due to arrival speed 626 */ 627 if (unlikely(csum_bits.l4e)) 628 goto checksum_fail; 629 630 if (unlikely(csum_bits.nat && csum_bits.eudpe)) 631 goto checksum_fail; 632 633 /* Handle packets that were not able to be checksummed due to arrival 634 * speed, in this case the stack can compute the csum. 635 */ 636 if (unlikely(csum_bits.pprs)) 637 return; 638 639 /* If there is an outer header present that might contain a checksum 640 * we need to bump the checksum level by 1 to reflect the fact that 641 * we are indicating we validated the inner checksum. 642 */ 643 if (decoded.tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT) 644 skb->csum_level = 1; 645 646 skb->ip_summed = CHECKSUM_UNNECESSARY; 647 return; 648 649 checksum_fail: 650 u64_stats_update_begin(&rxq->stats_sync); 651 u64_stats_inc(&rxq->q_stats.hw_csum_err); 652 u64_stats_update_end(&rxq->stats_sync); 653 } 654 655 /** 656 * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum 657 * @rx_desc: the receive descriptor 658 * 659 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 660 * descriptor writeback format. 661 * 662 * Return: parsed checksum status. 663 **/ 664 static struct idpf_rx_csum_decoded 665 idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc) 666 { 667 struct idpf_rx_csum_decoded csum_bits = { }; 668 u32 rx_error, rx_status; 669 u64 qword; 670 671 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 672 673 rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword); 674 rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword); 675 676 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error); 677 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M, 678 rx_error); 679 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error); 680 csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M, 681 rx_error); 682 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M, 683 rx_status); 684 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M, 685 rx_status); 686 687 return csum_bits; 688 } 689 690 /** 691 * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum 692 * @rx_desc: the receive descriptor 693 * 694 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 695 * descriptor writeback format. 696 * 697 * Return: parsed checksum status. 698 **/ 699 static struct idpf_rx_csum_decoded 700 idpf_rx_singleq_flex_csum(const union virtchnl2_rx_desc *rx_desc) 701 { 702 struct idpf_rx_csum_decoded csum_bits = { }; 703 u16 rx_status0, rx_status1; 704 705 rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0); 706 rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1); 707 708 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M, 709 rx_status0); 710 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M, 711 rx_status0); 712 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M, 713 rx_status0); 714 csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M, 715 rx_status0); 716 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M, 717 rx_status0); 718 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M, 719 rx_status0); 720 csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M, 721 rx_status1); 722 723 return csum_bits; 724 } 725 726 /** 727 * idpf_rx_singleq_base_hash - set the hash value in the skb 728 * @rx_q: Rx completion queue 729 * @skb: skb currently being received and modified 730 * @rx_desc: specific descriptor 731 * @decoded: Decoded Rx packet type related fields 732 * 733 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 734 * descriptor writeback format. 735 **/ 736 static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q, 737 struct sk_buff *skb, 738 const union virtchnl2_rx_desc *rx_desc, 739 struct libeth_rx_pt decoded) 740 { 741 u64 mask, qw1; 742 743 if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded)) 744 return; 745 746 mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; 747 qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 748 749 if (FIELD_GET(mask, qw1) == mask) { 750 u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss); 751 752 libeth_rx_pt_set_hash(skb, hash, decoded); 753 } 754 } 755 756 /** 757 * idpf_rx_singleq_flex_hash - set the hash value in the skb 758 * @rx_q: Rx completion queue 759 * @skb: skb currently being received and modified 760 * @rx_desc: specific descriptor 761 * @decoded: Decoded Rx packet type related fields 762 * 763 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 764 * descriptor writeback format. 765 **/ 766 static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, 767 struct sk_buff *skb, 768 const union virtchnl2_rx_desc *rx_desc, 769 struct libeth_rx_pt decoded) 770 { 771 if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded)) 772 return; 773 774 if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, 775 le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) { 776 u32 hash = le32_to_cpu(rx_desc->flex_nic_wb.rss_hash); 777 778 libeth_rx_pt_set_hash(skb, hash, decoded); 779 } 780 } 781 782 /** 783 * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx 784 * descriptor 785 * @rx_q: Rx ring being processed 786 * @skb: pointer to current skb being populated 787 * @rx_desc: descriptor for skb 788 * @ptype: packet type 789 * 790 * This function checks the ring, descriptor, and packet information in 791 * order to populate the hash, checksum, VLAN, protocol, and 792 * other fields within the skb. 793 */ 794 static void 795 idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, 796 struct sk_buff *skb, 797 const union virtchnl2_rx_desc *rx_desc, 798 u16 ptype) 799 { 800 struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype]; 801 struct idpf_rx_csum_decoded csum_bits; 802 803 /* modifies the skb - consumes the enet header */ 804 skb->protocol = eth_type_trans(skb, rx_q->netdev); 805 806 /* Check if we're using base mode descriptor IDs */ 807 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { 808 idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, decoded); 809 csum_bits = idpf_rx_singleq_base_csum(rx_desc); 810 } else { 811 idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, decoded); 812 csum_bits = idpf_rx_singleq_flex_csum(rx_desc); 813 } 814 815 idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded); 816 skb_record_rx_queue(skb, rx_q->idx); 817 } 818 819 /** 820 * idpf_rx_buf_hw_update - Store the new tail and head values 821 * @rxq: queue to bump 822 * @val: new head index 823 */ 824 static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val) 825 { 826 rxq->next_to_use = val; 827 828 if (unlikely(!rxq->tail)) 829 return; 830 831 /* writel has an implicit memory barrier */ 832 writel(val, rxq->tail); 833 } 834 835 /** 836 * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers 837 * @rx_q: queue for which the hw buffers are allocated 838 * @cleaned_count: number of buffers to replace 839 * 840 * Returns false if all allocations were successful, true if any fail 841 */ 842 bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, 843 u16 cleaned_count) 844 { 845 struct virtchnl2_singleq_rx_buf_desc *desc; 846 const struct libeth_fq_fp fq = { 847 .pp = rx_q->pp, 848 .fqes = rx_q->rx_buf, 849 .truesize = rx_q->truesize, 850 .count = rx_q->desc_count, 851 }; 852 u16 nta = rx_q->next_to_alloc; 853 854 if (!cleaned_count) 855 return false; 856 857 desc = &rx_q->single_buf[nta]; 858 859 do { 860 dma_addr_t addr; 861 862 addr = libeth_rx_alloc(&fq, nta); 863 if (addr == DMA_MAPPING_ERROR) 864 break; 865 866 /* Refresh the desc even if buffer_addrs didn't change 867 * because each write-back erases this info. 868 */ 869 desc->pkt_addr = cpu_to_le64(addr); 870 desc->hdr_addr = 0; 871 desc++; 872 873 nta++; 874 if (unlikely(nta == rx_q->desc_count)) { 875 desc = &rx_q->single_buf[0]; 876 nta = 0; 877 } 878 879 cleaned_count--; 880 } while (cleaned_count); 881 882 if (rx_q->next_to_alloc != nta) { 883 idpf_rx_buf_hw_update(rx_q, nta); 884 rx_q->next_to_alloc = nta; 885 } 886 887 return !!cleaned_count; 888 } 889 890 /** 891 * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor 892 * @rx_desc: the descriptor to process 893 * @fields: storage for extracted values 894 * 895 * Decode the Rx descriptor and extract relevant information including the 896 * size and Rx packet type. 897 * 898 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 899 * descriptor writeback format. 900 */ 901 static void 902 idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, 903 struct idpf_rx_extracted *fields) 904 { 905 u64 qword; 906 907 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 908 909 fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); 910 fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); 911 } 912 913 /** 914 * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor 915 * @rx_desc: the descriptor to process 916 * @fields: storage for extracted values 917 * 918 * Decode the Rx descriptor and extract relevant information including the 919 * size and Rx packet type. 920 * 921 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 922 * descriptor writeback format. 923 */ 924 static void 925 idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, 926 struct idpf_rx_extracted *fields) 927 { 928 fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, 929 le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); 930 fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, 931 le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); 932 } 933 934 /** 935 * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor 936 * @rx_q: Rx descriptor queue 937 * @rx_desc: the descriptor to process 938 * @fields: storage for extracted values 939 * 940 */ 941 static void 942 idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, 943 const union virtchnl2_rx_desc *rx_desc, 944 struct idpf_rx_extracted *fields) 945 { 946 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) 947 idpf_rx_singleq_extract_base_fields(rx_desc, fields); 948 else 949 idpf_rx_singleq_extract_flex_fields(rx_desc, fields); 950 } 951 952 /** 953 * idpf_rx_singleq_clean - Reclaim resources after receive completes 954 * @rx_q: rx queue to clean 955 * @budget: Total limit on number of packets to process 956 * 957 * Returns true if there's any budget left (e.g. the clean is finished) 958 */ 959 static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) 960 { 961 unsigned int total_rx_bytes = 0, total_rx_pkts = 0; 962 struct sk_buff *skb = rx_q->skb; 963 u16 ntc = rx_q->next_to_clean; 964 u16 cleaned_count = 0; 965 bool failure = false; 966 967 /* Process Rx packets bounded by budget */ 968 while (likely(total_rx_pkts < (unsigned int)budget)) { 969 struct idpf_rx_extracted fields = { }; 970 union virtchnl2_rx_desc *rx_desc; 971 struct idpf_rx_buf *rx_buf; 972 973 /* get the Rx desc from Rx queue based on 'next_to_clean' */ 974 rx_desc = &rx_q->rx[ntc]; 975 976 /* status_error_ptype_len will always be zero for unused 977 * descriptors because it's cleared in cleanup, and overlaps 978 * with hdr_addr which is always zero because packet split 979 * isn't used, if the hardware wrote DD then the length will be 980 * non-zero 981 */ 982 #define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M 983 if (!idpf_rx_singleq_test_staterr(rx_desc, 984 IDPF_RXD_DD)) 985 break; 986 987 /* This memory barrier is needed to keep us from reading 988 * any other fields out of the rx_desc 989 */ 990 dma_rmb(); 991 992 idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); 993 994 rx_buf = &rx_q->rx_buf[ntc]; 995 if (!libeth_rx_sync_for_cpu(rx_buf, fields.size)) 996 goto skip_data; 997 998 if (skb) 999 idpf_rx_add_frag(rx_buf, skb, fields.size); 1000 else 1001 skb = idpf_rx_build_skb(rx_buf, fields.size); 1002 1003 /* exit if we failed to retrieve a buffer */ 1004 if (!skb) 1005 break; 1006 1007 skip_data: 1008 rx_buf->page = NULL; 1009 1010 IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc); 1011 cleaned_count++; 1012 1013 /* skip if it is non EOP desc */ 1014 if (idpf_rx_singleq_is_non_eop(rx_desc) || unlikely(!skb)) 1015 continue; 1016 1017 #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ 1018 VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M) 1019 if (unlikely(idpf_rx_singleq_test_staterr(rx_desc, 1020 IDPF_RXD_ERR_S))) { 1021 dev_kfree_skb_any(skb); 1022 skb = NULL; 1023 continue; 1024 } 1025 1026 /* pad skb if needed (to make valid ethernet frame) */ 1027 if (eth_skb_pad(skb)) { 1028 skb = NULL; 1029 continue; 1030 } 1031 1032 /* probably a little skewed due to removing CRC */ 1033 total_rx_bytes += skb->len; 1034 1035 /* protocol */ 1036 idpf_rx_singleq_process_skb_fields(rx_q, skb, 1037 rx_desc, fields.rx_ptype); 1038 1039 /* send completed skb up the stack */ 1040 napi_gro_receive(rx_q->pp->p.napi, skb); 1041 skb = NULL; 1042 1043 /* update budget accounting */ 1044 total_rx_pkts++; 1045 } 1046 1047 rx_q->skb = skb; 1048 1049 rx_q->next_to_clean = ntc; 1050 1051 page_pool_nid_changed(rx_q->pp, numa_mem_id()); 1052 if (cleaned_count) 1053 failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); 1054 1055 u64_stats_update_begin(&rx_q->stats_sync); 1056 u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts); 1057 u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes); 1058 u64_stats_update_end(&rx_q->stats_sync); 1059 1060 /* guarantee a trip back through this routine if there was a failure */ 1061 return failure ? budget : (int)total_rx_pkts; 1062 } 1063 1064 /** 1065 * idpf_rx_singleq_clean_all - Clean all Rx queues 1066 * @q_vec: queue vector 1067 * @budget: Used to determine if we are in netpoll 1068 * @cleaned: returns number of packets cleaned 1069 * 1070 * Returns false if clean is not complete else returns true 1071 */ 1072 static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 1073 int *cleaned) 1074 { 1075 u16 num_rxq = q_vec->num_rxq; 1076 bool clean_complete = true; 1077 int budget_per_q, i; 1078 1079 /* We attempt to distribute budget to each Rx queue fairly, but don't 1080 * allow the budget to go below 1 because that would exit polling early. 1081 */ 1082 budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; 1083 for (i = 0; i < num_rxq; i++) { 1084 struct idpf_rx_queue *rxq = q_vec->rx[i]; 1085 int pkts_cleaned_per_q; 1086 1087 pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); 1088 1089 /* if we clean as many as budgeted, we must not be done */ 1090 if (pkts_cleaned_per_q >= budget_per_q) 1091 clean_complete = false; 1092 *cleaned += pkts_cleaned_per_q; 1093 } 1094 1095 return clean_complete; 1096 } 1097 1098 /** 1099 * idpf_vport_singleq_napi_poll - NAPI handler 1100 * @napi: struct from which you get q_vector 1101 * @budget: budget provided by stack 1102 */ 1103 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget) 1104 { 1105 struct idpf_q_vector *q_vector = 1106 container_of(napi, struct idpf_q_vector, napi); 1107 bool clean_complete; 1108 int work_done = 0; 1109 1110 /* Handle case where we are called by netpoll with a budget of 0 */ 1111 if (budget <= 0) { 1112 idpf_tx_singleq_clean_all(q_vector, budget, &work_done); 1113 1114 return budget; 1115 } 1116 1117 clean_complete = idpf_rx_singleq_clean_all(q_vector, budget, 1118 &work_done); 1119 clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget, 1120 &work_done); 1121 1122 /* If work not completed, return budget and polling will return */ 1123 if (!clean_complete) { 1124 idpf_vport_intr_set_wb_on_itr(q_vector); 1125 return budget; 1126 } 1127 1128 work_done = min_t(int, work_done, budget - 1); 1129 1130 /* Exit the polling mode, but don't re-enable interrupts if stack might 1131 * poll us due to busy-polling 1132 */ 1133 if (likely(napi_complete_done(napi, work_done))) 1134 idpf_vport_intr_update_itr_ena_irq(q_vector); 1135 else 1136 idpf_vport_intr_set_wb_on_itr(q_vector); 1137 1138 return work_done; 1139 } 1140