1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (C) 2023 Intel Corporation */ 3 4 #include <net/libeth/rx.h> 5 6 #include "idpf.h" 7 8 /** 9 * idpf_tx_singleq_csum - Enable tx checksum offloads 10 * @skb: pointer to skb 11 * @off: pointer to struct that holds offload parameters 12 * 13 * Returns 0 or error (negative) if checksum offload cannot be executed, 1 14 * otherwise. 15 */ 16 static int idpf_tx_singleq_csum(struct sk_buff *skb, 17 struct idpf_tx_offload_params *off) 18 { 19 u32 l4_len, l3_len, l2_len; 20 union { 21 struct iphdr *v4; 22 struct ipv6hdr *v6; 23 unsigned char *hdr; 24 } ip; 25 union { 26 struct tcphdr *tcp; 27 unsigned char *hdr; 28 } l4; 29 u32 offset, cmd = 0; 30 u8 l4_proto = 0; 31 __be16 frag_off; 32 bool is_tso; 33 34 if (skb->ip_summed != CHECKSUM_PARTIAL) 35 return 0; 36 37 ip.hdr = skb_network_header(skb); 38 l4.hdr = skb_transport_header(skb); 39 40 /* compute outer L2 header size */ 41 l2_len = ip.hdr - skb->data; 42 offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2); 43 is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO); 44 if (skb->encapsulation) { 45 u32 tunnel = 0; 46 47 /* define outer network header type */ 48 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 49 /* The stack computes the IP header already, the only 50 * time we need the hardware to recompute it is in the 51 * case of TSO. 52 */ 53 tunnel |= is_tso ? 54 IDPF_TX_CTX_EXT_IP_IPV4 : 55 IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM; 56 57 l4_proto = ip.v4->protocol; 58 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 59 tunnel |= IDPF_TX_CTX_EXT_IP_IPV6; 60 61 l4_proto = ip.v6->nexthdr; 62 if (ipv6_ext_hdr(l4_proto)) 63 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 64 sizeof(*ip.v6), 65 &l4_proto, &frag_off); 66 } 67 68 /* define outer transport */ 69 switch (l4_proto) { 70 case IPPROTO_UDP: 71 tunnel |= IDPF_TXD_CTX_UDP_TUNNELING; 72 break; 73 case IPPROTO_GRE: 74 tunnel |= IDPF_TXD_CTX_GRE_TUNNELING; 75 break; 76 case IPPROTO_IPIP: 77 case IPPROTO_IPV6: 78 l4.hdr = skb_inner_network_header(skb); 79 break; 80 default: 81 if (is_tso) 82 return -1; 83 84 skb_checksum_help(skb); 85 86 return 0; 87 } 88 off->tx_flags |= IDPF_TX_FLAGS_TUNNEL; 89 90 /* compute outer L3 header size */ 91 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M, 92 (l4.hdr - ip.hdr) / 4); 93 94 /* switch IP header pointer from outer to inner header */ 95 ip.hdr = skb_inner_network_header(skb); 96 97 /* compute tunnel header size */ 98 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M, 99 (ip.hdr - l4.hdr) / 2); 100 101 /* indicate if we need to offload outer UDP header */ 102 if (is_tso && 103 !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && 104 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) 105 tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M; 106 107 /* record tunnel offload values */ 108 off->cd_tunneling |= tunnel; 109 110 /* switch L4 header pointer from outer to inner */ 111 l4.hdr = skb_inner_transport_header(skb); 112 l4_proto = 0; 113 114 /* reset type as we transition from outer to inner headers */ 115 off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6); 116 if (ip.v4->version == 4) 117 off->tx_flags |= IDPF_TX_FLAGS_IPV4; 118 if (ip.v6->version == 6) 119 off->tx_flags |= IDPF_TX_FLAGS_IPV6; 120 } 121 122 /* Enable IP checksum offloads */ 123 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 124 l4_proto = ip.v4->protocol; 125 /* See comment above regarding need for HW to recompute IP 126 * header checksum in the case of TSO. 127 */ 128 if (is_tso) 129 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM; 130 else 131 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4; 132 133 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 134 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6; 135 l4_proto = ip.v6->nexthdr; 136 if (ipv6_ext_hdr(l4_proto)) 137 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 138 sizeof(*ip.v6), &l4_proto, 139 &frag_off); 140 } else { 141 return -1; 142 } 143 144 /* compute inner L3 header size */ 145 l3_len = l4.hdr - ip.hdr; 146 offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S; 147 148 /* Enable L4 checksum offloads */ 149 switch (l4_proto) { 150 case IPPROTO_TCP: 151 /* enable checksum offloads */ 152 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP; 153 l4_len = l4.tcp->doff; 154 break; 155 case IPPROTO_UDP: 156 /* enable UDP checksum offload */ 157 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP; 158 l4_len = sizeof(struct udphdr) >> 2; 159 break; 160 case IPPROTO_SCTP: 161 /* enable SCTP checksum offload */ 162 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP; 163 l4_len = sizeof(struct sctphdr) >> 2; 164 break; 165 default: 166 if (is_tso) 167 return -1; 168 169 skb_checksum_help(skb); 170 171 return 0; 172 } 173 174 offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S; 175 off->td_cmd |= cmd; 176 off->hdr_offsets |= offset; 177 178 return 1; 179 } 180 181 /** 182 * idpf_tx_singleq_map - Build the Tx base descriptor 183 * @tx_q: queue to send buffer on 184 * @first: first buffer info buffer to use 185 * @offloads: pointer to struct that holds offload parameters 186 * 187 * This function loops over the skb data pointed to by *first 188 * and gets a physical address for each memory location and programs 189 * it and the length into the transmit base mode descriptor. 190 */ 191 static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, 192 struct idpf_tx_buf *first, 193 struct idpf_tx_offload_params *offloads) 194 { 195 u32 offsets = offloads->hdr_offsets; 196 struct idpf_tx_buf *tx_buf = first; 197 struct idpf_base_tx_desc *tx_desc; 198 struct sk_buff *skb = first->skb; 199 u64 td_cmd = offloads->td_cmd; 200 unsigned int data_len, size; 201 u16 i = tx_q->next_to_use; 202 struct netdev_queue *nq; 203 skb_frag_t *frag; 204 dma_addr_t dma; 205 u64 td_tag = 0; 206 207 data_len = skb->data_len; 208 size = skb_headlen(skb); 209 210 tx_desc = &tx_q->base_tx[i]; 211 212 dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); 213 214 /* write each descriptor with CRC bit */ 215 if (idpf_queue_has(CRC_EN, tx_q)) 216 td_cmd |= IDPF_TX_DESC_CMD_ICRC; 217 218 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 219 unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 220 221 if (dma_mapping_error(tx_q->dev, dma)) 222 return idpf_tx_dma_map_error(tx_q, skb, first, i); 223 224 /* record length, and DMA address */ 225 dma_unmap_len_set(tx_buf, len, size); 226 dma_unmap_addr_set(tx_buf, dma, dma); 227 228 /* align size to end of page */ 229 max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1); 230 tx_desc->buf_addr = cpu_to_le64(dma); 231 232 /* account for data chunks larger than the hardware 233 * can handle 234 */ 235 while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) { 236 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, 237 offsets, 238 max_data, 239 td_tag); 240 tx_desc++; 241 i++; 242 243 if (i == tx_q->desc_count) { 244 tx_desc = &tx_q->base_tx[0]; 245 i = 0; 246 } 247 248 dma += max_data; 249 size -= max_data; 250 251 max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 252 tx_desc->buf_addr = cpu_to_le64(dma); 253 } 254 255 if (!data_len) 256 break; 257 258 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 259 size, td_tag); 260 tx_desc++; 261 i++; 262 263 if (i == tx_q->desc_count) { 264 tx_desc = &tx_q->base_tx[0]; 265 i = 0; 266 } 267 268 size = skb_frag_size(frag); 269 data_len -= size; 270 271 dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, 272 DMA_TO_DEVICE); 273 274 tx_buf = &tx_q->tx_buf[i]; 275 } 276 277 skb_tx_timestamp(first->skb); 278 279 /* write last descriptor with RS and EOP bits */ 280 td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS); 281 282 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 283 size, td_tag); 284 285 IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i); 286 287 /* set next_to_watch value indicating a packet is present */ 288 first->next_to_watch = tx_desc; 289 290 nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); 291 netdev_tx_sent_queue(nq, first->bytecount); 292 293 idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); 294 } 295 296 /** 297 * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring 298 * @txq: queue to put context descriptor on 299 * 300 * Since the TX buffer rings mimics the descriptor ring, update the tx buffer 301 * ring entry to reflect that this index is a context descriptor 302 */ 303 static struct idpf_base_tx_ctx_desc * 304 idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq) 305 { 306 struct idpf_base_tx_ctx_desc *ctx_desc; 307 int ntu = txq->next_to_use; 308 309 memset(&txq->tx_buf[ntu], 0, sizeof(struct idpf_tx_buf)); 310 txq->tx_buf[ntu].ctx_entry = true; 311 312 ctx_desc = &txq->base_ctx[ntu]; 313 314 IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu); 315 txq->next_to_use = ntu; 316 317 return ctx_desc; 318 } 319 320 /** 321 * idpf_tx_singleq_build_ctx_desc - populate context descriptor 322 * @txq: queue to send buffer on 323 * @offload: offload parameter structure 324 **/ 325 static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq, 326 struct idpf_tx_offload_params *offload) 327 { 328 struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); 329 u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX; 330 331 if (offload->tso_segs) { 332 qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S; 333 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_TSO_LEN_M, 334 offload->tso_len); 335 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss); 336 337 u64_stats_update_begin(&txq->stats_sync); 338 u64_stats_inc(&txq->q_stats.lso_pkts); 339 u64_stats_update_end(&txq->stats_sync); 340 } 341 342 desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling); 343 344 desc->qw0.l2tag2 = 0; 345 desc->qw0.rsvd1 = 0; 346 desc->qw1 = cpu_to_le64(qw1); 347 } 348 349 /** 350 * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors 351 * @skb: send buffer 352 * @tx_q: queue to send buffer on 353 * 354 * Returns NETDEV_TX_OK if sent, else an error code 355 */ 356 netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, 357 struct idpf_tx_queue *tx_q) 358 { 359 struct idpf_tx_offload_params offload = { }; 360 struct idpf_tx_buf *first; 361 unsigned int count; 362 __be16 protocol; 363 int csum, tso; 364 365 count = idpf_tx_desc_count_required(tx_q, skb); 366 if (unlikely(!count)) 367 return idpf_tx_drop_skb(tx_q, skb); 368 369 if (idpf_tx_maybe_stop_common(tx_q, 370 count + IDPF_TX_DESCS_PER_CACHE_LINE + 371 IDPF_TX_DESCS_FOR_CTX)) { 372 idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); 373 374 return NETDEV_TX_BUSY; 375 } 376 377 protocol = vlan_get_protocol(skb); 378 if (protocol == htons(ETH_P_IP)) 379 offload.tx_flags |= IDPF_TX_FLAGS_IPV4; 380 else if (protocol == htons(ETH_P_IPV6)) 381 offload.tx_flags |= IDPF_TX_FLAGS_IPV6; 382 383 tso = idpf_tso(skb, &offload); 384 if (tso < 0) 385 goto out_drop; 386 387 csum = idpf_tx_singleq_csum(skb, &offload); 388 if (csum < 0) 389 goto out_drop; 390 391 if (tso || offload.cd_tunneling) 392 idpf_tx_singleq_build_ctx_desc(tx_q, &offload); 393 394 /* record the location of the first descriptor for this packet */ 395 first = &tx_q->tx_buf[tx_q->next_to_use]; 396 first->skb = skb; 397 398 if (tso) { 399 first->gso_segs = offload.tso_segs; 400 first->bytecount = skb->len + ((first->gso_segs - 1) * offload.tso_hdr_len); 401 } else { 402 first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); 403 first->gso_segs = 1; 404 } 405 idpf_tx_singleq_map(tx_q, first, &offload); 406 407 return NETDEV_TX_OK; 408 409 out_drop: 410 return idpf_tx_drop_skb(tx_q, skb); 411 } 412 413 /** 414 * idpf_tx_singleq_clean - Reclaim resources from queue 415 * @tx_q: Tx queue to clean 416 * @napi_budget: Used to determine if we are in netpoll 417 * @cleaned: returns number of packets cleaned 418 * 419 */ 420 static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget, 421 int *cleaned) 422 { 423 unsigned int total_bytes = 0, total_pkts = 0; 424 struct idpf_base_tx_desc *tx_desc; 425 u32 budget = tx_q->clean_budget; 426 s16 ntc = tx_q->next_to_clean; 427 struct idpf_netdev_priv *np; 428 struct idpf_tx_buf *tx_buf; 429 struct netdev_queue *nq; 430 bool dont_wake; 431 432 tx_desc = &tx_q->base_tx[ntc]; 433 tx_buf = &tx_q->tx_buf[ntc]; 434 ntc -= tx_q->desc_count; 435 436 do { 437 struct idpf_base_tx_desc *eop_desc; 438 439 /* If this entry in the ring was used as a context descriptor, 440 * it's corresponding entry in the buffer ring will indicate as 441 * such. We can skip this descriptor since there is no buffer 442 * to clean. 443 */ 444 if (tx_buf->ctx_entry) { 445 /* Clear this flag here to avoid stale flag values when 446 * this buffer is used for actual data in the future. 447 * There are cases where the tx_buf struct / the flags 448 * field will not be cleared before being reused. 449 */ 450 tx_buf->ctx_entry = false; 451 goto fetch_next_txq_desc; 452 } 453 454 /* if next_to_watch is not set then no work pending */ 455 eop_desc = (struct idpf_base_tx_desc *)tx_buf->next_to_watch; 456 if (!eop_desc) 457 break; 458 459 /* prevent any other reads prior to eop_desc */ 460 smp_rmb(); 461 462 /* if the descriptor isn't done, no work yet to do */ 463 if (!(eop_desc->qw1 & 464 cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE))) 465 break; 466 467 /* clear next_to_watch to prevent false hangs */ 468 tx_buf->next_to_watch = NULL; 469 470 /* update the statistics for this packet */ 471 total_bytes += tx_buf->bytecount; 472 total_pkts += tx_buf->gso_segs; 473 474 napi_consume_skb(tx_buf->skb, napi_budget); 475 476 /* unmap skb header data */ 477 dma_unmap_single(tx_q->dev, 478 dma_unmap_addr(tx_buf, dma), 479 dma_unmap_len(tx_buf, len), 480 DMA_TO_DEVICE); 481 482 /* clear tx_buf data */ 483 tx_buf->skb = NULL; 484 dma_unmap_len_set(tx_buf, len, 0); 485 486 /* unmap remaining buffers */ 487 while (tx_desc != eop_desc) { 488 tx_buf++; 489 tx_desc++; 490 ntc++; 491 if (unlikely(!ntc)) { 492 ntc -= tx_q->desc_count; 493 tx_buf = tx_q->tx_buf; 494 tx_desc = &tx_q->base_tx[0]; 495 } 496 497 /* unmap any remaining paged data */ 498 if (dma_unmap_len(tx_buf, len)) { 499 dma_unmap_page(tx_q->dev, 500 dma_unmap_addr(tx_buf, dma), 501 dma_unmap_len(tx_buf, len), 502 DMA_TO_DEVICE); 503 dma_unmap_len_set(tx_buf, len, 0); 504 } 505 } 506 507 /* update budget only if we did something */ 508 budget--; 509 510 fetch_next_txq_desc: 511 tx_buf++; 512 tx_desc++; 513 ntc++; 514 if (unlikely(!ntc)) { 515 ntc -= tx_q->desc_count; 516 tx_buf = tx_q->tx_buf; 517 tx_desc = &tx_q->base_tx[0]; 518 } 519 } while (likely(budget)); 520 521 ntc += tx_q->desc_count; 522 tx_q->next_to_clean = ntc; 523 524 *cleaned += total_pkts; 525 526 u64_stats_update_begin(&tx_q->stats_sync); 527 u64_stats_add(&tx_q->q_stats.packets, total_pkts); 528 u64_stats_add(&tx_q->q_stats.bytes, total_bytes); 529 u64_stats_update_end(&tx_q->stats_sync); 530 531 np = netdev_priv(tx_q->netdev); 532 nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); 533 534 dont_wake = np->state != __IDPF_VPORT_UP || 535 !netif_carrier_ok(tx_q->netdev); 536 __netif_txq_completed_wake(nq, total_pkts, total_bytes, 537 IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, 538 dont_wake); 539 540 return !!budget; 541 } 542 543 /** 544 * idpf_tx_singleq_clean_all - Clean all Tx queues 545 * @q_vec: queue vector 546 * @budget: Used to determine if we are in netpoll 547 * @cleaned: returns number of packets cleaned 548 * 549 * Returns false if clean is not complete else returns true 550 */ 551 static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 552 int *cleaned) 553 { 554 u16 num_txq = q_vec->num_txq; 555 bool clean_complete = true; 556 int i, budget_per_q; 557 558 budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; 559 for (i = 0; i < num_txq; i++) { 560 struct idpf_tx_queue *q; 561 562 q = q_vec->tx[i]; 563 clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, 564 cleaned); 565 } 566 567 return clean_complete; 568 } 569 570 /** 571 * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor 572 * status and error fields 573 * @rx_desc: pointer to receive descriptor (in le64 format) 574 * @stat_err_bits: value to mask 575 * 576 * This function does some fast chicanery in order to return the 577 * value of the mask which is really only used for boolean tests. 578 * The status_error_ptype_len doesn't need to be shifted because it begins 579 * at offset zero. 580 */ 581 static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, 582 const u64 stat_err_bits) 583 { 584 return !!(rx_desc->base_wb.qword1.status_error_ptype_len & 585 cpu_to_le64(stat_err_bits)); 586 } 587 588 /** 589 * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers 590 * @rx_desc: Rx descriptor for current buffer 591 */ 592 static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc) 593 { 594 /* if we are the last buffer then there is nothing else to do */ 595 if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) 596 return false; 597 598 return true; 599 } 600 601 /** 602 * idpf_rx_singleq_csum - Indicate in skb if checksum is good 603 * @rxq: Rx ring being processed 604 * @skb: skb currently being received and modified 605 * @csum_bits: checksum bits from descriptor 606 * @decoded: the packet type decoded by hardware 607 * 608 * skb->protocol must be set before this function is called 609 */ 610 static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, 611 struct sk_buff *skb, 612 struct idpf_rx_csum_decoded csum_bits, 613 struct libeth_rx_pt decoded) 614 { 615 bool ipv4, ipv6; 616 617 /* check if Rx checksum is enabled */ 618 if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded)) 619 return; 620 621 /* check if HW has decoded the packet and checksum */ 622 if (unlikely(!csum_bits.l3l4p)) 623 return; 624 625 ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4; 626 ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6; 627 628 /* Check if there were any checksum errors */ 629 if (unlikely(ipv4 && (csum_bits.ipe || csum_bits.eipe))) 630 goto checksum_fail; 631 632 /* Device could not do any checksum offload for certain extension 633 * headers as indicated by setting IPV6EXADD bit 634 */ 635 if (unlikely(ipv6 && csum_bits.ipv6exadd)) 636 return; 637 638 /* check for L4 errors and handle packets that were not able to be 639 * checksummed due to arrival speed 640 */ 641 if (unlikely(csum_bits.l4e)) 642 goto checksum_fail; 643 644 if (unlikely(csum_bits.nat && csum_bits.eudpe)) 645 goto checksum_fail; 646 647 /* Handle packets that were not able to be checksummed due to arrival 648 * speed, in this case the stack can compute the csum. 649 */ 650 if (unlikely(csum_bits.pprs)) 651 return; 652 653 /* If there is an outer header present that might contain a checksum 654 * we need to bump the checksum level by 1 to reflect the fact that 655 * we are indicating we validated the inner checksum. 656 */ 657 if (decoded.tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT) 658 skb->csum_level = 1; 659 660 skb->ip_summed = CHECKSUM_UNNECESSARY; 661 return; 662 663 checksum_fail: 664 u64_stats_update_begin(&rxq->stats_sync); 665 u64_stats_inc(&rxq->q_stats.hw_csum_err); 666 u64_stats_update_end(&rxq->stats_sync); 667 } 668 669 /** 670 * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum 671 * @rx_desc: the receive descriptor 672 * 673 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 674 * descriptor writeback format. 675 * 676 * Return: parsed checksum status. 677 **/ 678 static struct idpf_rx_csum_decoded 679 idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc) 680 { 681 struct idpf_rx_csum_decoded csum_bits = { }; 682 u32 rx_error, rx_status; 683 u64 qword; 684 685 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 686 687 rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword); 688 rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword); 689 690 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error); 691 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M, 692 rx_error); 693 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error); 694 csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M, 695 rx_error); 696 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M, 697 rx_status); 698 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M, 699 rx_status); 700 701 return csum_bits; 702 } 703 704 /** 705 * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum 706 * @rx_desc: the receive descriptor 707 * 708 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 709 * descriptor writeback format. 710 * 711 * Return: parsed checksum status. 712 **/ 713 static struct idpf_rx_csum_decoded 714 idpf_rx_singleq_flex_csum(const union virtchnl2_rx_desc *rx_desc) 715 { 716 struct idpf_rx_csum_decoded csum_bits = { }; 717 u16 rx_status0, rx_status1; 718 719 rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0); 720 rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1); 721 722 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M, 723 rx_status0); 724 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M, 725 rx_status0); 726 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M, 727 rx_status0); 728 csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M, 729 rx_status0); 730 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M, 731 rx_status0); 732 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M, 733 rx_status0); 734 csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M, 735 rx_status1); 736 737 return csum_bits; 738 } 739 740 /** 741 * idpf_rx_singleq_base_hash - set the hash value in the skb 742 * @rx_q: Rx completion queue 743 * @skb: skb currently being received and modified 744 * @rx_desc: specific descriptor 745 * @decoded: Decoded Rx packet type related fields 746 * 747 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 748 * descriptor writeback format. 749 **/ 750 static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q, 751 struct sk_buff *skb, 752 const union virtchnl2_rx_desc *rx_desc, 753 struct libeth_rx_pt decoded) 754 { 755 u64 mask, qw1; 756 757 if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded)) 758 return; 759 760 mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; 761 qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 762 763 if (FIELD_GET(mask, qw1) == mask) { 764 u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss); 765 766 libeth_rx_pt_set_hash(skb, hash, decoded); 767 } 768 } 769 770 /** 771 * idpf_rx_singleq_flex_hash - set the hash value in the skb 772 * @rx_q: Rx completion queue 773 * @skb: skb currently being received and modified 774 * @rx_desc: specific descriptor 775 * @decoded: Decoded Rx packet type related fields 776 * 777 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 778 * descriptor writeback format. 779 **/ 780 static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, 781 struct sk_buff *skb, 782 const union virtchnl2_rx_desc *rx_desc, 783 struct libeth_rx_pt decoded) 784 { 785 if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded)) 786 return; 787 788 if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, 789 le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) { 790 u32 hash = le32_to_cpu(rx_desc->flex_nic_wb.rss_hash); 791 792 libeth_rx_pt_set_hash(skb, hash, decoded); 793 } 794 } 795 796 /** 797 * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx 798 * descriptor 799 * @rx_q: Rx ring being processed 800 * @skb: pointer to current skb being populated 801 * @rx_desc: descriptor for skb 802 * @ptype: packet type 803 * 804 * This function checks the ring, descriptor, and packet information in 805 * order to populate the hash, checksum, VLAN, protocol, and 806 * other fields within the skb. 807 */ 808 static void 809 idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, 810 struct sk_buff *skb, 811 const union virtchnl2_rx_desc *rx_desc, 812 u16 ptype) 813 { 814 struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype]; 815 struct idpf_rx_csum_decoded csum_bits; 816 817 /* modifies the skb - consumes the enet header */ 818 skb->protocol = eth_type_trans(skb, rx_q->netdev); 819 820 /* Check if we're using base mode descriptor IDs */ 821 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { 822 idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, decoded); 823 csum_bits = idpf_rx_singleq_base_csum(rx_desc); 824 } else { 825 idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, decoded); 826 csum_bits = idpf_rx_singleq_flex_csum(rx_desc); 827 } 828 829 idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded); 830 skb_record_rx_queue(skb, rx_q->idx); 831 } 832 833 /** 834 * idpf_rx_buf_hw_update - Store the new tail and head values 835 * @rxq: queue to bump 836 * @val: new head index 837 */ 838 static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val) 839 { 840 rxq->next_to_use = val; 841 842 if (unlikely(!rxq->tail)) 843 return; 844 845 /* writel has an implicit memory barrier */ 846 writel(val, rxq->tail); 847 } 848 849 /** 850 * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers 851 * @rx_q: queue for which the hw buffers are allocated 852 * @cleaned_count: number of buffers to replace 853 * 854 * Returns false if all allocations were successful, true if any fail 855 */ 856 bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, 857 u16 cleaned_count) 858 { 859 struct virtchnl2_singleq_rx_buf_desc *desc; 860 const struct libeth_fq_fp fq = { 861 .pp = rx_q->pp, 862 .fqes = rx_q->rx_buf, 863 .truesize = rx_q->truesize, 864 .count = rx_q->desc_count, 865 }; 866 u16 nta = rx_q->next_to_alloc; 867 868 if (!cleaned_count) 869 return false; 870 871 desc = &rx_q->single_buf[nta]; 872 873 do { 874 dma_addr_t addr; 875 876 addr = libeth_rx_alloc(&fq, nta); 877 if (addr == DMA_MAPPING_ERROR) 878 break; 879 880 /* Refresh the desc even if buffer_addrs didn't change 881 * because each write-back erases this info. 882 */ 883 desc->pkt_addr = cpu_to_le64(addr); 884 desc->hdr_addr = 0; 885 desc++; 886 887 nta++; 888 if (unlikely(nta == rx_q->desc_count)) { 889 desc = &rx_q->single_buf[0]; 890 nta = 0; 891 } 892 893 cleaned_count--; 894 } while (cleaned_count); 895 896 if (rx_q->next_to_alloc != nta) { 897 idpf_rx_buf_hw_update(rx_q, nta); 898 rx_q->next_to_alloc = nta; 899 } 900 901 return !!cleaned_count; 902 } 903 904 /** 905 * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor 906 * @rx_desc: the descriptor to process 907 * @fields: storage for extracted values 908 * 909 * Decode the Rx descriptor and extract relevant information including the 910 * size and Rx packet type. 911 * 912 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 913 * descriptor writeback format. 914 */ 915 static void 916 idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, 917 struct idpf_rx_extracted *fields) 918 { 919 u64 qword; 920 921 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 922 923 fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); 924 fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); 925 } 926 927 /** 928 * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor 929 * @rx_desc: the descriptor to process 930 * @fields: storage for extracted values 931 * 932 * Decode the Rx descriptor and extract relevant information including the 933 * size and Rx packet type. 934 * 935 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 936 * descriptor writeback format. 937 */ 938 static void 939 idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, 940 struct idpf_rx_extracted *fields) 941 { 942 fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, 943 le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); 944 fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, 945 le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); 946 } 947 948 /** 949 * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor 950 * @rx_q: Rx descriptor queue 951 * @rx_desc: the descriptor to process 952 * @fields: storage for extracted values 953 * 954 */ 955 static void 956 idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, 957 const union virtchnl2_rx_desc *rx_desc, 958 struct idpf_rx_extracted *fields) 959 { 960 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) 961 idpf_rx_singleq_extract_base_fields(rx_desc, fields); 962 else 963 idpf_rx_singleq_extract_flex_fields(rx_desc, fields); 964 } 965 966 /** 967 * idpf_rx_singleq_clean - Reclaim resources after receive completes 968 * @rx_q: rx queue to clean 969 * @budget: Total limit on number of packets to process 970 * 971 * Returns true if there's any budget left (e.g. the clean is finished) 972 */ 973 static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) 974 { 975 unsigned int total_rx_bytes = 0, total_rx_pkts = 0; 976 struct sk_buff *skb = rx_q->skb; 977 u16 ntc = rx_q->next_to_clean; 978 u16 cleaned_count = 0; 979 bool failure = false; 980 981 /* Process Rx packets bounded by budget */ 982 while (likely(total_rx_pkts < (unsigned int)budget)) { 983 struct idpf_rx_extracted fields = { }; 984 union virtchnl2_rx_desc *rx_desc; 985 struct idpf_rx_buf *rx_buf; 986 987 /* get the Rx desc from Rx queue based on 'next_to_clean' */ 988 rx_desc = &rx_q->rx[ntc]; 989 990 /* status_error_ptype_len will always be zero for unused 991 * descriptors because it's cleared in cleanup, and overlaps 992 * with hdr_addr which is always zero because packet split 993 * isn't used, if the hardware wrote DD then the length will be 994 * non-zero 995 */ 996 #define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M 997 if (!idpf_rx_singleq_test_staterr(rx_desc, 998 IDPF_RXD_DD)) 999 break; 1000 1001 /* This memory barrier is needed to keep us from reading 1002 * any other fields out of the rx_desc 1003 */ 1004 dma_rmb(); 1005 1006 idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); 1007 1008 rx_buf = &rx_q->rx_buf[ntc]; 1009 if (!libeth_rx_sync_for_cpu(rx_buf, fields.size)) 1010 goto skip_data; 1011 1012 if (skb) 1013 idpf_rx_add_frag(rx_buf, skb, fields.size); 1014 else 1015 skb = idpf_rx_build_skb(rx_buf, fields.size); 1016 1017 /* exit if we failed to retrieve a buffer */ 1018 if (!skb) 1019 break; 1020 1021 skip_data: 1022 rx_buf->page = NULL; 1023 1024 IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc); 1025 cleaned_count++; 1026 1027 /* skip if it is non EOP desc */ 1028 if (idpf_rx_singleq_is_non_eop(rx_desc) || unlikely(!skb)) 1029 continue; 1030 1031 #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ 1032 VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M) 1033 if (unlikely(idpf_rx_singleq_test_staterr(rx_desc, 1034 IDPF_RXD_ERR_S))) { 1035 dev_kfree_skb_any(skb); 1036 skb = NULL; 1037 continue; 1038 } 1039 1040 /* pad skb if needed (to make valid ethernet frame) */ 1041 if (eth_skb_pad(skb)) { 1042 skb = NULL; 1043 continue; 1044 } 1045 1046 /* probably a little skewed due to removing CRC */ 1047 total_rx_bytes += skb->len; 1048 1049 /* protocol */ 1050 idpf_rx_singleq_process_skb_fields(rx_q, skb, 1051 rx_desc, fields.rx_ptype); 1052 1053 /* send completed skb up the stack */ 1054 napi_gro_receive(rx_q->pp->p.napi, skb); 1055 skb = NULL; 1056 1057 /* update budget accounting */ 1058 total_rx_pkts++; 1059 } 1060 1061 rx_q->skb = skb; 1062 1063 rx_q->next_to_clean = ntc; 1064 1065 page_pool_nid_changed(rx_q->pp, numa_mem_id()); 1066 if (cleaned_count) 1067 failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); 1068 1069 u64_stats_update_begin(&rx_q->stats_sync); 1070 u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts); 1071 u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes); 1072 u64_stats_update_end(&rx_q->stats_sync); 1073 1074 /* guarantee a trip back through this routine if there was a failure */ 1075 return failure ? budget : (int)total_rx_pkts; 1076 } 1077 1078 /** 1079 * idpf_rx_singleq_clean_all - Clean all Rx queues 1080 * @q_vec: queue vector 1081 * @budget: Used to determine if we are in netpoll 1082 * @cleaned: returns number of packets cleaned 1083 * 1084 * Returns false if clean is not complete else returns true 1085 */ 1086 static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 1087 int *cleaned) 1088 { 1089 u16 num_rxq = q_vec->num_rxq; 1090 bool clean_complete = true; 1091 int budget_per_q, i; 1092 1093 /* We attempt to distribute budget to each Rx queue fairly, but don't 1094 * allow the budget to go below 1 because that would exit polling early. 1095 */ 1096 budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; 1097 for (i = 0; i < num_rxq; i++) { 1098 struct idpf_rx_queue *rxq = q_vec->rx[i]; 1099 int pkts_cleaned_per_q; 1100 1101 pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); 1102 1103 /* if we clean as many as budgeted, we must not be done */ 1104 if (pkts_cleaned_per_q >= budget_per_q) 1105 clean_complete = false; 1106 *cleaned += pkts_cleaned_per_q; 1107 } 1108 1109 return clean_complete; 1110 } 1111 1112 /** 1113 * idpf_vport_singleq_napi_poll - NAPI handler 1114 * @napi: struct from which you get q_vector 1115 * @budget: budget provided by stack 1116 */ 1117 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget) 1118 { 1119 struct idpf_q_vector *q_vector = 1120 container_of(napi, struct idpf_q_vector, napi); 1121 bool clean_complete; 1122 int work_done = 0; 1123 1124 /* Handle case where we are called by netpoll with a budget of 0 */ 1125 if (budget <= 0) { 1126 idpf_tx_singleq_clean_all(q_vector, budget, &work_done); 1127 1128 return budget; 1129 } 1130 1131 clean_complete = idpf_rx_singleq_clean_all(q_vector, budget, 1132 &work_done); 1133 clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget, 1134 &work_done); 1135 1136 /* If work not completed, return budget and polling will return */ 1137 if (!clean_complete) 1138 return budget; 1139 1140 work_done = min_t(int, work_done, budget - 1); 1141 1142 /* Exit the polling mode, but don't re-enable interrupts if stack might 1143 * poll us due to busy-polling 1144 */ 1145 if (likely(napi_complete_done(napi, work_done))) 1146 idpf_vport_intr_update_itr_ena_irq(q_vector); 1147 1148 return work_done; 1149 } 1150