1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (C) 2023 Intel Corporation */ 3 4 #include <net/libeth/rx.h> 5 #include <net/libeth/tx.h> 6 7 #include "idpf.h" 8 9 /** 10 * idpf_tx_singleq_csum - Enable tx checksum offloads 11 * @skb: pointer to skb 12 * @off: pointer to struct that holds offload parameters 13 * 14 * Returns 0 or error (negative) if checksum offload cannot be executed, 1 15 * otherwise. 16 */ 17 static int idpf_tx_singleq_csum(struct sk_buff *skb, 18 struct idpf_tx_offload_params *off) 19 { 20 u32 l4_len, l3_len, l2_len; 21 union { 22 struct iphdr *v4; 23 struct ipv6hdr *v6; 24 unsigned char *hdr; 25 } ip; 26 union { 27 struct tcphdr *tcp; 28 unsigned char *hdr; 29 } l4; 30 u32 offset, cmd = 0; 31 u8 l4_proto = 0; 32 __be16 frag_off; 33 bool is_tso; 34 35 if (skb->ip_summed != CHECKSUM_PARTIAL) 36 return 0; 37 38 ip.hdr = skb_network_header(skb); 39 l4.hdr = skb_transport_header(skb); 40 41 /* compute outer L2 header size */ 42 l2_len = ip.hdr - skb->data; 43 offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2); 44 is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO); 45 if (skb->encapsulation) { 46 u32 tunnel = 0; 47 48 /* define outer network header type */ 49 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 50 /* The stack computes the IP header already, the only 51 * time we need the hardware to recompute it is in the 52 * case of TSO. 53 */ 54 tunnel |= is_tso ? 55 IDPF_TX_CTX_EXT_IP_IPV4 : 56 IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM; 57 58 l4_proto = ip.v4->protocol; 59 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 60 tunnel |= IDPF_TX_CTX_EXT_IP_IPV6; 61 62 l4_proto = ip.v6->nexthdr; 63 if (ipv6_ext_hdr(l4_proto)) 64 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 65 sizeof(*ip.v6), 66 &l4_proto, &frag_off); 67 } 68 69 /* define outer transport */ 70 switch (l4_proto) { 71 case IPPROTO_UDP: 72 tunnel |= IDPF_TXD_CTX_UDP_TUNNELING; 73 break; 74 case IPPROTO_GRE: 75 tunnel |= IDPF_TXD_CTX_GRE_TUNNELING; 76 break; 77 case IPPROTO_IPIP: 78 case IPPROTO_IPV6: 79 l4.hdr = skb_inner_network_header(skb); 80 break; 81 default: 82 if (is_tso) 83 return -1; 84 85 skb_checksum_help(skb); 86 87 return 0; 88 } 89 off->tx_flags |= IDPF_TX_FLAGS_TUNNEL; 90 91 /* compute outer L3 header size */ 92 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M, 93 (l4.hdr - ip.hdr) / 4); 94 95 /* switch IP header pointer from outer to inner header */ 96 ip.hdr = skb_inner_network_header(skb); 97 98 /* compute tunnel header size */ 99 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M, 100 (ip.hdr - l4.hdr) / 2); 101 102 /* indicate if we need to offload outer UDP header */ 103 if (is_tso && 104 !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && 105 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) 106 tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M; 107 108 /* record tunnel offload values */ 109 off->cd_tunneling |= tunnel; 110 111 /* switch L4 header pointer from outer to inner */ 112 l4.hdr = skb_inner_transport_header(skb); 113 l4_proto = 0; 114 115 /* reset type as we transition from outer to inner headers */ 116 off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6); 117 if (ip.v4->version == 4) 118 off->tx_flags |= IDPF_TX_FLAGS_IPV4; 119 if (ip.v6->version == 6) 120 off->tx_flags |= IDPF_TX_FLAGS_IPV6; 121 } 122 123 /* Enable IP checksum offloads */ 124 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 125 l4_proto = ip.v4->protocol; 126 /* See comment above regarding need for HW to recompute IP 127 * header checksum in the case of TSO. 128 */ 129 if (is_tso) 130 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM; 131 else 132 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4; 133 134 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 135 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6; 136 l4_proto = ip.v6->nexthdr; 137 if (ipv6_ext_hdr(l4_proto)) 138 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 139 sizeof(*ip.v6), &l4_proto, 140 &frag_off); 141 } else { 142 return -1; 143 } 144 145 /* compute inner L3 header size */ 146 l3_len = l4.hdr - ip.hdr; 147 offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S; 148 149 /* Enable L4 checksum offloads */ 150 switch (l4_proto) { 151 case IPPROTO_TCP: 152 /* enable checksum offloads */ 153 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP; 154 l4_len = l4.tcp->doff; 155 break; 156 case IPPROTO_UDP: 157 /* enable UDP checksum offload */ 158 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP; 159 l4_len = sizeof(struct udphdr) >> 2; 160 break; 161 case IPPROTO_SCTP: 162 /* enable SCTP checksum offload */ 163 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP; 164 l4_len = sizeof(struct sctphdr) >> 2; 165 break; 166 default: 167 if (is_tso) 168 return -1; 169 170 skb_checksum_help(skb); 171 172 return 0; 173 } 174 175 offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S; 176 off->td_cmd |= cmd; 177 off->hdr_offsets |= offset; 178 179 return 1; 180 } 181 182 /** 183 * idpf_tx_singleq_map - Build the Tx base descriptor 184 * @tx_q: queue to send buffer on 185 * @first: first buffer info buffer to use 186 * @offloads: pointer to struct that holds offload parameters 187 * 188 * This function loops over the skb data pointed to by *first 189 * and gets a physical address for each memory location and programs 190 * it and the length into the transmit base mode descriptor. 191 */ 192 static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, 193 struct idpf_tx_buf *first, 194 struct idpf_tx_offload_params *offloads) 195 { 196 u32 offsets = offloads->hdr_offsets; 197 struct idpf_tx_buf *tx_buf = first; 198 struct idpf_base_tx_desc *tx_desc; 199 struct sk_buff *skb = first->skb; 200 u64 td_cmd = offloads->td_cmd; 201 unsigned int data_len, size; 202 u16 i = tx_q->next_to_use; 203 struct netdev_queue *nq; 204 skb_frag_t *frag; 205 dma_addr_t dma; 206 u64 td_tag = 0; 207 208 data_len = skb->data_len; 209 size = skb_headlen(skb); 210 211 tx_desc = &tx_q->base_tx[i]; 212 213 dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); 214 215 /* write each descriptor with CRC bit */ 216 if (idpf_queue_has(CRC_EN, tx_q)) 217 td_cmd |= IDPF_TX_DESC_CMD_ICRC; 218 219 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 220 unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 221 222 if (dma_mapping_error(tx_q->dev, dma)) 223 return idpf_tx_dma_map_error(tx_q, skb, first, i); 224 225 /* record length, and DMA address */ 226 dma_unmap_len_set(tx_buf, len, size); 227 dma_unmap_addr_set(tx_buf, dma, dma); 228 tx_buf->type = LIBETH_SQE_FRAG; 229 230 /* align size to end of page */ 231 max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1); 232 tx_desc->buf_addr = cpu_to_le64(dma); 233 234 /* account for data chunks larger than the hardware 235 * can handle 236 */ 237 while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) { 238 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, 239 offsets, 240 max_data, 241 td_tag); 242 if (unlikely(++i == tx_q->desc_count)) { 243 tx_buf = &tx_q->tx_buf[0]; 244 tx_desc = &tx_q->base_tx[0]; 245 i = 0; 246 } else { 247 tx_buf++; 248 tx_desc++; 249 } 250 251 tx_buf->type = LIBETH_SQE_EMPTY; 252 253 dma += max_data; 254 size -= max_data; 255 256 max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 257 tx_desc->buf_addr = cpu_to_le64(dma); 258 } 259 260 if (!data_len) 261 break; 262 263 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 264 size, td_tag); 265 266 if (unlikely(++i == tx_q->desc_count)) { 267 tx_buf = &tx_q->tx_buf[0]; 268 tx_desc = &tx_q->base_tx[0]; 269 i = 0; 270 } else { 271 tx_buf++; 272 tx_desc++; 273 } 274 275 size = skb_frag_size(frag); 276 data_len -= size; 277 278 dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, 279 DMA_TO_DEVICE); 280 } 281 282 skb_tx_timestamp(first->skb); 283 284 /* write last descriptor with RS and EOP bits */ 285 td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS); 286 287 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 288 size, td_tag); 289 290 first->type = LIBETH_SQE_SKB; 291 first->rs_idx = i; 292 293 IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i); 294 295 nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); 296 netdev_tx_sent_queue(nq, first->bytes); 297 298 idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); 299 } 300 301 /** 302 * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring 303 * @txq: queue to put context descriptor on 304 * 305 * Since the TX buffer rings mimics the descriptor ring, update the tx buffer 306 * ring entry to reflect that this index is a context descriptor 307 */ 308 static struct idpf_base_tx_ctx_desc * 309 idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq) 310 { 311 struct idpf_base_tx_ctx_desc *ctx_desc; 312 int ntu = txq->next_to_use; 313 314 txq->tx_buf[ntu].type = LIBETH_SQE_CTX; 315 316 ctx_desc = &txq->base_ctx[ntu]; 317 318 IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu); 319 txq->next_to_use = ntu; 320 321 return ctx_desc; 322 } 323 324 /** 325 * idpf_tx_singleq_build_ctx_desc - populate context descriptor 326 * @txq: queue to send buffer on 327 * @offload: offload parameter structure 328 **/ 329 static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq, 330 struct idpf_tx_offload_params *offload) 331 { 332 struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); 333 u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX; 334 335 if (offload->tso_segs) { 336 qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S; 337 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_TSO_LEN_M, 338 offload->tso_len); 339 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss); 340 341 u64_stats_update_begin(&txq->stats_sync); 342 u64_stats_inc(&txq->q_stats.lso_pkts); 343 u64_stats_update_end(&txq->stats_sync); 344 } 345 346 desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling); 347 348 desc->qw0.l2tag2 = 0; 349 desc->qw0.rsvd1 = 0; 350 desc->qw1 = cpu_to_le64(qw1); 351 } 352 353 /** 354 * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors 355 * @skb: send buffer 356 * @tx_q: queue to send buffer on 357 * 358 * Returns NETDEV_TX_OK if sent, else an error code 359 */ 360 netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, 361 struct idpf_tx_queue *tx_q) 362 { 363 struct idpf_tx_offload_params offload = { }; 364 struct idpf_tx_buf *first; 365 int csum, tso, needed; 366 unsigned int count; 367 __be16 protocol; 368 369 count = idpf_tx_desc_count_required(tx_q, skb); 370 if (unlikely(!count)) 371 return idpf_tx_drop_skb(tx_q, skb); 372 373 needed = count + IDPF_TX_DESCS_PER_CACHE_LINE + IDPF_TX_DESCS_FOR_CTX; 374 if (!netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, 375 IDPF_DESC_UNUSED(tx_q), 376 needed, needed)) { 377 idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); 378 379 u64_stats_update_begin(&tx_q->stats_sync); 380 u64_stats_inc(&tx_q->q_stats.q_busy); 381 u64_stats_update_end(&tx_q->stats_sync); 382 383 return NETDEV_TX_BUSY; 384 } 385 386 protocol = vlan_get_protocol(skb); 387 if (protocol == htons(ETH_P_IP)) 388 offload.tx_flags |= IDPF_TX_FLAGS_IPV4; 389 else if (protocol == htons(ETH_P_IPV6)) 390 offload.tx_flags |= IDPF_TX_FLAGS_IPV6; 391 392 tso = idpf_tso(skb, &offload); 393 if (tso < 0) 394 goto out_drop; 395 396 csum = idpf_tx_singleq_csum(skb, &offload); 397 if (csum < 0) 398 goto out_drop; 399 400 if (tso || offload.cd_tunneling) 401 idpf_tx_singleq_build_ctx_desc(tx_q, &offload); 402 403 /* record the location of the first descriptor for this packet */ 404 first = &tx_q->tx_buf[tx_q->next_to_use]; 405 first->skb = skb; 406 407 if (tso) { 408 first->packets = offload.tso_segs; 409 first->bytes = skb->len + ((first->packets - 1) * offload.tso_hdr_len); 410 } else { 411 first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); 412 first->packets = 1; 413 } 414 idpf_tx_singleq_map(tx_q, first, &offload); 415 416 return NETDEV_TX_OK; 417 418 out_drop: 419 return idpf_tx_drop_skb(tx_q, skb); 420 } 421 422 /** 423 * idpf_tx_singleq_clean - Reclaim resources from queue 424 * @tx_q: Tx queue to clean 425 * @napi_budget: Used to determine if we are in netpoll 426 * @cleaned: returns number of packets cleaned 427 * 428 */ 429 static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget, 430 int *cleaned) 431 { 432 struct libeth_sq_napi_stats ss = { }; 433 struct idpf_base_tx_desc *tx_desc; 434 u32 budget = tx_q->clean_budget; 435 s16 ntc = tx_q->next_to_clean; 436 struct libeth_cq_pp cp = { 437 .dev = tx_q->dev, 438 .ss = &ss, 439 .napi = napi_budget, 440 }; 441 struct idpf_netdev_priv *np; 442 struct idpf_tx_buf *tx_buf; 443 struct netdev_queue *nq; 444 bool dont_wake; 445 446 tx_desc = &tx_q->base_tx[ntc]; 447 tx_buf = &tx_q->tx_buf[ntc]; 448 ntc -= tx_q->desc_count; 449 450 do { 451 struct idpf_base_tx_desc *eop_desc; 452 453 /* If this entry in the ring was used as a context descriptor, 454 * it's corresponding entry in the buffer ring will indicate as 455 * such. We can skip this descriptor since there is no buffer 456 * to clean. 457 */ 458 if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) { 459 tx_buf->type = LIBETH_SQE_EMPTY; 460 goto fetch_next_txq_desc; 461 } 462 463 if (unlikely(tx_buf->type != LIBETH_SQE_SKB)) 464 break; 465 466 /* prevent any other reads prior to type */ 467 smp_rmb(); 468 469 eop_desc = &tx_q->base_tx[tx_buf->rs_idx]; 470 471 /* if the descriptor isn't done, no work yet to do */ 472 if (!(eop_desc->qw1 & 473 cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE))) 474 break; 475 476 /* update the statistics for this packet */ 477 libeth_tx_complete(tx_buf, &cp); 478 479 /* unmap remaining buffers */ 480 while (tx_desc != eop_desc) { 481 tx_buf++; 482 tx_desc++; 483 ntc++; 484 if (unlikely(!ntc)) { 485 ntc -= tx_q->desc_count; 486 tx_buf = tx_q->tx_buf; 487 tx_desc = &tx_q->base_tx[0]; 488 } 489 490 /* unmap any remaining paged data */ 491 libeth_tx_complete(tx_buf, &cp); 492 } 493 494 /* update budget only if we did something */ 495 budget--; 496 497 fetch_next_txq_desc: 498 tx_buf++; 499 tx_desc++; 500 ntc++; 501 if (unlikely(!ntc)) { 502 ntc -= tx_q->desc_count; 503 tx_buf = tx_q->tx_buf; 504 tx_desc = &tx_q->base_tx[0]; 505 } 506 } while (likely(budget)); 507 508 ntc += tx_q->desc_count; 509 tx_q->next_to_clean = ntc; 510 511 *cleaned += ss.packets; 512 513 u64_stats_update_begin(&tx_q->stats_sync); 514 u64_stats_add(&tx_q->q_stats.packets, ss.packets); 515 u64_stats_add(&tx_q->q_stats.bytes, ss.bytes); 516 u64_stats_update_end(&tx_q->stats_sync); 517 518 np = netdev_priv(tx_q->netdev); 519 nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); 520 521 dont_wake = np->state != __IDPF_VPORT_UP || 522 !netif_carrier_ok(tx_q->netdev); 523 __netif_txq_completed_wake(nq, ss.packets, ss.bytes, 524 IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, 525 dont_wake); 526 527 return !!budget; 528 } 529 530 /** 531 * idpf_tx_singleq_clean_all - Clean all Tx queues 532 * @q_vec: queue vector 533 * @budget: Used to determine if we are in netpoll 534 * @cleaned: returns number of packets cleaned 535 * 536 * Returns false if clean is not complete else returns true 537 */ 538 static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 539 int *cleaned) 540 { 541 u16 num_txq = q_vec->num_txq; 542 bool clean_complete = true; 543 int i, budget_per_q; 544 545 budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; 546 for (i = 0; i < num_txq; i++) { 547 struct idpf_tx_queue *q; 548 549 q = q_vec->tx[i]; 550 clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, 551 cleaned); 552 } 553 554 return clean_complete; 555 } 556 557 /** 558 * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor 559 * status and error fields 560 * @rx_desc: pointer to receive descriptor (in le64 format) 561 * @stat_err_bits: value to mask 562 * 563 * This function does some fast chicanery in order to return the 564 * value of the mask which is really only used for boolean tests. 565 * The status_error_ptype_len doesn't need to be shifted because it begins 566 * at offset zero. 567 */ 568 static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, 569 const u64 stat_err_bits) 570 { 571 return !!(rx_desc->base_wb.qword1.status_error_ptype_len & 572 cpu_to_le64(stat_err_bits)); 573 } 574 575 /** 576 * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers 577 * @rx_desc: Rx descriptor for current buffer 578 */ 579 static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc) 580 { 581 /* if we are the last buffer then there is nothing else to do */ 582 if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) 583 return false; 584 585 return true; 586 } 587 588 /** 589 * idpf_rx_singleq_csum - Indicate in skb if checksum is good 590 * @rxq: Rx ring being processed 591 * @skb: skb currently being received and modified 592 * @csum_bits: checksum bits from descriptor 593 * @decoded: the packet type decoded by hardware 594 * 595 * skb->protocol must be set before this function is called 596 */ 597 static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, 598 struct sk_buff *skb, 599 struct libeth_rx_csum csum_bits, 600 struct libeth_rx_pt decoded) 601 { 602 bool ipv4, ipv6; 603 604 /* check if Rx checksum is enabled */ 605 if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded)) 606 return; 607 608 /* check if HW has decoded the packet and checksum */ 609 if (unlikely(!csum_bits.l3l4p)) 610 return; 611 612 ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4; 613 ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6; 614 615 /* Check if there were any checksum errors */ 616 if (unlikely(ipv4 && (csum_bits.ipe || csum_bits.eipe))) 617 goto checksum_fail; 618 619 /* Device could not do any checksum offload for certain extension 620 * headers as indicated by setting IPV6EXADD bit 621 */ 622 if (unlikely(ipv6 && csum_bits.ipv6exadd)) 623 return; 624 625 /* check for L4 errors and handle packets that were not able to be 626 * checksummed due to arrival speed 627 */ 628 if (unlikely(csum_bits.l4e)) 629 goto checksum_fail; 630 631 if (unlikely(csum_bits.nat && csum_bits.eudpe)) 632 goto checksum_fail; 633 634 /* Handle packets that were not able to be checksummed due to arrival 635 * speed, in this case the stack can compute the csum. 636 */ 637 if (unlikely(csum_bits.pprs)) 638 return; 639 640 /* If there is an outer header present that might contain a checksum 641 * we need to bump the checksum level by 1 to reflect the fact that 642 * we are indicating we validated the inner checksum. 643 */ 644 if (decoded.tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT) 645 skb->csum_level = 1; 646 647 skb->ip_summed = CHECKSUM_UNNECESSARY; 648 return; 649 650 checksum_fail: 651 u64_stats_update_begin(&rxq->stats_sync); 652 u64_stats_inc(&rxq->q_stats.hw_csum_err); 653 u64_stats_update_end(&rxq->stats_sync); 654 } 655 656 /** 657 * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum 658 * @rx_desc: the receive descriptor 659 * 660 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 661 * descriptor writeback format. 662 * 663 * Return: parsed checksum status. 664 **/ 665 static struct libeth_rx_csum 666 idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc) 667 { 668 struct libeth_rx_csum csum_bits = { }; 669 u32 rx_error, rx_status; 670 u64 qword; 671 672 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 673 674 rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword); 675 rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword); 676 677 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error); 678 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M, 679 rx_error); 680 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error); 681 csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M, 682 rx_error); 683 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M, 684 rx_status); 685 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M, 686 rx_status); 687 688 return csum_bits; 689 } 690 691 /** 692 * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum 693 * @rx_desc: the receive descriptor 694 * 695 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 696 * descriptor writeback format. 697 * 698 * Return: parsed checksum status. 699 **/ 700 static struct libeth_rx_csum 701 idpf_rx_singleq_flex_csum(const union virtchnl2_rx_desc *rx_desc) 702 { 703 struct libeth_rx_csum csum_bits = { }; 704 u16 rx_status0, rx_status1; 705 706 rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0); 707 rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1); 708 709 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M, 710 rx_status0); 711 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M, 712 rx_status0); 713 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M, 714 rx_status0); 715 csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M, 716 rx_status0); 717 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M, 718 rx_status0); 719 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M, 720 rx_status0); 721 csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M, 722 rx_status1); 723 724 return csum_bits; 725 } 726 727 /** 728 * idpf_rx_singleq_base_hash - set the hash value in the skb 729 * @rx_q: Rx completion queue 730 * @skb: skb currently being received and modified 731 * @rx_desc: specific descriptor 732 * @decoded: Decoded Rx packet type related fields 733 * 734 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 735 * descriptor writeback format. 736 **/ 737 static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q, 738 struct sk_buff *skb, 739 const union virtchnl2_rx_desc *rx_desc, 740 struct libeth_rx_pt decoded) 741 { 742 u64 mask, qw1; 743 744 if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded)) 745 return; 746 747 mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; 748 qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 749 750 if (FIELD_GET(mask, qw1) == mask) { 751 u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss); 752 753 libeth_rx_pt_set_hash(skb, hash, decoded); 754 } 755 } 756 757 /** 758 * idpf_rx_singleq_flex_hash - set the hash value in the skb 759 * @rx_q: Rx completion queue 760 * @skb: skb currently being received and modified 761 * @rx_desc: specific descriptor 762 * @decoded: Decoded Rx packet type related fields 763 * 764 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 765 * descriptor writeback format. 766 **/ 767 static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, 768 struct sk_buff *skb, 769 const union virtchnl2_rx_desc *rx_desc, 770 struct libeth_rx_pt decoded) 771 { 772 if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded)) 773 return; 774 775 if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, 776 le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) { 777 u32 hash = le32_to_cpu(rx_desc->flex_nic_wb.rss_hash); 778 779 libeth_rx_pt_set_hash(skb, hash, decoded); 780 } 781 } 782 783 /** 784 * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx 785 * descriptor 786 * @rx_q: Rx ring being processed 787 * @skb: pointer to current skb being populated 788 * @rx_desc: descriptor for skb 789 * @ptype: packet type 790 * 791 * This function checks the ring, descriptor, and packet information in 792 * order to populate the hash, checksum, VLAN, protocol, and 793 * other fields within the skb. 794 */ 795 static void 796 idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, 797 struct sk_buff *skb, 798 const union virtchnl2_rx_desc *rx_desc, 799 u16 ptype) 800 { 801 struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype]; 802 struct libeth_rx_csum csum_bits; 803 804 /* modifies the skb - consumes the enet header */ 805 skb->protocol = eth_type_trans(skb, rx_q->netdev); 806 807 /* Check if we're using base mode descriptor IDs */ 808 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { 809 idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, decoded); 810 csum_bits = idpf_rx_singleq_base_csum(rx_desc); 811 } else { 812 idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, decoded); 813 csum_bits = idpf_rx_singleq_flex_csum(rx_desc); 814 } 815 816 idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded); 817 skb_record_rx_queue(skb, rx_q->idx); 818 } 819 820 /** 821 * idpf_rx_buf_hw_update - Store the new tail and head values 822 * @rxq: queue to bump 823 * @val: new head index 824 */ 825 static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val) 826 { 827 rxq->next_to_use = val; 828 829 if (unlikely(!rxq->tail)) 830 return; 831 832 /* writel has an implicit memory barrier */ 833 writel(val, rxq->tail); 834 } 835 836 /** 837 * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers 838 * @rx_q: queue for which the hw buffers are allocated 839 * @cleaned_count: number of buffers to replace 840 * 841 * Returns false if all allocations were successful, true if any fail 842 */ 843 bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, 844 u16 cleaned_count) 845 { 846 struct virtchnl2_singleq_rx_buf_desc *desc; 847 const struct libeth_fq_fp fq = { 848 .pp = rx_q->pp, 849 .fqes = rx_q->rx_buf, 850 .truesize = rx_q->truesize, 851 .count = rx_q->desc_count, 852 }; 853 u16 nta = rx_q->next_to_alloc; 854 855 if (!cleaned_count) 856 return false; 857 858 desc = &rx_q->single_buf[nta]; 859 860 do { 861 dma_addr_t addr; 862 863 addr = libeth_rx_alloc(&fq, nta); 864 if (addr == DMA_MAPPING_ERROR) 865 break; 866 867 /* Refresh the desc even if buffer_addrs didn't change 868 * because each write-back erases this info. 869 */ 870 desc->pkt_addr = cpu_to_le64(addr); 871 desc->hdr_addr = 0; 872 desc++; 873 874 nta++; 875 if (unlikely(nta == rx_q->desc_count)) { 876 desc = &rx_q->single_buf[0]; 877 nta = 0; 878 } 879 880 cleaned_count--; 881 } while (cleaned_count); 882 883 if (rx_q->next_to_alloc != nta) { 884 idpf_rx_buf_hw_update(rx_q, nta); 885 rx_q->next_to_alloc = nta; 886 } 887 888 return !!cleaned_count; 889 } 890 891 /** 892 * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor 893 * @rx_desc: the descriptor to process 894 * @fields: storage for extracted values 895 * 896 * Decode the Rx descriptor and extract relevant information including the 897 * size and Rx packet type. 898 * 899 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 900 * descriptor writeback format. 901 */ 902 static void 903 idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, 904 struct libeth_rqe_info *fields) 905 { 906 u64 qword; 907 908 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 909 910 fields->len = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); 911 fields->ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); 912 } 913 914 /** 915 * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor 916 * @rx_desc: the descriptor to process 917 * @fields: storage for extracted values 918 * 919 * Decode the Rx descriptor and extract relevant information including the 920 * size and Rx packet type. 921 * 922 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 923 * descriptor writeback format. 924 */ 925 static void 926 idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, 927 struct libeth_rqe_info *fields) 928 { 929 fields->len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, 930 le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); 931 fields->ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, 932 le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); 933 } 934 935 /** 936 * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor 937 * @rx_q: Rx descriptor queue 938 * @rx_desc: the descriptor to process 939 * @fields: storage for extracted values 940 * 941 */ 942 static void 943 idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, 944 const union virtchnl2_rx_desc *rx_desc, 945 struct libeth_rqe_info *fields) 946 { 947 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) 948 idpf_rx_singleq_extract_base_fields(rx_desc, fields); 949 else 950 idpf_rx_singleq_extract_flex_fields(rx_desc, fields); 951 } 952 953 /** 954 * idpf_rx_singleq_clean - Reclaim resources after receive completes 955 * @rx_q: rx queue to clean 956 * @budget: Total limit on number of packets to process 957 * 958 * Returns true if there's any budget left (e.g. the clean is finished) 959 */ 960 static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) 961 { 962 unsigned int total_rx_bytes = 0, total_rx_pkts = 0; 963 struct sk_buff *skb = rx_q->skb; 964 u16 ntc = rx_q->next_to_clean; 965 u16 cleaned_count = 0; 966 bool failure = false; 967 968 /* Process Rx packets bounded by budget */ 969 while (likely(total_rx_pkts < (unsigned int)budget)) { 970 struct libeth_rqe_info fields = { }; 971 union virtchnl2_rx_desc *rx_desc; 972 struct idpf_rx_buf *rx_buf; 973 974 /* get the Rx desc from Rx queue based on 'next_to_clean' */ 975 rx_desc = &rx_q->rx[ntc]; 976 977 /* status_error_ptype_len will always be zero for unused 978 * descriptors because it's cleared in cleanup, and overlaps 979 * with hdr_addr which is always zero because packet split 980 * isn't used, if the hardware wrote DD then the length will be 981 * non-zero 982 */ 983 #define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M 984 if (!idpf_rx_singleq_test_staterr(rx_desc, 985 IDPF_RXD_DD)) 986 break; 987 988 /* This memory barrier is needed to keep us from reading 989 * any other fields out of the rx_desc 990 */ 991 dma_rmb(); 992 993 idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); 994 995 rx_buf = &rx_q->rx_buf[ntc]; 996 if (!libeth_rx_sync_for_cpu(rx_buf, fields.len)) 997 goto skip_data; 998 999 if (skb) 1000 idpf_rx_add_frag(rx_buf, skb, fields.len); 1001 else 1002 skb = idpf_rx_build_skb(rx_buf, fields.len); 1003 1004 /* exit if we failed to retrieve a buffer */ 1005 if (!skb) 1006 break; 1007 1008 skip_data: 1009 rx_buf->netmem = 0; 1010 1011 IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc); 1012 cleaned_count++; 1013 1014 /* skip if it is non EOP desc */ 1015 if (idpf_rx_singleq_is_non_eop(rx_desc) || unlikely(!skb)) 1016 continue; 1017 1018 #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ 1019 VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M) 1020 if (unlikely(idpf_rx_singleq_test_staterr(rx_desc, 1021 IDPF_RXD_ERR_S))) { 1022 dev_kfree_skb_any(skb); 1023 skb = NULL; 1024 continue; 1025 } 1026 1027 /* pad skb if needed (to make valid ethernet frame) */ 1028 if (eth_skb_pad(skb)) { 1029 skb = NULL; 1030 continue; 1031 } 1032 1033 /* probably a little skewed due to removing CRC */ 1034 total_rx_bytes += skb->len; 1035 1036 /* protocol */ 1037 idpf_rx_singleq_process_skb_fields(rx_q, skb, rx_desc, 1038 fields.ptype); 1039 1040 /* send completed skb up the stack */ 1041 napi_gro_receive(rx_q->pp->p.napi, skb); 1042 skb = NULL; 1043 1044 /* update budget accounting */ 1045 total_rx_pkts++; 1046 } 1047 1048 rx_q->skb = skb; 1049 1050 rx_q->next_to_clean = ntc; 1051 1052 page_pool_nid_changed(rx_q->pp, numa_mem_id()); 1053 if (cleaned_count) 1054 failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); 1055 1056 u64_stats_update_begin(&rx_q->stats_sync); 1057 u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts); 1058 u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes); 1059 u64_stats_update_end(&rx_q->stats_sync); 1060 1061 /* guarantee a trip back through this routine if there was a failure */ 1062 return failure ? budget : (int)total_rx_pkts; 1063 } 1064 1065 /** 1066 * idpf_rx_singleq_clean_all - Clean all Rx queues 1067 * @q_vec: queue vector 1068 * @budget: Used to determine if we are in netpoll 1069 * @cleaned: returns number of packets cleaned 1070 * 1071 * Returns false if clean is not complete else returns true 1072 */ 1073 static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 1074 int *cleaned) 1075 { 1076 u16 num_rxq = q_vec->num_rxq; 1077 bool clean_complete = true; 1078 int budget_per_q, i; 1079 1080 /* We attempt to distribute budget to each Rx queue fairly, but don't 1081 * allow the budget to go below 1 because that would exit polling early. 1082 */ 1083 budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; 1084 for (i = 0; i < num_rxq; i++) { 1085 struct idpf_rx_queue *rxq = q_vec->rx[i]; 1086 int pkts_cleaned_per_q; 1087 1088 pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); 1089 1090 /* if we clean as many as budgeted, we must not be done */ 1091 if (pkts_cleaned_per_q >= budget_per_q) 1092 clean_complete = false; 1093 *cleaned += pkts_cleaned_per_q; 1094 } 1095 1096 return clean_complete; 1097 } 1098 1099 /** 1100 * idpf_vport_singleq_napi_poll - NAPI handler 1101 * @napi: struct from which you get q_vector 1102 * @budget: budget provided by stack 1103 */ 1104 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget) 1105 { 1106 struct idpf_q_vector *q_vector = 1107 container_of(napi, struct idpf_q_vector, napi); 1108 bool clean_complete; 1109 int work_done = 0; 1110 1111 /* Handle case where we are called by netpoll with a budget of 0 */ 1112 if (budget <= 0) { 1113 idpf_tx_singleq_clean_all(q_vector, budget, &work_done); 1114 1115 return budget; 1116 } 1117 1118 clean_complete = idpf_rx_singleq_clean_all(q_vector, budget, 1119 &work_done); 1120 clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget, 1121 &work_done); 1122 1123 /* If work not completed, return budget and polling will return */ 1124 if (!clean_complete) { 1125 idpf_vport_intr_set_wb_on_itr(q_vector); 1126 return budget; 1127 } 1128 1129 work_done = min_t(int, work_done, budget - 1); 1130 1131 /* Exit the polling mode, but don't re-enable interrupts if stack might 1132 * poll us due to busy-polling 1133 */ 1134 if (likely(napi_complete_done(napi, work_done))) 1135 idpf_vport_intr_update_itr_ena_irq(q_vector); 1136 else 1137 idpf_vport_intr_set_wb_on_itr(q_vector); 1138 1139 return work_done; 1140 } 1141