1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (C) 2023 Intel Corporation */ 3 4 #include <net/libeth/rx.h> 5 #include <net/libeth/tx.h> 6 7 #include "idpf.h" 8 9 /** 10 * idpf_tx_singleq_csum - Enable tx checksum offloads 11 * @skb: pointer to skb 12 * @off: pointer to struct that holds offload parameters 13 * 14 * Returns 0 or error (negative) if checksum offload cannot be executed, 1 15 * otherwise. 16 */ 17 static int idpf_tx_singleq_csum(struct sk_buff *skb, 18 struct idpf_tx_offload_params *off) 19 { 20 u32 l4_len, l3_len, l2_len; 21 union { 22 struct iphdr *v4; 23 struct ipv6hdr *v6; 24 unsigned char *hdr; 25 } ip; 26 union { 27 struct tcphdr *tcp; 28 unsigned char *hdr; 29 } l4; 30 u32 offset, cmd = 0; 31 u8 l4_proto = 0; 32 __be16 frag_off; 33 bool is_tso; 34 35 if (skb->ip_summed != CHECKSUM_PARTIAL) 36 return 0; 37 38 ip.hdr = skb_network_header(skb); 39 l4.hdr = skb_transport_header(skb); 40 41 /* compute outer L2 header size */ 42 l2_len = ip.hdr - skb->data; 43 offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2); 44 is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO); 45 if (skb->encapsulation) { 46 u32 tunnel = 0; 47 48 /* define outer network header type */ 49 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 50 /* The stack computes the IP header already, the only 51 * time we need the hardware to recompute it is in the 52 * case of TSO. 53 */ 54 tunnel |= is_tso ? 55 IDPF_TX_CTX_EXT_IP_IPV4 : 56 IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM; 57 58 l4_proto = ip.v4->protocol; 59 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 60 tunnel |= IDPF_TX_CTX_EXT_IP_IPV6; 61 62 l4_proto = ip.v6->nexthdr; 63 if (ipv6_ext_hdr(l4_proto)) 64 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 65 sizeof(*ip.v6), 66 &l4_proto, &frag_off); 67 } 68 69 /* define outer transport */ 70 switch (l4_proto) { 71 case IPPROTO_UDP: 72 tunnel |= IDPF_TXD_CTX_UDP_TUNNELING; 73 break; 74 case IPPROTO_GRE: 75 tunnel |= IDPF_TXD_CTX_GRE_TUNNELING; 76 break; 77 case IPPROTO_IPIP: 78 case IPPROTO_IPV6: 79 l4.hdr = skb_inner_network_header(skb); 80 break; 81 default: 82 if (is_tso) 83 return -1; 84 85 skb_checksum_help(skb); 86 87 return 0; 88 } 89 off->tx_flags |= IDPF_TX_FLAGS_TUNNEL; 90 91 /* compute outer L3 header size */ 92 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M, 93 (l4.hdr - ip.hdr) / 4); 94 95 /* switch IP header pointer from outer to inner header */ 96 ip.hdr = skb_inner_network_header(skb); 97 98 /* compute tunnel header size */ 99 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M, 100 (ip.hdr - l4.hdr) / 2); 101 102 /* indicate if we need to offload outer UDP header */ 103 if (is_tso && 104 !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && 105 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) 106 tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M; 107 108 /* record tunnel offload values */ 109 off->cd_tunneling |= tunnel; 110 111 /* switch L4 header pointer from outer to inner */ 112 l4.hdr = skb_inner_transport_header(skb); 113 l4_proto = 0; 114 115 /* reset type as we transition from outer to inner headers */ 116 off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6); 117 if (ip.v4->version == 4) 118 off->tx_flags |= IDPF_TX_FLAGS_IPV4; 119 if (ip.v6->version == 6) 120 off->tx_flags |= IDPF_TX_FLAGS_IPV6; 121 } 122 123 /* Enable IP checksum offloads */ 124 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 125 l4_proto = ip.v4->protocol; 126 /* See comment above regarding need for HW to recompute IP 127 * header checksum in the case of TSO. 128 */ 129 if (is_tso) 130 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM; 131 else 132 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4; 133 134 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 135 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6; 136 l4_proto = ip.v6->nexthdr; 137 if (ipv6_ext_hdr(l4_proto)) 138 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 139 sizeof(*ip.v6), &l4_proto, 140 &frag_off); 141 } else { 142 return -1; 143 } 144 145 /* compute inner L3 header size */ 146 l3_len = l4.hdr - ip.hdr; 147 offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S; 148 149 /* Enable L4 checksum offloads */ 150 switch (l4_proto) { 151 case IPPROTO_TCP: 152 /* enable checksum offloads */ 153 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP; 154 l4_len = l4.tcp->doff; 155 break; 156 case IPPROTO_UDP: 157 /* enable UDP checksum offload */ 158 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP; 159 l4_len = sizeof(struct udphdr) >> 2; 160 break; 161 case IPPROTO_SCTP: 162 /* enable SCTP checksum offload */ 163 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP; 164 l4_len = sizeof(struct sctphdr) >> 2; 165 break; 166 default: 167 if (is_tso) 168 return -1; 169 170 skb_checksum_help(skb); 171 172 return 0; 173 } 174 175 offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S; 176 off->td_cmd |= cmd; 177 off->hdr_offsets |= offset; 178 179 return 1; 180 } 181 182 /** 183 * idpf_tx_singleq_dma_map_error - handle TX DMA map errors 184 * @txq: queue to send buffer on 185 * @skb: send buffer 186 * @first: original first buffer info buffer for packet 187 * @idx: starting point on ring to unwind 188 */ 189 static void idpf_tx_singleq_dma_map_error(struct idpf_tx_queue *txq, 190 struct sk_buff *skb, 191 struct idpf_tx_buf *first, u16 idx) 192 { 193 struct libeth_sq_napi_stats ss = { }; 194 struct libeth_cq_pp cp = { 195 .dev = txq->dev, 196 .ss = &ss, 197 }; 198 199 u64_stats_update_begin(&txq->stats_sync); 200 u64_stats_inc(&txq->q_stats.dma_map_errs); 201 u64_stats_update_end(&txq->stats_sync); 202 203 /* clear dma mappings for failed tx_buf map */ 204 for (;;) { 205 struct idpf_tx_buf *tx_buf; 206 207 tx_buf = &txq->tx_buf[idx]; 208 libeth_tx_complete(tx_buf, &cp); 209 if (tx_buf == first) 210 break; 211 if (idx == 0) 212 idx = txq->desc_count; 213 idx--; 214 } 215 216 if (skb_is_gso(skb)) { 217 union idpf_tx_flex_desc *tx_desc; 218 219 /* If we failed a DMA mapping for a TSO packet, we will have 220 * used one additional descriptor for a context 221 * descriptor. Reset that here. 222 */ 223 tx_desc = &txq->flex_tx[idx]; 224 memset(tx_desc, 0, sizeof(*tx_desc)); 225 if (idx == 0) 226 idx = txq->desc_count; 227 idx--; 228 } 229 230 /* Update tail in case netdev_xmit_more was previously true */ 231 idpf_tx_buf_hw_update(txq, idx, false); 232 } 233 234 /** 235 * idpf_tx_singleq_map - Build the Tx base descriptor 236 * @tx_q: queue to send buffer on 237 * @first: first buffer info buffer to use 238 * @offloads: pointer to struct that holds offload parameters 239 * 240 * This function loops over the skb data pointed to by *first 241 * and gets a physical address for each memory location and programs 242 * it and the length into the transmit base mode descriptor. 243 */ 244 static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, 245 struct idpf_tx_buf *first, 246 struct idpf_tx_offload_params *offloads) 247 { 248 u32 offsets = offloads->hdr_offsets; 249 struct idpf_tx_buf *tx_buf = first; 250 struct idpf_base_tx_desc *tx_desc; 251 struct sk_buff *skb = first->skb; 252 u64 td_cmd = offloads->td_cmd; 253 unsigned int data_len, size; 254 u16 i = tx_q->next_to_use; 255 struct netdev_queue *nq; 256 skb_frag_t *frag; 257 dma_addr_t dma; 258 u64 td_tag = 0; 259 260 data_len = skb->data_len; 261 size = skb_headlen(skb); 262 263 tx_desc = &tx_q->base_tx[i]; 264 265 dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); 266 267 /* write each descriptor with CRC bit */ 268 if (idpf_queue_has(CRC_EN, tx_q)) 269 td_cmd |= IDPF_TX_DESC_CMD_ICRC; 270 271 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 272 unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 273 274 if (unlikely(dma_mapping_error(tx_q->dev, dma))) 275 return idpf_tx_singleq_dma_map_error(tx_q, skb, 276 first, i); 277 278 /* record length, and DMA address */ 279 dma_unmap_len_set(tx_buf, len, size); 280 dma_unmap_addr_set(tx_buf, dma, dma); 281 tx_buf->type = LIBETH_SQE_FRAG; 282 283 /* align size to end of page */ 284 max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1); 285 tx_desc->buf_addr = cpu_to_le64(dma); 286 287 /* account for data chunks larger than the hardware 288 * can handle 289 */ 290 while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) { 291 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, 292 offsets, 293 max_data, 294 td_tag); 295 if (unlikely(++i == tx_q->desc_count)) { 296 tx_buf = &tx_q->tx_buf[0]; 297 tx_desc = &tx_q->base_tx[0]; 298 i = 0; 299 } else { 300 tx_buf++; 301 tx_desc++; 302 } 303 304 tx_buf->type = LIBETH_SQE_EMPTY; 305 306 dma += max_data; 307 size -= max_data; 308 309 max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 310 tx_desc->buf_addr = cpu_to_le64(dma); 311 } 312 313 if (!data_len) 314 break; 315 316 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 317 size, td_tag); 318 319 if (unlikely(++i == tx_q->desc_count)) { 320 tx_buf = &tx_q->tx_buf[0]; 321 tx_desc = &tx_q->base_tx[0]; 322 i = 0; 323 } else { 324 tx_buf++; 325 tx_desc++; 326 } 327 328 size = skb_frag_size(frag); 329 data_len -= size; 330 331 dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, 332 DMA_TO_DEVICE); 333 } 334 335 skb_tx_timestamp(first->skb); 336 337 /* write last descriptor with RS and EOP bits */ 338 td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS); 339 340 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 341 size, td_tag); 342 343 first->type = LIBETH_SQE_SKB; 344 first->rs_idx = i; 345 346 IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i); 347 348 nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); 349 netdev_tx_sent_queue(nq, first->bytes); 350 351 idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); 352 } 353 354 /** 355 * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring 356 * @txq: queue to put context descriptor on 357 * 358 * Since the TX buffer rings mimics the descriptor ring, update the tx buffer 359 * ring entry to reflect that this index is a context descriptor 360 */ 361 static struct idpf_base_tx_ctx_desc * 362 idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq) 363 { 364 struct idpf_base_tx_ctx_desc *ctx_desc; 365 int ntu = txq->next_to_use; 366 367 txq->tx_buf[ntu].type = LIBETH_SQE_CTX; 368 369 ctx_desc = &txq->base_ctx[ntu]; 370 371 IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu); 372 txq->next_to_use = ntu; 373 374 return ctx_desc; 375 } 376 377 /** 378 * idpf_tx_singleq_build_ctx_desc - populate context descriptor 379 * @txq: queue to send buffer on 380 * @offload: offload parameter structure 381 **/ 382 static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq, 383 struct idpf_tx_offload_params *offload) 384 { 385 struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); 386 u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX; 387 388 if (offload->tso_segs) { 389 qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S; 390 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_TSO_LEN_M, 391 offload->tso_len); 392 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss); 393 394 u64_stats_update_begin(&txq->stats_sync); 395 u64_stats_inc(&txq->q_stats.lso_pkts); 396 u64_stats_update_end(&txq->stats_sync); 397 } 398 399 desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling); 400 401 desc->qw0.l2tag2 = 0; 402 desc->qw0.rsvd1 = 0; 403 desc->qw1 = cpu_to_le64(qw1); 404 } 405 406 /** 407 * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors 408 * @skb: send buffer 409 * @tx_q: queue to send buffer on 410 * 411 * Returns NETDEV_TX_OK if sent, else an error code 412 */ 413 netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, 414 struct idpf_tx_queue *tx_q) 415 { 416 struct idpf_tx_offload_params offload = { }; 417 struct idpf_tx_buf *first; 418 u32 count, buf_count = 1; 419 int csum, tso, needed; 420 __be16 protocol; 421 422 count = idpf_tx_res_count_required(tx_q, skb, &buf_count); 423 if (unlikely(!count)) 424 return idpf_tx_drop_skb(tx_q, skb); 425 426 needed = count + IDPF_TX_DESCS_PER_CACHE_LINE + IDPF_TX_DESCS_FOR_CTX; 427 if (!netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, 428 IDPF_DESC_UNUSED(tx_q), 429 needed, needed)) { 430 idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); 431 432 u64_stats_update_begin(&tx_q->stats_sync); 433 u64_stats_inc(&tx_q->q_stats.q_busy); 434 u64_stats_update_end(&tx_q->stats_sync); 435 436 return NETDEV_TX_BUSY; 437 } 438 439 protocol = vlan_get_protocol(skb); 440 if (protocol == htons(ETH_P_IP)) 441 offload.tx_flags |= IDPF_TX_FLAGS_IPV4; 442 else if (protocol == htons(ETH_P_IPV6)) 443 offload.tx_flags |= IDPF_TX_FLAGS_IPV6; 444 445 tso = idpf_tso(skb, &offload); 446 if (tso < 0) 447 goto out_drop; 448 449 csum = idpf_tx_singleq_csum(skb, &offload); 450 if (csum < 0) 451 goto out_drop; 452 453 if (tso || offload.cd_tunneling) 454 idpf_tx_singleq_build_ctx_desc(tx_q, &offload); 455 456 /* record the location of the first descriptor for this packet */ 457 first = &tx_q->tx_buf[tx_q->next_to_use]; 458 first->skb = skb; 459 460 if (tso) { 461 first->packets = offload.tso_segs; 462 first->bytes = skb->len + ((first->packets - 1) * offload.tso_hdr_len); 463 } else { 464 first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); 465 first->packets = 1; 466 } 467 idpf_tx_singleq_map(tx_q, first, &offload); 468 469 return NETDEV_TX_OK; 470 471 out_drop: 472 return idpf_tx_drop_skb(tx_q, skb); 473 } 474 475 /** 476 * idpf_tx_singleq_clean - Reclaim resources from queue 477 * @tx_q: Tx queue to clean 478 * @napi_budget: Used to determine if we are in netpoll 479 * @cleaned: returns number of packets cleaned 480 * 481 */ 482 static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget, 483 int *cleaned) 484 { 485 struct libeth_sq_napi_stats ss = { }; 486 struct idpf_base_tx_desc *tx_desc; 487 u32 budget = tx_q->clean_budget; 488 s16 ntc = tx_q->next_to_clean; 489 struct libeth_cq_pp cp = { 490 .dev = tx_q->dev, 491 .ss = &ss, 492 .napi = napi_budget, 493 }; 494 struct idpf_netdev_priv *np; 495 struct idpf_tx_buf *tx_buf; 496 struct netdev_queue *nq; 497 bool dont_wake; 498 499 tx_desc = &tx_q->base_tx[ntc]; 500 tx_buf = &tx_q->tx_buf[ntc]; 501 ntc -= tx_q->desc_count; 502 503 do { 504 struct idpf_base_tx_desc *eop_desc; 505 506 /* If this entry in the ring was used as a context descriptor, 507 * it's corresponding entry in the buffer ring will indicate as 508 * such. We can skip this descriptor since there is no buffer 509 * to clean. 510 */ 511 if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) { 512 tx_buf->type = LIBETH_SQE_EMPTY; 513 goto fetch_next_txq_desc; 514 } 515 516 if (unlikely(tx_buf->type != LIBETH_SQE_SKB)) 517 break; 518 519 /* prevent any other reads prior to type */ 520 smp_rmb(); 521 522 eop_desc = &tx_q->base_tx[tx_buf->rs_idx]; 523 524 /* if the descriptor isn't done, no work yet to do */ 525 if (!(eop_desc->qw1 & 526 cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE))) 527 break; 528 529 /* update the statistics for this packet */ 530 libeth_tx_complete(tx_buf, &cp); 531 532 /* unmap remaining buffers */ 533 while (tx_desc != eop_desc) { 534 tx_buf++; 535 tx_desc++; 536 ntc++; 537 if (unlikely(!ntc)) { 538 ntc -= tx_q->desc_count; 539 tx_buf = tx_q->tx_buf; 540 tx_desc = &tx_q->base_tx[0]; 541 } 542 543 /* unmap any remaining paged data */ 544 libeth_tx_complete(tx_buf, &cp); 545 } 546 547 /* update budget only if we did something */ 548 budget--; 549 550 fetch_next_txq_desc: 551 tx_buf++; 552 tx_desc++; 553 ntc++; 554 if (unlikely(!ntc)) { 555 ntc -= tx_q->desc_count; 556 tx_buf = tx_q->tx_buf; 557 tx_desc = &tx_q->base_tx[0]; 558 } 559 } while (likely(budget)); 560 561 ntc += tx_q->desc_count; 562 tx_q->next_to_clean = ntc; 563 564 *cleaned += ss.packets; 565 566 u64_stats_update_begin(&tx_q->stats_sync); 567 u64_stats_add(&tx_q->q_stats.packets, ss.packets); 568 u64_stats_add(&tx_q->q_stats.bytes, ss.bytes); 569 u64_stats_update_end(&tx_q->stats_sync); 570 571 np = netdev_priv(tx_q->netdev); 572 nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); 573 574 dont_wake = np->state != __IDPF_VPORT_UP || 575 !netif_carrier_ok(tx_q->netdev); 576 __netif_txq_completed_wake(nq, ss.packets, ss.bytes, 577 IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, 578 dont_wake); 579 580 return !!budget; 581 } 582 583 /** 584 * idpf_tx_singleq_clean_all - Clean all Tx queues 585 * @q_vec: queue vector 586 * @budget: Used to determine if we are in netpoll 587 * @cleaned: returns number of packets cleaned 588 * 589 * Returns false if clean is not complete else returns true 590 */ 591 static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 592 int *cleaned) 593 { 594 u16 num_txq = q_vec->num_txq; 595 bool clean_complete = true; 596 int i, budget_per_q; 597 598 budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; 599 for (i = 0; i < num_txq; i++) { 600 struct idpf_tx_queue *q; 601 602 q = q_vec->tx[i]; 603 clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, 604 cleaned); 605 } 606 607 return clean_complete; 608 } 609 610 /** 611 * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor 612 * status and error fields 613 * @rx_desc: pointer to receive descriptor (in le64 format) 614 * @stat_err_bits: value to mask 615 * 616 * This function does some fast chicanery in order to return the 617 * value of the mask which is really only used for boolean tests. 618 * The status_error_ptype_len doesn't need to be shifted because it begins 619 * at offset zero. 620 */ 621 static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, 622 const u64 stat_err_bits) 623 { 624 return !!(rx_desc->base_wb.qword1.status_error_ptype_len & 625 cpu_to_le64(stat_err_bits)); 626 } 627 628 /** 629 * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers 630 * @rx_desc: Rx descriptor for current buffer 631 */ 632 static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc) 633 { 634 /* if we are the last buffer then there is nothing else to do */ 635 if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) 636 return false; 637 638 return true; 639 } 640 641 /** 642 * idpf_rx_singleq_csum - Indicate in skb if checksum is good 643 * @rxq: Rx ring being processed 644 * @skb: skb currently being received and modified 645 * @csum_bits: checksum bits from descriptor 646 * @decoded: the packet type decoded by hardware 647 * 648 * skb->protocol must be set before this function is called 649 */ 650 static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, 651 struct sk_buff *skb, 652 struct libeth_rx_csum csum_bits, 653 struct libeth_rx_pt decoded) 654 { 655 bool ipv4, ipv6; 656 657 /* check if Rx checksum is enabled */ 658 if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded)) 659 return; 660 661 /* check if HW has decoded the packet and checksum */ 662 if (unlikely(!csum_bits.l3l4p)) 663 return; 664 665 ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4; 666 ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6; 667 668 /* Check if there were any checksum errors */ 669 if (unlikely(ipv4 && (csum_bits.ipe || csum_bits.eipe))) 670 goto checksum_fail; 671 672 /* Device could not do any checksum offload for certain extension 673 * headers as indicated by setting IPV6EXADD bit 674 */ 675 if (unlikely(ipv6 && csum_bits.ipv6exadd)) 676 return; 677 678 /* check for L4 errors and handle packets that were not able to be 679 * checksummed due to arrival speed 680 */ 681 if (unlikely(csum_bits.l4e)) 682 goto checksum_fail; 683 684 if (unlikely(csum_bits.nat && csum_bits.eudpe)) 685 goto checksum_fail; 686 687 /* Handle packets that were not able to be checksummed due to arrival 688 * speed, in this case the stack can compute the csum. 689 */ 690 if (unlikely(csum_bits.pprs)) 691 return; 692 693 /* If there is an outer header present that might contain a checksum 694 * we need to bump the checksum level by 1 to reflect the fact that 695 * we are indicating we validated the inner checksum. 696 */ 697 if (decoded.tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT) 698 skb->csum_level = 1; 699 700 skb->ip_summed = CHECKSUM_UNNECESSARY; 701 return; 702 703 checksum_fail: 704 u64_stats_update_begin(&rxq->stats_sync); 705 u64_stats_inc(&rxq->q_stats.hw_csum_err); 706 u64_stats_update_end(&rxq->stats_sync); 707 } 708 709 /** 710 * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum 711 * @rx_desc: the receive descriptor 712 * 713 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 714 * descriptor writeback format. 715 * 716 * Return: parsed checksum status. 717 **/ 718 static struct libeth_rx_csum 719 idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc) 720 { 721 struct libeth_rx_csum csum_bits = { }; 722 u32 rx_error, rx_status; 723 u64 qword; 724 725 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 726 727 rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword); 728 rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword); 729 730 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error); 731 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M, 732 rx_error); 733 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error); 734 csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M, 735 rx_error); 736 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M, 737 rx_status); 738 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M, 739 rx_status); 740 741 return csum_bits; 742 } 743 744 /** 745 * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum 746 * @rx_desc: the receive descriptor 747 * 748 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 749 * descriptor writeback format. 750 * 751 * Return: parsed checksum status. 752 **/ 753 static struct libeth_rx_csum 754 idpf_rx_singleq_flex_csum(const union virtchnl2_rx_desc *rx_desc) 755 { 756 struct libeth_rx_csum csum_bits = { }; 757 u16 rx_status0, rx_status1; 758 759 rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0); 760 rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1); 761 762 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M, 763 rx_status0); 764 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M, 765 rx_status0); 766 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M, 767 rx_status0); 768 csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M, 769 rx_status0); 770 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M, 771 rx_status0); 772 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M, 773 rx_status0); 774 csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M, 775 rx_status1); 776 777 return csum_bits; 778 } 779 780 /** 781 * idpf_rx_singleq_base_hash - set the hash value in the skb 782 * @rx_q: Rx completion queue 783 * @skb: skb currently being received and modified 784 * @rx_desc: specific descriptor 785 * @decoded: Decoded Rx packet type related fields 786 * 787 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 788 * descriptor writeback format. 789 **/ 790 static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q, 791 struct sk_buff *skb, 792 const union virtchnl2_rx_desc *rx_desc, 793 struct libeth_rx_pt decoded) 794 { 795 u64 mask, qw1; 796 797 if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded)) 798 return; 799 800 mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; 801 qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 802 803 if (FIELD_GET(mask, qw1) == mask) { 804 u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss); 805 806 libeth_rx_pt_set_hash(skb, hash, decoded); 807 } 808 } 809 810 /** 811 * idpf_rx_singleq_flex_hash - set the hash value in the skb 812 * @rx_q: Rx completion queue 813 * @skb: skb currently being received and modified 814 * @rx_desc: specific descriptor 815 * @decoded: Decoded Rx packet type related fields 816 * 817 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 818 * descriptor writeback format. 819 **/ 820 static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, 821 struct sk_buff *skb, 822 const union virtchnl2_rx_desc *rx_desc, 823 struct libeth_rx_pt decoded) 824 { 825 if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded)) 826 return; 827 828 if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, 829 le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) { 830 u32 hash = le32_to_cpu(rx_desc->flex_nic_wb.rss_hash); 831 832 libeth_rx_pt_set_hash(skb, hash, decoded); 833 } 834 } 835 836 /** 837 * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx 838 * descriptor 839 * @rx_q: Rx ring being processed 840 * @skb: pointer to current skb being populated 841 * @rx_desc: descriptor for skb 842 * @ptype: packet type 843 * 844 * This function checks the ring, descriptor, and packet information in 845 * order to populate the hash, checksum, VLAN, protocol, and 846 * other fields within the skb. 847 */ 848 static void 849 idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, 850 struct sk_buff *skb, 851 const union virtchnl2_rx_desc *rx_desc, 852 u16 ptype) 853 { 854 struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype]; 855 struct libeth_rx_csum csum_bits; 856 857 /* modifies the skb - consumes the enet header */ 858 skb->protocol = eth_type_trans(skb, rx_q->netdev); 859 860 /* Check if we're using base mode descriptor IDs */ 861 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { 862 idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, decoded); 863 csum_bits = idpf_rx_singleq_base_csum(rx_desc); 864 } else { 865 idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, decoded); 866 csum_bits = idpf_rx_singleq_flex_csum(rx_desc); 867 } 868 869 idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded); 870 skb_record_rx_queue(skb, rx_q->idx); 871 } 872 873 /** 874 * idpf_rx_buf_hw_update - Store the new tail and head values 875 * @rxq: queue to bump 876 * @val: new head index 877 */ 878 static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val) 879 { 880 rxq->next_to_use = val; 881 882 if (unlikely(!rxq->tail)) 883 return; 884 885 /* writel has an implicit memory barrier */ 886 writel(val, rxq->tail); 887 } 888 889 /** 890 * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers 891 * @rx_q: queue for which the hw buffers are allocated 892 * @cleaned_count: number of buffers to replace 893 * 894 * Returns false if all allocations were successful, true if any fail 895 */ 896 bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, 897 u16 cleaned_count) 898 { 899 struct virtchnl2_singleq_rx_buf_desc *desc; 900 const struct libeth_fq_fp fq = { 901 .pp = rx_q->pp, 902 .fqes = rx_q->rx_buf, 903 .truesize = rx_q->truesize, 904 .count = rx_q->desc_count, 905 }; 906 u16 nta = rx_q->next_to_alloc; 907 908 if (!cleaned_count) 909 return false; 910 911 desc = &rx_q->single_buf[nta]; 912 913 do { 914 dma_addr_t addr; 915 916 addr = libeth_rx_alloc(&fq, nta); 917 if (addr == DMA_MAPPING_ERROR) 918 break; 919 920 /* Refresh the desc even if buffer_addrs didn't change 921 * because each write-back erases this info. 922 */ 923 desc->pkt_addr = cpu_to_le64(addr); 924 desc->hdr_addr = 0; 925 desc++; 926 927 nta++; 928 if (unlikely(nta == rx_q->desc_count)) { 929 desc = &rx_q->single_buf[0]; 930 nta = 0; 931 } 932 933 cleaned_count--; 934 } while (cleaned_count); 935 936 if (rx_q->next_to_alloc != nta) { 937 idpf_rx_buf_hw_update(rx_q, nta); 938 rx_q->next_to_alloc = nta; 939 } 940 941 return !!cleaned_count; 942 } 943 944 /** 945 * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor 946 * @rx_desc: the descriptor to process 947 * @fields: storage for extracted values 948 * 949 * Decode the Rx descriptor and extract relevant information including the 950 * size and Rx packet type. 951 * 952 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 953 * descriptor writeback format. 954 */ 955 static void 956 idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, 957 struct libeth_rqe_info *fields) 958 { 959 u64 qword; 960 961 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 962 963 fields->len = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); 964 fields->ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); 965 } 966 967 /** 968 * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor 969 * @rx_desc: the descriptor to process 970 * @fields: storage for extracted values 971 * 972 * Decode the Rx descriptor and extract relevant information including the 973 * size and Rx packet type. 974 * 975 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 976 * descriptor writeback format. 977 */ 978 static void 979 idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, 980 struct libeth_rqe_info *fields) 981 { 982 fields->len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, 983 le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); 984 fields->ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, 985 le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); 986 } 987 988 /** 989 * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor 990 * @rx_q: Rx descriptor queue 991 * @rx_desc: the descriptor to process 992 * @fields: storage for extracted values 993 * 994 */ 995 static void 996 idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, 997 const union virtchnl2_rx_desc *rx_desc, 998 struct libeth_rqe_info *fields) 999 { 1000 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) 1001 idpf_rx_singleq_extract_base_fields(rx_desc, fields); 1002 else 1003 idpf_rx_singleq_extract_flex_fields(rx_desc, fields); 1004 } 1005 1006 /** 1007 * idpf_rx_singleq_clean - Reclaim resources after receive completes 1008 * @rx_q: rx queue to clean 1009 * @budget: Total limit on number of packets to process 1010 * 1011 * Returns true if there's any budget left (e.g. the clean is finished) 1012 */ 1013 static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) 1014 { 1015 unsigned int total_rx_bytes = 0, total_rx_pkts = 0; 1016 struct sk_buff *skb = rx_q->skb; 1017 u16 ntc = rx_q->next_to_clean; 1018 u16 cleaned_count = 0; 1019 bool failure = false; 1020 1021 /* Process Rx packets bounded by budget */ 1022 while (likely(total_rx_pkts < (unsigned int)budget)) { 1023 struct libeth_rqe_info fields = { }; 1024 union virtchnl2_rx_desc *rx_desc; 1025 struct idpf_rx_buf *rx_buf; 1026 1027 /* get the Rx desc from Rx queue based on 'next_to_clean' */ 1028 rx_desc = &rx_q->rx[ntc]; 1029 1030 /* status_error_ptype_len will always be zero for unused 1031 * descriptors because it's cleared in cleanup, and overlaps 1032 * with hdr_addr which is always zero because packet split 1033 * isn't used, if the hardware wrote DD then the length will be 1034 * non-zero 1035 */ 1036 #define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M 1037 if (!idpf_rx_singleq_test_staterr(rx_desc, 1038 IDPF_RXD_DD)) 1039 break; 1040 1041 /* This memory barrier is needed to keep us from reading 1042 * any other fields out of the rx_desc 1043 */ 1044 dma_rmb(); 1045 1046 idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); 1047 1048 rx_buf = &rx_q->rx_buf[ntc]; 1049 if (!libeth_rx_sync_for_cpu(rx_buf, fields.len)) 1050 goto skip_data; 1051 1052 if (skb) 1053 idpf_rx_add_frag(rx_buf, skb, fields.len); 1054 else 1055 skb = idpf_rx_build_skb(rx_buf, fields.len); 1056 1057 /* exit if we failed to retrieve a buffer */ 1058 if (!skb) 1059 break; 1060 1061 skip_data: 1062 rx_buf->netmem = 0; 1063 1064 IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc); 1065 cleaned_count++; 1066 1067 /* skip if it is non EOP desc */ 1068 if (idpf_rx_singleq_is_non_eop(rx_desc) || unlikely(!skb)) 1069 continue; 1070 1071 #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ 1072 VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M) 1073 if (unlikely(idpf_rx_singleq_test_staterr(rx_desc, 1074 IDPF_RXD_ERR_S))) { 1075 dev_kfree_skb_any(skb); 1076 skb = NULL; 1077 continue; 1078 } 1079 1080 /* pad skb if needed (to make valid ethernet frame) */ 1081 if (eth_skb_pad(skb)) { 1082 skb = NULL; 1083 continue; 1084 } 1085 1086 /* probably a little skewed due to removing CRC */ 1087 total_rx_bytes += skb->len; 1088 1089 /* protocol */ 1090 idpf_rx_singleq_process_skb_fields(rx_q, skb, rx_desc, 1091 fields.ptype); 1092 1093 /* send completed skb up the stack */ 1094 napi_gro_receive(rx_q->pp->p.napi, skb); 1095 skb = NULL; 1096 1097 /* update budget accounting */ 1098 total_rx_pkts++; 1099 } 1100 1101 rx_q->skb = skb; 1102 1103 rx_q->next_to_clean = ntc; 1104 1105 page_pool_nid_changed(rx_q->pp, numa_mem_id()); 1106 if (cleaned_count) 1107 failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); 1108 1109 u64_stats_update_begin(&rx_q->stats_sync); 1110 u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts); 1111 u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes); 1112 u64_stats_update_end(&rx_q->stats_sync); 1113 1114 /* guarantee a trip back through this routine if there was a failure */ 1115 return failure ? budget : (int)total_rx_pkts; 1116 } 1117 1118 /** 1119 * idpf_rx_singleq_clean_all - Clean all Rx queues 1120 * @q_vec: queue vector 1121 * @budget: Used to determine if we are in netpoll 1122 * @cleaned: returns number of packets cleaned 1123 * 1124 * Returns false if clean is not complete else returns true 1125 */ 1126 static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 1127 int *cleaned) 1128 { 1129 u16 num_rxq = q_vec->num_rxq; 1130 bool clean_complete = true; 1131 int budget_per_q, i; 1132 1133 /* We attempt to distribute budget to each Rx queue fairly, but don't 1134 * allow the budget to go below 1 because that would exit polling early. 1135 */ 1136 budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; 1137 for (i = 0; i < num_rxq; i++) { 1138 struct idpf_rx_queue *rxq = q_vec->rx[i]; 1139 int pkts_cleaned_per_q; 1140 1141 pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); 1142 1143 /* if we clean as many as budgeted, we must not be done */ 1144 if (pkts_cleaned_per_q >= budget_per_q) 1145 clean_complete = false; 1146 *cleaned += pkts_cleaned_per_q; 1147 } 1148 1149 return clean_complete; 1150 } 1151 1152 /** 1153 * idpf_vport_singleq_napi_poll - NAPI handler 1154 * @napi: struct from which you get q_vector 1155 * @budget: budget provided by stack 1156 */ 1157 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget) 1158 { 1159 struct idpf_q_vector *q_vector = 1160 container_of(napi, struct idpf_q_vector, napi); 1161 bool clean_complete; 1162 int work_done = 0; 1163 1164 /* Handle case where we are called by netpoll with a budget of 0 */ 1165 if (budget <= 0) { 1166 idpf_tx_singleq_clean_all(q_vector, budget, &work_done); 1167 1168 return budget; 1169 } 1170 1171 clean_complete = idpf_rx_singleq_clean_all(q_vector, budget, 1172 &work_done); 1173 clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget, 1174 &work_done); 1175 1176 /* If work not completed, return budget and polling will return */ 1177 if (!clean_complete) { 1178 idpf_vport_intr_set_wb_on_itr(q_vector); 1179 return budget; 1180 } 1181 1182 work_done = min_t(int, work_done, budget - 1); 1183 1184 /* Exit the polling mode, but don't re-enable interrupts if stack might 1185 * poll us due to busy-polling 1186 */ 1187 if (likely(napi_complete_done(napi, work_done))) 1188 idpf_vport_intr_update_itr_ena_irq(q_vector); 1189 else 1190 idpf_vport_intr_set_wb_on_itr(q_vector); 1191 1192 return work_done; 1193 } 1194