1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (C) 2023 Intel Corporation */ 3 4 #include <net/libeth/xdp.h> 5 6 #include "idpf.h" 7 8 /** 9 * idpf_tx_singleq_csum - Enable tx checksum offloads 10 * @skb: pointer to skb 11 * @off: pointer to struct that holds offload parameters 12 * 13 * Returns 0 or error (negative) if checksum offload cannot be executed, 1 14 * otherwise. 15 */ 16 static int idpf_tx_singleq_csum(struct sk_buff *skb, 17 struct idpf_tx_offload_params *off) 18 { 19 u32 l4_len, l3_len, l2_len; 20 union { 21 struct iphdr *v4; 22 struct ipv6hdr *v6; 23 unsigned char *hdr; 24 } ip; 25 union { 26 struct tcphdr *tcp; 27 unsigned char *hdr; 28 } l4; 29 u32 offset, cmd = 0; 30 u8 l4_proto = 0; 31 __be16 frag_off; 32 bool is_tso; 33 34 if (skb->ip_summed != CHECKSUM_PARTIAL) 35 return 0; 36 37 ip.hdr = skb_network_header(skb); 38 l4.hdr = skb_transport_header(skb); 39 40 /* compute outer L2 header size */ 41 l2_len = ip.hdr - skb->data; 42 offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2); 43 is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO); 44 if (skb->encapsulation) { 45 u32 tunnel = 0; 46 47 /* define outer network header type */ 48 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 49 /* The stack computes the IP header already, the only 50 * time we need the hardware to recompute it is in the 51 * case of TSO. 52 */ 53 tunnel |= is_tso ? 54 IDPF_TX_CTX_EXT_IP_IPV4 : 55 IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM; 56 57 l4_proto = ip.v4->protocol; 58 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 59 tunnel |= IDPF_TX_CTX_EXT_IP_IPV6; 60 61 l4_proto = ip.v6->nexthdr; 62 if (ipv6_ext_hdr(l4_proto)) 63 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 64 sizeof(*ip.v6), 65 &l4_proto, &frag_off); 66 } 67 68 /* define outer transport */ 69 switch (l4_proto) { 70 case IPPROTO_UDP: 71 tunnel |= IDPF_TXD_CTX_UDP_TUNNELING; 72 break; 73 case IPPROTO_GRE: 74 tunnel |= IDPF_TXD_CTX_GRE_TUNNELING; 75 break; 76 case IPPROTO_IPIP: 77 case IPPROTO_IPV6: 78 l4.hdr = skb_inner_network_header(skb); 79 break; 80 default: 81 if (is_tso) 82 return -1; 83 84 skb_checksum_help(skb); 85 86 return 0; 87 } 88 off->tx_flags |= IDPF_TX_FLAGS_TUNNEL; 89 90 /* compute outer L3 header size */ 91 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M, 92 (l4.hdr - ip.hdr) / 4); 93 94 /* switch IP header pointer from outer to inner header */ 95 ip.hdr = skb_inner_network_header(skb); 96 97 /* compute tunnel header size */ 98 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M, 99 (ip.hdr - l4.hdr) / 2); 100 101 /* indicate if we need to offload outer UDP header */ 102 if (is_tso && 103 !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && 104 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) 105 tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M; 106 107 /* record tunnel offload values */ 108 off->cd_tunneling |= tunnel; 109 110 /* switch L4 header pointer from outer to inner */ 111 l4.hdr = skb_inner_transport_header(skb); 112 l4_proto = 0; 113 114 /* reset type as we transition from outer to inner headers */ 115 off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6); 116 if (ip.v4->version == 4) 117 off->tx_flags |= IDPF_TX_FLAGS_IPV4; 118 if (ip.v6->version == 6) 119 off->tx_flags |= IDPF_TX_FLAGS_IPV6; 120 } 121 122 /* Enable IP checksum offloads */ 123 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 124 l4_proto = ip.v4->protocol; 125 /* See comment above regarding need for HW to recompute IP 126 * header checksum in the case of TSO. 127 */ 128 if (is_tso) 129 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM; 130 else 131 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4; 132 133 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 134 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6; 135 l4_proto = ip.v6->nexthdr; 136 if (ipv6_ext_hdr(l4_proto)) 137 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 138 sizeof(*ip.v6), &l4_proto, 139 &frag_off); 140 } else { 141 return -1; 142 } 143 144 /* compute inner L3 header size */ 145 l3_len = l4.hdr - ip.hdr; 146 offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S; 147 148 /* Enable L4 checksum offloads */ 149 switch (l4_proto) { 150 case IPPROTO_TCP: 151 /* enable checksum offloads */ 152 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP; 153 l4_len = l4.tcp->doff; 154 break; 155 case IPPROTO_UDP: 156 /* enable UDP checksum offload */ 157 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP; 158 l4_len = sizeof(struct udphdr) >> 2; 159 break; 160 case IPPROTO_SCTP: 161 /* enable SCTP checksum offload */ 162 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP; 163 l4_len = sizeof(struct sctphdr) >> 2; 164 break; 165 default: 166 if (is_tso) 167 return -1; 168 169 skb_checksum_help(skb); 170 171 return 0; 172 } 173 174 offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S; 175 off->td_cmd |= cmd; 176 off->hdr_offsets |= offset; 177 178 return 1; 179 } 180 181 /** 182 * idpf_tx_singleq_dma_map_error - handle TX DMA map errors 183 * @txq: queue to send buffer on 184 * @skb: send buffer 185 * @first: original first buffer info buffer for packet 186 * @idx: starting point on ring to unwind 187 */ 188 static void idpf_tx_singleq_dma_map_error(struct idpf_tx_queue *txq, 189 struct sk_buff *skb, 190 struct idpf_tx_buf *first, u16 idx) 191 { 192 struct libeth_sq_napi_stats ss = { }; 193 struct libeth_cq_pp cp = { 194 .dev = txq->dev, 195 .ss = &ss, 196 }; 197 198 u64_stats_update_begin(&txq->stats_sync); 199 u64_stats_inc(&txq->q_stats.dma_map_errs); 200 u64_stats_update_end(&txq->stats_sync); 201 202 /* clear dma mappings for failed tx_buf map */ 203 for (;;) { 204 struct idpf_tx_buf *tx_buf; 205 206 tx_buf = &txq->tx_buf[idx]; 207 libeth_tx_complete(tx_buf, &cp); 208 if (tx_buf == first) 209 break; 210 if (idx == 0) 211 idx = txq->desc_count; 212 idx--; 213 } 214 215 if (skb_is_gso(skb)) { 216 union idpf_tx_flex_desc *tx_desc; 217 218 /* If we failed a DMA mapping for a TSO packet, we will have 219 * used one additional descriptor for a context 220 * descriptor. Reset that here. 221 */ 222 tx_desc = &txq->flex_tx[idx]; 223 memset(tx_desc, 0, sizeof(*tx_desc)); 224 if (idx == 0) 225 idx = txq->desc_count; 226 idx--; 227 } 228 229 /* Update tail in case netdev_xmit_more was previously true */ 230 idpf_tx_buf_hw_update(txq, idx, false); 231 } 232 233 /** 234 * idpf_tx_singleq_map - Build the Tx base descriptor 235 * @tx_q: queue to send buffer on 236 * @first: first buffer info buffer to use 237 * @offloads: pointer to struct that holds offload parameters 238 * 239 * This function loops over the skb data pointed to by *first 240 * and gets a physical address for each memory location and programs 241 * it and the length into the transmit base mode descriptor. 242 */ 243 static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, 244 struct idpf_tx_buf *first, 245 struct idpf_tx_offload_params *offloads) 246 { 247 u32 offsets = offloads->hdr_offsets; 248 struct idpf_tx_buf *tx_buf = first; 249 struct idpf_base_tx_desc *tx_desc; 250 struct sk_buff *skb = first->skb; 251 u64 td_cmd = offloads->td_cmd; 252 unsigned int data_len, size; 253 u16 i = tx_q->next_to_use; 254 struct netdev_queue *nq; 255 skb_frag_t *frag; 256 dma_addr_t dma; 257 u64 td_tag = 0; 258 259 data_len = skb->data_len; 260 size = skb_headlen(skb); 261 262 tx_desc = &tx_q->base_tx[i]; 263 264 dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); 265 266 /* write each descriptor with CRC bit */ 267 if (idpf_queue_has(CRC_EN, tx_q)) 268 td_cmd |= IDPF_TX_DESC_CMD_ICRC; 269 270 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 271 unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 272 273 if (unlikely(dma_mapping_error(tx_q->dev, dma))) 274 return idpf_tx_singleq_dma_map_error(tx_q, skb, 275 first, i); 276 277 /* record length, and DMA address */ 278 dma_unmap_len_set(tx_buf, len, size); 279 dma_unmap_addr_set(tx_buf, dma, dma); 280 tx_buf->type = LIBETH_SQE_FRAG; 281 282 /* align size to end of page */ 283 max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1); 284 tx_desc->buf_addr = cpu_to_le64(dma); 285 286 /* account for data chunks larger than the hardware 287 * can handle 288 */ 289 while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) { 290 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, 291 offsets, 292 max_data, 293 td_tag); 294 if (unlikely(++i == tx_q->desc_count)) { 295 tx_buf = &tx_q->tx_buf[0]; 296 tx_desc = &tx_q->base_tx[0]; 297 i = 0; 298 } else { 299 tx_buf++; 300 tx_desc++; 301 } 302 303 tx_buf->type = LIBETH_SQE_EMPTY; 304 305 dma += max_data; 306 size -= max_data; 307 308 max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 309 tx_desc->buf_addr = cpu_to_le64(dma); 310 } 311 312 if (!data_len) 313 break; 314 315 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 316 size, td_tag); 317 318 if (unlikely(++i == tx_q->desc_count)) { 319 tx_buf = &tx_q->tx_buf[0]; 320 tx_desc = &tx_q->base_tx[0]; 321 i = 0; 322 } else { 323 tx_buf++; 324 tx_desc++; 325 } 326 327 size = skb_frag_size(frag); 328 data_len -= size; 329 330 dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, 331 DMA_TO_DEVICE); 332 } 333 334 skb_tx_timestamp(first->skb); 335 336 /* write last descriptor with RS and EOP bits */ 337 td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS); 338 339 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 340 size, td_tag); 341 342 first->type = LIBETH_SQE_SKB; 343 first->rs_idx = i; 344 345 IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i); 346 347 nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); 348 netdev_tx_sent_queue(nq, first->bytes); 349 350 idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); 351 } 352 353 /** 354 * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring 355 * @txq: queue to put context descriptor on 356 * 357 * Since the TX buffer rings mimics the descriptor ring, update the tx buffer 358 * ring entry to reflect that this index is a context descriptor 359 */ 360 static struct idpf_base_tx_ctx_desc * 361 idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq) 362 { 363 struct idpf_base_tx_ctx_desc *ctx_desc; 364 int ntu = txq->next_to_use; 365 366 txq->tx_buf[ntu].type = LIBETH_SQE_CTX; 367 368 ctx_desc = &txq->base_ctx[ntu]; 369 370 IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu); 371 txq->next_to_use = ntu; 372 373 return ctx_desc; 374 } 375 376 /** 377 * idpf_tx_singleq_build_ctx_desc - populate context descriptor 378 * @txq: queue to send buffer on 379 * @offload: offload parameter structure 380 **/ 381 static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq, 382 struct idpf_tx_offload_params *offload) 383 { 384 struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); 385 u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX; 386 387 if (offload->tso_segs) { 388 qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S; 389 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_TSO_LEN_M, 390 offload->tso_len); 391 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss); 392 393 u64_stats_update_begin(&txq->stats_sync); 394 u64_stats_inc(&txq->q_stats.lso_pkts); 395 u64_stats_update_end(&txq->stats_sync); 396 } 397 398 desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling); 399 400 desc->qw0.l2tag2 = 0; 401 desc->qw0.rsvd1 = 0; 402 desc->qw1 = cpu_to_le64(qw1); 403 } 404 405 /** 406 * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors 407 * @skb: send buffer 408 * @tx_q: queue to send buffer on 409 * 410 * Returns NETDEV_TX_OK if sent, else an error code 411 */ 412 netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, 413 struct idpf_tx_queue *tx_q) 414 { 415 struct idpf_tx_offload_params offload = { }; 416 struct idpf_tx_buf *first; 417 u32 count, buf_count = 1; 418 int csum, tso, needed; 419 __be16 protocol; 420 421 count = idpf_tx_res_count_required(tx_q, skb, &buf_count); 422 if (unlikely(!count)) 423 return idpf_tx_drop_skb(tx_q, skb); 424 425 needed = count + IDPF_TX_DESCS_PER_CACHE_LINE + IDPF_TX_DESCS_FOR_CTX; 426 if (!netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, 427 IDPF_DESC_UNUSED(tx_q), 428 needed, needed)) { 429 idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); 430 431 u64_stats_update_begin(&tx_q->stats_sync); 432 u64_stats_inc(&tx_q->q_stats.q_busy); 433 u64_stats_update_end(&tx_q->stats_sync); 434 435 return NETDEV_TX_BUSY; 436 } 437 438 protocol = vlan_get_protocol(skb); 439 if (protocol == htons(ETH_P_IP)) 440 offload.tx_flags |= IDPF_TX_FLAGS_IPV4; 441 else if (protocol == htons(ETH_P_IPV6)) 442 offload.tx_flags |= IDPF_TX_FLAGS_IPV6; 443 444 tso = idpf_tso(skb, &offload); 445 if (tso < 0) 446 goto out_drop; 447 448 csum = idpf_tx_singleq_csum(skb, &offload); 449 if (csum < 0) 450 goto out_drop; 451 452 if (tso || offload.cd_tunneling) 453 idpf_tx_singleq_build_ctx_desc(tx_q, &offload); 454 455 /* record the location of the first descriptor for this packet */ 456 first = &tx_q->tx_buf[tx_q->next_to_use]; 457 first->skb = skb; 458 459 if (tso) { 460 first->packets = offload.tso_segs; 461 first->bytes = skb->len + ((first->packets - 1) * offload.tso_hdr_len); 462 } else { 463 first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); 464 first->packets = 1; 465 } 466 idpf_tx_singleq_map(tx_q, first, &offload); 467 468 return NETDEV_TX_OK; 469 470 out_drop: 471 return idpf_tx_drop_skb(tx_q, skb); 472 } 473 474 /** 475 * idpf_tx_singleq_clean - Reclaim resources from queue 476 * @tx_q: Tx queue to clean 477 * @napi_budget: Used to determine if we are in netpoll 478 * @cleaned: returns number of packets cleaned 479 * 480 */ 481 static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget, 482 int *cleaned) 483 { 484 struct libeth_sq_napi_stats ss = { }; 485 struct idpf_base_tx_desc *tx_desc; 486 u32 budget = tx_q->clean_budget; 487 s16 ntc = tx_q->next_to_clean; 488 struct libeth_cq_pp cp = { 489 .dev = tx_q->dev, 490 .ss = &ss, 491 .napi = napi_budget, 492 }; 493 struct idpf_netdev_priv *np; 494 struct idpf_tx_buf *tx_buf; 495 struct netdev_queue *nq; 496 bool dont_wake; 497 498 tx_desc = &tx_q->base_tx[ntc]; 499 tx_buf = &tx_q->tx_buf[ntc]; 500 ntc -= tx_q->desc_count; 501 502 do { 503 struct idpf_base_tx_desc *eop_desc; 504 505 /* If this entry in the ring was used as a context descriptor, 506 * it's corresponding entry in the buffer ring will indicate as 507 * such. We can skip this descriptor since there is no buffer 508 * to clean. 509 */ 510 if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) { 511 tx_buf->type = LIBETH_SQE_EMPTY; 512 goto fetch_next_txq_desc; 513 } 514 515 if (unlikely(tx_buf->type != LIBETH_SQE_SKB)) 516 break; 517 518 /* prevent any other reads prior to type */ 519 smp_rmb(); 520 521 eop_desc = &tx_q->base_tx[tx_buf->rs_idx]; 522 523 /* if the descriptor isn't done, no work yet to do */ 524 if (!(eop_desc->qw1 & 525 cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE))) 526 break; 527 528 /* update the statistics for this packet */ 529 libeth_tx_complete(tx_buf, &cp); 530 531 /* unmap remaining buffers */ 532 while (tx_desc != eop_desc) { 533 tx_buf++; 534 tx_desc++; 535 ntc++; 536 if (unlikely(!ntc)) { 537 ntc -= tx_q->desc_count; 538 tx_buf = tx_q->tx_buf; 539 tx_desc = &tx_q->base_tx[0]; 540 } 541 542 /* unmap any remaining paged data */ 543 libeth_tx_complete(tx_buf, &cp); 544 } 545 546 /* update budget only if we did something */ 547 budget--; 548 549 fetch_next_txq_desc: 550 tx_buf++; 551 tx_desc++; 552 ntc++; 553 if (unlikely(!ntc)) { 554 ntc -= tx_q->desc_count; 555 tx_buf = tx_q->tx_buf; 556 tx_desc = &tx_q->base_tx[0]; 557 } 558 } while (likely(budget)); 559 560 ntc += tx_q->desc_count; 561 tx_q->next_to_clean = ntc; 562 563 *cleaned += ss.packets; 564 565 u64_stats_update_begin(&tx_q->stats_sync); 566 u64_stats_add(&tx_q->q_stats.packets, ss.packets); 567 u64_stats_add(&tx_q->q_stats.bytes, ss.bytes); 568 u64_stats_update_end(&tx_q->stats_sync); 569 570 np = netdev_priv(tx_q->netdev); 571 nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); 572 573 dont_wake = np->state != __IDPF_VPORT_UP || 574 !netif_carrier_ok(tx_q->netdev); 575 __netif_txq_completed_wake(nq, ss.packets, ss.bytes, 576 IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, 577 dont_wake); 578 579 return !!budget; 580 } 581 582 /** 583 * idpf_tx_singleq_clean_all - Clean all Tx queues 584 * @q_vec: queue vector 585 * @budget: Used to determine if we are in netpoll 586 * @cleaned: returns number of packets cleaned 587 * 588 * Returns false if clean is not complete else returns true 589 */ 590 static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 591 int *cleaned) 592 { 593 u16 num_txq = q_vec->num_txq; 594 bool clean_complete = true; 595 int i, budget_per_q; 596 597 budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; 598 for (i = 0; i < num_txq; i++) { 599 struct idpf_tx_queue *q; 600 601 q = q_vec->tx[i]; 602 clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, 603 cleaned); 604 } 605 606 return clean_complete; 607 } 608 609 /** 610 * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor 611 * status and error fields 612 * @rx_desc: pointer to receive descriptor (in le64 format) 613 * @stat_err_bits: value to mask 614 * 615 * This function does some fast chicanery in order to return the 616 * value of the mask which is really only used for boolean tests. 617 * The status_error_ptype_len doesn't need to be shifted because it begins 618 * at offset zero. 619 */ 620 static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, 621 const u64 stat_err_bits) 622 { 623 return !!(rx_desc->base_wb.qword1.status_error_ptype_len & 624 cpu_to_le64(stat_err_bits)); 625 } 626 627 /** 628 * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers 629 * @rx_desc: Rx descriptor for current buffer 630 */ 631 static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc) 632 { 633 /* if we are the last buffer then there is nothing else to do */ 634 if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) 635 return false; 636 637 return true; 638 } 639 640 /** 641 * idpf_rx_singleq_csum - Indicate in skb if checksum is good 642 * @rxq: Rx ring being processed 643 * @skb: skb currently being received and modified 644 * @csum_bits: checksum bits from descriptor 645 * @decoded: the packet type decoded by hardware 646 * 647 * skb->protocol must be set before this function is called 648 */ 649 static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, 650 struct sk_buff *skb, 651 struct libeth_rx_csum csum_bits, 652 struct libeth_rx_pt decoded) 653 { 654 bool ipv4, ipv6; 655 656 /* check if Rx checksum is enabled */ 657 if (!libeth_rx_pt_has_checksum(rxq->xdp_rxq.dev, decoded)) 658 return; 659 660 /* check if HW has decoded the packet and checksum */ 661 if (unlikely(!csum_bits.l3l4p)) 662 return; 663 664 ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4; 665 ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6; 666 667 /* Check if there were any checksum errors */ 668 if (unlikely(ipv4 && (csum_bits.ipe || csum_bits.eipe))) 669 goto checksum_fail; 670 671 /* Device could not do any checksum offload for certain extension 672 * headers as indicated by setting IPV6EXADD bit 673 */ 674 if (unlikely(ipv6 && csum_bits.ipv6exadd)) 675 return; 676 677 /* check for L4 errors and handle packets that were not able to be 678 * checksummed due to arrival speed 679 */ 680 if (unlikely(csum_bits.l4e)) 681 goto checksum_fail; 682 683 if (unlikely(csum_bits.nat && csum_bits.eudpe)) 684 goto checksum_fail; 685 686 /* Handle packets that were not able to be checksummed due to arrival 687 * speed, in this case the stack can compute the csum. 688 */ 689 if (unlikely(csum_bits.pprs)) 690 return; 691 692 /* If there is an outer header present that might contain a checksum 693 * we need to bump the checksum level by 1 to reflect the fact that 694 * we are indicating we validated the inner checksum. 695 */ 696 if (decoded.tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT) 697 skb->csum_level = 1; 698 699 skb->ip_summed = CHECKSUM_UNNECESSARY; 700 return; 701 702 checksum_fail: 703 u64_stats_update_begin(&rxq->stats_sync); 704 u64_stats_inc(&rxq->q_stats.hw_csum_err); 705 u64_stats_update_end(&rxq->stats_sync); 706 } 707 708 /** 709 * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum 710 * @rx_desc: the receive descriptor 711 * 712 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 713 * descriptor writeback format. 714 * 715 * Return: parsed checksum status. 716 **/ 717 static struct libeth_rx_csum 718 idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc) 719 { 720 struct libeth_rx_csum csum_bits = { }; 721 u32 rx_error, rx_status; 722 u64 qword; 723 724 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 725 726 rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword); 727 rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword); 728 729 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error); 730 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M, 731 rx_error); 732 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error); 733 csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M, 734 rx_error); 735 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M, 736 rx_status); 737 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M, 738 rx_status); 739 740 return csum_bits; 741 } 742 743 /** 744 * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum 745 * @rx_desc: the receive descriptor 746 * 747 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 748 * descriptor writeback format. 749 * 750 * Return: parsed checksum status. 751 **/ 752 static struct libeth_rx_csum 753 idpf_rx_singleq_flex_csum(const union virtchnl2_rx_desc *rx_desc) 754 { 755 struct libeth_rx_csum csum_bits = { }; 756 u16 rx_status0, rx_status1; 757 758 rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0); 759 rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1); 760 761 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M, 762 rx_status0); 763 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M, 764 rx_status0); 765 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M, 766 rx_status0); 767 csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M, 768 rx_status0); 769 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M, 770 rx_status0); 771 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M, 772 rx_status0); 773 csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M, 774 rx_status1); 775 776 return csum_bits; 777 } 778 779 /** 780 * idpf_rx_singleq_base_hash - set the hash value in the skb 781 * @rx_q: Rx completion queue 782 * @skb: skb currently being received and modified 783 * @rx_desc: specific descriptor 784 * @decoded: Decoded Rx packet type related fields 785 * 786 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 787 * descriptor writeback format. 788 **/ 789 static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q, 790 struct sk_buff *skb, 791 const union virtchnl2_rx_desc *rx_desc, 792 struct libeth_rx_pt decoded) 793 { 794 u64 mask, qw1; 795 796 if (!libeth_rx_pt_has_hash(rx_q->xdp_rxq.dev, decoded)) 797 return; 798 799 mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; 800 qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 801 802 if (FIELD_GET(mask, qw1) == mask) { 803 u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss); 804 805 libeth_rx_pt_set_hash(skb, hash, decoded); 806 } 807 } 808 809 /** 810 * idpf_rx_singleq_flex_hash - set the hash value in the skb 811 * @rx_q: Rx completion queue 812 * @skb: skb currently being received and modified 813 * @rx_desc: specific descriptor 814 * @decoded: Decoded Rx packet type related fields 815 * 816 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 817 * descriptor writeback format. 818 **/ 819 static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, 820 struct sk_buff *skb, 821 const union virtchnl2_rx_desc *rx_desc, 822 struct libeth_rx_pt decoded) 823 { 824 if (!libeth_rx_pt_has_hash(rx_q->xdp_rxq.dev, decoded)) 825 return; 826 827 if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, 828 le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) { 829 u32 hash = le32_to_cpu(rx_desc->flex_nic_wb.rss_hash); 830 831 libeth_rx_pt_set_hash(skb, hash, decoded); 832 } 833 } 834 835 /** 836 * __idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx 837 * descriptor 838 * @rx_q: Rx ring being processed 839 * @skb: pointer to current skb being populated 840 * @rx_desc: descriptor for skb 841 * @ptype: packet type 842 * 843 * This function checks the ring, descriptor, and packet information in 844 * order to populate the hash, checksum, VLAN, protocol, and 845 * other fields within the skb. 846 */ 847 static void 848 __idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, 849 struct sk_buff *skb, 850 const union virtchnl2_rx_desc *rx_desc, 851 u16 ptype) 852 { 853 struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype]; 854 struct libeth_rx_csum csum_bits; 855 856 /* Check if we're using base mode descriptor IDs */ 857 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { 858 idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, decoded); 859 csum_bits = idpf_rx_singleq_base_csum(rx_desc); 860 } else { 861 idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, decoded); 862 csum_bits = idpf_rx_singleq_flex_csum(rx_desc); 863 } 864 865 idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded); 866 } 867 868 /** 869 * idpf_rx_buf_hw_update - Store the new tail and head values 870 * @rxq: queue to bump 871 * @val: new head index 872 */ 873 static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val) 874 { 875 rxq->next_to_use = val; 876 877 if (unlikely(!rxq->tail)) 878 return; 879 880 /* writel has an implicit memory barrier */ 881 writel(val, rxq->tail); 882 } 883 884 /** 885 * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers 886 * @rx_q: queue for which the hw buffers are allocated 887 * @cleaned_count: number of buffers to replace 888 * 889 * Returns false if all allocations were successful, true if any fail 890 */ 891 bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, 892 u16 cleaned_count) 893 { 894 struct virtchnl2_singleq_rx_buf_desc *desc; 895 const struct libeth_fq_fp fq = { 896 .pp = rx_q->pp, 897 .fqes = rx_q->rx_buf, 898 .truesize = rx_q->truesize, 899 .count = rx_q->desc_count, 900 }; 901 u16 nta = rx_q->next_to_alloc; 902 903 if (!cleaned_count) 904 return false; 905 906 desc = &rx_q->single_buf[nta]; 907 908 do { 909 dma_addr_t addr; 910 911 addr = libeth_rx_alloc(&fq, nta); 912 if (addr == DMA_MAPPING_ERROR) 913 break; 914 915 /* Refresh the desc even if buffer_addrs didn't change 916 * because each write-back erases this info. 917 */ 918 desc->pkt_addr = cpu_to_le64(addr); 919 desc->hdr_addr = 0; 920 desc++; 921 922 nta++; 923 if (unlikely(nta == rx_q->desc_count)) { 924 desc = &rx_q->single_buf[0]; 925 nta = 0; 926 } 927 928 cleaned_count--; 929 } while (cleaned_count); 930 931 if (rx_q->next_to_alloc != nta) { 932 idpf_rx_buf_hw_update(rx_q, nta); 933 rx_q->next_to_alloc = nta; 934 } 935 936 return !!cleaned_count; 937 } 938 939 /** 940 * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor 941 * @rx_desc: the descriptor to process 942 * @fields: storage for extracted values 943 * 944 * Decode the Rx descriptor and extract relevant information including the 945 * size and Rx packet type. 946 * 947 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 948 * descriptor writeback format. 949 */ 950 static void 951 idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, 952 struct libeth_rqe_info *fields) 953 { 954 u64 qword; 955 956 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 957 958 fields->len = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); 959 fields->ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); 960 } 961 962 /** 963 * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor 964 * @rx_desc: the descriptor to process 965 * @fields: storage for extracted values 966 * 967 * Decode the Rx descriptor and extract relevant information including the 968 * size and Rx packet type. 969 * 970 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 971 * descriptor writeback format. 972 */ 973 static void 974 idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, 975 struct libeth_rqe_info *fields) 976 { 977 fields->len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, 978 le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); 979 fields->ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, 980 le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); 981 } 982 983 /** 984 * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor 985 * @rx_q: Rx descriptor queue 986 * @rx_desc: the descriptor to process 987 * @fields: storage for extracted values 988 * 989 */ 990 static void 991 idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, 992 const union virtchnl2_rx_desc *rx_desc, 993 struct libeth_rqe_info *fields) 994 { 995 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) 996 idpf_rx_singleq_extract_base_fields(rx_desc, fields); 997 else 998 idpf_rx_singleq_extract_flex_fields(rx_desc, fields); 999 } 1000 1001 static bool 1002 idpf_rx_singleq_process_skb_fields(struct sk_buff *skb, 1003 const struct libeth_xdp_buff *xdp, 1004 struct libeth_rq_napi_stats *rs) 1005 { 1006 struct libeth_rqe_info fields; 1007 struct idpf_rx_queue *rxq; 1008 1009 rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq); 1010 1011 idpf_rx_singleq_extract_fields(rxq, xdp->desc, &fields); 1012 __idpf_rx_singleq_process_skb_fields(rxq, skb, xdp->desc, 1013 fields.ptype); 1014 1015 return true; 1016 } 1017 1018 static void idpf_xdp_run_pass(struct libeth_xdp_buff *xdp, 1019 struct napi_struct *napi, 1020 struct libeth_rq_napi_stats *rs, 1021 const union virtchnl2_rx_desc *desc) 1022 { 1023 libeth_xdp_run_pass(xdp, NULL, napi, rs, desc, NULL, 1024 idpf_rx_singleq_process_skb_fields); 1025 } 1026 1027 /** 1028 * idpf_rx_singleq_clean - Reclaim resources after receive completes 1029 * @rx_q: rx queue to clean 1030 * @budget: Total limit on number of packets to process 1031 * 1032 * Returns true if there's any budget left (e.g. the clean is finished) 1033 */ 1034 static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) 1035 { 1036 struct libeth_rq_napi_stats rs = { }; 1037 u16 ntc = rx_q->next_to_clean; 1038 LIBETH_XDP_ONSTACK_BUFF(xdp); 1039 u16 cleaned_count = 0; 1040 1041 libeth_xdp_init_buff(xdp, &rx_q->xdp, &rx_q->xdp_rxq); 1042 1043 /* Process Rx packets bounded by budget */ 1044 while (likely(rs.packets < budget)) { 1045 struct libeth_rqe_info fields = { }; 1046 union virtchnl2_rx_desc *rx_desc; 1047 struct idpf_rx_buf *rx_buf; 1048 1049 /* get the Rx desc from Rx queue based on 'next_to_clean' */ 1050 rx_desc = &rx_q->rx[ntc]; 1051 1052 /* status_error_ptype_len will always be zero for unused 1053 * descriptors because it's cleared in cleanup, and overlaps 1054 * with hdr_addr which is always zero because packet split 1055 * isn't used, if the hardware wrote DD then the length will be 1056 * non-zero 1057 */ 1058 #define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M 1059 if (!idpf_rx_singleq_test_staterr(rx_desc, 1060 IDPF_RXD_DD)) 1061 break; 1062 1063 /* This memory barrier is needed to keep us from reading 1064 * any other fields out of the rx_desc 1065 */ 1066 dma_rmb(); 1067 1068 idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); 1069 1070 rx_buf = &rx_q->rx_buf[ntc]; 1071 libeth_xdp_process_buff(xdp, rx_buf, fields.len); 1072 rx_buf->netmem = 0; 1073 1074 IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc); 1075 cleaned_count++; 1076 1077 /* skip if it is non EOP desc */ 1078 if (idpf_rx_singleq_is_non_eop(rx_desc) || 1079 unlikely(!xdp->data)) 1080 continue; 1081 1082 #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ 1083 VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M) 1084 if (unlikely(idpf_rx_singleq_test_staterr(rx_desc, 1085 IDPF_RXD_ERR_S))) { 1086 libeth_xdp_return_buff_slow(xdp); 1087 continue; 1088 } 1089 1090 idpf_xdp_run_pass(xdp, rx_q->pp->p.napi, &rs, rx_desc); 1091 } 1092 1093 rx_q->next_to_clean = ntc; 1094 libeth_xdp_save_buff(&rx_q->xdp, xdp); 1095 1096 page_pool_nid_changed(rx_q->pp, numa_mem_id()); 1097 if (cleaned_count) 1098 idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); 1099 1100 u64_stats_update_begin(&rx_q->stats_sync); 1101 u64_stats_add(&rx_q->q_stats.packets, rs.packets); 1102 u64_stats_add(&rx_q->q_stats.bytes, rs.bytes); 1103 u64_stats_update_end(&rx_q->stats_sync); 1104 1105 return rs.packets; 1106 } 1107 1108 /** 1109 * idpf_rx_singleq_clean_all - Clean all Rx queues 1110 * @q_vec: queue vector 1111 * @budget: Used to determine if we are in netpoll 1112 * @cleaned: returns number of packets cleaned 1113 * 1114 * Returns false if clean is not complete else returns true 1115 */ 1116 static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 1117 int *cleaned) 1118 { 1119 u16 num_rxq = q_vec->num_rxq; 1120 bool clean_complete = true; 1121 int budget_per_q, i; 1122 1123 /* We attempt to distribute budget to each Rx queue fairly, but don't 1124 * allow the budget to go below 1 because that would exit polling early. 1125 */ 1126 budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; 1127 for (i = 0; i < num_rxq; i++) { 1128 struct idpf_rx_queue *rxq = q_vec->rx[i]; 1129 int pkts_cleaned_per_q; 1130 1131 pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); 1132 1133 /* if we clean as many as budgeted, we must not be done */ 1134 if (pkts_cleaned_per_q >= budget_per_q) 1135 clean_complete = false; 1136 *cleaned += pkts_cleaned_per_q; 1137 } 1138 1139 return clean_complete; 1140 } 1141 1142 /** 1143 * idpf_vport_singleq_napi_poll - NAPI handler 1144 * @napi: struct from which you get q_vector 1145 * @budget: budget provided by stack 1146 */ 1147 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget) 1148 { 1149 struct idpf_q_vector *q_vector = 1150 container_of(napi, struct idpf_q_vector, napi); 1151 bool clean_complete; 1152 int work_done = 0; 1153 1154 /* Handle case where we are called by netpoll with a budget of 0 */ 1155 if (budget <= 0) { 1156 idpf_tx_singleq_clean_all(q_vector, budget, &work_done); 1157 1158 return budget; 1159 } 1160 1161 clean_complete = idpf_rx_singleq_clean_all(q_vector, budget, 1162 &work_done); 1163 clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget, 1164 &work_done); 1165 1166 /* If work not completed, return budget and polling will return */ 1167 if (!clean_complete) { 1168 idpf_vport_intr_set_wb_on_itr(q_vector); 1169 return budget; 1170 } 1171 1172 work_done = min_t(int, work_done, budget - 1); 1173 1174 /* Exit the polling mode, but don't re-enable interrupts if stack might 1175 * poll us due to busy-polling 1176 */ 1177 if (likely(napi_complete_done(napi, work_done))) 1178 idpf_vport_intr_update_itr_ena_irq(q_vector); 1179 else 1180 idpf_vport_intr_set_wb_on_itr(q_vector); 1181 1182 return work_done; 1183 } 1184