1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (C) 2023 Intel Corporation */ 3 4 #include "idpf.h" 5 6 /** 7 * idpf_tx_singleq_csum - Enable tx checksum offloads 8 * @skb: pointer to skb 9 * @off: pointer to struct that holds offload parameters 10 * 11 * Returns 0 or error (negative) if checksum offload cannot be executed, 1 12 * otherwise. 13 */ 14 static int idpf_tx_singleq_csum(struct sk_buff *skb, 15 struct idpf_tx_offload_params *off) 16 { 17 u32 l4_len, l3_len, l2_len; 18 union { 19 struct iphdr *v4; 20 struct ipv6hdr *v6; 21 unsigned char *hdr; 22 } ip; 23 union { 24 struct tcphdr *tcp; 25 unsigned char *hdr; 26 } l4; 27 u32 offset, cmd = 0; 28 u8 l4_proto = 0; 29 __be16 frag_off; 30 bool is_tso; 31 32 if (skb->ip_summed != CHECKSUM_PARTIAL) 33 return 0; 34 35 ip.hdr = skb_network_header(skb); 36 l4.hdr = skb_transport_header(skb); 37 38 /* compute outer L2 header size */ 39 l2_len = ip.hdr - skb->data; 40 offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2); 41 is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO); 42 if (skb->encapsulation) { 43 u32 tunnel = 0; 44 45 /* define outer network header type */ 46 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 47 /* The stack computes the IP header already, the only 48 * time we need the hardware to recompute it is in the 49 * case of TSO. 50 */ 51 tunnel |= is_tso ? 52 IDPF_TX_CTX_EXT_IP_IPV4 : 53 IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM; 54 55 l4_proto = ip.v4->protocol; 56 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 57 tunnel |= IDPF_TX_CTX_EXT_IP_IPV6; 58 59 l4_proto = ip.v6->nexthdr; 60 if (ipv6_ext_hdr(l4_proto)) 61 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 62 sizeof(*ip.v6), 63 &l4_proto, &frag_off); 64 } 65 66 /* define outer transport */ 67 switch (l4_proto) { 68 case IPPROTO_UDP: 69 tunnel |= IDPF_TXD_CTX_UDP_TUNNELING; 70 break; 71 case IPPROTO_GRE: 72 tunnel |= IDPF_TXD_CTX_GRE_TUNNELING; 73 break; 74 case IPPROTO_IPIP: 75 case IPPROTO_IPV6: 76 l4.hdr = skb_inner_network_header(skb); 77 break; 78 default: 79 if (is_tso) 80 return -1; 81 82 skb_checksum_help(skb); 83 84 return 0; 85 } 86 off->tx_flags |= IDPF_TX_FLAGS_TUNNEL; 87 88 /* compute outer L3 header size */ 89 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M, 90 (l4.hdr - ip.hdr) / 4); 91 92 /* switch IP header pointer from outer to inner header */ 93 ip.hdr = skb_inner_network_header(skb); 94 95 /* compute tunnel header size */ 96 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M, 97 (ip.hdr - l4.hdr) / 2); 98 99 /* indicate if we need to offload outer UDP header */ 100 if (is_tso && 101 !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && 102 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) 103 tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M; 104 105 /* record tunnel offload values */ 106 off->cd_tunneling |= tunnel; 107 108 /* switch L4 header pointer from outer to inner */ 109 l4.hdr = skb_inner_transport_header(skb); 110 l4_proto = 0; 111 112 /* reset type as we transition from outer to inner headers */ 113 off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6); 114 if (ip.v4->version == 4) 115 off->tx_flags |= IDPF_TX_FLAGS_IPV4; 116 if (ip.v6->version == 6) 117 off->tx_flags |= IDPF_TX_FLAGS_IPV6; 118 } 119 120 /* Enable IP checksum offloads */ 121 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 122 l4_proto = ip.v4->protocol; 123 /* See comment above regarding need for HW to recompute IP 124 * header checksum in the case of TSO. 125 */ 126 if (is_tso) 127 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM; 128 else 129 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4; 130 131 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 132 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6; 133 l4_proto = ip.v6->nexthdr; 134 if (ipv6_ext_hdr(l4_proto)) 135 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 136 sizeof(*ip.v6), &l4_proto, 137 &frag_off); 138 } else { 139 return -1; 140 } 141 142 /* compute inner L3 header size */ 143 l3_len = l4.hdr - ip.hdr; 144 offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S; 145 146 /* Enable L4 checksum offloads */ 147 switch (l4_proto) { 148 case IPPROTO_TCP: 149 /* enable checksum offloads */ 150 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP; 151 l4_len = l4.tcp->doff; 152 break; 153 case IPPROTO_UDP: 154 /* enable UDP checksum offload */ 155 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP; 156 l4_len = sizeof(struct udphdr) >> 2; 157 break; 158 case IPPROTO_SCTP: 159 /* enable SCTP checksum offload */ 160 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP; 161 l4_len = sizeof(struct sctphdr) >> 2; 162 break; 163 default: 164 if (is_tso) 165 return -1; 166 167 skb_checksum_help(skb); 168 169 return 0; 170 } 171 172 offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S; 173 off->td_cmd |= cmd; 174 off->hdr_offsets |= offset; 175 176 return 1; 177 } 178 179 /** 180 * idpf_tx_singleq_map - Build the Tx base descriptor 181 * @tx_q: queue to send buffer on 182 * @first: first buffer info buffer to use 183 * @offloads: pointer to struct that holds offload parameters 184 * 185 * This function loops over the skb data pointed to by *first 186 * and gets a physical address for each memory location and programs 187 * it and the length into the transmit base mode descriptor. 188 */ 189 static void idpf_tx_singleq_map(struct idpf_queue *tx_q, 190 struct idpf_tx_buf *first, 191 struct idpf_tx_offload_params *offloads) 192 { 193 u32 offsets = offloads->hdr_offsets; 194 struct idpf_tx_buf *tx_buf = first; 195 struct idpf_base_tx_desc *tx_desc; 196 struct sk_buff *skb = first->skb; 197 u64 td_cmd = offloads->td_cmd; 198 unsigned int data_len, size; 199 u16 i = tx_q->next_to_use; 200 struct netdev_queue *nq; 201 skb_frag_t *frag; 202 dma_addr_t dma; 203 u64 td_tag = 0; 204 205 data_len = skb->data_len; 206 size = skb_headlen(skb); 207 208 tx_desc = IDPF_BASE_TX_DESC(tx_q, i); 209 210 dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); 211 212 /* write each descriptor with CRC bit */ 213 if (tx_q->vport->crc_enable) 214 td_cmd |= IDPF_TX_DESC_CMD_ICRC; 215 216 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 217 unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 218 219 if (dma_mapping_error(tx_q->dev, dma)) 220 return idpf_tx_dma_map_error(tx_q, skb, first, i); 221 222 /* record length, and DMA address */ 223 dma_unmap_len_set(tx_buf, len, size); 224 dma_unmap_addr_set(tx_buf, dma, dma); 225 226 /* align size to end of page */ 227 max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1); 228 tx_desc->buf_addr = cpu_to_le64(dma); 229 230 /* account for data chunks larger than the hardware 231 * can handle 232 */ 233 while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) { 234 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, 235 offsets, 236 max_data, 237 td_tag); 238 tx_desc++; 239 i++; 240 241 if (i == tx_q->desc_count) { 242 tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); 243 i = 0; 244 } 245 246 dma += max_data; 247 size -= max_data; 248 249 max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 250 tx_desc->buf_addr = cpu_to_le64(dma); 251 } 252 253 if (!data_len) 254 break; 255 256 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 257 size, td_tag); 258 tx_desc++; 259 i++; 260 261 if (i == tx_q->desc_count) { 262 tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); 263 i = 0; 264 } 265 266 size = skb_frag_size(frag); 267 data_len -= size; 268 269 dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, 270 DMA_TO_DEVICE); 271 272 tx_buf = &tx_q->tx_buf[i]; 273 } 274 275 skb_tx_timestamp(first->skb); 276 277 /* write last descriptor with RS and EOP bits */ 278 td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS); 279 280 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 281 size, td_tag); 282 283 IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i); 284 285 /* set next_to_watch value indicating a packet is present */ 286 first->next_to_watch = tx_desc; 287 288 nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); 289 netdev_tx_sent_queue(nq, first->bytecount); 290 291 idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); 292 } 293 294 /** 295 * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring 296 * @txq: queue to put context descriptor on 297 * 298 * Since the TX buffer rings mimics the descriptor ring, update the tx buffer 299 * ring entry to reflect that this index is a context descriptor 300 */ 301 static struct idpf_base_tx_ctx_desc * 302 idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq) 303 { 304 struct idpf_base_tx_ctx_desc *ctx_desc; 305 int ntu = txq->next_to_use; 306 307 memset(&txq->tx_buf[ntu], 0, sizeof(struct idpf_tx_buf)); 308 txq->tx_buf[ntu].ctx_entry = true; 309 310 ctx_desc = IDPF_BASE_TX_CTX_DESC(txq, ntu); 311 312 IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu); 313 txq->next_to_use = ntu; 314 315 return ctx_desc; 316 } 317 318 /** 319 * idpf_tx_singleq_build_ctx_desc - populate context descriptor 320 * @txq: queue to send buffer on 321 * @offload: offload parameter structure 322 **/ 323 static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, 324 struct idpf_tx_offload_params *offload) 325 { 326 struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); 327 u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX; 328 329 if (offload->tso_segs) { 330 qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S; 331 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_TSO_LEN_M, 332 offload->tso_len); 333 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss); 334 335 u64_stats_update_begin(&txq->stats_sync); 336 u64_stats_inc(&txq->q_stats.tx.lso_pkts); 337 u64_stats_update_end(&txq->stats_sync); 338 } 339 340 desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling); 341 342 desc->qw0.l2tag2 = 0; 343 desc->qw0.rsvd1 = 0; 344 desc->qw1 = cpu_to_le64(qw1); 345 } 346 347 /** 348 * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors 349 * @skb: send buffer 350 * @tx_q: queue to send buffer on 351 * 352 * Returns NETDEV_TX_OK if sent, else an error code 353 */ 354 static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, 355 struct idpf_queue *tx_q) 356 { 357 struct idpf_tx_offload_params offload = { }; 358 struct idpf_tx_buf *first; 359 unsigned int count; 360 __be16 protocol; 361 int csum, tso; 362 363 count = idpf_tx_desc_count_required(tx_q, skb); 364 if (unlikely(!count)) 365 return idpf_tx_drop_skb(tx_q, skb); 366 367 if (idpf_tx_maybe_stop_common(tx_q, 368 count + IDPF_TX_DESCS_PER_CACHE_LINE + 369 IDPF_TX_DESCS_FOR_CTX)) { 370 idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); 371 372 return NETDEV_TX_BUSY; 373 } 374 375 protocol = vlan_get_protocol(skb); 376 if (protocol == htons(ETH_P_IP)) 377 offload.tx_flags |= IDPF_TX_FLAGS_IPV4; 378 else if (protocol == htons(ETH_P_IPV6)) 379 offload.tx_flags |= IDPF_TX_FLAGS_IPV6; 380 381 tso = idpf_tso(skb, &offload); 382 if (tso < 0) 383 goto out_drop; 384 385 csum = idpf_tx_singleq_csum(skb, &offload); 386 if (csum < 0) 387 goto out_drop; 388 389 if (tso || offload.cd_tunneling) 390 idpf_tx_singleq_build_ctx_desc(tx_q, &offload); 391 392 /* record the location of the first descriptor for this packet */ 393 first = &tx_q->tx_buf[tx_q->next_to_use]; 394 first->skb = skb; 395 396 if (tso) { 397 first->gso_segs = offload.tso_segs; 398 first->bytecount = skb->len + ((first->gso_segs - 1) * offload.tso_hdr_len); 399 } else { 400 first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); 401 first->gso_segs = 1; 402 } 403 idpf_tx_singleq_map(tx_q, first, &offload); 404 405 return NETDEV_TX_OK; 406 407 out_drop: 408 return idpf_tx_drop_skb(tx_q, skb); 409 } 410 411 /** 412 * idpf_tx_singleq_start - Selects the right Tx queue to send buffer 413 * @skb: send buffer 414 * @netdev: network interface device structure 415 * 416 * Returns NETDEV_TX_OK if sent, else an error code 417 */ 418 netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, 419 struct net_device *netdev) 420 { 421 struct idpf_vport *vport = idpf_netdev_to_vport(netdev); 422 struct idpf_queue *tx_q; 423 424 tx_q = vport->txqs[skb_get_queue_mapping(skb)]; 425 426 /* hardware can't handle really short frames, hardware padding works 427 * beyond this point 428 */ 429 if (skb_put_padto(skb, IDPF_TX_MIN_PKT_LEN)) { 430 idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); 431 432 return NETDEV_TX_OK; 433 } 434 435 return idpf_tx_singleq_frame(skb, tx_q); 436 } 437 438 /** 439 * idpf_tx_singleq_clean - Reclaim resources from queue 440 * @tx_q: Tx queue to clean 441 * @napi_budget: Used to determine if we are in netpoll 442 * @cleaned: returns number of packets cleaned 443 * 444 */ 445 static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, 446 int *cleaned) 447 { 448 unsigned int budget = tx_q->vport->compln_clean_budget; 449 unsigned int total_bytes = 0, total_pkts = 0; 450 struct idpf_base_tx_desc *tx_desc; 451 s16 ntc = tx_q->next_to_clean; 452 struct idpf_netdev_priv *np; 453 struct idpf_tx_buf *tx_buf; 454 struct idpf_vport *vport; 455 struct netdev_queue *nq; 456 bool dont_wake; 457 458 tx_desc = IDPF_BASE_TX_DESC(tx_q, ntc); 459 tx_buf = &tx_q->tx_buf[ntc]; 460 ntc -= tx_q->desc_count; 461 462 do { 463 struct idpf_base_tx_desc *eop_desc; 464 465 /* If this entry in the ring was used as a context descriptor, 466 * it's corresponding entry in the buffer ring will indicate as 467 * such. We can skip this descriptor since there is no buffer 468 * to clean. 469 */ 470 if (tx_buf->ctx_entry) { 471 /* Clear this flag here to avoid stale flag values when 472 * this buffer is used for actual data in the future. 473 * There are cases where the tx_buf struct / the flags 474 * field will not be cleared before being reused. 475 */ 476 tx_buf->ctx_entry = false; 477 goto fetch_next_txq_desc; 478 } 479 480 /* if next_to_watch is not set then no work pending */ 481 eop_desc = (struct idpf_base_tx_desc *)tx_buf->next_to_watch; 482 if (!eop_desc) 483 break; 484 485 /* prevent any other reads prior to eop_desc */ 486 smp_rmb(); 487 488 /* if the descriptor isn't done, no work yet to do */ 489 if (!(eop_desc->qw1 & 490 cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE))) 491 break; 492 493 /* clear next_to_watch to prevent false hangs */ 494 tx_buf->next_to_watch = NULL; 495 496 /* update the statistics for this packet */ 497 total_bytes += tx_buf->bytecount; 498 total_pkts += tx_buf->gso_segs; 499 500 napi_consume_skb(tx_buf->skb, napi_budget); 501 502 /* unmap skb header data */ 503 dma_unmap_single(tx_q->dev, 504 dma_unmap_addr(tx_buf, dma), 505 dma_unmap_len(tx_buf, len), 506 DMA_TO_DEVICE); 507 508 /* clear tx_buf data */ 509 tx_buf->skb = NULL; 510 dma_unmap_len_set(tx_buf, len, 0); 511 512 /* unmap remaining buffers */ 513 while (tx_desc != eop_desc) { 514 tx_buf++; 515 tx_desc++; 516 ntc++; 517 if (unlikely(!ntc)) { 518 ntc -= tx_q->desc_count; 519 tx_buf = tx_q->tx_buf; 520 tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); 521 } 522 523 /* unmap any remaining paged data */ 524 if (dma_unmap_len(tx_buf, len)) { 525 dma_unmap_page(tx_q->dev, 526 dma_unmap_addr(tx_buf, dma), 527 dma_unmap_len(tx_buf, len), 528 DMA_TO_DEVICE); 529 dma_unmap_len_set(tx_buf, len, 0); 530 } 531 } 532 533 /* update budget only if we did something */ 534 budget--; 535 536 fetch_next_txq_desc: 537 tx_buf++; 538 tx_desc++; 539 ntc++; 540 if (unlikely(!ntc)) { 541 ntc -= tx_q->desc_count; 542 tx_buf = tx_q->tx_buf; 543 tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); 544 } 545 } while (likely(budget)); 546 547 ntc += tx_q->desc_count; 548 tx_q->next_to_clean = ntc; 549 550 *cleaned += total_pkts; 551 552 u64_stats_update_begin(&tx_q->stats_sync); 553 u64_stats_add(&tx_q->q_stats.tx.packets, total_pkts); 554 u64_stats_add(&tx_q->q_stats.tx.bytes, total_bytes); 555 u64_stats_update_end(&tx_q->stats_sync); 556 557 vport = tx_q->vport; 558 np = netdev_priv(vport->netdev); 559 nq = netdev_get_tx_queue(vport->netdev, tx_q->idx); 560 561 dont_wake = np->state != __IDPF_VPORT_UP || 562 !netif_carrier_ok(vport->netdev); 563 __netif_txq_completed_wake(nq, total_pkts, total_bytes, 564 IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, 565 dont_wake); 566 567 return !!budget; 568 } 569 570 /** 571 * idpf_tx_singleq_clean_all - Clean all Tx queues 572 * @q_vec: queue vector 573 * @budget: Used to determine if we are in netpoll 574 * @cleaned: returns number of packets cleaned 575 * 576 * Returns false if clean is not complete else returns true 577 */ 578 static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 579 int *cleaned) 580 { 581 u16 num_txq = q_vec->num_txq; 582 bool clean_complete = true; 583 int i, budget_per_q; 584 585 budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; 586 for (i = 0; i < num_txq; i++) { 587 struct idpf_queue *q; 588 589 q = q_vec->tx[i]; 590 clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, 591 cleaned); 592 } 593 594 return clean_complete; 595 } 596 597 /** 598 * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor 599 * status and error fields 600 * @rx_desc: pointer to receive descriptor (in le64 format) 601 * @stat_err_bits: value to mask 602 * 603 * This function does some fast chicanery in order to return the 604 * value of the mask which is really only used for boolean tests. 605 * The status_error_ptype_len doesn't need to be shifted because it begins 606 * at offset zero. 607 */ 608 static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, 609 const u64 stat_err_bits) 610 { 611 return !!(rx_desc->base_wb.qword1.status_error_ptype_len & 612 cpu_to_le64(stat_err_bits)); 613 } 614 615 /** 616 * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers 617 * @rxq: Rx ring being processed 618 * @rx_desc: Rx descriptor for current buffer 619 * @skb: Current socket buffer containing buffer in progress 620 * @ntc: next to clean 621 */ 622 static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq, 623 union virtchnl2_rx_desc *rx_desc, 624 struct sk_buff *skb, u16 ntc) 625 { 626 /* if we are the last buffer then there is nothing else to do */ 627 if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) 628 return false; 629 630 return true; 631 } 632 633 /** 634 * idpf_rx_singleq_csum - Indicate in skb if checksum is good 635 * @rxq: Rx ring being processed 636 * @skb: skb currently being received and modified 637 * @csum_bits: checksum bits from descriptor 638 * @ptype: the packet type decoded by hardware 639 * 640 * skb->protocol must be set before this function is called 641 */ 642 static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, 643 struct idpf_rx_csum_decoded *csum_bits, 644 u16 ptype) 645 { 646 struct idpf_rx_ptype_decoded decoded; 647 bool ipv4, ipv6; 648 649 /* check if Rx checksum is enabled */ 650 if (unlikely(!(rxq->vport->netdev->features & NETIF_F_RXCSUM))) 651 return; 652 653 /* check if HW has decoded the packet and checksum */ 654 if (unlikely(!(csum_bits->l3l4p))) 655 return; 656 657 decoded = rxq->vport->rx_ptype_lkup[ptype]; 658 if (unlikely(!(decoded.known && decoded.outer_ip))) 659 return; 660 661 ipv4 = IDPF_RX_PTYPE_TO_IPV(&decoded, IDPF_RX_PTYPE_OUTER_IPV4); 662 ipv6 = IDPF_RX_PTYPE_TO_IPV(&decoded, IDPF_RX_PTYPE_OUTER_IPV6); 663 664 /* Check if there were any checksum errors */ 665 if (unlikely(ipv4 && (csum_bits->ipe || csum_bits->eipe))) 666 goto checksum_fail; 667 668 /* Device could not do any checksum offload for certain extension 669 * headers as indicated by setting IPV6EXADD bit 670 */ 671 if (unlikely(ipv6 && csum_bits->ipv6exadd)) 672 return; 673 674 /* check for L4 errors and handle packets that were not able to be 675 * checksummed due to arrival speed 676 */ 677 if (unlikely(csum_bits->l4e)) 678 goto checksum_fail; 679 680 if (unlikely(csum_bits->nat && csum_bits->eudpe)) 681 goto checksum_fail; 682 683 /* Handle packets that were not able to be checksummed due to arrival 684 * speed, in this case the stack can compute the csum. 685 */ 686 if (unlikely(csum_bits->pprs)) 687 return; 688 689 /* If there is an outer header present that might contain a checksum 690 * we need to bump the checksum level by 1 to reflect the fact that 691 * we are indicating we validated the inner checksum. 692 */ 693 if (decoded.tunnel_type >= IDPF_RX_PTYPE_TUNNEL_IP_GRENAT) 694 skb->csum_level = 1; 695 696 /* Only report checksum unnecessary for ICMP, TCP, UDP, or SCTP */ 697 switch (decoded.inner_prot) { 698 case IDPF_RX_PTYPE_INNER_PROT_ICMP: 699 case IDPF_RX_PTYPE_INNER_PROT_TCP: 700 case IDPF_RX_PTYPE_INNER_PROT_UDP: 701 case IDPF_RX_PTYPE_INNER_PROT_SCTP: 702 skb->ip_summed = CHECKSUM_UNNECESSARY; 703 return; 704 default: 705 return; 706 } 707 708 checksum_fail: 709 u64_stats_update_begin(&rxq->stats_sync); 710 u64_stats_inc(&rxq->q_stats.rx.hw_csum_err); 711 u64_stats_update_end(&rxq->stats_sync); 712 } 713 714 /** 715 * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum 716 * @rx_q: Rx completion queue 717 * @skb: skb currently being received and modified 718 * @rx_desc: the receive descriptor 719 * @ptype: Rx packet type 720 * 721 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 722 * descriptor writeback format. 723 **/ 724 static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q, 725 struct sk_buff *skb, 726 union virtchnl2_rx_desc *rx_desc, 727 u16 ptype) 728 { 729 struct idpf_rx_csum_decoded csum_bits; 730 u32 rx_error, rx_status; 731 u64 qword; 732 733 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 734 735 rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword); 736 rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword); 737 738 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error); 739 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M, 740 rx_error); 741 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error); 742 csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M, 743 rx_error); 744 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M, 745 rx_status); 746 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M, 747 rx_status); 748 csum_bits.nat = 0; 749 csum_bits.eudpe = 0; 750 751 idpf_rx_singleq_csum(rx_q, skb, &csum_bits, ptype); 752 } 753 754 /** 755 * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum 756 * @rx_q: Rx completion queue 757 * @skb: skb currently being received and modified 758 * @rx_desc: the receive descriptor 759 * @ptype: Rx packet type 760 * 761 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 762 * descriptor writeback format. 763 **/ 764 static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q, 765 struct sk_buff *skb, 766 union virtchnl2_rx_desc *rx_desc, 767 u16 ptype) 768 { 769 struct idpf_rx_csum_decoded csum_bits; 770 u16 rx_status0, rx_status1; 771 772 rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0); 773 rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1); 774 775 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M, 776 rx_status0); 777 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M, 778 rx_status0); 779 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M, 780 rx_status0); 781 csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M, 782 rx_status0); 783 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M, 784 rx_status0); 785 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M, 786 rx_status0); 787 csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M, 788 rx_status1); 789 csum_bits.pprs = 0; 790 791 idpf_rx_singleq_csum(rx_q, skb, &csum_bits, ptype); 792 } 793 794 /** 795 * idpf_rx_singleq_base_hash - set the hash value in the skb 796 * @rx_q: Rx completion queue 797 * @skb: skb currently being received and modified 798 * @rx_desc: specific descriptor 799 * @decoded: Decoded Rx packet type related fields 800 * 801 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 802 * descriptor writeback format. 803 **/ 804 static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q, 805 struct sk_buff *skb, 806 union virtchnl2_rx_desc *rx_desc, 807 struct idpf_rx_ptype_decoded *decoded) 808 { 809 u64 mask, qw1; 810 811 if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) 812 return; 813 814 mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; 815 qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 816 817 if (FIELD_GET(mask, qw1) == mask) { 818 u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss); 819 820 skb_set_hash(skb, hash, idpf_ptype_to_htype(decoded)); 821 } 822 } 823 824 /** 825 * idpf_rx_singleq_flex_hash - set the hash value in the skb 826 * @rx_q: Rx completion queue 827 * @skb: skb currently being received and modified 828 * @rx_desc: specific descriptor 829 * @decoded: Decoded Rx packet type related fields 830 * 831 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 832 * descriptor writeback format. 833 **/ 834 static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q, 835 struct sk_buff *skb, 836 union virtchnl2_rx_desc *rx_desc, 837 struct idpf_rx_ptype_decoded *decoded) 838 { 839 if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) 840 return; 841 842 if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, 843 le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) 844 skb_set_hash(skb, le32_to_cpu(rx_desc->flex_nic_wb.rss_hash), 845 idpf_ptype_to_htype(decoded)); 846 } 847 848 /** 849 * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx 850 * descriptor 851 * @rx_q: Rx ring being processed 852 * @skb: pointer to current skb being populated 853 * @rx_desc: descriptor for skb 854 * @ptype: packet type 855 * 856 * This function checks the ring, descriptor, and packet information in 857 * order to populate the hash, checksum, VLAN, protocol, and 858 * other fields within the skb. 859 */ 860 static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, 861 struct sk_buff *skb, 862 union virtchnl2_rx_desc *rx_desc, 863 u16 ptype) 864 { 865 struct idpf_rx_ptype_decoded decoded = 866 rx_q->vport->rx_ptype_lkup[ptype]; 867 868 /* modifies the skb - consumes the enet header */ 869 skb->protocol = eth_type_trans(skb, rx_q->vport->netdev); 870 871 /* Check if we're using base mode descriptor IDs */ 872 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { 873 idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, &decoded); 874 idpf_rx_singleq_base_csum(rx_q, skb, rx_desc, ptype); 875 } else { 876 idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, &decoded); 877 idpf_rx_singleq_flex_csum(rx_q, skb, rx_desc, ptype); 878 } 879 } 880 881 /** 882 * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers 883 * @rx_q: queue for which the hw buffers are allocated 884 * @cleaned_count: number of buffers to replace 885 * 886 * Returns false if all allocations were successful, true if any fail 887 */ 888 bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, 889 u16 cleaned_count) 890 { 891 struct virtchnl2_singleq_rx_buf_desc *desc; 892 u16 nta = rx_q->next_to_alloc; 893 struct idpf_rx_buf *buf; 894 895 if (!cleaned_count) 896 return false; 897 898 desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, nta); 899 buf = &rx_q->rx_buf.buf[nta]; 900 901 do { 902 dma_addr_t addr; 903 904 addr = idpf_alloc_page(rx_q->pp, buf, rx_q->rx_buf_size); 905 if (unlikely(addr == DMA_MAPPING_ERROR)) 906 break; 907 908 /* Refresh the desc even if buffer_addrs didn't change 909 * because each write-back erases this info. 910 */ 911 desc->pkt_addr = cpu_to_le64(addr); 912 desc->hdr_addr = 0; 913 desc++; 914 915 buf++; 916 nta++; 917 if (unlikely(nta == rx_q->desc_count)) { 918 desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, 0); 919 buf = rx_q->rx_buf.buf; 920 nta = 0; 921 } 922 923 cleaned_count--; 924 } while (cleaned_count); 925 926 if (rx_q->next_to_alloc != nta) { 927 idpf_rx_buf_hw_update(rx_q, nta); 928 rx_q->next_to_alloc = nta; 929 } 930 931 return !!cleaned_count; 932 } 933 934 /** 935 * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor 936 * @rx_q: Rx descriptor queue 937 * @rx_desc: the descriptor to process 938 * @fields: storage for extracted values 939 * 940 * Decode the Rx descriptor and extract relevant information including the 941 * size and Rx packet type. 942 * 943 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 944 * descriptor writeback format. 945 */ 946 static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, 947 union virtchnl2_rx_desc *rx_desc, 948 struct idpf_rx_extracted *fields) 949 { 950 u64 qword; 951 952 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 953 954 fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); 955 fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); 956 } 957 958 /** 959 * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor 960 * @rx_q: Rx descriptor queue 961 * @rx_desc: the descriptor to process 962 * @fields: storage for extracted values 963 * 964 * Decode the Rx descriptor and extract relevant information including the 965 * size and Rx packet type. 966 * 967 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 968 * descriptor writeback format. 969 */ 970 static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q, 971 union virtchnl2_rx_desc *rx_desc, 972 struct idpf_rx_extracted *fields) 973 { 974 fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, 975 le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); 976 fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, 977 le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); 978 } 979 980 /** 981 * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor 982 * @rx_q: Rx descriptor queue 983 * @rx_desc: the descriptor to process 984 * @fields: storage for extracted values 985 * 986 */ 987 static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q, 988 union virtchnl2_rx_desc *rx_desc, 989 struct idpf_rx_extracted *fields) 990 { 991 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) 992 idpf_rx_singleq_extract_base_fields(rx_q, rx_desc, fields); 993 else 994 idpf_rx_singleq_extract_flex_fields(rx_q, rx_desc, fields); 995 } 996 997 /** 998 * idpf_rx_singleq_clean - Reclaim resources after receive completes 999 * @rx_q: rx queue to clean 1000 * @budget: Total limit on number of packets to process 1001 * 1002 * Returns true if there's any budget left (e.g. the clean is finished) 1003 */ 1004 static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) 1005 { 1006 unsigned int total_rx_bytes = 0, total_rx_pkts = 0; 1007 struct sk_buff *skb = rx_q->skb; 1008 u16 ntc = rx_q->next_to_clean; 1009 u16 cleaned_count = 0; 1010 bool failure = false; 1011 1012 /* Process Rx packets bounded by budget */ 1013 while (likely(total_rx_pkts < (unsigned int)budget)) { 1014 struct idpf_rx_extracted fields = { }; 1015 union virtchnl2_rx_desc *rx_desc; 1016 struct idpf_rx_buf *rx_buf; 1017 1018 /* get the Rx desc from Rx queue based on 'next_to_clean' */ 1019 rx_desc = IDPF_RX_DESC(rx_q, ntc); 1020 1021 /* status_error_ptype_len will always be zero for unused 1022 * descriptors because it's cleared in cleanup, and overlaps 1023 * with hdr_addr which is always zero because packet split 1024 * isn't used, if the hardware wrote DD then the length will be 1025 * non-zero 1026 */ 1027 #define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M 1028 if (!idpf_rx_singleq_test_staterr(rx_desc, 1029 IDPF_RXD_DD)) 1030 break; 1031 1032 /* This memory barrier is needed to keep us from reading 1033 * any other fields out of the rx_desc 1034 */ 1035 dma_rmb(); 1036 1037 idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); 1038 1039 rx_buf = &rx_q->rx_buf.buf[ntc]; 1040 if (!fields.size) { 1041 idpf_rx_put_page(rx_buf); 1042 goto skip_data; 1043 } 1044 1045 idpf_rx_sync_for_cpu(rx_buf, fields.size); 1046 if (skb) 1047 idpf_rx_add_frag(rx_buf, skb, fields.size); 1048 else 1049 skb = idpf_rx_construct_skb(rx_q, rx_buf, fields.size); 1050 1051 /* exit if we failed to retrieve a buffer */ 1052 if (!skb) 1053 break; 1054 1055 skip_data: 1056 IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc); 1057 1058 cleaned_count++; 1059 1060 /* skip if it is non EOP desc */ 1061 if (idpf_rx_singleq_is_non_eop(rx_q, rx_desc, skb, ntc)) 1062 continue; 1063 1064 #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ 1065 VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M) 1066 if (unlikely(idpf_rx_singleq_test_staterr(rx_desc, 1067 IDPF_RXD_ERR_S))) { 1068 dev_kfree_skb_any(skb); 1069 skb = NULL; 1070 continue; 1071 } 1072 1073 /* pad skb if needed (to make valid ethernet frame) */ 1074 if (eth_skb_pad(skb)) { 1075 skb = NULL; 1076 continue; 1077 } 1078 1079 /* probably a little skewed due to removing CRC */ 1080 total_rx_bytes += skb->len; 1081 1082 /* protocol */ 1083 idpf_rx_singleq_process_skb_fields(rx_q, skb, 1084 rx_desc, fields.rx_ptype); 1085 1086 /* send completed skb up the stack */ 1087 napi_gro_receive(&rx_q->q_vector->napi, skb); 1088 skb = NULL; 1089 1090 /* update budget accounting */ 1091 total_rx_pkts++; 1092 } 1093 1094 rx_q->skb = skb; 1095 1096 rx_q->next_to_clean = ntc; 1097 1098 if (cleaned_count) 1099 failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); 1100 1101 u64_stats_update_begin(&rx_q->stats_sync); 1102 u64_stats_add(&rx_q->q_stats.rx.packets, total_rx_pkts); 1103 u64_stats_add(&rx_q->q_stats.rx.bytes, total_rx_bytes); 1104 u64_stats_update_end(&rx_q->stats_sync); 1105 1106 /* guarantee a trip back through this routine if there was a failure */ 1107 return failure ? budget : (int)total_rx_pkts; 1108 } 1109 1110 /** 1111 * idpf_rx_singleq_clean_all - Clean all Rx queues 1112 * @q_vec: queue vector 1113 * @budget: Used to determine if we are in netpoll 1114 * @cleaned: returns number of packets cleaned 1115 * 1116 * Returns false if clean is not complete else returns true 1117 */ 1118 static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 1119 int *cleaned) 1120 { 1121 u16 num_rxq = q_vec->num_rxq; 1122 bool clean_complete = true; 1123 int budget_per_q, i; 1124 1125 /* We attempt to distribute budget to each Rx queue fairly, but don't 1126 * allow the budget to go below 1 because that would exit polling early. 1127 */ 1128 budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; 1129 for (i = 0; i < num_rxq; i++) { 1130 struct idpf_queue *rxq = q_vec->rx[i]; 1131 int pkts_cleaned_per_q; 1132 1133 pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); 1134 1135 /* if we clean as many as budgeted, we must not be done */ 1136 if (pkts_cleaned_per_q >= budget_per_q) 1137 clean_complete = false; 1138 *cleaned += pkts_cleaned_per_q; 1139 } 1140 1141 return clean_complete; 1142 } 1143 1144 /** 1145 * idpf_vport_singleq_napi_poll - NAPI handler 1146 * @napi: struct from which you get q_vector 1147 * @budget: budget provided by stack 1148 */ 1149 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget) 1150 { 1151 struct idpf_q_vector *q_vector = 1152 container_of(napi, struct idpf_q_vector, napi); 1153 bool clean_complete; 1154 int work_done = 0; 1155 1156 /* Handle case where we are called by netpoll with a budget of 0 */ 1157 if (budget <= 0) { 1158 idpf_tx_singleq_clean_all(q_vector, budget, &work_done); 1159 1160 return budget; 1161 } 1162 1163 clean_complete = idpf_rx_singleq_clean_all(q_vector, budget, 1164 &work_done); 1165 clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget, 1166 &work_done); 1167 1168 /* If work not completed, return budget and polling will return */ 1169 if (!clean_complete) 1170 return budget; 1171 1172 work_done = min_t(int, work_done, budget - 1); 1173 1174 /* Exit the polling mode, but don't re-enable interrupts if stack might 1175 * poll us due to busy-polling 1176 */ 1177 if (likely(napi_complete_done(napi, work_done))) 1178 idpf_vport_intr_update_itr_ena_irq(q_vector); 1179 1180 return work_done; 1181 } 1182