1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (C) 2023 Intel Corporation */ 3 4 #include "idpf.h" 5 6 /** 7 * idpf_tx_singleq_csum - Enable tx checksum offloads 8 * @skb: pointer to skb 9 * @off: pointer to struct that holds offload parameters 10 * 11 * Returns 0 or error (negative) if checksum offload cannot be executed, 1 12 * otherwise. 13 */ 14 static int idpf_tx_singleq_csum(struct sk_buff *skb, 15 struct idpf_tx_offload_params *off) 16 { 17 u32 l4_len, l3_len, l2_len; 18 union { 19 struct iphdr *v4; 20 struct ipv6hdr *v6; 21 unsigned char *hdr; 22 } ip; 23 union { 24 struct tcphdr *tcp; 25 unsigned char *hdr; 26 } l4; 27 u32 offset, cmd = 0; 28 u8 l4_proto = 0; 29 __be16 frag_off; 30 bool is_tso; 31 32 if (skb->ip_summed != CHECKSUM_PARTIAL) 33 return 0; 34 35 ip.hdr = skb_network_header(skb); 36 l4.hdr = skb_transport_header(skb); 37 38 /* compute outer L2 header size */ 39 l2_len = ip.hdr - skb->data; 40 offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2); 41 is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO); 42 if (skb->encapsulation) { 43 u32 tunnel = 0; 44 45 /* define outer network header type */ 46 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 47 /* The stack computes the IP header already, the only 48 * time we need the hardware to recompute it is in the 49 * case of TSO. 50 */ 51 tunnel |= is_tso ? 52 IDPF_TX_CTX_EXT_IP_IPV4 : 53 IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM; 54 55 l4_proto = ip.v4->protocol; 56 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 57 tunnel |= IDPF_TX_CTX_EXT_IP_IPV6; 58 59 l4_proto = ip.v6->nexthdr; 60 if (ipv6_ext_hdr(l4_proto)) 61 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 62 sizeof(*ip.v6), 63 &l4_proto, &frag_off); 64 } 65 66 /* define outer transport */ 67 switch (l4_proto) { 68 case IPPROTO_UDP: 69 tunnel |= IDPF_TXD_CTX_UDP_TUNNELING; 70 break; 71 case IPPROTO_GRE: 72 tunnel |= IDPF_TXD_CTX_GRE_TUNNELING; 73 break; 74 case IPPROTO_IPIP: 75 case IPPROTO_IPV6: 76 l4.hdr = skb_inner_network_header(skb); 77 break; 78 default: 79 if (is_tso) 80 return -1; 81 82 skb_checksum_help(skb); 83 84 return 0; 85 } 86 off->tx_flags |= IDPF_TX_FLAGS_TUNNEL; 87 88 /* compute outer L3 header size */ 89 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M, 90 (l4.hdr - ip.hdr) / 4); 91 92 /* switch IP header pointer from outer to inner header */ 93 ip.hdr = skb_inner_network_header(skb); 94 95 /* compute tunnel header size */ 96 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M, 97 (ip.hdr - l4.hdr) / 2); 98 99 /* indicate if we need to offload outer UDP header */ 100 if (is_tso && 101 !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && 102 (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) 103 tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M; 104 105 /* record tunnel offload values */ 106 off->cd_tunneling |= tunnel; 107 108 /* switch L4 header pointer from outer to inner */ 109 l4.hdr = skb_inner_transport_header(skb); 110 l4_proto = 0; 111 112 /* reset type as we transition from outer to inner headers */ 113 off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6); 114 if (ip.v4->version == 4) 115 off->tx_flags |= IDPF_TX_FLAGS_IPV4; 116 if (ip.v6->version == 6) 117 off->tx_flags |= IDPF_TX_FLAGS_IPV6; 118 } 119 120 /* Enable IP checksum offloads */ 121 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { 122 l4_proto = ip.v4->protocol; 123 /* See comment above regarding need for HW to recompute IP 124 * header checksum in the case of TSO. 125 */ 126 if (is_tso) 127 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM; 128 else 129 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4; 130 131 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { 132 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6; 133 l4_proto = ip.v6->nexthdr; 134 if (ipv6_ext_hdr(l4_proto)) 135 ipv6_skip_exthdr(skb, skb_network_offset(skb) + 136 sizeof(*ip.v6), &l4_proto, 137 &frag_off); 138 } else { 139 return -1; 140 } 141 142 /* compute inner L3 header size */ 143 l3_len = l4.hdr - ip.hdr; 144 offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S; 145 146 /* Enable L4 checksum offloads */ 147 switch (l4_proto) { 148 case IPPROTO_TCP: 149 /* enable checksum offloads */ 150 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP; 151 l4_len = l4.tcp->doff; 152 break; 153 case IPPROTO_UDP: 154 /* enable UDP checksum offload */ 155 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP; 156 l4_len = sizeof(struct udphdr) >> 2; 157 break; 158 case IPPROTO_SCTP: 159 /* enable SCTP checksum offload */ 160 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP; 161 l4_len = sizeof(struct sctphdr) >> 2; 162 break; 163 default: 164 if (is_tso) 165 return -1; 166 167 skb_checksum_help(skb); 168 169 return 0; 170 } 171 172 offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S; 173 off->td_cmd |= cmd; 174 off->hdr_offsets |= offset; 175 176 return 1; 177 } 178 179 /** 180 * idpf_tx_singleq_map - Build the Tx base descriptor 181 * @tx_q: queue to send buffer on 182 * @first: first buffer info buffer to use 183 * @offloads: pointer to struct that holds offload parameters 184 * 185 * This function loops over the skb data pointed to by *first 186 * and gets a physical address for each memory location and programs 187 * it and the length into the transmit base mode descriptor. 188 */ 189 static void idpf_tx_singleq_map(struct idpf_queue *tx_q, 190 struct idpf_tx_buf *first, 191 struct idpf_tx_offload_params *offloads) 192 { 193 u32 offsets = offloads->hdr_offsets; 194 struct idpf_tx_buf *tx_buf = first; 195 struct idpf_base_tx_desc *tx_desc; 196 struct sk_buff *skb = first->skb; 197 u64 td_cmd = offloads->td_cmd; 198 unsigned int data_len, size; 199 u16 i = tx_q->next_to_use; 200 struct netdev_queue *nq; 201 skb_frag_t *frag; 202 dma_addr_t dma; 203 u64 td_tag = 0; 204 205 data_len = skb->data_len; 206 size = skb_headlen(skb); 207 208 tx_desc = IDPF_BASE_TX_DESC(tx_q, i); 209 210 dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); 211 212 /* write each descriptor with CRC bit */ 213 if (tx_q->vport->crc_enable) 214 td_cmd |= IDPF_TX_DESC_CMD_ICRC; 215 216 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 217 unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 218 219 if (dma_mapping_error(tx_q->dev, dma)) 220 return idpf_tx_dma_map_error(tx_q, skb, first, i); 221 222 /* record length, and DMA address */ 223 dma_unmap_len_set(tx_buf, len, size); 224 dma_unmap_addr_set(tx_buf, dma, dma); 225 226 /* align size to end of page */ 227 max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1); 228 tx_desc->buf_addr = cpu_to_le64(dma); 229 230 /* account for data chunks larger than the hardware 231 * can handle 232 */ 233 while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) { 234 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, 235 offsets, 236 max_data, 237 td_tag); 238 tx_desc++; 239 i++; 240 241 if (i == tx_q->desc_count) { 242 tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); 243 i = 0; 244 } 245 246 dma += max_data; 247 size -= max_data; 248 249 max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 250 tx_desc->buf_addr = cpu_to_le64(dma); 251 } 252 253 if (!data_len) 254 break; 255 256 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 257 size, td_tag); 258 tx_desc++; 259 i++; 260 261 if (i == tx_q->desc_count) { 262 tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); 263 i = 0; 264 } 265 266 size = skb_frag_size(frag); 267 data_len -= size; 268 269 dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, 270 DMA_TO_DEVICE); 271 272 tx_buf = &tx_q->tx_buf[i]; 273 } 274 275 skb_tx_timestamp(first->skb); 276 277 /* write last descriptor with RS and EOP bits */ 278 td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS); 279 280 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, 281 size, td_tag); 282 283 IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i); 284 285 /* set next_to_watch value indicating a packet is present */ 286 first->next_to_watch = tx_desc; 287 288 nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); 289 netdev_tx_sent_queue(nq, first->bytecount); 290 291 idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); 292 } 293 294 /** 295 * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring 296 * @txq: queue to put context descriptor on 297 * 298 * Since the TX buffer rings mimics the descriptor ring, update the tx buffer 299 * ring entry to reflect that this index is a context descriptor 300 */ 301 static struct idpf_base_tx_ctx_desc * 302 idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq) 303 { 304 struct idpf_base_tx_ctx_desc *ctx_desc; 305 int ntu = txq->next_to_use; 306 307 memset(&txq->tx_buf[ntu], 0, sizeof(struct idpf_tx_buf)); 308 txq->tx_buf[ntu].ctx_entry = true; 309 310 ctx_desc = IDPF_BASE_TX_CTX_DESC(txq, ntu); 311 312 IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu); 313 txq->next_to_use = ntu; 314 315 return ctx_desc; 316 } 317 318 /** 319 * idpf_tx_singleq_build_ctx_desc - populate context descriptor 320 * @txq: queue to send buffer on 321 * @offload: offload parameter structure 322 **/ 323 static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, 324 struct idpf_tx_offload_params *offload) 325 { 326 struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); 327 u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX; 328 329 if (offload->tso_segs) { 330 qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S; 331 qw1 |= ((u64)offload->tso_len << IDPF_TXD_CTX_QW1_TSO_LEN_S) & 332 IDPF_TXD_CTX_QW1_TSO_LEN_M; 333 qw1 |= ((u64)offload->mss << IDPF_TXD_CTX_QW1_MSS_S) & 334 IDPF_TXD_CTX_QW1_MSS_M; 335 336 u64_stats_update_begin(&txq->stats_sync); 337 u64_stats_inc(&txq->q_stats.tx.lso_pkts); 338 u64_stats_update_end(&txq->stats_sync); 339 } 340 341 desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling); 342 343 desc->qw0.l2tag2 = 0; 344 desc->qw0.rsvd1 = 0; 345 desc->qw1 = cpu_to_le64(qw1); 346 } 347 348 /** 349 * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors 350 * @skb: send buffer 351 * @tx_q: queue to send buffer on 352 * 353 * Returns NETDEV_TX_OK if sent, else an error code 354 */ 355 static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, 356 struct idpf_queue *tx_q) 357 { 358 struct idpf_tx_offload_params offload = { }; 359 struct idpf_tx_buf *first; 360 unsigned int count; 361 __be16 protocol; 362 int csum, tso; 363 364 count = idpf_tx_desc_count_required(tx_q, skb); 365 if (unlikely(!count)) 366 return idpf_tx_drop_skb(tx_q, skb); 367 368 if (idpf_tx_maybe_stop_common(tx_q, 369 count + IDPF_TX_DESCS_PER_CACHE_LINE + 370 IDPF_TX_DESCS_FOR_CTX)) { 371 idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); 372 373 return NETDEV_TX_BUSY; 374 } 375 376 protocol = vlan_get_protocol(skb); 377 if (protocol == htons(ETH_P_IP)) 378 offload.tx_flags |= IDPF_TX_FLAGS_IPV4; 379 else if (protocol == htons(ETH_P_IPV6)) 380 offload.tx_flags |= IDPF_TX_FLAGS_IPV6; 381 382 tso = idpf_tso(skb, &offload); 383 if (tso < 0) 384 goto out_drop; 385 386 csum = idpf_tx_singleq_csum(skb, &offload); 387 if (csum < 0) 388 goto out_drop; 389 390 if (tso || offload.cd_tunneling) 391 idpf_tx_singleq_build_ctx_desc(tx_q, &offload); 392 393 /* record the location of the first descriptor for this packet */ 394 first = &tx_q->tx_buf[tx_q->next_to_use]; 395 first->skb = skb; 396 397 if (tso) { 398 first->gso_segs = offload.tso_segs; 399 first->bytecount = skb->len + ((first->gso_segs - 1) * offload.tso_hdr_len); 400 } else { 401 first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); 402 first->gso_segs = 1; 403 } 404 idpf_tx_singleq_map(tx_q, first, &offload); 405 406 return NETDEV_TX_OK; 407 408 out_drop: 409 return idpf_tx_drop_skb(tx_q, skb); 410 } 411 412 /** 413 * idpf_tx_singleq_start - Selects the right Tx queue to send buffer 414 * @skb: send buffer 415 * @netdev: network interface device structure 416 * 417 * Returns NETDEV_TX_OK if sent, else an error code 418 */ 419 netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, 420 struct net_device *netdev) 421 { 422 struct idpf_vport *vport = idpf_netdev_to_vport(netdev); 423 struct idpf_queue *tx_q; 424 425 tx_q = vport->txqs[skb_get_queue_mapping(skb)]; 426 427 /* hardware can't handle really short frames, hardware padding works 428 * beyond this point 429 */ 430 if (skb_put_padto(skb, IDPF_TX_MIN_PKT_LEN)) { 431 idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); 432 433 return NETDEV_TX_OK; 434 } 435 436 return idpf_tx_singleq_frame(skb, tx_q); 437 } 438 439 /** 440 * idpf_tx_singleq_clean - Reclaim resources from queue 441 * @tx_q: Tx queue to clean 442 * @napi_budget: Used to determine if we are in netpoll 443 * @cleaned: returns number of packets cleaned 444 * 445 */ 446 static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, 447 int *cleaned) 448 { 449 unsigned int budget = tx_q->vport->compln_clean_budget; 450 unsigned int total_bytes = 0, total_pkts = 0; 451 struct idpf_base_tx_desc *tx_desc; 452 s16 ntc = tx_q->next_to_clean; 453 struct idpf_netdev_priv *np; 454 struct idpf_tx_buf *tx_buf; 455 struct idpf_vport *vport; 456 struct netdev_queue *nq; 457 bool dont_wake; 458 459 tx_desc = IDPF_BASE_TX_DESC(tx_q, ntc); 460 tx_buf = &tx_q->tx_buf[ntc]; 461 ntc -= tx_q->desc_count; 462 463 do { 464 struct idpf_base_tx_desc *eop_desc; 465 466 /* If this entry in the ring was used as a context descriptor, 467 * it's corresponding entry in the buffer ring will indicate as 468 * such. We can skip this descriptor since there is no buffer 469 * to clean. 470 */ 471 if (tx_buf->ctx_entry) { 472 /* Clear this flag here to avoid stale flag values when 473 * this buffer is used for actual data in the future. 474 * There are cases where the tx_buf struct / the flags 475 * field will not be cleared before being reused. 476 */ 477 tx_buf->ctx_entry = false; 478 goto fetch_next_txq_desc; 479 } 480 481 /* if next_to_watch is not set then no work pending */ 482 eop_desc = (struct idpf_base_tx_desc *)tx_buf->next_to_watch; 483 if (!eop_desc) 484 break; 485 486 /* prevent any other reads prior to eop_desc */ 487 smp_rmb(); 488 489 /* if the descriptor isn't done, no work yet to do */ 490 if (!(eop_desc->qw1 & 491 cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE))) 492 break; 493 494 /* clear next_to_watch to prevent false hangs */ 495 tx_buf->next_to_watch = NULL; 496 497 /* update the statistics for this packet */ 498 total_bytes += tx_buf->bytecount; 499 total_pkts += tx_buf->gso_segs; 500 501 napi_consume_skb(tx_buf->skb, napi_budget); 502 503 /* unmap skb header data */ 504 dma_unmap_single(tx_q->dev, 505 dma_unmap_addr(tx_buf, dma), 506 dma_unmap_len(tx_buf, len), 507 DMA_TO_DEVICE); 508 509 /* clear tx_buf data */ 510 tx_buf->skb = NULL; 511 dma_unmap_len_set(tx_buf, len, 0); 512 513 /* unmap remaining buffers */ 514 while (tx_desc != eop_desc) { 515 tx_buf++; 516 tx_desc++; 517 ntc++; 518 if (unlikely(!ntc)) { 519 ntc -= tx_q->desc_count; 520 tx_buf = tx_q->tx_buf; 521 tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); 522 } 523 524 /* unmap any remaining paged data */ 525 if (dma_unmap_len(tx_buf, len)) { 526 dma_unmap_page(tx_q->dev, 527 dma_unmap_addr(tx_buf, dma), 528 dma_unmap_len(tx_buf, len), 529 DMA_TO_DEVICE); 530 dma_unmap_len_set(tx_buf, len, 0); 531 } 532 } 533 534 /* update budget only if we did something */ 535 budget--; 536 537 fetch_next_txq_desc: 538 tx_buf++; 539 tx_desc++; 540 ntc++; 541 if (unlikely(!ntc)) { 542 ntc -= tx_q->desc_count; 543 tx_buf = tx_q->tx_buf; 544 tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); 545 } 546 } while (likely(budget)); 547 548 ntc += tx_q->desc_count; 549 tx_q->next_to_clean = ntc; 550 551 *cleaned += total_pkts; 552 553 u64_stats_update_begin(&tx_q->stats_sync); 554 u64_stats_add(&tx_q->q_stats.tx.packets, total_pkts); 555 u64_stats_add(&tx_q->q_stats.tx.bytes, total_bytes); 556 u64_stats_update_end(&tx_q->stats_sync); 557 558 vport = tx_q->vport; 559 np = netdev_priv(vport->netdev); 560 nq = netdev_get_tx_queue(vport->netdev, tx_q->idx); 561 562 dont_wake = np->state != __IDPF_VPORT_UP || 563 !netif_carrier_ok(vport->netdev); 564 __netif_txq_completed_wake(nq, total_pkts, total_bytes, 565 IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, 566 dont_wake); 567 568 return !!budget; 569 } 570 571 /** 572 * idpf_tx_singleq_clean_all - Clean all Tx queues 573 * @q_vec: queue vector 574 * @budget: Used to determine if we are in netpoll 575 * @cleaned: returns number of packets cleaned 576 * 577 * Returns false if clean is not complete else returns true 578 */ 579 static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 580 int *cleaned) 581 { 582 u16 num_txq = q_vec->num_txq; 583 bool clean_complete = true; 584 int i, budget_per_q; 585 586 budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; 587 for (i = 0; i < num_txq; i++) { 588 struct idpf_queue *q; 589 590 q = q_vec->tx[i]; 591 clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, 592 cleaned); 593 } 594 595 return clean_complete; 596 } 597 598 /** 599 * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor 600 * status and error fields 601 * @rx_desc: pointer to receive descriptor (in le64 format) 602 * @stat_err_bits: value to mask 603 * 604 * This function does some fast chicanery in order to return the 605 * value of the mask which is really only used for boolean tests. 606 * The status_error_ptype_len doesn't need to be shifted because it begins 607 * at offset zero. 608 */ 609 static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, 610 const u64 stat_err_bits) 611 { 612 return !!(rx_desc->base_wb.qword1.status_error_ptype_len & 613 cpu_to_le64(stat_err_bits)); 614 } 615 616 /** 617 * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers 618 * @rxq: Rx ring being processed 619 * @rx_desc: Rx descriptor for current buffer 620 * @skb: Current socket buffer containing buffer in progress 621 * @ntc: next to clean 622 */ 623 static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq, 624 union virtchnl2_rx_desc *rx_desc, 625 struct sk_buff *skb, u16 ntc) 626 { 627 /* if we are the last buffer then there is nothing else to do */ 628 if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) 629 return false; 630 631 return true; 632 } 633 634 /** 635 * idpf_rx_singleq_csum - Indicate in skb if checksum is good 636 * @rxq: Rx ring being processed 637 * @skb: skb currently being received and modified 638 * @csum_bits: checksum bits from descriptor 639 * @ptype: the packet type decoded by hardware 640 * 641 * skb->protocol must be set before this function is called 642 */ 643 static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, 644 struct idpf_rx_csum_decoded *csum_bits, 645 u16 ptype) 646 { 647 struct idpf_rx_ptype_decoded decoded; 648 bool ipv4, ipv6; 649 650 /* check if Rx checksum is enabled */ 651 if (unlikely(!(rxq->vport->netdev->features & NETIF_F_RXCSUM))) 652 return; 653 654 /* check if HW has decoded the packet and checksum */ 655 if (unlikely(!(csum_bits->l3l4p))) 656 return; 657 658 decoded = rxq->vport->rx_ptype_lkup[ptype]; 659 if (unlikely(!(decoded.known && decoded.outer_ip))) 660 return; 661 662 ipv4 = IDPF_RX_PTYPE_TO_IPV(&decoded, IDPF_RX_PTYPE_OUTER_IPV4); 663 ipv6 = IDPF_RX_PTYPE_TO_IPV(&decoded, IDPF_RX_PTYPE_OUTER_IPV6); 664 665 /* Check if there were any checksum errors */ 666 if (unlikely(ipv4 && (csum_bits->ipe || csum_bits->eipe))) 667 goto checksum_fail; 668 669 /* Device could not do any checksum offload for certain extension 670 * headers as indicated by setting IPV6EXADD bit 671 */ 672 if (unlikely(ipv6 && csum_bits->ipv6exadd)) 673 return; 674 675 /* check for L4 errors and handle packets that were not able to be 676 * checksummed due to arrival speed 677 */ 678 if (unlikely(csum_bits->l4e)) 679 goto checksum_fail; 680 681 if (unlikely(csum_bits->nat && csum_bits->eudpe)) 682 goto checksum_fail; 683 684 /* Handle packets that were not able to be checksummed due to arrival 685 * speed, in this case the stack can compute the csum. 686 */ 687 if (unlikely(csum_bits->pprs)) 688 return; 689 690 /* If there is an outer header present that might contain a checksum 691 * we need to bump the checksum level by 1 to reflect the fact that 692 * we are indicating we validated the inner checksum. 693 */ 694 if (decoded.tunnel_type >= IDPF_RX_PTYPE_TUNNEL_IP_GRENAT) 695 skb->csum_level = 1; 696 697 /* Only report checksum unnecessary for ICMP, TCP, UDP, or SCTP */ 698 switch (decoded.inner_prot) { 699 case IDPF_RX_PTYPE_INNER_PROT_ICMP: 700 case IDPF_RX_PTYPE_INNER_PROT_TCP: 701 case IDPF_RX_PTYPE_INNER_PROT_UDP: 702 case IDPF_RX_PTYPE_INNER_PROT_SCTP: 703 skb->ip_summed = CHECKSUM_UNNECESSARY; 704 return; 705 default: 706 return; 707 } 708 709 checksum_fail: 710 u64_stats_update_begin(&rxq->stats_sync); 711 u64_stats_inc(&rxq->q_stats.rx.hw_csum_err); 712 u64_stats_update_end(&rxq->stats_sync); 713 } 714 715 /** 716 * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum 717 * @rx_q: Rx completion queue 718 * @skb: skb currently being received and modified 719 * @rx_desc: the receive descriptor 720 * @ptype: Rx packet type 721 * 722 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 723 * descriptor writeback format. 724 **/ 725 static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q, 726 struct sk_buff *skb, 727 union virtchnl2_rx_desc *rx_desc, 728 u16 ptype) 729 { 730 struct idpf_rx_csum_decoded csum_bits; 731 u32 rx_error, rx_status; 732 u64 qword; 733 734 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 735 736 rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword); 737 rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword); 738 739 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error); 740 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M, 741 rx_error); 742 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error); 743 csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M, 744 rx_error); 745 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M, 746 rx_status); 747 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M, 748 rx_status); 749 csum_bits.nat = 0; 750 csum_bits.eudpe = 0; 751 752 idpf_rx_singleq_csum(rx_q, skb, &csum_bits, ptype); 753 } 754 755 /** 756 * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum 757 * @rx_q: Rx completion queue 758 * @skb: skb currently being received and modified 759 * @rx_desc: the receive descriptor 760 * @ptype: Rx packet type 761 * 762 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 763 * descriptor writeback format. 764 **/ 765 static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q, 766 struct sk_buff *skb, 767 union virtchnl2_rx_desc *rx_desc, 768 u16 ptype) 769 { 770 struct idpf_rx_csum_decoded csum_bits; 771 u16 rx_status0, rx_status1; 772 773 rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0); 774 rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1); 775 776 csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M, 777 rx_status0); 778 csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M, 779 rx_status0); 780 csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M, 781 rx_status0); 782 csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M, 783 rx_status0); 784 csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M, 785 rx_status0); 786 csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M, 787 rx_status0); 788 csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M, 789 rx_status1); 790 csum_bits.pprs = 0; 791 792 idpf_rx_singleq_csum(rx_q, skb, &csum_bits, ptype); 793 } 794 795 /** 796 * idpf_rx_singleq_base_hash - set the hash value in the skb 797 * @rx_q: Rx completion queue 798 * @skb: skb currently being received and modified 799 * @rx_desc: specific descriptor 800 * @decoded: Decoded Rx packet type related fields 801 * 802 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 803 * descriptor writeback format. 804 **/ 805 static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q, 806 struct sk_buff *skb, 807 union virtchnl2_rx_desc *rx_desc, 808 struct idpf_rx_ptype_decoded *decoded) 809 { 810 u64 mask, qw1; 811 812 if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) 813 return; 814 815 mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; 816 qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 817 818 if (FIELD_GET(mask, qw1) == mask) { 819 u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss); 820 821 skb_set_hash(skb, hash, idpf_ptype_to_htype(decoded)); 822 } 823 } 824 825 /** 826 * idpf_rx_singleq_flex_hash - set the hash value in the skb 827 * @rx_q: Rx completion queue 828 * @skb: skb currently being received and modified 829 * @rx_desc: specific descriptor 830 * @decoded: Decoded Rx packet type related fields 831 * 832 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 833 * descriptor writeback format. 834 **/ 835 static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q, 836 struct sk_buff *skb, 837 union virtchnl2_rx_desc *rx_desc, 838 struct idpf_rx_ptype_decoded *decoded) 839 { 840 if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) 841 return; 842 843 if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, 844 le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) 845 skb_set_hash(skb, le32_to_cpu(rx_desc->flex_nic_wb.rss_hash), 846 idpf_ptype_to_htype(decoded)); 847 } 848 849 /** 850 * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx 851 * descriptor 852 * @rx_q: Rx ring being processed 853 * @skb: pointer to current skb being populated 854 * @rx_desc: descriptor for skb 855 * @ptype: packet type 856 * 857 * This function checks the ring, descriptor, and packet information in 858 * order to populate the hash, checksum, VLAN, protocol, and 859 * other fields within the skb. 860 */ 861 static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, 862 struct sk_buff *skb, 863 union virtchnl2_rx_desc *rx_desc, 864 u16 ptype) 865 { 866 struct idpf_rx_ptype_decoded decoded = 867 rx_q->vport->rx_ptype_lkup[ptype]; 868 869 /* modifies the skb - consumes the enet header */ 870 skb->protocol = eth_type_trans(skb, rx_q->vport->netdev); 871 872 /* Check if we're using base mode descriptor IDs */ 873 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { 874 idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, &decoded); 875 idpf_rx_singleq_base_csum(rx_q, skb, rx_desc, ptype); 876 } else { 877 idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, &decoded); 878 idpf_rx_singleq_flex_csum(rx_q, skb, rx_desc, ptype); 879 } 880 } 881 882 /** 883 * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers 884 * @rx_q: queue for which the hw buffers are allocated 885 * @cleaned_count: number of buffers to replace 886 * 887 * Returns false if all allocations were successful, true if any fail 888 */ 889 bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, 890 u16 cleaned_count) 891 { 892 struct virtchnl2_singleq_rx_buf_desc *desc; 893 u16 nta = rx_q->next_to_alloc; 894 struct idpf_rx_buf *buf; 895 896 if (!cleaned_count) 897 return false; 898 899 desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, nta); 900 buf = &rx_q->rx_buf.buf[nta]; 901 902 do { 903 dma_addr_t addr; 904 905 addr = idpf_alloc_page(rx_q->pp, buf, rx_q->rx_buf_size); 906 if (unlikely(addr == DMA_MAPPING_ERROR)) 907 break; 908 909 /* Refresh the desc even if buffer_addrs didn't change 910 * because each write-back erases this info. 911 */ 912 desc->pkt_addr = cpu_to_le64(addr); 913 desc->hdr_addr = 0; 914 desc++; 915 916 buf++; 917 nta++; 918 if (unlikely(nta == rx_q->desc_count)) { 919 desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, 0); 920 buf = rx_q->rx_buf.buf; 921 nta = 0; 922 } 923 924 cleaned_count--; 925 } while (cleaned_count); 926 927 if (rx_q->next_to_alloc != nta) { 928 idpf_rx_buf_hw_update(rx_q, nta); 929 rx_q->next_to_alloc = nta; 930 } 931 932 return !!cleaned_count; 933 } 934 935 /** 936 * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor 937 * @rx_q: Rx descriptor queue 938 * @rx_desc: the descriptor to process 939 * @fields: storage for extracted values 940 * 941 * Decode the Rx descriptor and extract relevant information including the 942 * size and Rx packet type. 943 * 944 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte 945 * descriptor writeback format. 946 */ 947 static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, 948 union virtchnl2_rx_desc *rx_desc, 949 struct idpf_rx_extracted *fields) 950 { 951 u64 qword; 952 953 qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); 954 955 fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); 956 fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); 957 } 958 959 /** 960 * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor 961 * @rx_q: Rx descriptor queue 962 * @rx_desc: the descriptor to process 963 * @fields: storage for extracted values 964 * 965 * Decode the Rx descriptor and extract relevant information including the 966 * size and Rx packet type. 967 * 968 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible 969 * descriptor writeback format. 970 */ 971 static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q, 972 union virtchnl2_rx_desc *rx_desc, 973 struct idpf_rx_extracted *fields) 974 { 975 fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, 976 le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); 977 fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, 978 le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); 979 } 980 981 /** 982 * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor 983 * @rx_q: Rx descriptor queue 984 * @rx_desc: the descriptor to process 985 * @fields: storage for extracted values 986 * 987 */ 988 static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q, 989 union virtchnl2_rx_desc *rx_desc, 990 struct idpf_rx_extracted *fields) 991 { 992 if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) 993 idpf_rx_singleq_extract_base_fields(rx_q, rx_desc, fields); 994 else 995 idpf_rx_singleq_extract_flex_fields(rx_q, rx_desc, fields); 996 } 997 998 /** 999 * idpf_rx_singleq_clean - Reclaim resources after receive completes 1000 * @rx_q: rx queue to clean 1001 * @budget: Total limit on number of packets to process 1002 * 1003 * Returns true if there's any budget left (e.g. the clean is finished) 1004 */ 1005 static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) 1006 { 1007 unsigned int total_rx_bytes = 0, total_rx_pkts = 0; 1008 struct sk_buff *skb = rx_q->skb; 1009 u16 ntc = rx_q->next_to_clean; 1010 u16 cleaned_count = 0; 1011 bool failure = false; 1012 1013 /* Process Rx packets bounded by budget */ 1014 while (likely(total_rx_pkts < (unsigned int)budget)) { 1015 struct idpf_rx_extracted fields = { }; 1016 union virtchnl2_rx_desc *rx_desc; 1017 struct idpf_rx_buf *rx_buf; 1018 1019 /* get the Rx desc from Rx queue based on 'next_to_clean' */ 1020 rx_desc = IDPF_RX_DESC(rx_q, ntc); 1021 1022 /* status_error_ptype_len will always be zero for unused 1023 * descriptors because it's cleared in cleanup, and overlaps 1024 * with hdr_addr which is always zero because packet split 1025 * isn't used, if the hardware wrote DD then the length will be 1026 * non-zero 1027 */ 1028 #define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M 1029 if (!idpf_rx_singleq_test_staterr(rx_desc, 1030 IDPF_RXD_DD)) 1031 break; 1032 1033 /* This memory barrier is needed to keep us from reading 1034 * any other fields out of the rx_desc 1035 */ 1036 dma_rmb(); 1037 1038 idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); 1039 1040 rx_buf = &rx_q->rx_buf.buf[ntc]; 1041 if (!fields.size) { 1042 idpf_rx_put_page(rx_buf); 1043 goto skip_data; 1044 } 1045 1046 idpf_rx_sync_for_cpu(rx_buf, fields.size); 1047 skb = rx_q->skb; 1048 if (skb) 1049 idpf_rx_add_frag(rx_buf, skb, fields.size); 1050 else 1051 skb = idpf_rx_construct_skb(rx_q, rx_buf, fields.size); 1052 1053 /* exit if we failed to retrieve a buffer */ 1054 if (!skb) 1055 break; 1056 1057 skip_data: 1058 IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc); 1059 1060 cleaned_count++; 1061 1062 /* skip if it is non EOP desc */ 1063 if (idpf_rx_singleq_is_non_eop(rx_q, rx_desc, skb, ntc)) 1064 continue; 1065 1066 #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ 1067 VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M) 1068 if (unlikely(idpf_rx_singleq_test_staterr(rx_desc, 1069 IDPF_RXD_ERR_S))) { 1070 dev_kfree_skb_any(skb); 1071 skb = NULL; 1072 continue; 1073 } 1074 1075 /* pad skb if needed (to make valid ethernet frame) */ 1076 if (eth_skb_pad(skb)) { 1077 skb = NULL; 1078 continue; 1079 } 1080 1081 /* probably a little skewed due to removing CRC */ 1082 total_rx_bytes += skb->len; 1083 1084 /* protocol */ 1085 idpf_rx_singleq_process_skb_fields(rx_q, skb, 1086 rx_desc, fields.rx_ptype); 1087 1088 /* send completed skb up the stack */ 1089 napi_gro_receive(&rx_q->q_vector->napi, skb); 1090 skb = NULL; 1091 1092 /* update budget accounting */ 1093 total_rx_pkts++; 1094 } 1095 1096 rx_q->skb = skb; 1097 1098 rx_q->next_to_clean = ntc; 1099 1100 if (cleaned_count) 1101 failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); 1102 1103 u64_stats_update_begin(&rx_q->stats_sync); 1104 u64_stats_add(&rx_q->q_stats.rx.packets, total_rx_pkts); 1105 u64_stats_add(&rx_q->q_stats.rx.bytes, total_rx_bytes); 1106 u64_stats_update_end(&rx_q->stats_sync); 1107 1108 /* guarantee a trip back through this routine if there was a failure */ 1109 return failure ? budget : (int)total_rx_pkts; 1110 } 1111 1112 /** 1113 * idpf_rx_singleq_clean_all - Clean all Rx queues 1114 * @q_vec: queue vector 1115 * @budget: Used to determine if we are in netpoll 1116 * @cleaned: returns number of packets cleaned 1117 * 1118 * Returns false if clean is not complete else returns true 1119 */ 1120 static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, 1121 int *cleaned) 1122 { 1123 u16 num_rxq = q_vec->num_rxq; 1124 bool clean_complete = true; 1125 int budget_per_q, i; 1126 1127 /* We attempt to distribute budget to each Rx queue fairly, but don't 1128 * allow the budget to go below 1 because that would exit polling early. 1129 */ 1130 budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; 1131 for (i = 0; i < num_rxq; i++) { 1132 struct idpf_queue *rxq = q_vec->rx[i]; 1133 int pkts_cleaned_per_q; 1134 1135 pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); 1136 1137 /* if we clean as many as budgeted, we must not be done */ 1138 if (pkts_cleaned_per_q >= budget_per_q) 1139 clean_complete = false; 1140 *cleaned += pkts_cleaned_per_q; 1141 } 1142 1143 return clean_complete; 1144 } 1145 1146 /** 1147 * idpf_vport_singleq_napi_poll - NAPI handler 1148 * @napi: struct from which you get q_vector 1149 * @budget: budget provided by stack 1150 */ 1151 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget) 1152 { 1153 struct idpf_q_vector *q_vector = 1154 container_of(napi, struct idpf_q_vector, napi); 1155 bool clean_complete; 1156 int work_done = 0; 1157 1158 /* Handle case where we are called by netpoll with a budget of 0 */ 1159 if (budget <= 0) { 1160 idpf_tx_singleq_clean_all(q_vector, budget, &work_done); 1161 1162 return budget; 1163 } 1164 1165 clean_complete = idpf_rx_singleq_clean_all(q_vector, budget, 1166 &work_done); 1167 clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget, 1168 &work_done); 1169 1170 /* If work not completed, return budget and polling will return */ 1171 if (!clean_complete) 1172 return budget; 1173 1174 work_done = min_t(int, work_done, budget - 1); 1175 1176 /* Exit the polling mode, but don't re-enable interrupts if stack might 1177 * poll us due to busy-polling 1178 */ 1179 if (likely(napi_complete_done(napi, work_done))) 1180 idpf_vport_intr_update_itr_ena_irq(q_vector); 1181 1182 return work_done; 1183 } 1184