1*17fcb3dcSFan Gong // SPDX-License-Identifier: GPL-2.0 2*17fcb3dcSFan Gong // Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. 3*17fcb3dcSFan Gong 4*17fcb3dcSFan Gong #include <linux/etherdevice.h> 5*17fcb3dcSFan Gong #include <linux/if_vlan.h> 6*17fcb3dcSFan Gong #include <linux/netdevice.h> 7*17fcb3dcSFan Gong #include <net/gro.h> 8*17fcb3dcSFan Gong #include <net/page_pool/helpers.h> 9*17fcb3dcSFan Gong 10*17fcb3dcSFan Gong #include "hinic3_hwdev.h" 11*17fcb3dcSFan Gong #include "hinic3_nic_dev.h" 12*17fcb3dcSFan Gong #include "hinic3_nic_io.h" 13*17fcb3dcSFan Gong #include "hinic3_rx.h" 14*17fcb3dcSFan Gong 15*17fcb3dcSFan Gong #define HINIC3_RX_HDR_SIZE 256 16*17fcb3dcSFan Gong #define HINIC3_RX_BUFFER_WRITE 16 17*17fcb3dcSFan Gong 18*17fcb3dcSFan Gong #define HINIC3_RX_TCP_PKT 0x3 19*17fcb3dcSFan Gong #define HINIC3_RX_UDP_PKT 0x4 20*17fcb3dcSFan Gong #define HINIC3_RX_SCTP_PKT 0x7 21*17fcb3dcSFan Gong 22*17fcb3dcSFan Gong #define HINIC3_RX_IPV4_PKT 0 23*17fcb3dcSFan Gong #define HINIC3_RX_IPV6_PKT 1 24*17fcb3dcSFan Gong #define HINIC3_RX_INVALID_IP_TYPE 2 25*17fcb3dcSFan Gong 26*17fcb3dcSFan Gong #define HINIC3_RX_PKT_FORMAT_NON_TUNNEL 0 27*17fcb3dcSFan Gong #define HINIC3_RX_PKT_FORMAT_VXLAN 1 28*17fcb3dcSFan Gong 29*17fcb3dcSFan Gong #define HINIC3_LRO_PKT_HDR_LEN_IPV4 66 30*17fcb3dcSFan Gong #define HINIC3_LRO_PKT_HDR_LEN_IPV6 86 31*17fcb3dcSFan Gong #define HINIC3_LRO_PKT_HDR_LEN(cqe) \ 32*17fcb3dcSFan Gong (RQ_CQE_OFFOLAD_TYPE_GET((cqe)->offload_type, IP_TYPE) == \ 33*17fcb3dcSFan Gong HINIC3_RX_IPV6_PKT ? HINIC3_LRO_PKT_HDR_LEN_IPV6 : \ 34*17fcb3dcSFan Gong HINIC3_LRO_PKT_HDR_LEN_IPV4) 35*17fcb3dcSFan Gong 36*17fcb3dcSFan Gong int hinic3_alloc_rxqs(struct net_device *netdev) 37*17fcb3dcSFan Gong { 38*17fcb3dcSFan Gong /* Completed by later submission due to LoC limit. */ 39*17fcb3dcSFan Gong return -EFAULT; 40*17fcb3dcSFan Gong } 41*17fcb3dcSFan Gong 42*17fcb3dcSFan Gong void hinic3_free_rxqs(struct net_device *netdev) 43*17fcb3dcSFan Gong { 44*17fcb3dcSFan Gong /* Completed by later submission due to LoC limit. */ 45*17fcb3dcSFan Gong } 46*17fcb3dcSFan Gong 47*17fcb3dcSFan Gong static int rx_alloc_mapped_page(struct page_pool *page_pool, 48*17fcb3dcSFan Gong struct hinic3_rx_info *rx_info, u16 buf_len) 49*17fcb3dcSFan Gong { 50*17fcb3dcSFan Gong struct page *page; 51*17fcb3dcSFan Gong u32 page_offset; 52*17fcb3dcSFan Gong 53*17fcb3dcSFan Gong page = page_pool_dev_alloc_frag(page_pool, &page_offset, buf_len); 54*17fcb3dcSFan Gong if (unlikely(!page)) 55*17fcb3dcSFan Gong return -ENOMEM; 56*17fcb3dcSFan Gong 57*17fcb3dcSFan Gong rx_info->page = page; 58*17fcb3dcSFan Gong rx_info->page_offset = page_offset; 59*17fcb3dcSFan Gong 60*17fcb3dcSFan Gong return 0; 61*17fcb3dcSFan Gong } 62*17fcb3dcSFan Gong 63*17fcb3dcSFan Gong static void rq_wqe_buf_set(struct hinic3_io_queue *rq, uint32_t wqe_idx, 64*17fcb3dcSFan Gong dma_addr_t dma_addr, u16 len) 65*17fcb3dcSFan Gong { 66*17fcb3dcSFan Gong struct hinic3_rq_wqe *rq_wqe; 67*17fcb3dcSFan Gong 68*17fcb3dcSFan Gong rq_wqe = get_q_element(&rq->wq.qpages, wqe_idx, NULL); 69*17fcb3dcSFan Gong rq_wqe->buf_hi_addr = upper_32_bits(dma_addr); 70*17fcb3dcSFan Gong rq_wqe->buf_lo_addr = lower_32_bits(dma_addr); 71*17fcb3dcSFan Gong } 72*17fcb3dcSFan Gong 73*17fcb3dcSFan Gong static u32 hinic3_rx_fill_buffers(struct hinic3_rxq *rxq) 74*17fcb3dcSFan Gong { 75*17fcb3dcSFan Gong u32 i, free_wqebbs = rxq->delta - 1; 76*17fcb3dcSFan Gong struct hinic3_rx_info *rx_info; 77*17fcb3dcSFan Gong dma_addr_t dma_addr; 78*17fcb3dcSFan Gong int err; 79*17fcb3dcSFan Gong 80*17fcb3dcSFan Gong for (i = 0; i < free_wqebbs; i++) { 81*17fcb3dcSFan Gong rx_info = &rxq->rx_info[rxq->next_to_update]; 82*17fcb3dcSFan Gong 83*17fcb3dcSFan Gong err = rx_alloc_mapped_page(rxq->page_pool, rx_info, 84*17fcb3dcSFan Gong rxq->buf_len); 85*17fcb3dcSFan Gong if (unlikely(err)) 86*17fcb3dcSFan Gong break; 87*17fcb3dcSFan Gong 88*17fcb3dcSFan Gong dma_addr = page_pool_get_dma_addr(rx_info->page) + 89*17fcb3dcSFan Gong rx_info->page_offset; 90*17fcb3dcSFan Gong rq_wqe_buf_set(rxq->rq, rxq->next_to_update, dma_addr, 91*17fcb3dcSFan Gong rxq->buf_len); 92*17fcb3dcSFan Gong rxq->next_to_update = (rxq->next_to_update + 1) & rxq->q_mask; 93*17fcb3dcSFan Gong } 94*17fcb3dcSFan Gong 95*17fcb3dcSFan Gong if (likely(i)) { 96*17fcb3dcSFan Gong hinic3_write_db(rxq->rq, rxq->q_id & 3, DB_CFLAG_DP_RQ, 97*17fcb3dcSFan Gong rxq->next_to_update << HINIC3_NORMAL_RQ_WQE); 98*17fcb3dcSFan Gong rxq->delta -= i; 99*17fcb3dcSFan Gong rxq->next_to_alloc = rxq->next_to_update; 100*17fcb3dcSFan Gong } 101*17fcb3dcSFan Gong 102*17fcb3dcSFan Gong return i; 103*17fcb3dcSFan Gong } 104*17fcb3dcSFan Gong 105*17fcb3dcSFan Gong static void hinic3_add_rx_frag(struct hinic3_rxq *rxq, 106*17fcb3dcSFan Gong struct hinic3_rx_info *rx_info, 107*17fcb3dcSFan Gong struct sk_buff *skb, u32 size) 108*17fcb3dcSFan Gong { 109*17fcb3dcSFan Gong struct page *page; 110*17fcb3dcSFan Gong u8 *va; 111*17fcb3dcSFan Gong 112*17fcb3dcSFan Gong page = rx_info->page; 113*17fcb3dcSFan Gong va = (u8 *)page_address(page) + rx_info->page_offset; 114*17fcb3dcSFan Gong net_prefetch(va); 115*17fcb3dcSFan Gong 116*17fcb3dcSFan Gong page_pool_dma_sync_for_cpu(rxq->page_pool, page, rx_info->page_offset, 117*17fcb3dcSFan Gong rxq->buf_len); 118*17fcb3dcSFan Gong 119*17fcb3dcSFan Gong if (size <= HINIC3_RX_HDR_SIZE && !skb_is_nonlinear(skb)) { 120*17fcb3dcSFan Gong memcpy(__skb_put(skb, size), va, 121*17fcb3dcSFan Gong ALIGN(size, sizeof(long))); 122*17fcb3dcSFan Gong page_pool_put_full_page(rxq->page_pool, page, false); 123*17fcb3dcSFan Gong 124*17fcb3dcSFan Gong return; 125*17fcb3dcSFan Gong } 126*17fcb3dcSFan Gong 127*17fcb3dcSFan Gong skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, 128*17fcb3dcSFan Gong rx_info->page_offset, size, rxq->buf_len); 129*17fcb3dcSFan Gong skb_mark_for_recycle(skb); 130*17fcb3dcSFan Gong } 131*17fcb3dcSFan Gong 132*17fcb3dcSFan Gong static void packaging_skb(struct hinic3_rxq *rxq, struct sk_buff *skb, 133*17fcb3dcSFan Gong u32 sge_num, u32 pkt_len) 134*17fcb3dcSFan Gong { 135*17fcb3dcSFan Gong struct hinic3_rx_info *rx_info; 136*17fcb3dcSFan Gong u32 temp_pkt_len = pkt_len; 137*17fcb3dcSFan Gong u32 temp_sge_num = sge_num; 138*17fcb3dcSFan Gong u32 sw_ci; 139*17fcb3dcSFan Gong u32 size; 140*17fcb3dcSFan Gong 141*17fcb3dcSFan Gong sw_ci = rxq->cons_idx & rxq->q_mask; 142*17fcb3dcSFan Gong while (temp_sge_num) { 143*17fcb3dcSFan Gong rx_info = &rxq->rx_info[sw_ci]; 144*17fcb3dcSFan Gong sw_ci = (sw_ci + 1) & rxq->q_mask; 145*17fcb3dcSFan Gong if (unlikely(temp_pkt_len > rxq->buf_len)) { 146*17fcb3dcSFan Gong size = rxq->buf_len; 147*17fcb3dcSFan Gong temp_pkt_len -= rxq->buf_len; 148*17fcb3dcSFan Gong } else { 149*17fcb3dcSFan Gong size = temp_pkt_len; 150*17fcb3dcSFan Gong } 151*17fcb3dcSFan Gong 152*17fcb3dcSFan Gong hinic3_add_rx_frag(rxq, rx_info, skb, size); 153*17fcb3dcSFan Gong 154*17fcb3dcSFan Gong /* clear contents of buffer_info */ 155*17fcb3dcSFan Gong rx_info->page = NULL; 156*17fcb3dcSFan Gong temp_sge_num--; 157*17fcb3dcSFan Gong } 158*17fcb3dcSFan Gong } 159*17fcb3dcSFan Gong 160*17fcb3dcSFan Gong static u32 hinic3_get_sge_num(struct hinic3_rxq *rxq, u32 pkt_len) 161*17fcb3dcSFan Gong { 162*17fcb3dcSFan Gong u32 sge_num; 163*17fcb3dcSFan Gong 164*17fcb3dcSFan Gong sge_num = pkt_len >> rxq->buf_len_shift; 165*17fcb3dcSFan Gong sge_num += (pkt_len & (rxq->buf_len - 1)) ? 1 : 0; 166*17fcb3dcSFan Gong 167*17fcb3dcSFan Gong return sge_num; 168*17fcb3dcSFan Gong } 169*17fcb3dcSFan Gong 170*17fcb3dcSFan Gong static struct sk_buff *hinic3_fetch_rx_buffer(struct hinic3_rxq *rxq, 171*17fcb3dcSFan Gong u32 pkt_len) 172*17fcb3dcSFan Gong { 173*17fcb3dcSFan Gong struct sk_buff *skb; 174*17fcb3dcSFan Gong u32 sge_num; 175*17fcb3dcSFan Gong 176*17fcb3dcSFan Gong skb = napi_alloc_skb(&rxq->irq_cfg->napi, HINIC3_RX_HDR_SIZE); 177*17fcb3dcSFan Gong if (unlikely(!skb)) 178*17fcb3dcSFan Gong return NULL; 179*17fcb3dcSFan Gong 180*17fcb3dcSFan Gong sge_num = hinic3_get_sge_num(rxq, pkt_len); 181*17fcb3dcSFan Gong 182*17fcb3dcSFan Gong net_prefetchw(skb->data); 183*17fcb3dcSFan Gong packaging_skb(rxq, skb, sge_num, pkt_len); 184*17fcb3dcSFan Gong 185*17fcb3dcSFan Gong rxq->cons_idx += sge_num; 186*17fcb3dcSFan Gong rxq->delta += sge_num; 187*17fcb3dcSFan Gong 188*17fcb3dcSFan Gong return skb; 189*17fcb3dcSFan Gong } 190*17fcb3dcSFan Gong 191*17fcb3dcSFan Gong static void hinic3_pull_tail(struct sk_buff *skb) 192*17fcb3dcSFan Gong { 193*17fcb3dcSFan Gong skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; 194*17fcb3dcSFan Gong unsigned int pull_len; 195*17fcb3dcSFan Gong unsigned char *va; 196*17fcb3dcSFan Gong 197*17fcb3dcSFan Gong va = skb_frag_address(frag); 198*17fcb3dcSFan Gong 199*17fcb3dcSFan Gong /* we need the header to contain the greater of either ETH_HLEN or 200*17fcb3dcSFan Gong * 60 bytes if the skb->len is less than 60 for skb_pad. 201*17fcb3dcSFan Gong */ 202*17fcb3dcSFan Gong pull_len = eth_get_headlen(skb->dev, va, HINIC3_RX_HDR_SIZE); 203*17fcb3dcSFan Gong 204*17fcb3dcSFan Gong /* align pull length to size of long to optimize memcpy performance */ 205*17fcb3dcSFan Gong skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); 206*17fcb3dcSFan Gong 207*17fcb3dcSFan Gong /* update all of the pointers */ 208*17fcb3dcSFan Gong skb_frag_size_sub(frag, pull_len); 209*17fcb3dcSFan Gong skb_frag_off_add(frag, pull_len); 210*17fcb3dcSFan Gong 211*17fcb3dcSFan Gong skb->data_len -= pull_len; 212*17fcb3dcSFan Gong skb->tail += pull_len; 213*17fcb3dcSFan Gong } 214*17fcb3dcSFan Gong 215*17fcb3dcSFan Gong static void hinic3_rx_csum(struct hinic3_rxq *rxq, u32 offload_type, 216*17fcb3dcSFan Gong u32 status, struct sk_buff *skb) 217*17fcb3dcSFan Gong { 218*17fcb3dcSFan Gong u32 pkt_fmt = RQ_CQE_OFFOLAD_TYPE_GET(offload_type, TUNNEL_PKT_FORMAT); 219*17fcb3dcSFan Gong u32 pkt_type = RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_TYPE); 220*17fcb3dcSFan Gong u32 ip_type = RQ_CQE_OFFOLAD_TYPE_GET(offload_type, IP_TYPE); 221*17fcb3dcSFan Gong u32 csum_err = RQ_CQE_STATUS_GET(status, CSUM_ERR); 222*17fcb3dcSFan Gong struct net_device *netdev = rxq->netdev; 223*17fcb3dcSFan Gong 224*17fcb3dcSFan Gong if (!(netdev->features & NETIF_F_RXCSUM)) 225*17fcb3dcSFan Gong return; 226*17fcb3dcSFan Gong 227*17fcb3dcSFan Gong if (unlikely(csum_err)) { 228*17fcb3dcSFan Gong /* pkt type is recognized by HW, and csum is wrong */ 229*17fcb3dcSFan Gong skb->ip_summed = CHECKSUM_NONE; 230*17fcb3dcSFan Gong return; 231*17fcb3dcSFan Gong } 232*17fcb3dcSFan Gong 233*17fcb3dcSFan Gong if (ip_type == HINIC3_RX_INVALID_IP_TYPE || 234*17fcb3dcSFan Gong !(pkt_fmt == HINIC3_RX_PKT_FORMAT_NON_TUNNEL || 235*17fcb3dcSFan Gong pkt_fmt == HINIC3_RX_PKT_FORMAT_VXLAN)) { 236*17fcb3dcSFan Gong skb->ip_summed = CHECKSUM_NONE; 237*17fcb3dcSFan Gong return; 238*17fcb3dcSFan Gong } 239*17fcb3dcSFan Gong 240*17fcb3dcSFan Gong switch (pkt_type) { 241*17fcb3dcSFan Gong case HINIC3_RX_TCP_PKT: 242*17fcb3dcSFan Gong case HINIC3_RX_UDP_PKT: 243*17fcb3dcSFan Gong case HINIC3_RX_SCTP_PKT: 244*17fcb3dcSFan Gong skb->ip_summed = CHECKSUM_UNNECESSARY; 245*17fcb3dcSFan Gong break; 246*17fcb3dcSFan Gong default: 247*17fcb3dcSFan Gong skb->ip_summed = CHECKSUM_NONE; 248*17fcb3dcSFan Gong break; 249*17fcb3dcSFan Gong } 250*17fcb3dcSFan Gong } 251*17fcb3dcSFan Gong 252*17fcb3dcSFan Gong static void hinic3_lro_set_gso_params(struct sk_buff *skb, u16 num_lro) 253*17fcb3dcSFan Gong { 254*17fcb3dcSFan Gong struct ethhdr *eth = (struct ethhdr *)(skb->data); 255*17fcb3dcSFan Gong __be16 proto; 256*17fcb3dcSFan Gong 257*17fcb3dcSFan Gong proto = __vlan_get_protocol(skb, eth->h_proto, NULL); 258*17fcb3dcSFan Gong 259*17fcb3dcSFan Gong skb_shinfo(skb)->gso_size = DIV_ROUND_UP(skb->len - skb_headlen(skb), 260*17fcb3dcSFan Gong num_lro); 261*17fcb3dcSFan Gong skb_shinfo(skb)->gso_type = proto == htons(ETH_P_IP) ? 262*17fcb3dcSFan Gong SKB_GSO_TCPV4 : SKB_GSO_TCPV6; 263*17fcb3dcSFan Gong skb_shinfo(skb)->gso_segs = num_lro; 264*17fcb3dcSFan Gong } 265*17fcb3dcSFan Gong 266*17fcb3dcSFan Gong static int recv_one_pkt(struct hinic3_rxq *rxq, struct hinic3_rq_cqe *rx_cqe, 267*17fcb3dcSFan Gong u32 pkt_len, u32 vlan_len, u32 status) 268*17fcb3dcSFan Gong { 269*17fcb3dcSFan Gong struct net_device *netdev = rxq->netdev; 270*17fcb3dcSFan Gong struct sk_buff *skb; 271*17fcb3dcSFan Gong u32 offload_type; 272*17fcb3dcSFan Gong u16 num_lro; 273*17fcb3dcSFan Gong 274*17fcb3dcSFan Gong skb = hinic3_fetch_rx_buffer(rxq, pkt_len); 275*17fcb3dcSFan Gong if (unlikely(!skb)) 276*17fcb3dcSFan Gong return -ENOMEM; 277*17fcb3dcSFan Gong 278*17fcb3dcSFan Gong /* place header in linear portion of buffer */ 279*17fcb3dcSFan Gong if (skb_is_nonlinear(skb)) 280*17fcb3dcSFan Gong hinic3_pull_tail(skb); 281*17fcb3dcSFan Gong 282*17fcb3dcSFan Gong offload_type = rx_cqe->offload_type; 283*17fcb3dcSFan Gong hinic3_rx_csum(rxq, offload_type, status, skb); 284*17fcb3dcSFan Gong 285*17fcb3dcSFan Gong num_lro = RQ_CQE_STATUS_GET(status, NUM_LRO); 286*17fcb3dcSFan Gong if (num_lro) 287*17fcb3dcSFan Gong hinic3_lro_set_gso_params(skb, num_lro); 288*17fcb3dcSFan Gong 289*17fcb3dcSFan Gong skb_record_rx_queue(skb, rxq->q_id); 290*17fcb3dcSFan Gong skb->protocol = eth_type_trans(skb, netdev); 291*17fcb3dcSFan Gong 292*17fcb3dcSFan Gong if (skb_has_frag_list(skb)) { 293*17fcb3dcSFan Gong napi_gro_flush(&rxq->irq_cfg->napi, false); 294*17fcb3dcSFan Gong netif_receive_skb(skb); 295*17fcb3dcSFan Gong } else { 296*17fcb3dcSFan Gong napi_gro_receive(&rxq->irq_cfg->napi, skb); 297*17fcb3dcSFan Gong } 298*17fcb3dcSFan Gong 299*17fcb3dcSFan Gong return 0; 300*17fcb3dcSFan Gong } 301*17fcb3dcSFan Gong 302*17fcb3dcSFan Gong int hinic3_rx_poll(struct hinic3_rxq *rxq, int budget) 303*17fcb3dcSFan Gong { 304*17fcb3dcSFan Gong struct hinic3_nic_dev *nic_dev = netdev_priv(rxq->netdev); 305*17fcb3dcSFan Gong u32 sw_ci, status, pkt_len, vlan_len; 306*17fcb3dcSFan Gong struct hinic3_rq_cqe *rx_cqe; 307*17fcb3dcSFan Gong u32 num_wqe = 0; 308*17fcb3dcSFan Gong int nr_pkts = 0; 309*17fcb3dcSFan Gong u16 num_lro; 310*17fcb3dcSFan Gong 311*17fcb3dcSFan Gong while (likely(nr_pkts < budget)) { 312*17fcb3dcSFan Gong sw_ci = rxq->cons_idx & rxq->q_mask; 313*17fcb3dcSFan Gong rx_cqe = rxq->cqe_arr + sw_ci; 314*17fcb3dcSFan Gong status = rx_cqe->status; 315*17fcb3dcSFan Gong if (!RQ_CQE_STATUS_GET(status, RXDONE)) 316*17fcb3dcSFan Gong break; 317*17fcb3dcSFan Gong 318*17fcb3dcSFan Gong /* make sure we read rx_done before packet length */ 319*17fcb3dcSFan Gong rmb(); 320*17fcb3dcSFan Gong 321*17fcb3dcSFan Gong vlan_len = rx_cqe->vlan_len; 322*17fcb3dcSFan Gong pkt_len = RQ_CQE_SGE_GET(vlan_len, LEN); 323*17fcb3dcSFan Gong if (recv_one_pkt(rxq, rx_cqe, pkt_len, vlan_len, status)) 324*17fcb3dcSFan Gong break; 325*17fcb3dcSFan Gong 326*17fcb3dcSFan Gong nr_pkts++; 327*17fcb3dcSFan Gong num_lro = RQ_CQE_STATUS_GET(status, NUM_LRO); 328*17fcb3dcSFan Gong if (num_lro) 329*17fcb3dcSFan Gong num_wqe += hinic3_get_sge_num(rxq, pkt_len); 330*17fcb3dcSFan Gong 331*17fcb3dcSFan Gong rx_cqe->status = 0; 332*17fcb3dcSFan Gong 333*17fcb3dcSFan Gong if (num_wqe >= nic_dev->lro_replenish_thld) 334*17fcb3dcSFan Gong break; 335*17fcb3dcSFan Gong } 336*17fcb3dcSFan Gong 337*17fcb3dcSFan Gong if (rxq->delta >= HINIC3_RX_BUFFER_WRITE) 338*17fcb3dcSFan Gong hinic3_rx_fill_buffers(rxq); 339*17fcb3dcSFan Gong 340*17fcb3dcSFan Gong return nr_pkts; 341*17fcb3dcSFan Gong } 342