10440e27eSJoe Damato // SPDX-License-Identifier: GPL-2.0-or-later 20440e27eSJoe Damato /* Broadcom NetXtreme-C/E network driver. 30440e27eSJoe Damato * 40440e27eSJoe Damato * This program is free software; you can redistribute it and/or modify 50440e27eSJoe Damato * it under the terms of the GNU General Public License as published by 60440e27eSJoe Damato * the Free Software Foundation. 70440e27eSJoe Damato */ 80440e27eSJoe Damato 90440e27eSJoe Damato #include <linux/pci.h> 100440e27eSJoe Damato #include <linux/netdevice.h> 110440e27eSJoe Damato #include <linux/skbuff.h> 120440e27eSJoe Damato #include <net/netdev_queues.h> 130440e27eSJoe Damato #include <net/ip.h> 140440e27eSJoe Damato #include <net/ipv6.h> 150440e27eSJoe Damato #include <net/udp.h> 160440e27eSJoe Damato #include <net/tso.h> 170440e27eSJoe Damato #include <linux/bnxt/hsi.h> 180440e27eSJoe Damato 190440e27eSJoe Damato #include "bnxt.h" 200440e27eSJoe Damato #include "bnxt_gso.h" 210440e27eSJoe Damato 22*cc5d9066SJoe Damato static u32 bnxt_sw_gso_lhint(unsigned int len) 23*cc5d9066SJoe Damato { 24*cc5d9066SJoe Damato if (len <= 512) 25*cc5d9066SJoe Damato return TX_BD_FLAGS_LHINT_512_AND_SMALLER; 26*cc5d9066SJoe Damato else if (len <= 1023) 27*cc5d9066SJoe Damato return TX_BD_FLAGS_LHINT_512_TO_1023; 28*cc5d9066SJoe Damato else if (len <= 2047) 29*cc5d9066SJoe Damato return TX_BD_FLAGS_LHINT_1024_TO_2047; 30*cc5d9066SJoe Damato else 31*cc5d9066SJoe Damato return TX_BD_FLAGS_LHINT_2048_AND_LARGER; 32*cc5d9066SJoe Damato } 33*cc5d9066SJoe Damato 340440e27eSJoe Damato netdev_tx_t bnxt_sw_udp_gso_xmit(struct bnxt *bp, 350440e27eSJoe Damato struct bnxt_tx_ring_info *txr, 360440e27eSJoe Damato struct netdev_queue *txq, 370440e27eSJoe Damato struct sk_buff *skb) 380440e27eSJoe Damato { 39*cc5d9066SJoe Damato unsigned int last_unmap_len __maybe_unused = 0; 40*cc5d9066SJoe Damato dma_addr_t last_unmap_addr __maybe_unused = 0; 41*cc5d9066SJoe Damato struct bnxt_sw_tx_bd *last_unmap_buf = NULL; 42*cc5d9066SJoe Damato unsigned int hdr_len, mss, num_segs; 43*cc5d9066SJoe Damato struct pci_dev *pdev = bp->pdev; 44*cc5d9066SJoe Damato unsigned int total_payload; 45*cc5d9066SJoe Damato struct tso_dma_map map; 46*cc5d9066SJoe Damato u32 vlan_tag_flags = 0; 47*cc5d9066SJoe Damato int i, bds_needed; 48*cc5d9066SJoe Damato struct tso_t tso; 49*cc5d9066SJoe Damato u16 cfa_action; 50*cc5d9066SJoe Damato __le32 csum; 51*cc5d9066SJoe Damato u16 prod; 52*cc5d9066SJoe Damato 53*cc5d9066SJoe Damato hdr_len = tso_start(skb, &tso); 54*cc5d9066SJoe Damato mss = skb_shinfo(skb)->gso_size; 55*cc5d9066SJoe Damato total_payload = skb->len - hdr_len; 56*cc5d9066SJoe Damato num_segs = DIV_ROUND_UP(total_payload, mss); 57*cc5d9066SJoe Damato 58*cc5d9066SJoe Damato if (unlikely(num_segs <= 1)) 59*cc5d9066SJoe Damato goto drop; 60*cc5d9066SJoe Damato 61*cc5d9066SJoe Damato /* Upper bound on the number of descriptors needed. 62*cc5d9066SJoe Damato * 63*cc5d9066SJoe Damato * Each segment uses 1 long BD + 1 ext BD + payload BDs, which is 64*cc5d9066SJoe Damato * at most num_segs + nr_frags (each frag boundary crossing adds at 65*cc5d9066SJoe Damato * most 1 extra BD). 66*cc5d9066SJoe Damato */ 67*cc5d9066SJoe Damato bds_needed = 3 * num_segs + skb_shinfo(skb)->nr_frags + 1; 68*cc5d9066SJoe Damato 69*cc5d9066SJoe Damato if (unlikely(bnxt_tx_avail(bp, txr) < bds_needed)) { 70*cc5d9066SJoe Damato netif_txq_try_stop(txq, bnxt_tx_avail(bp, txr), 71*cc5d9066SJoe Damato bp->tx_wake_thresh); 72*cc5d9066SJoe Damato return NETDEV_TX_BUSY; 73*cc5d9066SJoe Damato } 74*cc5d9066SJoe Damato 75*cc5d9066SJoe Damato /* BD backpressure alone cannot prevent overwriting in-flight 76*cc5d9066SJoe Damato * headers in the inline buffer. Check slot availability directly. 77*cc5d9066SJoe Damato */ 78*cc5d9066SJoe Damato if (!netif_txq_maybe_stop(txq, bnxt_inline_avail(txr), 79*cc5d9066SJoe Damato num_segs, num_segs)) 80*cc5d9066SJoe Damato return NETDEV_TX_BUSY; 81*cc5d9066SJoe Damato 82*cc5d9066SJoe Damato if (unlikely(tso_dma_map_init(&map, &pdev->dev, skb, hdr_len))) 83*cc5d9066SJoe Damato goto drop; 84*cc5d9066SJoe Damato 85*cc5d9066SJoe Damato cfa_action = bnxt_xmit_get_cfa_action(skb); 86*cc5d9066SJoe Damato if (skb_vlan_tag_present(skb)) { 87*cc5d9066SJoe Damato vlan_tag_flags = TX_BD_CFA_META_KEY_VLAN | 88*cc5d9066SJoe Damato skb_vlan_tag_get(skb); 89*cc5d9066SJoe Damato if (skb->vlan_proto == htons(ETH_P_8021Q)) 90*cc5d9066SJoe Damato vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT; 91*cc5d9066SJoe Damato } 92*cc5d9066SJoe Damato 93*cc5d9066SJoe Damato csum = cpu_to_le32(TX_BD_FLAGS_TCP_UDP_CHKSUM); 94*cc5d9066SJoe Damato if (!tso.ipv6) 95*cc5d9066SJoe Damato csum |= cpu_to_le32(TX_BD_FLAGS_IP_CKSUM); 96*cc5d9066SJoe Damato 97*cc5d9066SJoe Damato prod = txr->tx_prod; 98*cc5d9066SJoe Damato 99*cc5d9066SJoe Damato for (i = 0; i < num_segs; i++) { 100*cc5d9066SJoe Damato unsigned int seg_payload = min_t(unsigned int, mss, 101*cc5d9066SJoe Damato total_payload - i * mss); 102*cc5d9066SJoe Damato u16 slot = (txr->tx_inline_prod + i) & 103*cc5d9066SJoe Damato (BNXT_SW_USO_MAX_SEGS - 1); 104*cc5d9066SJoe Damato struct bnxt_sw_tx_bd *tx_buf; 105*cc5d9066SJoe Damato unsigned int mapping_len; 106*cc5d9066SJoe Damato dma_addr_t this_hdr_dma; 107*cc5d9066SJoe Damato unsigned int chunk_len; 108*cc5d9066SJoe Damato unsigned int offset; 109*cc5d9066SJoe Damato dma_addr_t dma_addr; 110*cc5d9066SJoe Damato struct tx_bd *txbd; 111*cc5d9066SJoe Damato struct udphdr *uh; 112*cc5d9066SJoe Damato void *this_hdr; 113*cc5d9066SJoe Damato int bd_count; 114*cc5d9066SJoe Damato bool last; 115*cc5d9066SJoe Damato u32 flags; 116*cc5d9066SJoe Damato 117*cc5d9066SJoe Damato last = (i == num_segs - 1); 118*cc5d9066SJoe Damato offset = slot * TSO_HEADER_SIZE; 119*cc5d9066SJoe Damato this_hdr = txr->tx_inline_buf + offset; 120*cc5d9066SJoe Damato this_hdr_dma = txr->tx_inline_dma + offset; 121*cc5d9066SJoe Damato 122*cc5d9066SJoe Damato tso_build_hdr(skb, this_hdr, &tso, seg_payload, last); 123*cc5d9066SJoe Damato 124*cc5d9066SJoe Damato /* Zero stale csum fields copied from the original skb; 125*cc5d9066SJoe Damato * HW offload recomputes from scratch. 126*cc5d9066SJoe Damato */ 127*cc5d9066SJoe Damato uh = this_hdr + skb_transport_offset(skb); 128*cc5d9066SJoe Damato uh->check = 0; 129*cc5d9066SJoe Damato if (!tso.ipv6) { 130*cc5d9066SJoe Damato struct iphdr *iph = this_hdr + skb_network_offset(skb); 131*cc5d9066SJoe Damato 132*cc5d9066SJoe Damato iph->check = 0; 133*cc5d9066SJoe Damato } 134*cc5d9066SJoe Damato 135*cc5d9066SJoe Damato dma_sync_single_for_device(&pdev->dev, this_hdr_dma, 136*cc5d9066SJoe Damato hdr_len, DMA_TO_DEVICE); 137*cc5d9066SJoe Damato 138*cc5d9066SJoe Damato bd_count = tso_dma_map_count(&map, seg_payload); 139*cc5d9066SJoe Damato 140*cc5d9066SJoe Damato tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)]; 141*cc5d9066SJoe Damato txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; 142*cc5d9066SJoe Damato 143*cc5d9066SJoe Damato tx_buf->skb = skb; 144*cc5d9066SJoe Damato tx_buf->nr_frags = bd_count; 145*cc5d9066SJoe Damato tx_buf->is_push = 0; 146*cc5d9066SJoe Damato tx_buf->is_ts_pkt = 0; 147*cc5d9066SJoe Damato 148*cc5d9066SJoe Damato dma_unmap_addr_set(tx_buf, mapping, this_hdr_dma); 149*cc5d9066SJoe Damato dma_unmap_len_set(tx_buf, len, 0); 150*cc5d9066SJoe Damato 151*cc5d9066SJoe Damato if (last) { 152*cc5d9066SJoe Damato tx_buf->is_sw_gso = BNXT_SW_GSO_LAST; 153*cc5d9066SJoe Damato tso_dma_map_completion_save(&map, &tx_buf->sw_gso_cstate); 154*cc5d9066SJoe Damato } else { 155*cc5d9066SJoe Damato tx_buf->is_sw_gso = BNXT_SW_GSO_MID; 156*cc5d9066SJoe Damato } 157*cc5d9066SJoe Damato 158*cc5d9066SJoe Damato flags = (hdr_len << TX_BD_LEN_SHIFT) | 159*cc5d9066SJoe Damato TX_BD_TYPE_LONG_TX_BD | 160*cc5d9066SJoe Damato TX_BD_CNT(2 + bd_count); 161*cc5d9066SJoe Damato 162*cc5d9066SJoe Damato flags |= bnxt_sw_gso_lhint(hdr_len + seg_payload); 163*cc5d9066SJoe Damato 164*cc5d9066SJoe Damato txbd->tx_bd_len_flags_type = cpu_to_le32(flags); 165*cc5d9066SJoe Damato txbd->tx_bd_haddr = cpu_to_le64(this_hdr_dma); 166*cc5d9066SJoe Damato txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod, 167*cc5d9066SJoe Damato 2 + bd_count); 168*cc5d9066SJoe Damato 169*cc5d9066SJoe Damato prod = NEXT_TX(prod); 170*cc5d9066SJoe Damato bnxt_init_ext_bd(bp, txr, prod, csum, 171*cc5d9066SJoe Damato vlan_tag_flags, cfa_action); 172*cc5d9066SJoe Damato 173*cc5d9066SJoe Damato /* set dma_unmap_len on the LAST BD touching each 174*cc5d9066SJoe Damato * region. Since completions are in-order, the last segment 175*cc5d9066SJoe Damato * completes after all earlier ones, so the unmap is safe. 176*cc5d9066SJoe Damato */ 177*cc5d9066SJoe Damato while (tso_dma_map_next(&map, &dma_addr, &chunk_len, 178*cc5d9066SJoe Damato &mapping_len, seg_payload)) { 179*cc5d9066SJoe Damato prod = NEXT_TX(prod); 180*cc5d9066SJoe Damato txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; 181*cc5d9066SJoe Damato tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)]; 182*cc5d9066SJoe Damato 183*cc5d9066SJoe Damato txbd->tx_bd_haddr = cpu_to_le64(dma_addr); 184*cc5d9066SJoe Damato dma_unmap_addr_set(tx_buf, mapping, dma_addr); 185*cc5d9066SJoe Damato dma_unmap_len_set(tx_buf, len, 0); 186*cc5d9066SJoe Damato tx_buf->skb = NULL; 187*cc5d9066SJoe Damato tx_buf->is_sw_gso = 0; 188*cc5d9066SJoe Damato 189*cc5d9066SJoe Damato if (mapping_len) { 190*cc5d9066SJoe Damato if (last_unmap_buf) { 191*cc5d9066SJoe Damato dma_unmap_addr_set(last_unmap_buf, 192*cc5d9066SJoe Damato mapping, 193*cc5d9066SJoe Damato last_unmap_addr); 194*cc5d9066SJoe Damato dma_unmap_len_set(last_unmap_buf, 195*cc5d9066SJoe Damato len, 196*cc5d9066SJoe Damato last_unmap_len); 197*cc5d9066SJoe Damato } 198*cc5d9066SJoe Damato last_unmap_addr = dma_addr; 199*cc5d9066SJoe Damato last_unmap_len = mapping_len; 200*cc5d9066SJoe Damato } 201*cc5d9066SJoe Damato last_unmap_buf = tx_buf; 202*cc5d9066SJoe Damato 203*cc5d9066SJoe Damato flags = chunk_len << TX_BD_LEN_SHIFT; 204*cc5d9066SJoe Damato txbd->tx_bd_len_flags_type = cpu_to_le32(flags); 205*cc5d9066SJoe Damato txbd->tx_bd_opaque = 0; 206*cc5d9066SJoe Damato 207*cc5d9066SJoe Damato seg_payload -= chunk_len; 208*cc5d9066SJoe Damato } 209*cc5d9066SJoe Damato 210*cc5d9066SJoe Damato txbd->tx_bd_len_flags_type |= 211*cc5d9066SJoe Damato cpu_to_le32(TX_BD_FLAGS_PACKET_END); 212*cc5d9066SJoe Damato 213*cc5d9066SJoe Damato prod = NEXT_TX(prod); 214*cc5d9066SJoe Damato } 215*cc5d9066SJoe Damato 216*cc5d9066SJoe Damato if (last_unmap_buf) { 217*cc5d9066SJoe Damato dma_unmap_addr_set(last_unmap_buf, mapping, last_unmap_addr); 218*cc5d9066SJoe Damato dma_unmap_len_set(last_unmap_buf, len, last_unmap_len); 219*cc5d9066SJoe Damato } 220*cc5d9066SJoe Damato 221*cc5d9066SJoe Damato txr->tx_inline_prod += num_segs; 222*cc5d9066SJoe Damato 223*cc5d9066SJoe Damato netdev_tx_sent_queue(txq, skb->len); 224*cc5d9066SJoe Damato 225*cc5d9066SJoe Damato WRITE_ONCE(txr->tx_prod, prod); 226*cc5d9066SJoe Damato /* Sync BDs before doorbell */ 227*cc5d9066SJoe Damato wmb(); 228*cc5d9066SJoe Damato bnxt_db_write(bp, &txr->tx_db, prod); 229*cc5d9066SJoe Damato 230*cc5d9066SJoe Damato if (unlikely(bnxt_tx_avail(bp, txr) <= bp->tx_wake_thresh)) 231*cc5d9066SJoe Damato netif_txq_try_stop(txq, bnxt_tx_avail(bp, txr), 232*cc5d9066SJoe Damato bp->tx_wake_thresh); 233*cc5d9066SJoe Damato 234*cc5d9066SJoe Damato return NETDEV_TX_OK; 235*cc5d9066SJoe Damato 236*cc5d9066SJoe Damato drop: 2370440e27eSJoe Damato dev_kfree_skb_any(skb); 2380440e27eSJoe Damato dev_core_stats_tx_dropped_inc(bp->dev); 2390440e27eSJoe Damato return NETDEV_TX_OK; 2400440e27eSJoe Damato } 241