xref: /linux/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c (revision 8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf)
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
3 
4 #include <linux/if_vlan.h>
5 #include <linux/iopoll.h>
6 #include <net/ip6_checksum.h>
7 #include <net/ipv6.h>
8 #include <net/netdev_queues.h>
9 
10 #include "hinic3_hwdev.h"
11 #include "hinic3_nic_cfg.h"
12 #include "hinic3_nic_dev.h"
13 #include "hinic3_nic_io.h"
14 #include "hinic3_tx.h"
15 #include "hinic3_wq.h"
16 
17 #define MIN_SKB_LEN                32
18 
hinic3_alloc_txqs(struct net_device * netdev)19 int hinic3_alloc_txqs(struct net_device *netdev)
20 {
21 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
22 	struct hinic3_hwdev *hwdev = nic_dev->hwdev;
23 	u16 q_id, num_txqs = nic_dev->max_qps;
24 	struct pci_dev *pdev = nic_dev->pdev;
25 	struct hinic3_txq *txq;
26 
27 	if (!num_txqs) {
28 		dev_err(hwdev->dev, "Cannot allocate zero size txqs\n");
29 		return -EINVAL;
30 	}
31 
32 	nic_dev->txqs = kcalloc(num_txqs, sizeof(*nic_dev->txqs),  GFP_KERNEL);
33 	if (!nic_dev->txqs)
34 		return -ENOMEM;
35 
36 	for (q_id = 0; q_id < num_txqs; q_id++) {
37 		txq = &nic_dev->txqs[q_id];
38 		txq->netdev = netdev;
39 		txq->q_id = q_id;
40 		txq->q_depth = nic_dev->q_params.sq_depth;
41 		txq->q_mask = nic_dev->q_params.sq_depth - 1;
42 		txq->dev = &pdev->dev;
43 	}
44 
45 	return 0;
46 }
47 
hinic3_free_txqs(struct net_device * netdev)48 void hinic3_free_txqs(struct net_device *netdev)
49 {
50 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
51 
52 	kfree(nic_dev->txqs);
53 }
54 
hinic3_set_buf_desc(struct hinic3_sq_bufdesc * buf_descs,dma_addr_t addr,u32 len)55 static void hinic3_set_buf_desc(struct hinic3_sq_bufdesc *buf_descs,
56 				dma_addr_t addr, u32 len)
57 {
58 	buf_descs->hi_addr = upper_32_bits(addr);
59 	buf_descs->lo_addr = lower_32_bits(addr);
60 	buf_descs->len  = len;
61 }
62 
hinic3_tx_map_skb(struct net_device * netdev,struct sk_buff * skb,struct hinic3_txq * txq,struct hinic3_tx_info * tx_info,struct hinic3_sq_wqe_combo * wqe_combo)63 static int hinic3_tx_map_skb(struct net_device *netdev, struct sk_buff *skb,
64 			     struct hinic3_txq *txq,
65 			     struct hinic3_tx_info *tx_info,
66 			     struct hinic3_sq_wqe_combo *wqe_combo)
67 {
68 	struct hinic3_sq_wqe_desc *wqe_desc = wqe_combo->ctrl_bd0;
69 	struct hinic3_sq_bufdesc *buf_desc = wqe_combo->bds_head;
70 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
71 	struct hinic3_dma_info *dma_info = tx_info->dma_info;
72 	struct pci_dev *pdev = nic_dev->pdev;
73 	skb_frag_t *frag;
74 	u32 i, idx;
75 	int err;
76 
77 	dma_info[0].dma = dma_map_single(&pdev->dev, skb->data,
78 					 skb_headlen(skb), DMA_TO_DEVICE);
79 	if (dma_mapping_error(&pdev->dev, dma_info[0].dma))
80 		return -EFAULT;
81 
82 	dma_info[0].len = skb_headlen(skb);
83 
84 	wqe_desc->hi_addr = upper_32_bits(dma_info[0].dma);
85 	wqe_desc->lo_addr = lower_32_bits(dma_info[0].dma);
86 
87 	wqe_desc->ctrl_len = dma_info[0].len;
88 
89 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
90 		frag = &(skb_shinfo(skb)->frags[i]);
91 		if (unlikely(i == wqe_combo->first_bds_num))
92 			buf_desc = wqe_combo->bds_sec2;
93 
94 		idx = i + 1;
95 		dma_info[idx].dma = skb_frag_dma_map(&pdev->dev, frag, 0,
96 						     skb_frag_size(frag),
97 						     DMA_TO_DEVICE);
98 		if (dma_mapping_error(&pdev->dev, dma_info[idx].dma)) {
99 			err = -EFAULT;
100 			goto err_unmap_page;
101 		}
102 		dma_info[idx].len = skb_frag_size(frag);
103 
104 		hinic3_set_buf_desc(buf_desc, dma_info[idx].dma,
105 				    dma_info[idx].len);
106 		buf_desc++;
107 	}
108 
109 	return 0;
110 
111 err_unmap_page:
112 	while (idx > 1) {
113 		idx--;
114 		dma_unmap_page(&pdev->dev, dma_info[idx].dma,
115 			       dma_info[idx].len, DMA_TO_DEVICE);
116 	}
117 	dma_unmap_single(&pdev->dev, dma_info[0].dma, dma_info[0].len,
118 			 DMA_TO_DEVICE);
119 	return err;
120 }
121 
hinic3_tx_unmap_skb(struct net_device * netdev,struct sk_buff * skb,struct hinic3_dma_info * dma_info)122 static void hinic3_tx_unmap_skb(struct net_device *netdev,
123 				struct sk_buff *skb,
124 				struct hinic3_dma_info *dma_info)
125 {
126 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
127 	struct pci_dev *pdev = nic_dev->pdev;
128 	int i;
129 
130 	for (i = 0; i < skb_shinfo(skb)->nr_frags;) {
131 		i++;
132 		dma_unmap_page(&pdev->dev,
133 			       dma_info[i].dma,
134 			       dma_info[i].len, DMA_TO_DEVICE);
135 	}
136 
137 	dma_unmap_single(&pdev->dev, dma_info[0].dma,
138 			 dma_info[0].len, DMA_TO_DEVICE);
139 }
140 
141 union hinic3_ip {
142 	struct iphdr   *v4;
143 	struct ipv6hdr *v6;
144 	unsigned char  *hdr;
145 };
146 
147 union hinic3_l4 {
148 	struct tcphdr *tcp;
149 	struct udphdr *udp;
150 	unsigned char *hdr;
151 };
152 
153 enum hinic3_l3_type {
154 	HINIC3_L3_UNKNOWN         = 0,
155 	HINIC3_L3_IP6_PKT         = 1,
156 	HINIC3_L3_IP4_PKT_NO_CSUM = 2,
157 	HINIC3_L3_IP4_PKT_CSUM    = 3,
158 };
159 
160 enum hinic3_l4_offload_type {
161 	HINIC3_L4_OFFLOAD_DISABLE = 0,
162 	HINIC3_L4_OFFLOAD_TCP     = 1,
163 	HINIC3_L4_OFFLOAD_STCP    = 2,
164 	HINIC3_L4_OFFLOAD_UDP     = 3,
165 };
166 
167 /* initialize l4 offset and offload */
get_inner_l4_info(struct sk_buff * skb,union hinic3_l4 * l4,u8 l4_proto,u32 * offset,enum hinic3_l4_offload_type * l4_offload)168 static void get_inner_l4_info(struct sk_buff *skb, union hinic3_l4 *l4,
169 			      u8 l4_proto, u32 *offset,
170 			      enum hinic3_l4_offload_type *l4_offload)
171 {
172 	switch (l4_proto) {
173 	case IPPROTO_TCP:
174 		*l4_offload = HINIC3_L4_OFFLOAD_TCP;
175 		/* To be same with TSO, payload offset begins from payload */
176 		*offset = (l4->tcp->doff << TCP_HDR_DATA_OFF_UNIT_SHIFT) +
177 			   TRANSPORT_OFFSET(l4->hdr, skb);
178 		break;
179 
180 	case IPPROTO_UDP:
181 		*l4_offload = HINIC3_L4_OFFLOAD_UDP;
182 		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
183 		break;
184 	default:
185 		*l4_offload = HINIC3_L4_OFFLOAD_DISABLE;
186 		*offset = 0;
187 	}
188 }
189 
hinic3_tx_csum(struct hinic3_txq * txq,struct hinic3_sq_task * task,struct sk_buff * skb)190 static int hinic3_tx_csum(struct hinic3_txq *txq, struct hinic3_sq_task *task,
191 			  struct sk_buff *skb)
192 {
193 	if (skb->ip_summed != CHECKSUM_PARTIAL)
194 		return 0;
195 
196 	if (skb->encapsulation) {
197 		union hinic3_ip ip;
198 		u8 l4_proto;
199 
200 		task->pkt_info0 |= SQ_TASK_INFO0_SET(1, TUNNEL_FLAG);
201 
202 		ip.hdr = skb_network_header(skb);
203 		if (ip.v4->version == 4) {
204 			l4_proto = ip.v4->protocol;
205 		} else if (ip.v4->version == 6) {
206 			union hinic3_l4 l4;
207 			unsigned char *exthdr;
208 			__be16 frag_off;
209 
210 			exthdr = ip.hdr + sizeof(*ip.v6);
211 			l4_proto = ip.v6->nexthdr;
212 			l4.hdr = skb_transport_header(skb);
213 			if (l4.hdr != exthdr)
214 				ipv6_skip_exthdr(skb, exthdr - skb->data,
215 						 &l4_proto, &frag_off);
216 		} else {
217 			l4_proto = IPPROTO_RAW;
218 		}
219 
220 		if (l4_proto != IPPROTO_UDP ||
221 		    ((struct udphdr *)skb_transport_header(skb))->dest !=
222 		    VXLAN_OFFLOAD_PORT_LE) {
223 			/* Unsupported tunnel packet, disable csum offload */
224 			skb_checksum_help(skb);
225 			return 0;
226 		}
227 	}
228 
229 	task->pkt_info0 |= SQ_TASK_INFO0_SET(1, INNER_L4_EN);
230 
231 	return 1;
232 }
233 
get_inner_l3_l4_type(struct sk_buff * skb,union hinic3_ip * ip,union hinic3_l4 * l4,enum hinic3_l3_type * l3_type,u8 * l4_proto)234 static void get_inner_l3_l4_type(struct sk_buff *skb, union hinic3_ip *ip,
235 				 union hinic3_l4 *l4,
236 				 enum hinic3_l3_type *l3_type, u8 *l4_proto)
237 {
238 	unsigned char *exthdr;
239 	__be16 frag_off;
240 
241 	if (ip->v4->version == 4) {
242 		*l3_type = HINIC3_L3_IP4_PKT_CSUM;
243 		*l4_proto = ip->v4->protocol;
244 	} else if (ip->v4->version == 6) {
245 		*l3_type = HINIC3_L3_IP6_PKT;
246 		exthdr = ip->hdr + sizeof(*ip->v6);
247 		*l4_proto = ip->v6->nexthdr;
248 		if (exthdr != l4->hdr) {
249 			ipv6_skip_exthdr(skb, exthdr - skb->data,
250 					 l4_proto, &frag_off);
251 		}
252 	} else {
253 		*l3_type = HINIC3_L3_UNKNOWN;
254 		*l4_proto = 0;
255 	}
256 }
257 
hinic3_set_tso_info(struct hinic3_sq_task * task,u32 * queue_info,enum hinic3_l4_offload_type l4_offload,u32 offset,u32 mss)258 static void hinic3_set_tso_info(struct hinic3_sq_task *task, u32 *queue_info,
259 				enum hinic3_l4_offload_type l4_offload,
260 				u32 offset, u32 mss)
261 {
262 	if (l4_offload == HINIC3_L4_OFFLOAD_TCP) {
263 		*queue_info |= SQ_CTRL_QUEUE_INFO_SET(1, TSO);
264 		task->pkt_info0 |= SQ_TASK_INFO0_SET(1, INNER_L4_EN);
265 	} else if (l4_offload == HINIC3_L4_OFFLOAD_UDP) {
266 		*queue_info |= SQ_CTRL_QUEUE_INFO_SET(1, UFO);
267 		task->pkt_info0 |= SQ_TASK_INFO0_SET(1, INNER_L4_EN);
268 	}
269 
270 	/* enable L3 calculation */
271 	task->pkt_info0 |= SQ_TASK_INFO0_SET(1, INNER_L3_EN);
272 
273 	*queue_info |= SQ_CTRL_QUEUE_INFO_SET(offset >> 1, PLDOFF);
274 
275 	/* set MSS value */
276 	*queue_info &= ~SQ_CTRL_QUEUE_INFO_MSS_MASK;
277 	*queue_info |= SQ_CTRL_QUEUE_INFO_SET(mss, MSS);
278 }
279 
csum_magic(union hinic3_ip * ip,unsigned short proto)280 static __sum16 csum_magic(union hinic3_ip *ip, unsigned short proto)
281 {
282 	return (ip->v4->version == 4) ?
283 		csum_tcpudp_magic(ip->v4->saddr, ip->v4->daddr, 0, proto, 0) :
284 		csum_ipv6_magic(&ip->v6->saddr, &ip->v6->daddr, 0, proto, 0);
285 }
286 
hinic3_tso(struct hinic3_sq_task * task,u32 * queue_info,struct sk_buff * skb)287 static int hinic3_tso(struct hinic3_sq_task *task, u32 *queue_info,
288 		      struct sk_buff *skb)
289 {
290 	enum hinic3_l4_offload_type l4_offload;
291 	enum hinic3_l3_type l3_type;
292 	union hinic3_ip ip;
293 	union hinic3_l4 l4;
294 	u8 l4_proto;
295 	u32 offset;
296 	int err;
297 
298 	if (!skb_is_gso(skb))
299 		return 0;
300 
301 	err = skb_cow_head(skb, 0);
302 	if (err < 0)
303 		return err;
304 
305 	if (skb->encapsulation) {
306 		u32 gso_type = skb_shinfo(skb)->gso_type;
307 		/* L3 checksum is always enabled */
308 		task->pkt_info0 |= SQ_TASK_INFO0_SET(1, OUT_L3_EN);
309 		task->pkt_info0 |= SQ_TASK_INFO0_SET(1, TUNNEL_FLAG);
310 
311 		l4.hdr = skb_transport_header(skb);
312 		ip.hdr = skb_network_header(skb);
313 
314 		if (gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
315 			l4.udp->check = ~csum_magic(&ip, IPPROTO_UDP);
316 			task->pkt_info0 |= SQ_TASK_INFO0_SET(1, OUT_L4_EN);
317 		}
318 
319 		ip.hdr = skb_inner_network_header(skb);
320 		l4.hdr = skb_inner_transport_header(skb);
321 	} else {
322 		ip.hdr = skb_network_header(skb);
323 		l4.hdr = skb_transport_header(skb);
324 	}
325 
326 	get_inner_l3_l4_type(skb, &ip, &l4, &l3_type, &l4_proto);
327 
328 	if (l4_proto == IPPROTO_TCP)
329 		l4.tcp->check = ~csum_magic(&ip, IPPROTO_TCP);
330 
331 	get_inner_l4_info(skb, &l4, l4_proto, &offset, &l4_offload);
332 
333 	hinic3_set_tso_info(task, queue_info, l4_offload, offset,
334 			    skb_shinfo(skb)->gso_size);
335 
336 	return 1;
337 }
338 
hinic3_set_vlan_tx_offload(struct hinic3_sq_task * task,u16 vlan_tag,u8 vlan_tpid)339 static void hinic3_set_vlan_tx_offload(struct hinic3_sq_task *task,
340 				       u16 vlan_tag, u8 vlan_tpid)
341 {
342 	/* vlan_tpid: 0=select TPID0 in IPSU, 1=select TPID1 in IPSU
343 	 * 2=select TPID2 in IPSU, 3=select TPID3 in IPSU,
344 	 * 4=select TPID4 in IPSU
345 	 */
346 	task->vlan_offload = SQ_TASK_INFO3_SET(vlan_tag, VLAN_TAG) |
347 			     SQ_TASK_INFO3_SET(vlan_tpid, VLAN_TPID) |
348 			     SQ_TASK_INFO3_SET(1, VLAN_TAG_VALID);
349 }
350 
hinic3_tx_offload(struct sk_buff * skb,struct hinic3_sq_task * task,u32 * queue_info,struct hinic3_txq * txq)351 static u32 hinic3_tx_offload(struct sk_buff *skb, struct hinic3_sq_task *task,
352 			     u32 *queue_info, struct hinic3_txq *txq)
353 {
354 	u32 offload = 0;
355 	int tso_cs_en;
356 
357 	task->pkt_info0 = 0;
358 	task->ip_identify = 0;
359 	task->rsvd = 0;
360 	task->vlan_offload = 0;
361 
362 	tso_cs_en = hinic3_tso(task, queue_info, skb);
363 	if (tso_cs_en < 0) {
364 		offload = HINIC3_TX_OFFLOAD_INVALID;
365 		return offload;
366 	} else if (tso_cs_en) {
367 		offload |= HINIC3_TX_OFFLOAD_TSO;
368 	} else {
369 		tso_cs_en = hinic3_tx_csum(txq, task, skb);
370 		if (tso_cs_en)
371 			offload |= HINIC3_TX_OFFLOAD_CSUM;
372 	}
373 
374 #define VLAN_INSERT_MODE_MAX 5
375 	if (unlikely(skb_vlan_tag_present(skb))) {
376 		/* select vlan insert mode by qid, default 802.1Q Tag type */
377 		hinic3_set_vlan_tx_offload(task, skb_vlan_tag_get(skb),
378 					   txq->q_id % VLAN_INSERT_MODE_MAX);
379 		offload |= HINIC3_TX_OFFLOAD_VLAN;
380 	}
381 
382 	if (unlikely(SQ_CTRL_QUEUE_INFO_GET(*queue_info, PLDOFF) >
383 		     SQ_CTRL_MAX_PLDOFF)) {
384 		offload = HINIC3_TX_OFFLOAD_INVALID;
385 		return offload;
386 	}
387 
388 	return offload;
389 }
390 
hinic3_get_and_update_sq_owner(struct hinic3_io_queue * sq,u16 curr_pi,u16 wqebb_cnt)391 static u16 hinic3_get_and_update_sq_owner(struct hinic3_io_queue *sq,
392 					  u16 curr_pi, u16 wqebb_cnt)
393 {
394 	u16 owner = sq->owner;
395 
396 	if (unlikely(curr_pi + wqebb_cnt >= sq->wq.q_depth))
397 		sq->owner = !sq->owner;
398 
399 	return owner;
400 }
401 
hinic3_set_wqe_combo(struct hinic3_txq * txq,struct hinic3_sq_wqe_combo * wqe_combo,u32 offload,u16 num_sge,u16 * curr_pi)402 static u16 hinic3_set_wqe_combo(struct hinic3_txq *txq,
403 				struct hinic3_sq_wqe_combo *wqe_combo,
404 				u32 offload, u16 num_sge, u16 *curr_pi)
405 {
406 	struct hinic3_sq_bufdesc *first_part_wqebbs, *second_part_wqebbs;
407 	u16 first_part_wqebbs_num, tmp_pi;
408 
409 	wqe_combo->ctrl_bd0 = hinic3_wq_get_one_wqebb(&txq->sq->wq, curr_pi);
410 	if (!offload && num_sge == 1) {
411 		wqe_combo->wqe_type = SQ_WQE_COMPACT_TYPE;
412 		return hinic3_get_and_update_sq_owner(txq->sq, *curr_pi, 1);
413 	}
414 
415 	wqe_combo->wqe_type = SQ_WQE_EXTENDED_TYPE;
416 
417 	if (offload) {
418 		wqe_combo->task = hinic3_wq_get_one_wqebb(&txq->sq->wq,
419 							  &tmp_pi);
420 		wqe_combo->task_type = SQ_WQE_TASKSECT_16BYTES;
421 	} else {
422 		wqe_combo->task_type = SQ_WQE_TASKSECT_46BITS;
423 	}
424 
425 	if (num_sge > 1) {
426 		/* first wqebb contain bd0, and bd size is equal to sq wqebb
427 		 * size, so we use (num_sge - 1) as wanted weqbb_cnt
428 		 */
429 		hinic3_wq_get_multi_wqebbs(&txq->sq->wq, num_sge - 1, &tmp_pi,
430 					   &first_part_wqebbs,
431 					   &second_part_wqebbs,
432 					   &first_part_wqebbs_num);
433 		wqe_combo->bds_head = first_part_wqebbs;
434 		wqe_combo->bds_sec2 = second_part_wqebbs;
435 		wqe_combo->first_bds_num = first_part_wqebbs_num;
436 	}
437 
438 	return hinic3_get_and_update_sq_owner(txq->sq, *curr_pi,
439 					      num_sge + !!offload);
440 }
441 
hinic3_prepare_sq_ctrl(struct hinic3_sq_wqe_combo * wqe_combo,u32 queue_info,int nr_descs,u16 owner)442 static void hinic3_prepare_sq_ctrl(struct hinic3_sq_wqe_combo *wqe_combo,
443 				   u32 queue_info, int nr_descs, u16 owner)
444 {
445 	struct hinic3_sq_wqe_desc *wqe_desc = wqe_combo->ctrl_bd0;
446 
447 	if (wqe_combo->wqe_type == SQ_WQE_COMPACT_TYPE) {
448 		wqe_desc->ctrl_len |=
449 		    SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) |
450 		    SQ_CTRL_SET(wqe_combo->wqe_type, EXTENDED) |
451 		    SQ_CTRL_SET(owner, OWNER);
452 
453 		/* compact wqe queue_info will transfer to chip */
454 		wqe_desc->queue_info = 0;
455 		return;
456 	}
457 
458 	wqe_desc->ctrl_len |= SQ_CTRL_SET(nr_descs, BUFDESC_NUM) |
459 			      SQ_CTRL_SET(wqe_combo->task_type, TASKSECT_LEN) |
460 			      SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) |
461 			      SQ_CTRL_SET(wqe_combo->wqe_type, EXTENDED) |
462 			      SQ_CTRL_SET(owner, OWNER);
463 
464 	wqe_desc->queue_info = queue_info;
465 	wqe_desc->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1, UC);
466 
467 	if (!SQ_CTRL_QUEUE_INFO_GET(wqe_desc->queue_info, MSS)) {
468 		wqe_desc->queue_info |=
469 		    SQ_CTRL_QUEUE_INFO_SET(HINIC3_TX_MSS_DEFAULT, MSS);
470 	} else if (SQ_CTRL_QUEUE_INFO_GET(wqe_desc->queue_info, MSS) <
471 		   HINIC3_TX_MSS_MIN) {
472 		/* mss should not be less than 80 */
473 		wqe_desc->queue_info &= ~SQ_CTRL_QUEUE_INFO_MSS_MASK;
474 		wqe_desc->queue_info |=
475 		    SQ_CTRL_QUEUE_INFO_SET(HINIC3_TX_MSS_MIN, MSS);
476 	}
477 }
478 
hinic3_send_one_skb(struct sk_buff * skb,struct net_device * netdev,struct hinic3_txq * txq)479 static netdev_tx_t hinic3_send_one_skb(struct sk_buff *skb,
480 				       struct net_device *netdev,
481 				       struct hinic3_txq *txq)
482 {
483 	struct hinic3_sq_wqe_combo wqe_combo = {};
484 	struct hinic3_tx_info *tx_info;
485 	u32 offload, queue_info = 0;
486 	struct hinic3_sq_task task;
487 	u16 wqebb_cnt, num_sge;
488 	u16 saved_wq_prod_idx;
489 	u16 owner, pi = 0;
490 	u8 saved_sq_owner;
491 	int err;
492 
493 	if (unlikely(skb->len < MIN_SKB_LEN)) {
494 		if (skb_pad(skb, MIN_SKB_LEN - skb->len))
495 			goto err_out;
496 
497 		skb->len = MIN_SKB_LEN;
498 	}
499 
500 	num_sge = skb_shinfo(skb)->nr_frags + 1;
501 	/* assume normal wqe format + 1 wqebb for task info */
502 	wqebb_cnt = num_sge + 1;
503 
504 	if (unlikely(hinic3_wq_free_wqebbs(&txq->sq->wq) < wqebb_cnt)) {
505 		if (likely(wqebb_cnt > txq->tx_stop_thrs))
506 			txq->tx_stop_thrs = min(wqebb_cnt, txq->tx_start_thrs);
507 
508 		netif_subqueue_try_stop(netdev, txq->sq->q_id,
509 					hinic3_wq_free_wqebbs(&txq->sq->wq),
510 					txq->tx_start_thrs);
511 
512 		return NETDEV_TX_BUSY;
513 	}
514 
515 	offload = hinic3_tx_offload(skb, &task, &queue_info, txq);
516 	if (unlikely(offload == HINIC3_TX_OFFLOAD_INVALID)) {
517 		goto err_drop_pkt;
518 	} else if (!offload) {
519 		wqebb_cnt -= 1;
520 		if (unlikely(num_sge == 1 &&
521 			     skb->len > HINIC3_COMPACT_WQEE_SKB_MAX_LEN))
522 			goto err_drop_pkt;
523 	}
524 
525 	saved_wq_prod_idx = txq->sq->wq.prod_idx;
526 	saved_sq_owner = txq->sq->owner;
527 
528 	owner = hinic3_set_wqe_combo(txq, &wqe_combo, offload, num_sge, &pi);
529 	if (offload)
530 		*wqe_combo.task = task;
531 
532 	tx_info = &txq->tx_info[pi];
533 	tx_info->skb = skb;
534 	tx_info->wqebb_cnt = wqebb_cnt;
535 
536 	err = hinic3_tx_map_skb(netdev, skb, txq, tx_info, &wqe_combo);
537 	if (err) {
538 		/* Rollback work queue to reclaim the wqebb we did not use */
539 		txq->sq->wq.prod_idx = saved_wq_prod_idx;
540 		txq->sq->owner = saved_sq_owner;
541 		goto err_drop_pkt;
542 	}
543 
544 	netif_subqueue_sent(netdev, txq->sq->q_id, skb->len);
545 	netif_subqueue_maybe_stop(netdev, txq->sq->q_id,
546 				  hinic3_wq_free_wqebbs(&txq->sq->wq),
547 				  txq->tx_stop_thrs,
548 				  txq->tx_start_thrs);
549 
550 	hinic3_prepare_sq_ctrl(&wqe_combo, queue_info, num_sge, owner);
551 	hinic3_write_db(txq->sq, 0, DB_CFLAG_DP_SQ,
552 			hinic3_get_sq_local_pi(txq->sq));
553 
554 	return NETDEV_TX_OK;
555 
556 err_drop_pkt:
557 	dev_kfree_skb_any(skb);
558 
559 err_out:
560 	return NETDEV_TX_OK;
561 }
562 
hinic3_xmit_frame(struct sk_buff * skb,struct net_device * netdev)563 netdev_tx_t hinic3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
564 {
565 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
566 	u16 q_id = skb_get_queue_mapping(skb);
567 
568 	if (unlikely(!netif_carrier_ok(netdev)))
569 		goto err_drop_pkt;
570 
571 	if (unlikely(q_id >= nic_dev->q_params.num_qps))
572 		goto err_drop_pkt;
573 
574 	return hinic3_send_one_skb(skb, netdev, &nic_dev->txqs[q_id]);
575 
576 err_drop_pkt:
577 	dev_kfree_skb_any(skb);
578 	return NETDEV_TX_OK;
579 }
580 
is_hw_complete_sq_process(struct hinic3_io_queue * sq)581 static bool is_hw_complete_sq_process(struct hinic3_io_queue *sq)
582 {
583 	u16 sw_pi, hw_ci;
584 
585 	sw_pi = hinic3_get_sq_local_pi(sq);
586 	hw_ci = hinic3_get_sq_hw_ci(sq);
587 
588 	return sw_pi == hw_ci;
589 }
590 
591 #define HINIC3_FLUSH_QUEUE_POLL_SLEEP_US   10000
592 #define HINIC3_FLUSH_QUEUE_POLL_TIMEOUT_US 10000000
hinic3_stop_sq(struct hinic3_txq * txq)593 static int hinic3_stop_sq(struct hinic3_txq *txq)
594 {
595 	struct hinic3_nic_dev *nic_dev = netdev_priv(txq->netdev);
596 	int err, rc;
597 
598 	err = read_poll_timeout(hinic3_force_drop_tx_pkt, rc,
599 				is_hw_complete_sq_process(txq->sq) || rc,
600 				HINIC3_FLUSH_QUEUE_POLL_SLEEP_US,
601 				HINIC3_FLUSH_QUEUE_POLL_TIMEOUT_US,
602 				true, nic_dev->hwdev);
603 	if (rc)
604 		return rc;
605 	else
606 		return err;
607 }
608 
609 /* packet transmission should be stopped before calling this function */
hinic3_flush_txqs(struct net_device * netdev)610 void hinic3_flush_txqs(struct net_device *netdev)
611 {
612 	struct hinic3_nic_dev *nic_dev = netdev_priv(netdev);
613 	u16 qid;
614 	int err;
615 
616 	for (qid = 0; qid < nic_dev->q_params.num_qps; qid++) {
617 		err = hinic3_stop_sq(&nic_dev->txqs[qid]);
618 		netdev_tx_reset_subqueue(netdev, qid);
619 		if (err)
620 			netdev_err(netdev, "Failed to stop sq%u\n", qid);
621 	}
622 }
623 
624 #define HINIC3_BDS_PER_SQ_WQEBB \
625 	(HINIC3_SQ_WQEBB_SIZE / sizeof(struct hinic3_sq_bufdesc))
626 
hinic3_tx_poll(struct hinic3_txq * txq,int budget)627 bool hinic3_tx_poll(struct hinic3_txq *txq, int budget)
628 {
629 	struct net_device *netdev = txq->netdev;
630 	u16 hw_ci, sw_ci, q_id = txq->sq->q_id;
631 	struct hinic3_tx_info *tx_info;
632 	unsigned int bytes_compl = 0;
633 	unsigned int pkts = 0;
634 	u16 wqebb_cnt = 0;
635 
636 	hw_ci = hinic3_get_sq_hw_ci(txq->sq);
637 	dma_rmb();
638 	sw_ci = hinic3_get_sq_local_ci(txq->sq);
639 
640 	do {
641 		tx_info = &txq->tx_info[sw_ci];
642 
643 		/* Did all wqebb of this wqe complete? */
644 		if (hw_ci == sw_ci ||
645 		    ((hw_ci - sw_ci) & txq->q_mask) < tx_info->wqebb_cnt)
646 			break;
647 
648 		sw_ci = (sw_ci + tx_info->wqebb_cnt) & txq->q_mask;
649 		net_prefetch(&txq->tx_info[sw_ci]);
650 
651 		wqebb_cnt += tx_info->wqebb_cnt;
652 		bytes_compl += tx_info->skb->len;
653 		pkts++;
654 
655 		hinic3_tx_unmap_skb(netdev, tx_info->skb, tx_info->dma_info);
656 		napi_consume_skb(tx_info->skb, budget);
657 		tx_info->skb = NULL;
658 	} while (likely(pkts < HINIC3_TX_POLL_WEIGHT));
659 
660 	hinic3_wq_put_wqebbs(&txq->sq->wq, wqebb_cnt);
661 
662 	netif_subqueue_completed_wake(netdev, q_id, pkts, bytes_compl,
663 				      hinic3_wq_free_wqebbs(&txq->sq->wq),
664 				      txq->tx_start_thrs);
665 
666 	return pkts == HINIC3_TX_POLL_WEIGHT;
667 }
668