xref: /linux/drivers/net/ethernet/fungible/funeth/funeth_tx.c (revision 06b9cce42634a50f2840777a66553b02320db5ef)
1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
2 
3 #include <linux/dma-mapping.h>
4 #include <linux/ip.h>
5 #include <linux/pci.h>
6 #include <linux/skbuff.h>
7 #include <linux/tcp.h>
8 #include <uapi/linux/udp.h>
9 #include "funeth.h"
10 #include "funeth_txrx.h"
11 #include "funeth_trace.h"
12 #include "fun_queue.h"
13 
14 #define FUN_XDP_CLEAN_THRES 32
15 #define FUN_XDP_CLEAN_BATCH 16
16 
17 /* DMA-map a packet and return the (length, DMA_address) pairs for its
18  * segments. If a mapping error occurs -ENOMEM is returned.
19  */
20 static int map_skb(const struct sk_buff *skb, struct device *dev,
21 		   dma_addr_t *addr, unsigned int *len)
22 {
23 	const struct skb_shared_info *si;
24 	const skb_frag_t *fp, *end;
25 
26 	*len = skb_headlen(skb);
27 	*addr = dma_map_single(dev, skb->data, *len, DMA_TO_DEVICE);
28 	if (dma_mapping_error(dev, *addr))
29 		return -ENOMEM;
30 
31 	si = skb_shinfo(skb);
32 	end = &si->frags[si->nr_frags];
33 
34 	for (fp = si->frags; fp < end; fp++) {
35 		*++len = skb_frag_size(fp);
36 		*++addr = skb_frag_dma_map(dev, fp, 0, *len, DMA_TO_DEVICE);
37 		if (dma_mapping_error(dev, *addr))
38 			goto unwind;
39 	}
40 	return 0;
41 
42 unwind:
43 	while (fp-- > si->frags)
44 		dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE);
45 
46 	dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE);
47 	return -ENOMEM;
48 }
49 
50 /* Return the address just past the end of a Tx queue's descriptor ring.
51  * It exploits the fact that the HW writeback area is just after the end
52  * of the descriptor ring.
53  */
54 static void *txq_end(const struct funeth_txq *q)
55 {
56 	return (void *)q->hw_wb;
57 }
58 
59 /* Return the amount of space within a Tx ring from the given address to the
60  * end.
61  */
62 static unsigned int txq_to_end(const struct funeth_txq *q, void *p)
63 {
64 	return txq_end(q) - p;
65 }
66 
67 /* Return the number of Tx descriptors occupied by a Tx request. */
68 static unsigned int tx_req_ndesc(const struct fun_eth_tx_req *req)
69 {
70 	return DIV_ROUND_UP(req->len8, FUNETH_SQE_SIZE / 8);
71 }
72 
73 static __be16 tcp_hdr_doff_flags(const struct tcphdr *th)
74 {
75 	return *(__be16 *)&tcp_flag_word(th);
76 }
77 
78 #if IS_ENABLED(CONFIG_TLS_DEVICE)
79 #include "funeth_ktls.h"
80 
81 static struct sk_buff *fun_tls_tx(struct sk_buff *skb, struct funeth_txq *q,
82 				  unsigned int *tls_len)
83 {
84 	const struct fun_ktls_tx_ctx *tls_ctx;
85 	u32 datalen, seq;
86 
87 	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
88 	if (!datalen)
89 		return skb;
90 
91 	if (likely(!tls_offload_tx_resync_pending(skb->sk))) {
92 		seq = ntohl(tcp_hdr(skb)->seq);
93 		tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
94 
95 		if (likely(tls_ctx->next_seq == seq)) {
96 			*tls_len = datalen;
97 			return skb;
98 		}
99 		if (seq - tls_ctx->next_seq < U32_MAX / 4) {
100 			tls_offload_tx_resync_request(skb->sk, seq,
101 						      tls_ctx->next_seq);
102 		}
103 	}
104 
105 	FUN_QSTAT_INC(q, tx_tls_fallback);
106 	skb = tls_encrypt_skb(skb);
107 	if (!skb)
108 		FUN_QSTAT_INC(q, tx_tls_drops);
109 
110 	return skb;
111 }
112 #endif
113 
114 /* Write as many descriptors as needed for the supplied skb starting at the
115  * current producer location. The caller has made certain enough descriptors
116  * are available.
117  *
118  * Returns the number of descriptors written, 0 on error.
119  */
120 static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q,
121 				   unsigned int tls_len)
122 {
123 	unsigned int extra_bytes = 0, extra_pkts = 0;
124 	unsigned int idx = q->prod_cnt & q->mask;
125 	const struct skb_shared_info *shinfo;
126 	unsigned int lens[MAX_SKB_FRAGS + 1];
127 	dma_addr_t addrs[MAX_SKB_FRAGS + 1];
128 	struct fun_eth_tx_req *req;
129 	struct fun_dataop_gl *gle;
130 	const struct tcphdr *th;
131 	unsigned int ngle, i;
132 	u16 flags;
133 
134 	if (unlikely(map_skb(skb, q->dma_dev, addrs, lens))) {
135 		FUN_QSTAT_INC(q, tx_map_err);
136 		return 0;
137 	}
138 
139 	req = fun_tx_desc_addr(q, idx);
140 	req->op = FUN_ETH_OP_TX;
141 	req->len8 = 0;
142 	req->flags = 0;
143 	req->suboff8 = offsetof(struct fun_eth_tx_req, dataop);
144 	req->repr_idn = 0;
145 	req->encap_proto = 0;
146 
147 	shinfo = skb_shinfo(skb);
148 	if (likely(shinfo->gso_size)) {
149 		if (skb->encapsulation) {
150 			u16 ol4_ofst;
151 
152 			flags = FUN_ETH_OUTER_EN | FUN_ETH_INNER_LSO |
153 				FUN_ETH_UPDATE_INNER_L4_CKSUM |
154 				FUN_ETH_UPDATE_OUTER_L3_LEN;
155 			if (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
156 						SKB_GSO_UDP_TUNNEL_CSUM)) {
157 				flags |= FUN_ETH_UPDATE_OUTER_L4_LEN |
158 					 FUN_ETH_OUTER_UDP;
159 				if (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)
160 					flags |= FUN_ETH_UPDATE_OUTER_L4_CKSUM;
161 				ol4_ofst = skb_transport_offset(skb);
162 			} else {
163 				ol4_ofst = skb_inner_network_offset(skb);
164 			}
165 
166 			if (ip_hdr(skb)->version == 4)
167 				flags |= FUN_ETH_UPDATE_OUTER_L3_CKSUM;
168 			else
169 				flags |= FUN_ETH_OUTER_IPV6;
170 
171 			if (skb->inner_network_header) {
172 				if (inner_ip_hdr(skb)->version == 4)
173 					flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM |
174 						 FUN_ETH_UPDATE_INNER_L3_LEN;
175 				else
176 					flags |= FUN_ETH_INNER_IPV6 |
177 						 FUN_ETH_UPDATE_INNER_L3_LEN;
178 			}
179 			th = inner_tcp_hdr(skb);
180 			fun_eth_offload_init(&req->offload, flags,
181 					     shinfo->gso_size,
182 					     tcp_hdr_doff_flags(th), 0,
183 					     skb_inner_network_offset(skb),
184 					     skb_inner_transport_offset(skb),
185 					     skb_network_offset(skb), ol4_ofst);
186 			FUN_QSTAT_INC(q, tx_encap_tso);
187 		} else {
188 			/* HW considers one set of headers as inner */
189 			flags = FUN_ETH_INNER_LSO |
190 				FUN_ETH_UPDATE_INNER_L4_CKSUM |
191 				FUN_ETH_UPDATE_INNER_L3_LEN;
192 			if (shinfo->gso_type & SKB_GSO_TCPV6)
193 				flags |= FUN_ETH_INNER_IPV6;
194 			else
195 				flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM;
196 			th = tcp_hdr(skb);
197 			fun_eth_offload_init(&req->offload, flags,
198 					     shinfo->gso_size,
199 					     tcp_hdr_doff_flags(th), 0,
200 					     skb_network_offset(skb),
201 					     skb_transport_offset(skb), 0, 0);
202 			FUN_QSTAT_INC(q, tx_tso);
203 		}
204 
205 		u64_stats_update_begin(&q->syncp);
206 		q->stats.tx_cso += shinfo->gso_segs;
207 		u64_stats_update_end(&q->syncp);
208 
209 		extra_pkts = shinfo->gso_segs - 1;
210 		extra_bytes = (be16_to_cpu(req->offload.inner_l4_off) +
211 			       __tcp_hdrlen(th)) * extra_pkts;
212 	} else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
213 		flags = FUN_ETH_UPDATE_INNER_L4_CKSUM;
214 		if (skb->csum_offset == offsetof(struct udphdr, check))
215 			flags |= FUN_ETH_INNER_UDP;
216 		fun_eth_offload_init(&req->offload, flags, 0, 0, 0, 0,
217 				     skb_checksum_start_offset(skb), 0, 0);
218 		FUN_QSTAT_INC(q, tx_cso);
219 	} else {
220 		fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0);
221 	}
222 
223 	ngle = shinfo->nr_frags + 1;
224 	req->len8 = (sizeof(*req) + ngle * sizeof(*gle)) / 8;
225 	req->dataop = FUN_DATAOP_HDR_INIT(ngle, 0, ngle, 0, skb->len);
226 
227 	for (i = 0, gle = (struct fun_dataop_gl *)req->dataop.imm;
228 	     i < ngle && txq_to_end(q, gle); i++, gle++)
229 		fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]);
230 
231 	if (txq_to_end(q, gle) == 0) {
232 		gle = (struct fun_dataop_gl *)q->desc;
233 		for ( ; i < ngle; i++, gle++)
234 			fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]);
235 	}
236 
237 	if (IS_ENABLED(CONFIG_TLS_DEVICE) && unlikely(tls_len)) {
238 		struct fun_eth_tls *tls = (struct fun_eth_tls *)gle;
239 		struct fun_ktls_tx_ctx *tls_ctx;
240 
241 		req->len8 += FUNETH_TLS_SZ / 8;
242 		req->flags = cpu_to_be16(FUN_ETH_TX_TLS);
243 
244 		tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
245 		tls->tlsid = tls_ctx->tlsid;
246 		tls_ctx->next_seq += tls_len;
247 
248 		u64_stats_update_begin(&q->syncp);
249 		q->stats.tx_tls_bytes += tls_len;
250 		q->stats.tx_tls_pkts += 1 + extra_pkts;
251 		u64_stats_update_end(&q->syncp);
252 	}
253 
254 	u64_stats_update_begin(&q->syncp);
255 	q->stats.tx_bytes += skb->len + extra_bytes;
256 	q->stats.tx_pkts += 1 + extra_pkts;
257 	u64_stats_update_end(&q->syncp);
258 
259 	q->info[idx].skb = skb;
260 
261 	trace_funeth_tx(q, skb->len, idx, req->dataop.ngather);
262 	return tx_req_ndesc(req);
263 }
264 
265 /* Return the number of available descriptors of a Tx queue.
266  * HW assumes head==tail means the ring is empty so we need to keep one
267  * descriptor unused.
268  */
269 static unsigned int fun_txq_avail(const struct funeth_txq *q)
270 {
271 	return q->mask - q->prod_cnt + q->cons_cnt;
272 }
273 
274 /* Stop a queue if it can't handle another worst-case packet. */
275 static void fun_tx_check_stop(struct funeth_txq *q)
276 {
277 	if (likely(fun_txq_avail(q) >= FUNETH_MAX_PKT_DESC))
278 		return;
279 
280 	netif_tx_stop_queue(q->ndq);
281 
282 	/* NAPI reclaim is freeing packets in parallel with us and we may race.
283 	 * We have stopped the queue but check again after synchronizing with
284 	 * reclaim.
285 	 */
286 	smp_mb();
287 	if (likely(fun_txq_avail(q) < FUNETH_MAX_PKT_DESC))
288 		FUN_QSTAT_INC(q, tx_nstops);
289 	else
290 		netif_tx_start_queue(q->ndq);
291 }
292 
293 /* Return true if a queue has enough space to restart. Current condition is
294  * that the queue must be >= 1/4 empty.
295  */
296 static bool fun_txq_may_restart(struct funeth_txq *q)
297 {
298 	return fun_txq_avail(q) >= q->mask / 4;
299 }
300 
301 netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev)
302 {
303 	struct funeth_priv *fp = netdev_priv(netdev);
304 	unsigned int qid = skb_get_queue_mapping(skb);
305 	struct funeth_txq *q = fp->txqs[qid];
306 	unsigned int tls_len = 0;
307 	unsigned int ndesc;
308 
309 	if (IS_ENABLED(CONFIG_TLS_DEVICE) && skb->sk &&
310 	    tls_is_sk_tx_device_offloaded(skb->sk)) {
311 		skb = fun_tls_tx(skb, q, &tls_len);
312 		if (unlikely(!skb))
313 			goto dropped;
314 	}
315 
316 	ndesc = write_pkt_desc(skb, q, tls_len);
317 	if (unlikely(!ndesc)) {
318 		dev_kfree_skb_any(skb);
319 		goto dropped;
320 	}
321 
322 	q->prod_cnt += ndesc;
323 	fun_tx_check_stop(q);
324 
325 	skb_tx_timestamp(skb);
326 
327 	if (__netdev_tx_sent_queue(q->ndq, skb->len, netdev_xmit_more()))
328 		fun_txq_wr_db(q);
329 	else
330 		FUN_QSTAT_INC(q, tx_more);
331 
332 	return NETDEV_TX_OK;
333 
334 dropped:
335 	/* A dropped packet may be the last one in a xmit_more train,
336 	 * ring the doorbell just in case.
337 	 */
338 	if (!netdev_xmit_more())
339 		fun_txq_wr_db(q);
340 	return NETDEV_TX_OK;
341 }
342 
343 /* Return a Tx queue's HW head index written back to host memory. */
344 static u16 txq_hw_head(const struct funeth_txq *q)
345 {
346 	return (u16)be64_to_cpu(*q->hw_wb);
347 }
348 
349 /* Unmap the Tx packet starting at the given descriptor index and
350  * return the number of Tx descriptors it occupied.
351  */
352 static unsigned int unmap_skb(const struct funeth_txq *q, unsigned int idx)
353 {
354 	const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx);
355 	unsigned int ngle = req->dataop.ngather;
356 	struct fun_dataop_gl *gle;
357 
358 	if (ngle) {
359 		gle = (struct fun_dataop_gl *)req->dataop.imm;
360 		dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data),
361 				 be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE);
362 
363 		for (gle++; --ngle && txq_to_end(q, gle); gle++)
364 			dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data),
365 				       be32_to_cpu(gle->sgl_len),
366 				       DMA_TO_DEVICE);
367 
368 		for (gle = (struct fun_dataop_gl *)q->desc; ngle; ngle--, gle++)
369 			dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data),
370 				       be32_to_cpu(gle->sgl_len),
371 				       DMA_TO_DEVICE);
372 	}
373 
374 	return tx_req_ndesc(req);
375 }
376 
377 /* Reclaim completed Tx descriptors and free their packets. Restart a stopped
378  * queue if we freed enough descriptors.
379  *
380  * Return true if we exhausted the budget while there is more work to be done.
381  */
382 static bool fun_txq_reclaim(struct funeth_txq *q, int budget)
383 {
384 	unsigned int npkts = 0, nbytes = 0, ndesc = 0;
385 	unsigned int head, limit, reclaim_idx;
386 
387 	/* budget may be 0, e.g., netpoll */
388 	limit = budget ? budget : UINT_MAX;
389 
390 	for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask;
391 	     head != reclaim_idx && npkts < limit; head = txq_hw_head(q)) {
392 		/* The HW head is continually updated, ensure we don't read
393 		 * descriptor state before the head tells us to reclaim it.
394 		 * On the enqueue side the doorbell is an implicit write
395 		 * barrier.
396 		 */
397 		rmb();
398 
399 		do {
400 			unsigned int pkt_desc = unmap_skb(q, reclaim_idx);
401 			struct sk_buff *skb = q->info[reclaim_idx].skb;
402 
403 			trace_funeth_tx_free(q, reclaim_idx, pkt_desc, head);
404 
405 			nbytes += skb->len;
406 			napi_consume_skb(skb, budget);
407 			ndesc += pkt_desc;
408 			reclaim_idx = (reclaim_idx + pkt_desc) & q->mask;
409 			npkts++;
410 		} while (reclaim_idx != head && npkts < limit);
411 	}
412 
413 	q->cons_cnt += ndesc;
414 	netdev_tx_completed_queue(q->ndq, npkts, nbytes);
415 	smp_mb(); /* pairs with the one in fun_tx_check_stop() */
416 
417 	if (unlikely(netif_tx_queue_stopped(q->ndq) &&
418 		     fun_txq_may_restart(q))) {
419 		netif_tx_wake_queue(q->ndq);
420 		FUN_QSTAT_INC(q, tx_nrestarts);
421 	}
422 
423 	return reclaim_idx != head;
424 }
425 
426 /* The NAPI handler for Tx queues. */
427 int fun_txq_napi_poll(struct napi_struct *napi, int budget)
428 {
429 	struct fun_irq *irq = container_of(napi, struct fun_irq, napi);
430 	struct funeth_txq *q = irq->txq;
431 	unsigned int db_val;
432 
433 	if (fun_txq_reclaim(q, budget))
434 		return budget;               /* exhausted budget */
435 
436 	napi_complete(napi);                 /* exhausted pending work */
437 	db_val = READ_ONCE(q->irq_db_val) | (q->cons_cnt & q->mask);
438 	writel(db_val, q->db);
439 	return 0;
440 }
441 
442 static void fun_xdp_unmap(const struct funeth_txq *q, unsigned int idx)
443 {
444 	const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx);
445 	const struct fun_dataop_gl *gle;
446 
447 	gle = (const struct fun_dataop_gl *)req->dataop.imm;
448 	dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data),
449 			 be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE);
450 }
451 
452 /* Reclaim up to @budget completed Tx descriptors from a TX XDP queue. */
453 static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget)
454 {
455 	unsigned int npkts = 0, head, reclaim_idx;
456 
457 	for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask;
458 	     head != reclaim_idx && npkts < budget; head = txq_hw_head(q)) {
459 		/* The HW head is continually updated, ensure we don't read
460 		 * descriptor state before the head tells us to reclaim it.
461 		 * On the enqueue side the doorbell is an implicit write
462 		 * barrier.
463 		 */
464 		rmb();
465 
466 		do {
467 			fun_xdp_unmap(q, reclaim_idx);
468 			page_frag_free(q->info[reclaim_idx].vaddr);
469 
470 			trace_funeth_tx_free(q, reclaim_idx, 1, head);
471 
472 			reclaim_idx = (reclaim_idx + 1) & q->mask;
473 			npkts++;
474 		} while (reclaim_idx != head && npkts < budget);
475 	}
476 
477 	q->cons_cnt += npkts;
478 	return npkts;
479 }
480 
481 bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
482 {
483 	struct fun_eth_tx_req *req;
484 	struct fun_dataop_gl *gle;
485 	unsigned int idx;
486 	dma_addr_t dma;
487 
488 	if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES)
489 		fun_xdpq_clean(q, FUN_XDP_CLEAN_BATCH);
490 
491 	if (!unlikely(fun_txq_avail(q))) {
492 		FUN_QSTAT_INC(q, tx_xdp_full);
493 		return false;
494 	}
495 
496 	dma = dma_map_single(q->dma_dev, data, len, DMA_TO_DEVICE);
497 	if (unlikely(dma_mapping_error(q->dma_dev, dma))) {
498 		FUN_QSTAT_INC(q, tx_map_err);
499 		return false;
500 	}
501 
502 	idx = q->prod_cnt & q->mask;
503 	req = fun_tx_desc_addr(q, idx);
504 	req->op = FUN_ETH_OP_TX;
505 	req->len8 = (sizeof(*req) + sizeof(*gle)) / 8;
506 	req->flags = 0;
507 	req->suboff8 = offsetof(struct fun_eth_tx_req, dataop);
508 	req->repr_idn = 0;
509 	req->encap_proto = 0;
510 	fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0);
511 	req->dataop = FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len);
512 
513 	gle = (struct fun_dataop_gl *)req->dataop.imm;
514 	fun_dataop_gl_init(gle, 0, 0, len, dma);
515 
516 	q->info[idx].vaddr = data;
517 
518 	u64_stats_update_begin(&q->syncp);
519 	q->stats.tx_bytes += len;
520 	q->stats.tx_pkts++;
521 	u64_stats_update_end(&q->syncp);
522 
523 	trace_funeth_tx(q, len, idx, 1);
524 	q->prod_cnt++;
525 
526 	return true;
527 }
528 
529 int fun_xdp_xmit_frames(struct net_device *dev, int n,
530 			struct xdp_frame **frames, u32 flags)
531 {
532 	struct funeth_priv *fp = netdev_priv(dev);
533 	struct funeth_txq *q, **xdpqs;
534 	int i, q_idx;
535 
536 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
537 		return -EINVAL;
538 
539 	xdpqs = rcu_dereference_bh(fp->xdpqs);
540 	if (unlikely(!xdpqs))
541 		return -ENETDOWN;
542 
543 	q_idx = smp_processor_id();
544 	if (unlikely(q_idx >= fp->num_xdpqs))
545 		return -ENXIO;
546 
547 	for (q = xdpqs[q_idx], i = 0; i < n; i++) {
548 		const struct xdp_frame *xdpf = frames[i];
549 
550 		if (!fun_xdp_tx(q, xdpf->data, xdpf->len))
551 			break;
552 	}
553 
554 	if (unlikely(flags & XDP_XMIT_FLUSH))
555 		fun_txq_wr_db(q);
556 	return i;
557 }
558 
559 /* Purge a Tx queue of any queued packets. Should be called once HW access
560  * to the packets has been revoked, e.g., after the queue has been disabled.
561  */
562 static void fun_txq_purge(struct funeth_txq *q)
563 {
564 	while (q->cons_cnt != q->prod_cnt) {
565 		unsigned int idx = q->cons_cnt & q->mask;
566 
567 		q->cons_cnt += unmap_skb(q, idx);
568 		dev_kfree_skb_any(q->info[idx].skb);
569 	}
570 	netdev_tx_reset_queue(q->ndq);
571 }
572 
573 static void fun_xdpq_purge(struct funeth_txq *q)
574 {
575 	while (q->cons_cnt != q->prod_cnt) {
576 		unsigned int idx = q->cons_cnt & q->mask;
577 
578 		fun_xdp_unmap(q, idx);
579 		page_frag_free(q->info[idx].vaddr);
580 		q->cons_cnt++;
581 	}
582 }
583 
584 /* Create a Tx queue, allocating all the host resources needed. */
585 static struct funeth_txq *fun_txq_create_sw(struct net_device *dev,
586 					    unsigned int qidx,
587 					    unsigned int ndesc,
588 					    struct fun_irq *irq)
589 {
590 	struct funeth_priv *fp = netdev_priv(dev);
591 	struct funeth_txq *q;
592 	int numa_node;
593 
594 	if (irq)
595 		numa_node = fun_irq_node(irq); /* skb Tx queue */
596 	else
597 		numa_node = cpu_to_node(qidx); /* XDP Tx queue */
598 
599 	q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node);
600 	if (!q)
601 		goto err;
602 
603 	q->dma_dev = &fp->pdev->dev;
604 	q->desc = fun_alloc_ring_mem(q->dma_dev, ndesc, FUNETH_SQE_SIZE,
605 				     sizeof(*q->info), true, numa_node,
606 				     &q->dma_addr, (void **)&q->info,
607 				     &q->hw_wb);
608 	if (!q->desc)
609 		goto free_q;
610 
611 	q->netdev = dev;
612 	q->mask = ndesc - 1;
613 	q->qidx = qidx;
614 	q->numa_node = numa_node;
615 	u64_stats_init(&q->syncp);
616 	q->init_state = FUN_QSTATE_INIT_SW;
617 	return q;
618 
619 free_q:
620 	kfree(q);
621 err:
622 	netdev_err(dev, "Can't allocate memory for %s queue %u\n",
623 		   irq ? "Tx" : "XDP", qidx);
624 	return NULL;
625 }
626 
627 static void fun_txq_free_sw(struct funeth_txq *q)
628 {
629 	struct funeth_priv *fp = netdev_priv(q->netdev);
630 
631 	fun_free_ring_mem(q->dma_dev, q->mask + 1, FUNETH_SQE_SIZE, true,
632 			  q->desc, q->dma_addr, q->info);
633 
634 	fp->tx_packets += q->stats.tx_pkts;
635 	fp->tx_bytes   += q->stats.tx_bytes;
636 	fp->tx_dropped += q->stats.tx_map_err;
637 
638 	kfree(q);
639 }
640 
641 /* Allocate the device portion of a Tx queue. */
642 int fun_txq_create_dev(struct funeth_txq *q, struct fun_irq *irq)
643 {
644 	struct funeth_priv *fp = netdev_priv(q->netdev);
645 	unsigned int irq_idx, ndesc = q->mask + 1;
646 	int err;
647 
648 	q->irq = irq;
649 	*q->hw_wb = 0;
650 	q->prod_cnt = 0;
651 	q->cons_cnt = 0;
652 	irq_idx = irq ? irq->irq_idx : 0;
653 
654 	err = fun_sq_create(fp->fdev,
655 			    FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS |
656 			    FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR, 0,
657 			    FUN_HCI_ID_INVALID, ilog2(FUNETH_SQE_SIZE), ndesc,
658 			    q->dma_addr, fp->tx_coal_count, fp->tx_coal_usec,
659 			    irq_idx, 0, fp->fdev->kern_end_qid, 0,
660 			    &q->hw_qid, &q->db);
661 	if (err)
662 		goto out;
663 
664 	err = fun_create_and_bind_tx(fp, q->hw_qid);
665 	if (err < 0)
666 		goto free_devq;
667 	q->ethid = err;
668 
669 	if (irq) {
670 		irq->txq = q;
671 		q->ndq = netdev_get_tx_queue(q->netdev, q->qidx);
672 		q->irq_db_val = FUN_IRQ_SQ_DB(fp->tx_coal_usec,
673 					      fp->tx_coal_count);
674 		writel(q->irq_db_val, q->db);
675 	}
676 
677 	q->init_state = FUN_QSTATE_INIT_FULL;
678 	netif_info(fp, ifup, q->netdev,
679 		   "%s queue %u, depth %u, HW qid %u, IRQ idx %u, eth id %u, node %d\n",
680 		   irq ? "Tx" : "XDP", q->qidx, ndesc, q->hw_qid, irq_idx,
681 		   q->ethid, q->numa_node);
682 	return 0;
683 
684 free_devq:
685 	fun_destroy_sq(fp->fdev, q->hw_qid);
686 out:
687 	netdev_err(q->netdev,
688 		   "Failed to create %s queue %u on device, error %d\n",
689 		   irq ? "Tx" : "XDP", q->qidx, err);
690 	return err;
691 }
692 
693 static void fun_txq_free_dev(struct funeth_txq *q)
694 {
695 	struct funeth_priv *fp = netdev_priv(q->netdev);
696 
697 	if (q->init_state < FUN_QSTATE_INIT_FULL)
698 		return;
699 
700 	netif_info(fp, ifdown, q->netdev,
701 		   "Freeing %s queue %u (id %u), IRQ %u, ethid %u\n",
702 		   q->irq ? "Tx" : "XDP", q->qidx, q->hw_qid,
703 		   q->irq ? q->irq->irq_idx : 0, q->ethid);
704 
705 	fun_destroy_sq(fp->fdev, q->hw_qid);
706 	fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, q->ethid);
707 
708 	if (q->irq) {
709 		q->irq->txq = NULL;
710 		fun_txq_purge(q);
711 	} else {
712 		fun_xdpq_purge(q);
713 	}
714 
715 	q->init_state = FUN_QSTATE_INIT_SW;
716 }
717 
718 /* Create or advance a Tx queue, allocating all the host and device resources
719  * needed to reach the target state.
720  */
721 int funeth_txq_create(struct net_device *dev, unsigned int qidx,
722 		      unsigned int ndesc, struct fun_irq *irq, int state,
723 		      struct funeth_txq **qp)
724 {
725 	struct funeth_txq *q = *qp;
726 	int err;
727 
728 	if (!q)
729 		q = fun_txq_create_sw(dev, qidx, ndesc, irq);
730 	if (!q)
731 		return -ENOMEM;
732 
733 	if (q->init_state >= state)
734 		goto out;
735 
736 	err = fun_txq_create_dev(q, irq);
737 	if (err) {
738 		if (!*qp)
739 			fun_txq_free_sw(q);
740 		return err;
741 	}
742 
743 out:
744 	*qp = q;
745 	return 0;
746 }
747 
748 /* Free Tx queue resources until it reaches the target state.
749  * The queue must be already disconnected from the stack.
750  */
751 struct funeth_txq *funeth_txq_free(struct funeth_txq *q, int state)
752 {
753 	if (state < FUN_QSTATE_INIT_FULL)
754 		fun_txq_free_dev(q);
755 
756 	if (state == FUN_QSTATE_DESTROYED) {
757 		fun_txq_free_sw(q);
758 		q = NULL;
759 	}
760 
761 	return q;
762 }
763