xref: /linux/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c (revision a3a02a52bcfcbcc4a637d4b68bf1bc391c9fad02)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2023 Intel Corporation */
3 
4 #include <net/libeth/rx.h>
5 
6 #include "idpf.h"
7 
8 /**
9  * idpf_tx_singleq_csum - Enable tx checksum offloads
10  * @skb: pointer to skb
11  * @off: pointer to struct that holds offload parameters
12  *
13  * Returns 0 or error (negative) if checksum offload cannot be executed, 1
14  * otherwise.
15  */
16 static int idpf_tx_singleq_csum(struct sk_buff *skb,
17 				struct idpf_tx_offload_params *off)
18 {
19 	u32 l4_len, l3_len, l2_len;
20 	union {
21 		struct iphdr *v4;
22 		struct ipv6hdr *v6;
23 		unsigned char *hdr;
24 	} ip;
25 	union {
26 		struct tcphdr *tcp;
27 		unsigned char *hdr;
28 	} l4;
29 	u32 offset, cmd = 0;
30 	u8 l4_proto = 0;
31 	__be16 frag_off;
32 	bool is_tso;
33 
34 	if (skb->ip_summed != CHECKSUM_PARTIAL)
35 		return 0;
36 
37 	ip.hdr = skb_network_header(skb);
38 	l4.hdr = skb_transport_header(skb);
39 
40 	/* compute outer L2 header size */
41 	l2_len = ip.hdr - skb->data;
42 	offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2);
43 	is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO);
44 	if (skb->encapsulation) {
45 		u32 tunnel = 0;
46 
47 		/* define outer network header type */
48 		if (off->tx_flags & IDPF_TX_FLAGS_IPV4) {
49 			/* The stack computes the IP header already, the only
50 			 * time we need the hardware to recompute it is in the
51 			 * case of TSO.
52 			 */
53 			tunnel |= is_tso ?
54 				  IDPF_TX_CTX_EXT_IP_IPV4 :
55 				  IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM;
56 
57 			l4_proto = ip.v4->protocol;
58 		} else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) {
59 			tunnel |= IDPF_TX_CTX_EXT_IP_IPV6;
60 
61 			l4_proto = ip.v6->nexthdr;
62 			if (ipv6_ext_hdr(l4_proto))
63 				ipv6_skip_exthdr(skb, skb_network_offset(skb) +
64 						 sizeof(*ip.v6),
65 						 &l4_proto, &frag_off);
66 		}
67 
68 		/* define outer transport */
69 		switch (l4_proto) {
70 		case IPPROTO_UDP:
71 			tunnel |= IDPF_TXD_CTX_UDP_TUNNELING;
72 			break;
73 		case IPPROTO_GRE:
74 			tunnel |= IDPF_TXD_CTX_GRE_TUNNELING;
75 			break;
76 		case IPPROTO_IPIP:
77 		case IPPROTO_IPV6:
78 			l4.hdr = skb_inner_network_header(skb);
79 			break;
80 		default:
81 			if (is_tso)
82 				return -1;
83 
84 			skb_checksum_help(skb);
85 
86 			return 0;
87 		}
88 		off->tx_flags |= IDPF_TX_FLAGS_TUNNEL;
89 
90 		/* compute outer L3 header size */
91 		tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M,
92 				     (l4.hdr - ip.hdr) / 4);
93 
94 		/* switch IP header pointer from outer to inner header */
95 		ip.hdr = skb_inner_network_header(skb);
96 
97 		/* compute tunnel header size */
98 		tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M,
99 				     (ip.hdr - l4.hdr) / 2);
100 
101 		/* indicate if we need to offload outer UDP header */
102 		if (is_tso &&
103 		    !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
104 		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
105 			tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M;
106 
107 		/* record tunnel offload values */
108 		off->cd_tunneling |= tunnel;
109 
110 		/* switch L4 header pointer from outer to inner */
111 		l4.hdr = skb_inner_transport_header(skb);
112 		l4_proto = 0;
113 
114 		/* reset type as we transition from outer to inner headers */
115 		off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6);
116 		if (ip.v4->version == 4)
117 			off->tx_flags |= IDPF_TX_FLAGS_IPV4;
118 		if (ip.v6->version == 6)
119 			off->tx_flags |= IDPF_TX_FLAGS_IPV6;
120 	}
121 
122 	/* Enable IP checksum offloads */
123 	if (off->tx_flags & IDPF_TX_FLAGS_IPV4) {
124 		l4_proto = ip.v4->protocol;
125 		/* See comment above regarding need for HW to recompute IP
126 		 * header checksum in the case of TSO.
127 		 */
128 		if (is_tso)
129 			cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM;
130 		else
131 			cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4;
132 
133 	} else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) {
134 		cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6;
135 		l4_proto = ip.v6->nexthdr;
136 		if (ipv6_ext_hdr(l4_proto))
137 			ipv6_skip_exthdr(skb, skb_network_offset(skb) +
138 					 sizeof(*ip.v6), &l4_proto,
139 					 &frag_off);
140 	} else {
141 		return -1;
142 	}
143 
144 	/* compute inner L3 header size */
145 	l3_len = l4.hdr - ip.hdr;
146 	offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S;
147 
148 	/* Enable L4 checksum offloads */
149 	switch (l4_proto) {
150 	case IPPROTO_TCP:
151 		/* enable checksum offloads */
152 		cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP;
153 		l4_len = l4.tcp->doff;
154 		break;
155 	case IPPROTO_UDP:
156 		/* enable UDP checksum offload */
157 		cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP;
158 		l4_len = sizeof(struct udphdr) >> 2;
159 		break;
160 	case IPPROTO_SCTP:
161 		/* enable SCTP checksum offload */
162 		cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP;
163 		l4_len = sizeof(struct sctphdr) >> 2;
164 		break;
165 	default:
166 		if (is_tso)
167 			return -1;
168 
169 		skb_checksum_help(skb);
170 
171 		return 0;
172 	}
173 
174 	offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S;
175 	off->td_cmd |= cmd;
176 	off->hdr_offsets |= offset;
177 
178 	return 1;
179 }
180 
181 /**
182  * idpf_tx_singleq_map - Build the Tx base descriptor
183  * @tx_q: queue to send buffer on
184  * @first: first buffer info buffer to use
185  * @offloads: pointer to struct that holds offload parameters
186  *
187  * This function loops over the skb data pointed to by *first
188  * and gets a physical address for each memory location and programs
189  * it and the length into the transmit base mode descriptor.
190  */
191 static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q,
192 				struct idpf_tx_buf *first,
193 				struct idpf_tx_offload_params *offloads)
194 {
195 	u32 offsets = offloads->hdr_offsets;
196 	struct idpf_tx_buf *tx_buf = first;
197 	struct idpf_base_tx_desc *tx_desc;
198 	struct sk_buff *skb = first->skb;
199 	u64 td_cmd = offloads->td_cmd;
200 	unsigned int data_len, size;
201 	u16 i = tx_q->next_to_use;
202 	struct netdev_queue *nq;
203 	skb_frag_t *frag;
204 	dma_addr_t dma;
205 	u64 td_tag = 0;
206 
207 	data_len = skb->data_len;
208 	size = skb_headlen(skb);
209 
210 	tx_desc = &tx_q->base_tx[i];
211 
212 	dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE);
213 
214 	/* write each descriptor with CRC bit */
215 	if (idpf_queue_has(CRC_EN, tx_q))
216 		td_cmd |= IDPF_TX_DESC_CMD_ICRC;
217 
218 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
219 		unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
220 
221 		if (dma_mapping_error(tx_q->dev, dma))
222 			return idpf_tx_dma_map_error(tx_q, skb, first, i);
223 
224 		/* record length, and DMA address */
225 		dma_unmap_len_set(tx_buf, len, size);
226 		dma_unmap_addr_set(tx_buf, dma, dma);
227 
228 		/* align size to end of page */
229 		max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1);
230 		tx_desc->buf_addr = cpu_to_le64(dma);
231 
232 		/* account for data chunks larger than the hardware
233 		 * can handle
234 		 */
235 		while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) {
236 			tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd,
237 								  offsets,
238 								  max_data,
239 								  td_tag);
240 			tx_desc++;
241 			i++;
242 
243 			if (i == tx_q->desc_count) {
244 				tx_desc = &tx_q->base_tx[0];
245 				i = 0;
246 			}
247 
248 			dma += max_data;
249 			size -= max_data;
250 
251 			max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
252 			tx_desc->buf_addr = cpu_to_le64(dma);
253 		}
254 
255 		if (!data_len)
256 			break;
257 
258 		tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets,
259 							  size, td_tag);
260 		tx_desc++;
261 		i++;
262 
263 		if (i == tx_q->desc_count) {
264 			tx_desc = &tx_q->base_tx[0];
265 			i = 0;
266 		}
267 
268 		size = skb_frag_size(frag);
269 		data_len -= size;
270 
271 		dma = skb_frag_dma_map(tx_q->dev, frag, 0, size,
272 				       DMA_TO_DEVICE);
273 
274 		tx_buf = &tx_q->tx_buf[i];
275 	}
276 
277 	skb_tx_timestamp(first->skb);
278 
279 	/* write last descriptor with RS and EOP bits */
280 	td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS);
281 
282 	tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets,
283 						  size, td_tag);
284 
285 	IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i);
286 
287 	/* set next_to_watch value indicating a packet is present */
288 	first->next_to_watch = tx_desc;
289 
290 	nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
291 	netdev_tx_sent_queue(nq, first->bytecount);
292 
293 	idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more());
294 }
295 
296 /**
297  * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring
298  * @txq: queue to put context descriptor on
299  *
300  * Since the TX buffer rings mimics the descriptor ring, update the tx buffer
301  * ring entry to reflect that this index is a context descriptor
302  */
303 static struct idpf_base_tx_ctx_desc *
304 idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq)
305 {
306 	struct idpf_base_tx_ctx_desc *ctx_desc;
307 	int ntu = txq->next_to_use;
308 
309 	memset(&txq->tx_buf[ntu], 0, sizeof(struct idpf_tx_buf));
310 	txq->tx_buf[ntu].ctx_entry = true;
311 
312 	ctx_desc = &txq->base_ctx[ntu];
313 
314 	IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu);
315 	txq->next_to_use = ntu;
316 
317 	return ctx_desc;
318 }
319 
320 /**
321  * idpf_tx_singleq_build_ctx_desc - populate context descriptor
322  * @txq: queue to send buffer on
323  * @offload: offload parameter structure
324  **/
325 static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq,
326 					   struct idpf_tx_offload_params *offload)
327 {
328 	struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq);
329 	u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX;
330 
331 	if (offload->tso_segs) {
332 		qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S;
333 		qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_TSO_LEN_M,
334 				  offload->tso_len);
335 		qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss);
336 
337 		u64_stats_update_begin(&txq->stats_sync);
338 		u64_stats_inc(&txq->q_stats.lso_pkts);
339 		u64_stats_update_end(&txq->stats_sync);
340 	}
341 
342 	desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling);
343 
344 	desc->qw0.l2tag2 = 0;
345 	desc->qw0.rsvd1 = 0;
346 	desc->qw1 = cpu_to_le64(qw1);
347 }
348 
349 /**
350  * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors
351  * @skb: send buffer
352  * @tx_q: queue to send buffer on
353  *
354  * Returns NETDEV_TX_OK if sent, else an error code
355  */
356 netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
357 				  struct idpf_tx_queue *tx_q)
358 {
359 	struct idpf_tx_offload_params offload = { };
360 	struct idpf_tx_buf *first;
361 	unsigned int count;
362 	__be16 protocol;
363 	int csum, tso;
364 
365 	count = idpf_tx_desc_count_required(tx_q, skb);
366 	if (unlikely(!count))
367 		return idpf_tx_drop_skb(tx_q, skb);
368 
369 	if (idpf_tx_maybe_stop_common(tx_q,
370 				      count + IDPF_TX_DESCS_PER_CACHE_LINE +
371 				      IDPF_TX_DESCS_FOR_CTX)) {
372 		idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
373 
374 		return NETDEV_TX_BUSY;
375 	}
376 
377 	protocol = vlan_get_protocol(skb);
378 	if (protocol == htons(ETH_P_IP))
379 		offload.tx_flags |= IDPF_TX_FLAGS_IPV4;
380 	else if (protocol == htons(ETH_P_IPV6))
381 		offload.tx_flags |= IDPF_TX_FLAGS_IPV6;
382 
383 	tso = idpf_tso(skb, &offload);
384 	if (tso < 0)
385 		goto out_drop;
386 
387 	csum = idpf_tx_singleq_csum(skb, &offload);
388 	if (csum < 0)
389 		goto out_drop;
390 
391 	if (tso || offload.cd_tunneling)
392 		idpf_tx_singleq_build_ctx_desc(tx_q, &offload);
393 
394 	/* record the location of the first descriptor for this packet */
395 	first = &tx_q->tx_buf[tx_q->next_to_use];
396 	first->skb = skb;
397 
398 	if (tso) {
399 		first->gso_segs = offload.tso_segs;
400 		first->bytecount = skb->len + ((first->gso_segs - 1) * offload.tso_hdr_len);
401 	} else {
402 		first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
403 		first->gso_segs = 1;
404 	}
405 	idpf_tx_singleq_map(tx_q, first, &offload);
406 
407 	return NETDEV_TX_OK;
408 
409 out_drop:
410 	return idpf_tx_drop_skb(tx_q, skb);
411 }
412 
413 /**
414  * idpf_tx_singleq_clean - Reclaim resources from queue
415  * @tx_q: Tx queue to clean
416  * @napi_budget: Used to determine if we are in netpoll
417  * @cleaned: returns number of packets cleaned
418  *
419  */
420 static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget,
421 				  int *cleaned)
422 {
423 	unsigned int total_bytes = 0, total_pkts = 0;
424 	struct idpf_base_tx_desc *tx_desc;
425 	u32 budget = tx_q->clean_budget;
426 	s16 ntc = tx_q->next_to_clean;
427 	struct idpf_netdev_priv *np;
428 	struct idpf_tx_buf *tx_buf;
429 	struct netdev_queue *nq;
430 	bool dont_wake;
431 
432 	tx_desc = &tx_q->base_tx[ntc];
433 	tx_buf = &tx_q->tx_buf[ntc];
434 	ntc -= tx_q->desc_count;
435 
436 	do {
437 		struct idpf_base_tx_desc *eop_desc;
438 
439 		/* If this entry in the ring was used as a context descriptor,
440 		 * it's corresponding entry in the buffer ring will indicate as
441 		 * such. We can skip this descriptor since there is no buffer
442 		 * to clean.
443 		 */
444 		if (tx_buf->ctx_entry) {
445 			/* Clear this flag here to avoid stale flag values when
446 			 * this buffer is used for actual data in the future.
447 			 * There are cases where the tx_buf struct / the flags
448 			 * field will not be cleared before being reused.
449 			 */
450 			tx_buf->ctx_entry = false;
451 			goto fetch_next_txq_desc;
452 		}
453 
454 		/* if next_to_watch is not set then no work pending */
455 		eop_desc = (struct idpf_base_tx_desc *)tx_buf->next_to_watch;
456 		if (!eop_desc)
457 			break;
458 
459 		/* prevent any other reads prior to eop_desc */
460 		smp_rmb();
461 
462 		/* if the descriptor isn't done, no work yet to do */
463 		if (!(eop_desc->qw1 &
464 		      cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE)))
465 			break;
466 
467 		/* clear next_to_watch to prevent false hangs */
468 		tx_buf->next_to_watch = NULL;
469 
470 		/* update the statistics for this packet */
471 		total_bytes += tx_buf->bytecount;
472 		total_pkts += tx_buf->gso_segs;
473 
474 		napi_consume_skb(tx_buf->skb, napi_budget);
475 
476 		/* unmap skb header data */
477 		dma_unmap_single(tx_q->dev,
478 				 dma_unmap_addr(tx_buf, dma),
479 				 dma_unmap_len(tx_buf, len),
480 				 DMA_TO_DEVICE);
481 
482 		/* clear tx_buf data */
483 		tx_buf->skb = NULL;
484 		dma_unmap_len_set(tx_buf, len, 0);
485 
486 		/* unmap remaining buffers */
487 		while (tx_desc != eop_desc) {
488 			tx_buf++;
489 			tx_desc++;
490 			ntc++;
491 			if (unlikely(!ntc)) {
492 				ntc -= tx_q->desc_count;
493 				tx_buf = tx_q->tx_buf;
494 				tx_desc = &tx_q->base_tx[0];
495 			}
496 
497 			/* unmap any remaining paged data */
498 			if (dma_unmap_len(tx_buf, len)) {
499 				dma_unmap_page(tx_q->dev,
500 					       dma_unmap_addr(tx_buf, dma),
501 					       dma_unmap_len(tx_buf, len),
502 					       DMA_TO_DEVICE);
503 				dma_unmap_len_set(tx_buf, len, 0);
504 			}
505 		}
506 
507 		/* update budget only if we did something */
508 		budget--;
509 
510 fetch_next_txq_desc:
511 		tx_buf++;
512 		tx_desc++;
513 		ntc++;
514 		if (unlikely(!ntc)) {
515 			ntc -= tx_q->desc_count;
516 			tx_buf = tx_q->tx_buf;
517 			tx_desc = &tx_q->base_tx[0];
518 		}
519 	} while (likely(budget));
520 
521 	ntc += tx_q->desc_count;
522 	tx_q->next_to_clean = ntc;
523 
524 	*cleaned += total_pkts;
525 
526 	u64_stats_update_begin(&tx_q->stats_sync);
527 	u64_stats_add(&tx_q->q_stats.packets, total_pkts);
528 	u64_stats_add(&tx_q->q_stats.bytes, total_bytes);
529 	u64_stats_update_end(&tx_q->stats_sync);
530 
531 	np = netdev_priv(tx_q->netdev);
532 	nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
533 
534 	dont_wake = np->state != __IDPF_VPORT_UP ||
535 		    !netif_carrier_ok(tx_q->netdev);
536 	__netif_txq_completed_wake(nq, total_pkts, total_bytes,
537 				   IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH,
538 				   dont_wake);
539 
540 	return !!budget;
541 }
542 
543 /**
544  * idpf_tx_singleq_clean_all - Clean all Tx queues
545  * @q_vec: queue vector
546  * @budget: Used to determine if we are in netpoll
547  * @cleaned: returns number of packets cleaned
548  *
549  * Returns false if clean is not complete else returns true
550  */
551 static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
552 				      int *cleaned)
553 {
554 	u16 num_txq = q_vec->num_txq;
555 	bool clean_complete = true;
556 	int i, budget_per_q;
557 
558 	budget_per_q = num_txq ? max(budget / num_txq, 1) : 0;
559 	for (i = 0; i < num_txq; i++) {
560 		struct idpf_tx_queue *q;
561 
562 		q = q_vec->tx[i];
563 		clean_complete &= idpf_tx_singleq_clean(q, budget_per_q,
564 							cleaned);
565 	}
566 
567 	return clean_complete;
568 }
569 
570 /**
571  * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor
572  * status and error fields
573  * @rx_desc: pointer to receive descriptor (in le64 format)
574  * @stat_err_bits: value to mask
575  *
576  * This function does some fast chicanery in order to return the
577  * value of the mask which is really only used for boolean tests.
578  * The status_error_ptype_len doesn't need to be shifted because it begins
579  * at offset zero.
580  */
581 static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc,
582 					 const u64 stat_err_bits)
583 {
584 	return !!(rx_desc->base_wb.qword1.status_error_ptype_len &
585 		  cpu_to_le64(stat_err_bits));
586 }
587 
588 /**
589  * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers
590  * @rx_desc: Rx descriptor for current buffer
591  */
592 static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc)
593 {
594 	/* if we are the last buffer then there is nothing else to do */
595 	if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ)))
596 		return false;
597 
598 	return true;
599 }
600 
601 /**
602  * idpf_rx_singleq_csum - Indicate in skb if checksum is good
603  * @rxq: Rx ring being processed
604  * @skb: skb currently being received and modified
605  * @csum_bits: checksum bits from descriptor
606  * @decoded: the packet type decoded by hardware
607  *
608  * skb->protocol must be set before this function is called
609  */
610 static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq,
611 				 struct sk_buff *skb,
612 				 struct idpf_rx_csum_decoded csum_bits,
613 				 struct libeth_rx_pt decoded)
614 {
615 	bool ipv4, ipv6;
616 
617 	/* check if Rx checksum is enabled */
618 	if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded))
619 		return;
620 
621 	/* check if HW has decoded the packet and checksum */
622 	if (unlikely(!csum_bits.l3l4p))
623 		return;
624 
625 	ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4;
626 	ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6;
627 
628 	/* Check if there were any checksum errors */
629 	if (unlikely(ipv4 && (csum_bits.ipe || csum_bits.eipe)))
630 		goto checksum_fail;
631 
632 	/* Device could not do any checksum offload for certain extension
633 	 * headers as indicated by setting IPV6EXADD bit
634 	 */
635 	if (unlikely(ipv6 && csum_bits.ipv6exadd))
636 		return;
637 
638 	/* check for L4 errors and handle packets that were not able to be
639 	 * checksummed due to arrival speed
640 	 */
641 	if (unlikely(csum_bits.l4e))
642 		goto checksum_fail;
643 
644 	if (unlikely(csum_bits.nat && csum_bits.eudpe))
645 		goto checksum_fail;
646 
647 	/* Handle packets that were not able to be checksummed due to arrival
648 	 * speed, in this case the stack can compute the csum.
649 	 */
650 	if (unlikely(csum_bits.pprs))
651 		return;
652 
653 	/* If there is an outer header present that might contain a checksum
654 	 * we need to bump the checksum level by 1 to reflect the fact that
655 	 * we are indicating we validated the inner checksum.
656 	 */
657 	if (decoded.tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT)
658 		skb->csum_level = 1;
659 
660 	skb->ip_summed = CHECKSUM_UNNECESSARY;
661 	return;
662 
663 checksum_fail:
664 	u64_stats_update_begin(&rxq->stats_sync);
665 	u64_stats_inc(&rxq->q_stats.hw_csum_err);
666 	u64_stats_update_end(&rxq->stats_sync);
667 }
668 
669 /**
670  * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum
671  * @rx_desc: the receive descriptor
672  *
673  * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
674  * descriptor writeback format.
675  *
676  * Return: parsed checksum status.
677  **/
678 static struct idpf_rx_csum_decoded
679 idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc)
680 {
681 	struct idpf_rx_csum_decoded csum_bits = { };
682 	u32 rx_error, rx_status;
683 	u64 qword;
684 
685 	qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
686 
687 	rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword);
688 	rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword);
689 
690 	csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error);
691 	csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M,
692 				   rx_error);
693 	csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error);
694 	csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M,
695 				   rx_error);
696 	csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M,
697 				    rx_status);
698 	csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M,
699 					rx_status);
700 
701 	return csum_bits;
702 }
703 
704 /**
705  * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum
706  * @rx_desc: the receive descriptor
707  *
708  * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
709  * descriptor writeback format.
710  *
711  * Return: parsed checksum status.
712  **/
713 static struct idpf_rx_csum_decoded
714 idpf_rx_singleq_flex_csum(const union virtchnl2_rx_desc *rx_desc)
715 {
716 	struct idpf_rx_csum_decoded csum_bits = { };
717 	u16 rx_status0, rx_status1;
718 
719 	rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0);
720 	rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1);
721 
722 	csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M,
723 				  rx_status0);
724 	csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M,
725 				   rx_status0);
726 	csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M,
727 				  rx_status0);
728 	csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M,
729 				    rx_status0);
730 	csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M,
731 				    rx_status0);
732 	csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M,
733 					rx_status0);
734 	csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M,
735 				  rx_status1);
736 
737 	return csum_bits;
738 }
739 
740 /**
741  * idpf_rx_singleq_base_hash - set the hash value in the skb
742  * @rx_q: Rx completion queue
743  * @skb: skb currently being received and modified
744  * @rx_desc: specific descriptor
745  * @decoded: Decoded Rx packet type related fields
746  *
747  * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
748  * descriptor writeback format.
749  **/
750 static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q,
751 				      struct sk_buff *skb,
752 				      const union virtchnl2_rx_desc *rx_desc,
753 				      struct libeth_rx_pt decoded)
754 {
755 	u64 mask, qw1;
756 
757 	if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded))
758 		return;
759 
760 	mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M;
761 	qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
762 
763 	if (FIELD_GET(mask, qw1) == mask) {
764 		u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss);
765 
766 		libeth_rx_pt_set_hash(skb, hash, decoded);
767 	}
768 }
769 
770 /**
771  * idpf_rx_singleq_flex_hash - set the hash value in the skb
772  * @rx_q: Rx completion queue
773  * @skb: skb currently being received and modified
774  * @rx_desc: specific descriptor
775  * @decoded: Decoded Rx packet type related fields
776  *
777  * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
778  * descriptor writeback format.
779  **/
780 static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q,
781 				      struct sk_buff *skb,
782 				      const union virtchnl2_rx_desc *rx_desc,
783 				      struct libeth_rx_pt decoded)
784 {
785 	if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded))
786 		return;
787 
788 	if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M,
789 		      le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) {
790 		u32 hash = le32_to_cpu(rx_desc->flex_nic_wb.rss_hash);
791 
792 		libeth_rx_pt_set_hash(skb, hash, decoded);
793 	}
794 }
795 
796 /**
797  * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx
798  * descriptor
799  * @rx_q: Rx ring being processed
800  * @skb: pointer to current skb being populated
801  * @rx_desc: descriptor for skb
802  * @ptype: packet type
803  *
804  * This function checks the ring, descriptor, and packet information in
805  * order to populate the hash, checksum, VLAN, protocol, and
806  * other fields within the skb.
807  */
808 static void
809 idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q,
810 				   struct sk_buff *skb,
811 				   const union virtchnl2_rx_desc *rx_desc,
812 				   u16 ptype)
813 {
814 	struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype];
815 	struct idpf_rx_csum_decoded csum_bits;
816 
817 	/* modifies the skb - consumes the enet header */
818 	skb->protocol = eth_type_trans(skb, rx_q->netdev);
819 
820 	/* Check if we're using base mode descriptor IDs */
821 	if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) {
822 		idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, decoded);
823 		csum_bits = idpf_rx_singleq_base_csum(rx_desc);
824 	} else {
825 		idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, decoded);
826 		csum_bits = idpf_rx_singleq_flex_csum(rx_desc);
827 	}
828 
829 	idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded);
830 	skb_record_rx_queue(skb, rx_q->idx);
831 }
832 
833 /**
834  * idpf_rx_buf_hw_update - Store the new tail and head values
835  * @rxq: queue to bump
836  * @val: new head index
837  */
838 static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val)
839 {
840 	rxq->next_to_use = val;
841 
842 	if (unlikely(!rxq->tail))
843 		return;
844 
845 	/* writel has an implicit memory barrier */
846 	writel(val, rxq->tail);
847 }
848 
849 /**
850  * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers
851  * @rx_q: queue for which the hw buffers are allocated
852  * @cleaned_count: number of buffers to replace
853  *
854  * Returns false if all allocations were successful, true if any fail
855  */
856 bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q,
857 				      u16 cleaned_count)
858 {
859 	struct virtchnl2_singleq_rx_buf_desc *desc;
860 	const struct libeth_fq_fp fq = {
861 		.pp		= rx_q->pp,
862 		.fqes		= rx_q->rx_buf,
863 		.truesize	= rx_q->truesize,
864 		.count		= rx_q->desc_count,
865 	};
866 	u16 nta = rx_q->next_to_alloc;
867 
868 	if (!cleaned_count)
869 		return false;
870 
871 	desc = &rx_q->single_buf[nta];
872 
873 	do {
874 		dma_addr_t addr;
875 
876 		addr = libeth_rx_alloc(&fq, nta);
877 		if (addr == DMA_MAPPING_ERROR)
878 			break;
879 
880 		/* Refresh the desc even if buffer_addrs didn't change
881 		 * because each write-back erases this info.
882 		 */
883 		desc->pkt_addr = cpu_to_le64(addr);
884 		desc->hdr_addr = 0;
885 		desc++;
886 
887 		nta++;
888 		if (unlikely(nta == rx_q->desc_count)) {
889 			desc = &rx_q->single_buf[0];
890 			nta = 0;
891 		}
892 
893 		cleaned_count--;
894 	} while (cleaned_count);
895 
896 	if (rx_q->next_to_alloc != nta) {
897 		idpf_rx_buf_hw_update(rx_q, nta);
898 		rx_q->next_to_alloc = nta;
899 	}
900 
901 	return !!cleaned_count;
902 }
903 
904 /**
905  * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor
906  * @rx_desc: the descriptor to process
907  * @fields: storage for extracted values
908  *
909  * Decode the Rx descriptor and extract relevant information including the
910  * size and Rx packet type.
911  *
912  * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
913  * descriptor writeback format.
914  */
915 static void
916 idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc,
917 				    struct idpf_rx_extracted *fields)
918 {
919 	u64 qword;
920 
921 	qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
922 
923 	fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword);
924 	fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword);
925 }
926 
927 /**
928  * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor
929  * @rx_desc: the descriptor to process
930  * @fields: storage for extracted values
931  *
932  * Decode the Rx descriptor and extract relevant information including the
933  * size and Rx packet type.
934  *
935  * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
936  * descriptor writeback format.
937  */
938 static void
939 idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc,
940 				    struct idpf_rx_extracted *fields)
941 {
942 	fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M,
943 				 le16_to_cpu(rx_desc->flex_nic_wb.pkt_len));
944 	fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M,
945 				     le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0));
946 }
947 
948 /**
949  * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor
950  * @rx_q: Rx descriptor queue
951  * @rx_desc: the descriptor to process
952  * @fields: storage for extracted values
953  *
954  */
955 static void
956 idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q,
957 			       const union virtchnl2_rx_desc *rx_desc,
958 			       struct idpf_rx_extracted *fields)
959 {
960 	if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M)
961 		idpf_rx_singleq_extract_base_fields(rx_desc, fields);
962 	else
963 		idpf_rx_singleq_extract_flex_fields(rx_desc, fields);
964 }
965 
966 /**
967  * idpf_rx_singleq_clean - Reclaim resources after receive completes
968  * @rx_q: rx queue to clean
969  * @budget: Total limit on number of packets to process
970  *
971  * Returns true if there's any budget left (e.g. the clean is finished)
972  */
973 static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)
974 {
975 	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
976 	struct sk_buff *skb = rx_q->skb;
977 	u16 ntc = rx_q->next_to_clean;
978 	u16 cleaned_count = 0;
979 	bool failure = false;
980 
981 	/* Process Rx packets bounded by budget */
982 	while (likely(total_rx_pkts < (unsigned int)budget)) {
983 		struct idpf_rx_extracted fields = { };
984 		union virtchnl2_rx_desc *rx_desc;
985 		struct idpf_rx_buf *rx_buf;
986 
987 		/* get the Rx desc from Rx queue based on 'next_to_clean' */
988 		rx_desc = &rx_q->rx[ntc];
989 
990 		/* status_error_ptype_len will always be zero for unused
991 		 * descriptors because it's cleared in cleanup, and overlaps
992 		 * with hdr_addr which is always zero because packet split
993 		 * isn't used, if the hardware wrote DD then the length will be
994 		 * non-zero
995 		 */
996 #define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M
997 		if (!idpf_rx_singleq_test_staterr(rx_desc,
998 						  IDPF_RXD_DD))
999 			break;
1000 
1001 		/* This memory barrier is needed to keep us from reading
1002 		 * any other fields out of the rx_desc
1003 		 */
1004 		dma_rmb();
1005 
1006 		idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields);
1007 
1008 		rx_buf = &rx_q->rx_buf[ntc];
1009 		if (!libeth_rx_sync_for_cpu(rx_buf, fields.size))
1010 			goto skip_data;
1011 
1012 		if (skb)
1013 			idpf_rx_add_frag(rx_buf, skb, fields.size);
1014 		else
1015 			skb = idpf_rx_build_skb(rx_buf, fields.size);
1016 
1017 		/* exit if we failed to retrieve a buffer */
1018 		if (!skb)
1019 			break;
1020 
1021 skip_data:
1022 		rx_buf->page = NULL;
1023 
1024 		IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc);
1025 		cleaned_count++;
1026 
1027 		/* skip if it is non EOP desc */
1028 		if (idpf_rx_singleq_is_non_eop(rx_desc) || unlikely(!skb))
1029 			continue;
1030 
1031 #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \
1032 				  VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M)
1033 		if (unlikely(idpf_rx_singleq_test_staterr(rx_desc,
1034 							  IDPF_RXD_ERR_S))) {
1035 			dev_kfree_skb_any(skb);
1036 			skb = NULL;
1037 			continue;
1038 		}
1039 
1040 		/* pad skb if needed (to make valid ethernet frame) */
1041 		if (eth_skb_pad(skb)) {
1042 			skb = NULL;
1043 			continue;
1044 		}
1045 
1046 		/* probably a little skewed due to removing CRC */
1047 		total_rx_bytes += skb->len;
1048 
1049 		/* protocol */
1050 		idpf_rx_singleq_process_skb_fields(rx_q, skb,
1051 						   rx_desc, fields.rx_ptype);
1052 
1053 		/* send completed skb up the stack */
1054 		napi_gro_receive(rx_q->pp->p.napi, skb);
1055 		skb = NULL;
1056 
1057 		/* update budget accounting */
1058 		total_rx_pkts++;
1059 	}
1060 
1061 	rx_q->skb = skb;
1062 
1063 	rx_q->next_to_clean = ntc;
1064 
1065 	page_pool_nid_changed(rx_q->pp, numa_mem_id());
1066 	if (cleaned_count)
1067 		failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count);
1068 
1069 	u64_stats_update_begin(&rx_q->stats_sync);
1070 	u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts);
1071 	u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes);
1072 	u64_stats_update_end(&rx_q->stats_sync);
1073 
1074 	/* guarantee a trip back through this routine if there was a failure */
1075 	return failure ? budget : (int)total_rx_pkts;
1076 }
1077 
1078 /**
1079  * idpf_rx_singleq_clean_all - Clean all Rx queues
1080  * @q_vec: queue vector
1081  * @budget: Used to determine if we are in netpoll
1082  * @cleaned: returns number of packets cleaned
1083  *
1084  * Returns false if clean is not complete else returns true
1085  */
1086 static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
1087 				      int *cleaned)
1088 {
1089 	u16 num_rxq = q_vec->num_rxq;
1090 	bool clean_complete = true;
1091 	int budget_per_q, i;
1092 
1093 	/* We attempt to distribute budget to each Rx queue fairly, but don't
1094 	 * allow the budget to go below 1 because that would exit polling early.
1095 	 */
1096 	budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0;
1097 	for (i = 0; i < num_rxq; i++) {
1098 		struct idpf_rx_queue *rxq = q_vec->rx[i];
1099 		int pkts_cleaned_per_q;
1100 
1101 		pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q);
1102 
1103 		/* if we clean as many as budgeted, we must not be done */
1104 		if (pkts_cleaned_per_q >= budget_per_q)
1105 			clean_complete = false;
1106 		*cleaned += pkts_cleaned_per_q;
1107 	}
1108 
1109 	return clean_complete;
1110 }
1111 
1112 /**
1113  * idpf_vport_singleq_napi_poll - NAPI handler
1114  * @napi: struct from which you get q_vector
1115  * @budget: budget provided by stack
1116  */
1117 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget)
1118 {
1119 	struct idpf_q_vector *q_vector =
1120 				container_of(napi, struct idpf_q_vector, napi);
1121 	bool clean_complete;
1122 	int work_done = 0;
1123 
1124 	/* Handle case where we are called by netpoll with a budget of 0 */
1125 	if (budget <= 0) {
1126 		idpf_tx_singleq_clean_all(q_vector, budget, &work_done);
1127 
1128 		return budget;
1129 	}
1130 
1131 	clean_complete = idpf_rx_singleq_clean_all(q_vector, budget,
1132 						   &work_done);
1133 	clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget,
1134 						    &work_done);
1135 
1136 	/* If work not completed, return budget and polling will return */
1137 	if (!clean_complete)
1138 		return budget;
1139 
1140 	work_done = min_t(int, work_done, budget - 1);
1141 
1142 	/* Exit the polling mode, but don't re-enable interrupts if stack might
1143 	 * poll us due to busy-polling
1144 	 */
1145 	if (likely(napi_complete_done(napi, work_done)))
1146 		idpf_vport_intr_update_itr_ena_irq(q_vector);
1147 
1148 	return work_done;
1149 }
1150