xref: /linux/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c (revision c532de5a67a70f8533d495f8f2aaa9a0491c3ad0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2023 Intel Corporation */
3 
4 #include <net/libeth/rx.h>
5 #include <net/libeth/tx.h>
6 
7 #include "idpf.h"
8 
9 /**
10  * idpf_tx_singleq_csum - Enable tx checksum offloads
11  * @skb: pointer to skb
12  * @off: pointer to struct that holds offload parameters
13  *
14  * Returns 0 or error (negative) if checksum offload cannot be executed, 1
15  * otherwise.
16  */
17 static int idpf_tx_singleq_csum(struct sk_buff *skb,
18 				struct idpf_tx_offload_params *off)
19 {
20 	u32 l4_len, l3_len, l2_len;
21 	union {
22 		struct iphdr *v4;
23 		struct ipv6hdr *v6;
24 		unsigned char *hdr;
25 	} ip;
26 	union {
27 		struct tcphdr *tcp;
28 		unsigned char *hdr;
29 	} l4;
30 	u32 offset, cmd = 0;
31 	u8 l4_proto = 0;
32 	__be16 frag_off;
33 	bool is_tso;
34 
35 	if (skb->ip_summed != CHECKSUM_PARTIAL)
36 		return 0;
37 
38 	ip.hdr = skb_network_header(skb);
39 	l4.hdr = skb_transport_header(skb);
40 
41 	/* compute outer L2 header size */
42 	l2_len = ip.hdr - skb->data;
43 	offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2);
44 	is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO);
45 	if (skb->encapsulation) {
46 		u32 tunnel = 0;
47 
48 		/* define outer network header type */
49 		if (off->tx_flags & IDPF_TX_FLAGS_IPV4) {
50 			/* The stack computes the IP header already, the only
51 			 * time we need the hardware to recompute it is in the
52 			 * case of TSO.
53 			 */
54 			tunnel |= is_tso ?
55 				  IDPF_TX_CTX_EXT_IP_IPV4 :
56 				  IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM;
57 
58 			l4_proto = ip.v4->protocol;
59 		} else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) {
60 			tunnel |= IDPF_TX_CTX_EXT_IP_IPV6;
61 
62 			l4_proto = ip.v6->nexthdr;
63 			if (ipv6_ext_hdr(l4_proto))
64 				ipv6_skip_exthdr(skb, skb_network_offset(skb) +
65 						 sizeof(*ip.v6),
66 						 &l4_proto, &frag_off);
67 		}
68 
69 		/* define outer transport */
70 		switch (l4_proto) {
71 		case IPPROTO_UDP:
72 			tunnel |= IDPF_TXD_CTX_UDP_TUNNELING;
73 			break;
74 		case IPPROTO_GRE:
75 			tunnel |= IDPF_TXD_CTX_GRE_TUNNELING;
76 			break;
77 		case IPPROTO_IPIP:
78 		case IPPROTO_IPV6:
79 			l4.hdr = skb_inner_network_header(skb);
80 			break;
81 		default:
82 			if (is_tso)
83 				return -1;
84 
85 			skb_checksum_help(skb);
86 
87 			return 0;
88 		}
89 		off->tx_flags |= IDPF_TX_FLAGS_TUNNEL;
90 
91 		/* compute outer L3 header size */
92 		tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M,
93 				     (l4.hdr - ip.hdr) / 4);
94 
95 		/* switch IP header pointer from outer to inner header */
96 		ip.hdr = skb_inner_network_header(skb);
97 
98 		/* compute tunnel header size */
99 		tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M,
100 				     (ip.hdr - l4.hdr) / 2);
101 
102 		/* indicate if we need to offload outer UDP header */
103 		if (is_tso &&
104 		    !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
105 		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
106 			tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M;
107 
108 		/* record tunnel offload values */
109 		off->cd_tunneling |= tunnel;
110 
111 		/* switch L4 header pointer from outer to inner */
112 		l4.hdr = skb_inner_transport_header(skb);
113 		l4_proto = 0;
114 
115 		/* reset type as we transition from outer to inner headers */
116 		off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6);
117 		if (ip.v4->version == 4)
118 			off->tx_flags |= IDPF_TX_FLAGS_IPV4;
119 		if (ip.v6->version == 6)
120 			off->tx_flags |= IDPF_TX_FLAGS_IPV6;
121 	}
122 
123 	/* Enable IP checksum offloads */
124 	if (off->tx_flags & IDPF_TX_FLAGS_IPV4) {
125 		l4_proto = ip.v4->protocol;
126 		/* See comment above regarding need for HW to recompute IP
127 		 * header checksum in the case of TSO.
128 		 */
129 		if (is_tso)
130 			cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM;
131 		else
132 			cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4;
133 
134 	} else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) {
135 		cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6;
136 		l4_proto = ip.v6->nexthdr;
137 		if (ipv6_ext_hdr(l4_proto))
138 			ipv6_skip_exthdr(skb, skb_network_offset(skb) +
139 					 sizeof(*ip.v6), &l4_proto,
140 					 &frag_off);
141 	} else {
142 		return -1;
143 	}
144 
145 	/* compute inner L3 header size */
146 	l3_len = l4.hdr - ip.hdr;
147 	offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S;
148 
149 	/* Enable L4 checksum offloads */
150 	switch (l4_proto) {
151 	case IPPROTO_TCP:
152 		/* enable checksum offloads */
153 		cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP;
154 		l4_len = l4.tcp->doff;
155 		break;
156 	case IPPROTO_UDP:
157 		/* enable UDP checksum offload */
158 		cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP;
159 		l4_len = sizeof(struct udphdr) >> 2;
160 		break;
161 	case IPPROTO_SCTP:
162 		/* enable SCTP checksum offload */
163 		cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP;
164 		l4_len = sizeof(struct sctphdr) >> 2;
165 		break;
166 	default:
167 		if (is_tso)
168 			return -1;
169 
170 		skb_checksum_help(skb);
171 
172 		return 0;
173 	}
174 
175 	offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S;
176 	off->td_cmd |= cmd;
177 	off->hdr_offsets |= offset;
178 
179 	return 1;
180 }
181 
182 /**
183  * idpf_tx_singleq_map - Build the Tx base descriptor
184  * @tx_q: queue to send buffer on
185  * @first: first buffer info buffer to use
186  * @offloads: pointer to struct that holds offload parameters
187  *
188  * This function loops over the skb data pointed to by *first
189  * and gets a physical address for each memory location and programs
190  * it and the length into the transmit base mode descriptor.
191  */
192 static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q,
193 				struct idpf_tx_buf *first,
194 				struct idpf_tx_offload_params *offloads)
195 {
196 	u32 offsets = offloads->hdr_offsets;
197 	struct idpf_tx_buf *tx_buf = first;
198 	struct idpf_base_tx_desc *tx_desc;
199 	struct sk_buff *skb = first->skb;
200 	u64 td_cmd = offloads->td_cmd;
201 	unsigned int data_len, size;
202 	u16 i = tx_q->next_to_use;
203 	struct netdev_queue *nq;
204 	skb_frag_t *frag;
205 	dma_addr_t dma;
206 	u64 td_tag = 0;
207 
208 	data_len = skb->data_len;
209 	size = skb_headlen(skb);
210 
211 	tx_desc = &tx_q->base_tx[i];
212 
213 	dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE);
214 
215 	/* write each descriptor with CRC bit */
216 	if (idpf_queue_has(CRC_EN, tx_q))
217 		td_cmd |= IDPF_TX_DESC_CMD_ICRC;
218 
219 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
220 		unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
221 
222 		if (dma_mapping_error(tx_q->dev, dma))
223 			return idpf_tx_dma_map_error(tx_q, skb, first, i);
224 
225 		/* record length, and DMA address */
226 		dma_unmap_len_set(tx_buf, len, size);
227 		dma_unmap_addr_set(tx_buf, dma, dma);
228 		tx_buf->type = LIBETH_SQE_FRAG;
229 
230 		/* align size to end of page */
231 		max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1);
232 		tx_desc->buf_addr = cpu_to_le64(dma);
233 
234 		/* account for data chunks larger than the hardware
235 		 * can handle
236 		 */
237 		while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) {
238 			tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd,
239 								  offsets,
240 								  max_data,
241 								  td_tag);
242 			if (unlikely(++i == tx_q->desc_count)) {
243 				tx_buf = &tx_q->tx_buf[0];
244 				tx_desc = &tx_q->base_tx[0];
245 				i = 0;
246 			} else {
247 				tx_buf++;
248 				tx_desc++;
249 			}
250 
251 			tx_buf->type = LIBETH_SQE_EMPTY;
252 
253 			dma += max_data;
254 			size -= max_data;
255 
256 			max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
257 			tx_desc->buf_addr = cpu_to_le64(dma);
258 		}
259 
260 		if (!data_len)
261 			break;
262 
263 		tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets,
264 							  size, td_tag);
265 
266 		if (unlikely(++i == tx_q->desc_count)) {
267 			tx_buf = &tx_q->tx_buf[0];
268 			tx_desc = &tx_q->base_tx[0];
269 			i = 0;
270 		} else {
271 			tx_buf++;
272 			tx_desc++;
273 		}
274 
275 		size = skb_frag_size(frag);
276 		data_len -= size;
277 
278 		dma = skb_frag_dma_map(tx_q->dev, frag, 0, size,
279 				       DMA_TO_DEVICE);
280 	}
281 
282 	skb_tx_timestamp(first->skb);
283 
284 	/* write last descriptor with RS and EOP bits */
285 	td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS);
286 
287 	tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets,
288 						  size, td_tag);
289 
290 	first->type = LIBETH_SQE_SKB;
291 	first->rs_idx = i;
292 
293 	IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i);
294 
295 	nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
296 	netdev_tx_sent_queue(nq, first->bytes);
297 
298 	idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more());
299 }
300 
301 /**
302  * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring
303  * @txq: queue to put context descriptor on
304  *
305  * Since the TX buffer rings mimics the descriptor ring, update the tx buffer
306  * ring entry to reflect that this index is a context descriptor
307  */
308 static struct idpf_base_tx_ctx_desc *
309 idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq)
310 {
311 	struct idpf_base_tx_ctx_desc *ctx_desc;
312 	int ntu = txq->next_to_use;
313 
314 	txq->tx_buf[ntu].type = LIBETH_SQE_CTX;
315 
316 	ctx_desc = &txq->base_ctx[ntu];
317 
318 	IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu);
319 	txq->next_to_use = ntu;
320 
321 	return ctx_desc;
322 }
323 
324 /**
325  * idpf_tx_singleq_build_ctx_desc - populate context descriptor
326  * @txq: queue to send buffer on
327  * @offload: offload parameter structure
328  **/
329 static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq,
330 					   struct idpf_tx_offload_params *offload)
331 {
332 	struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq);
333 	u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX;
334 
335 	if (offload->tso_segs) {
336 		qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S;
337 		qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_TSO_LEN_M,
338 				  offload->tso_len);
339 		qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss);
340 
341 		u64_stats_update_begin(&txq->stats_sync);
342 		u64_stats_inc(&txq->q_stats.lso_pkts);
343 		u64_stats_update_end(&txq->stats_sync);
344 	}
345 
346 	desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling);
347 
348 	desc->qw0.l2tag2 = 0;
349 	desc->qw0.rsvd1 = 0;
350 	desc->qw1 = cpu_to_le64(qw1);
351 }
352 
353 /**
354  * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors
355  * @skb: send buffer
356  * @tx_q: queue to send buffer on
357  *
358  * Returns NETDEV_TX_OK if sent, else an error code
359  */
360 netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
361 				  struct idpf_tx_queue *tx_q)
362 {
363 	struct idpf_tx_offload_params offload = { };
364 	struct idpf_tx_buf *first;
365 	unsigned int count;
366 	__be16 protocol;
367 	int csum, tso;
368 
369 	count = idpf_tx_desc_count_required(tx_q, skb);
370 	if (unlikely(!count))
371 		return idpf_tx_drop_skb(tx_q, skb);
372 
373 	if (idpf_tx_maybe_stop_common(tx_q,
374 				      count + IDPF_TX_DESCS_PER_CACHE_LINE +
375 				      IDPF_TX_DESCS_FOR_CTX)) {
376 		idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
377 
378 		u64_stats_update_begin(&tx_q->stats_sync);
379 		u64_stats_inc(&tx_q->q_stats.q_busy);
380 		u64_stats_update_end(&tx_q->stats_sync);
381 
382 		return NETDEV_TX_BUSY;
383 	}
384 
385 	protocol = vlan_get_protocol(skb);
386 	if (protocol == htons(ETH_P_IP))
387 		offload.tx_flags |= IDPF_TX_FLAGS_IPV4;
388 	else if (protocol == htons(ETH_P_IPV6))
389 		offload.tx_flags |= IDPF_TX_FLAGS_IPV6;
390 
391 	tso = idpf_tso(skb, &offload);
392 	if (tso < 0)
393 		goto out_drop;
394 
395 	csum = idpf_tx_singleq_csum(skb, &offload);
396 	if (csum < 0)
397 		goto out_drop;
398 
399 	if (tso || offload.cd_tunneling)
400 		idpf_tx_singleq_build_ctx_desc(tx_q, &offload);
401 
402 	/* record the location of the first descriptor for this packet */
403 	first = &tx_q->tx_buf[tx_q->next_to_use];
404 	first->skb = skb;
405 
406 	if (tso) {
407 		first->packets = offload.tso_segs;
408 		first->bytes = skb->len + ((first->packets - 1) * offload.tso_hdr_len);
409 	} else {
410 		first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
411 		first->packets = 1;
412 	}
413 	idpf_tx_singleq_map(tx_q, first, &offload);
414 
415 	return NETDEV_TX_OK;
416 
417 out_drop:
418 	return idpf_tx_drop_skb(tx_q, skb);
419 }
420 
421 /**
422  * idpf_tx_singleq_clean - Reclaim resources from queue
423  * @tx_q: Tx queue to clean
424  * @napi_budget: Used to determine if we are in netpoll
425  * @cleaned: returns number of packets cleaned
426  *
427  */
428 static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget,
429 				  int *cleaned)
430 {
431 	struct libeth_sq_napi_stats ss = { };
432 	struct idpf_base_tx_desc *tx_desc;
433 	u32 budget = tx_q->clean_budget;
434 	s16 ntc = tx_q->next_to_clean;
435 	struct libeth_cq_pp cp = {
436 		.dev	= tx_q->dev,
437 		.ss	= &ss,
438 		.napi	= napi_budget,
439 	};
440 	struct idpf_netdev_priv *np;
441 	struct idpf_tx_buf *tx_buf;
442 	struct netdev_queue *nq;
443 	bool dont_wake;
444 
445 	tx_desc = &tx_q->base_tx[ntc];
446 	tx_buf = &tx_q->tx_buf[ntc];
447 	ntc -= tx_q->desc_count;
448 
449 	do {
450 		struct idpf_base_tx_desc *eop_desc;
451 
452 		/* If this entry in the ring was used as a context descriptor,
453 		 * it's corresponding entry in the buffer ring will indicate as
454 		 * such. We can skip this descriptor since there is no buffer
455 		 * to clean.
456 		 */
457 		if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) {
458 			tx_buf->type = LIBETH_SQE_EMPTY;
459 			goto fetch_next_txq_desc;
460 		}
461 
462 		if (unlikely(tx_buf->type != LIBETH_SQE_SKB))
463 			break;
464 
465 		/* prevent any other reads prior to type */
466 		smp_rmb();
467 
468 		eop_desc = &tx_q->base_tx[tx_buf->rs_idx];
469 
470 		/* if the descriptor isn't done, no work yet to do */
471 		if (!(eop_desc->qw1 &
472 		      cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE)))
473 			break;
474 
475 		/* update the statistics for this packet */
476 		libeth_tx_complete(tx_buf, &cp);
477 
478 		/* unmap remaining buffers */
479 		while (tx_desc != eop_desc) {
480 			tx_buf++;
481 			tx_desc++;
482 			ntc++;
483 			if (unlikely(!ntc)) {
484 				ntc -= tx_q->desc_count;
485 				tx_buf = tx_q->tx_buf;
486 				tx_desc = &tx_q->base_tx[0];
487 			}
488 
489 			/* unmap any remaining paged data */
490 			libeth_tx_complete(tx_buf, &cp);
491 		}
492 
493 		/* update budget only if we did something */
494 		budget--;
495 
496 fetch_next_txq_desc:
497 		tx_buf++;
498 		tx_desc++;
499 		ntc++;
500 		if (unlikely(!ntc)) {
501 			ntc -= tx_q->desc_count;
502 			tx_buf = tx_q->tx_buf;
503 			tx_desc = &tx_q->base_tx[0];
504 		}
505 	} while (likely(budget));
506 
507 	ntc += tx_q->desc_count;
508 	tx_q->next_to_clean = ntc;
509 
510 	*cleaned += ss.packets;
511 
512 	u64_stats_update_begin(&tx_q->stats_sync);
513 	u64_stats_add(&tx_q->q_stats.packets, ss.packets);
514 	u64_stats_add(&tx_q->q_stats.bytes, ss.bytes);
515 	u64_stats_update_end(&tx_q->stats_sync);
516 
517 	np = netdev_priv(tx_q->netdev);
518 	nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
519 
520 	dont_wake = np->state != __IDPF_VPORT_UP ||
521 		    !netif_carrier_ok(tx_q->netdev);
522 	__netif_txq_completed_wake(nq, ss.packets, ss.bytes,
523 				   IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH,
524 				   dont_wake);
525 
526 	return !!budget;
527 }
528 
529 /**
530  * idpf_tx_singleq_clean_all - Clean all Tx queues
531  * @q_vec: queue vector
532  * @budget: Used to determine if we are in netpoll
533  * @cleaned: returns number of packets cleaned
534  *
535  * Returns false if clean is not complete else returns true
536  */
537 static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
538 				      int *cleaned)
539 {
540 	u16 num_txq = q_vec->num_txq;
541 	bool clean_complete = true;
542 	int i, budget_per_q;
543 
544 	budget_per_q = num_txq ? max(budget / num_txq, 1) : 0;
545 	for (i = 0; i < num_txq; i++) {
546 		struct idpf_tx_queue *q;
547 
548 		q = q_vec->tx[i];
549 		clean_complete &= idpf_tx_singleq_clean(q, budget_per_q,
550 							cleaned);
551 	}
552 
553 	return clean_complete;
554 }
555 
556 /**
557  * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor
558  * status and error fields
559  * @rx_desc: pointer to receive descriptor (in le64 format)
560  * @stat_err_bits: value to mask
561  *
562  * This function does some fast chicanery in order to return the
563  * value of the mask which is really only used for boolean tests.
564  * The status_error_ptype_len doesn't need to be shifted because it begins
565  * at offset zero.
566  */
567 static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc,
568 					 const u64 stat_err_bits)
569 {
570 	return !!(rx_desc->base_wb.qword1.status_error_ptype_len &
571 		  cpu_to_le64(stat_err_bits));
572 }
573 
574 /**
575  * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers
576  * @rx_desc: Rx descriptor for current buffer
577  */
578 static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc)
579 {
580 	/* if we are the last buffer then there is nothing else to do */
581 	if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ)))
582 		return false;
583 
584 	return true;
585 }
586 
587 /**
588  * idpf_rx_singleq_csum - Indicate in skb if checksum is good
589  * @rxq: Rx ring being processed
590  * @skb: skb currently being received and modified
591  * @csum_bits: checksum bits from descriptor
592  * @decoded: the packet type decoded by hardware
593  *
594  * skb->protocol must be set before this function is called
595  */
596 static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq,
597 				 struct sk_buff *skb,
598 				 struct idpf_rx_csum_decoded csum_bits,
599 				 struct libeth_rx_pt decoded)
600 {
601 	bool ipv4, ipv6;
602 
603 	/* check if Rx checksum is enabled */
604 	if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded))
605 		return;
606 
607 	/* check if HW has decoded the packet and checksum */
608 	if (unlikely(!csum_bits.l3l4p))
609 		return;
610 
611 	ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4;
612 	ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6;
613 
614 	/* Check if there were any checksum errors */
615 	if (unlikely(ipv4 && (csum_bits.ipe || csum_bits.eipe)))
616 		goto checksum_fail;
617 
618 	/* Device could not do any checksum offload for certain extension
619 	 * headers as indicated by setting IPV6EXADD bit
620 	 */
621 	if (unlikely(ipv6 && csum_bits.ipv6exadd))
622 		return;
623 
624 	/* check for L4 errors and handle packets that were not able to be
625 	 * checksummed due to arrival speed
626 	 */
627 	if (unlikely(csum_bits.l4e))
628 		goto checksum_fail;
629 
630 	if (unlikely(csum_bits.nat && csum_bits.eudpe))
631 		goto checksum_fail;
632 
633 	/* Handle packets that were not able to be checksummed due to arrival
634 	 * speed, in this case the stack can compute the csum.
635 	 */
636 	if (unlikely(csum_bits.pprs))
637 		return;
638 
639 	/* If there is an outer header present that might contain a checksum
640 	 * we need to bump the checksum level by 1 to reflect the fact that
641 	 * we are indicating we validated the inner checksum.
642 	 */
643 	if (decoded.tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT)
644 		skb->csum_level = 1;
645 
646 	skb->ip_summed = CHECKSUM_UNNECESSARY;
647 	return;
648 
649 checksum_fail:
650 	u64_stats_update_begin(&rxq->stats_sync);
651 	u64_stats_inc(&rxq->q_stats.hw_csum_err);
652 	u64_stats_update_end(&rxq->stats_sync);
653 }
654 
655 /**
656  * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum
657  * @rx_desc: the receive descriptor
658  *
659  * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
660  * descriptor writeback format.
661  *
662  * Return: parsed checksum status.
663  **/
664 static struct idpf_rx_csum_decoded
665 idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc)
666 {
667 	struct idpf_rx_csum_decoded csum_bits = { };
668 	u32 rx_error, rx_status;
669 	u64 qword;
670 
671 	qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
672 
673 	rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword);
674 	rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword);
675 
676 	csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error);
677 	csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M,
678 				   rx_error);
679 	csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error);
680 	csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M,
681 				   rx_error);
682 	csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M,
683 				    rx_status);
684 	csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M,
685 					rx_status);
686 
687 	return csum_bits;
688 }
689 
690 /**
691  * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum
692  * @rx_desc: the receive descriptor
693  *
694  * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
695  * descriptor writeback format.
696  *
697  * Return: parsed checksum status.
698  **/
699 static struct idpf_rx_csum_decoded
700 idpf_rx_singleq_flex_csum(const union virtchnl2_rx_desc *rx_desc)
701 {
702 	struct idpf_rx_csum_decoded csum_bits = { };
703 	u16 rx_status0, rx_status1;
704 
705 	rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0);
706 	rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1);
707 
708 	csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M,
709 				  rx_status0);
710 	csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M,
711 				   rx_status0);
712 	csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M,
713 				  rx_status0);
714 	csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M,
715 				    rx_status0);
716 	csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M,
717 				    rx_status0);
718 	csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M,
719 					rx_status0);
720 	csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M,
721 				  rx_status1);
722 
723 	return csum_bits;
724 }
725 
726 /**
727  * idpf_rx_singleq_base_hash - set the hash value in the skb
728  * @rx_q: Rx completion queue
729  * @skb: skb currently being received and modified
730  * @rx_desc: specific descriptor
731  * @decoded: Decoded Rx packet type related fields
732  *
733  * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
734  * descriptor writeback format.
735  **/
736 static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q,
737 				      struct sk_buff *skb,
738 				      const union virtchnl2_rx_desc *rx_desc,
739 				      struct libeth_rx_pt decoded)
740 {
741 	u64 mask, qw1;
742 
743 	if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded))
744 		return;
745 
746 	mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M;
747 	qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
748 
749 	if (FIELD_GET(mask, qw1) == mask) {
750 		u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss);
751 
752 		libeth_rx_pt_set_hash(skb, hash, decoded);
753 	}
754 }
755 
756 /**
757  * idpf_rx_singleq_flex_hash - set the hash value in the skb
758  * @rx_q: Rx completion queue
759  * @skb: skb currently being received and modified
760  * @rx_desc: specific descriptor
761  * @decoded: Decoded Rx packet type related fields
762  *
763  * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
764  * descriptor writeback format.
765  **/
766 static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q,
767 				      struct sk_buff *skb,
768 				      const union virtchnl2_rx_desc *rx_desc,
769 				      struct libeth_rx_pt decoded)
770 {
771 	if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded))
772 		return;
773 
774 	if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M,
775 		      le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) {
776 		u32 hash = le32_to_cpu(rx_desc->flex_nic_wb.rss_hash);
777 
778 		libeth_rx_pt_set_hash(skb, hash, decoded);
779 	}
780 }
781 
782 /**
783  * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx
784  * descriptor
785  * @rx_q: Rx ring being processed
786  * @skb: pointer to current skb being populated
787  * @rx_desc: descriptor for skb
788  * @ptype: packet type
789  *
790  * This function checks the ring, descriptor, and packet information in
791  * order to populate the hash, checksum, VLAN, protocol, and
792  * other fields within the skb.
793  */
794 static void
795 idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q,
796 				   struct sk_buff *skb,
797 				   const union virtchnl2_rx_desc *rx_desc,
798 				   u16 ptype)
799 {
800 	struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype];
801 	struct idpf_rx_csum_decoded csum_bits;
802 
803 	/* modifies the skb - consumes the enet header */
804 	skb->protocol = eth_type_trans(skb, rx_q->netdev);
805 
806 	/* Check if we're using base mode descriptor IDs */
807 	if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) {
808 		idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, decoded);
809 		csum_bits = idpf_rx_singleq_base_csum(rx_desc);
810 	} else {
811 		idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, decoded);
812 		csum_bits = idpf_rx_singleq_flex_csum(rx_desc);
813 	}
814 
815 	idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded);
816 	skb_record_rx_queue(skb, rx_q->idx);
817 }
818 
819 /**
820  * idpf_rx_buf_hw_update - Store the new tail and head values
821  * @rxq: queue to bump
822  * @val: new head index
823  */
824 static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val)
825 {
826 	rxq->next_to_use = val;
827 
828 	if (unlikely(!rxq->tail))
829 		return;
830 
831 	/* writel has an implicit memory barrier */
832 	writel(val, rxq->tail);
833 }
834 
835 /**
836  * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers
837  * @rx_q: queue for which the hw buffers are allocated
838  * @cleaned_count: number of buffers to replace
839  *
840  * Returns false if all allocations were successful, true if any fail
841  */
842 bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q,
843 				      u16 cleaned_count)
844 {
845 	struct virtchnl2_singleq_rx_buf_desc *desc;
846 	const struct libeth_fq_fp fq = {
847 		.pp		= rx_q->pp,
848 		.fqes		= rx_q->rx_buf,
849 		.truesize	= rx_q->truesize,
850 		.count		= rx_q->desc_count,
851 	};
852 	u16 nta = rx_q->next_to_alloc;
853 
854 	if (!cleaned_count)
855 		return false;
856 
857 	desc = &rx_q->single_buf[nta];
858 
859 	do {
860 		dma_addr_t addr;
861 
862 		addr = libeth_rx_alloc(&fq, nta);
863 		if (addr == DMA_MAPPING_ERROR)
864 			break;
865 
866 		/* Refresh the desc even if buffer_addrs didn't change
867 		 * because each write-back erases this info.
868 		 */
869 		desc->pkt_addr = cpu_to_le64(addr);
870 		desc->hdr_addr = 0;
871 		desc++;
872 
873 		nta++;
874 		if (unlikely(nta == rx_q->desc_count)) {
875 			desc = &rx_q->single_buf[0];
876 			nta = 0;
877 		}
878 
879 		cleaned_count--;
880 	} while (cleaned_count);
881 
882 	if (rx_q->next_to_alloc != nta) {
883 		idpf_rx_buf_hw_update(rx_q, nta);
884 		rx_q->next_to_alloc = nta;
885 	}
886 
887 	return !!cleaned_count;
888 }
889 
890 /**
891  * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor
892  * @rx_desc: the descriptor to process
893  * @fields: storage for extracted values
894  *
895  * Decode the Rx descriptor and extract relevant information including the
896  * size and Rx packet type.
897  *
898  * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
899  * descriptor writeback format.
900  */
901 static void
902 idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc,
903 				    struct idpf_rx_extracted *fields)
904 {
905 	u64 qword;
906 
907 	qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
908 
909 	fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword);
910 	fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword);
911 }
912 
913 /**
914  * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor
915  * @rx_desc: the descriptor to process
916  * @fields: storage for extracted values
917  *
918  * Decode the Rx descriptor and extract relevant information including the
919  * size and Rx packet type.
920  *
921  * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
922  * descriptor writeback format.
923  */
924 static void
925 idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc,
926 				    struct idpf_rx_extracted *fields)
927 {
928 	fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M,
929 				 le16_to_cpu(rx_desc->flex_nic_wb.pkt_len));
930 	fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M,
931 				     le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0));
932 }
933 
934 /**
935  * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor
936  * @rx_q: Rx descriptor queue
937  * @rx_desc: the descriptor to process
938  * @fields: storage for extracted values
939  *
940  */
941 static void
942 idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q,
943 			       const union virtchnl2_rx_desc *rx_desc,
944 			       struct idpf_rx_extracted *fields)
945 {
946 	if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M)
947 		idpf_rx_singleq_extract_base_fields(rx_desc, fields);
948 	else
949 		idpf_rx_singleq_extract_flex_fields(rx_desc, fields);
950 }
951 
952 /**
953  * idpf_rx_singleq_clean - Reclaim resources after receive completes
954  * @rx_q: rx queue to clean
955  * @budget: Total limit on number of packets to process
956  *
957  * Returns true if there's any budget left (e.g. the clean is finished)
958  */
959 static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)
960 {
961 	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
962 	struct sk_buff *skb = rx_q->skb;
963 	u16 ntc = rx_q->next_to_clean;
964 	u16 cleaned_count = 0;
965 	bool failure = false;
966 
967 	/* Process Rx packets bounded by budget */
968 	while (likely(total_rx_pkts < (unsigned int)budget)) {
969 		struct idpf_rx_extracted fields = { };
970 		union virtchnl2_rx_desc *rx_desc;
971 		struct idpf_rx_buf *rx_buf;
972 
973 		/* get the Rx desc from Rx queue based on 'next_to_clean' */
974 		rx_desc = &rx_q->rx[ntc];
975 
976 		/* status_error_ptype_len will always be zero for unused
977 		 * descriptors because it's cleared in cleanup, and overlaps
978 		 * with hdr_addr which is always zero because packet split
979 		 * isn't used, if the hardware wrote DD then the length will be
980 		 * non-zero
981 		 */
982 #define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M
983 		if (!idpf_rx_singleq_test_staterr(rx_desc,
984 						  IDPF_RXD_DD))
985 			break;
986 
987 		/* This memory barrier is needed to keep us from reading
988 		 * any other fields out of the rx_desc
989 		 */
990 		dma_rmb();
991 
992 		idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields);
993 
994 		rx_buf = &rx_q->rx_buf[ntc];
995 		if (!libeth_rx_sync_for_cpu(rx_buf, fields.size))
996 			goto skip_data;
997 
998 		if (skb)
999 			idpf_rx_add_frag(rx_buf, skb, fields.size);
1000 		else
1001 			skb = idpf_rx_build_skb(rx_buf, fields.size);
1002 
1003 		/* exit if we failed to retrieve a buffer */
1004 		if (!skb)
1005 			break;
1006 
1007 skip_data:
1008 		rx_buf->page = NULL;
1009 
1010 		IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc);
1011 		cleaned_count++;
1012 
1013 		/* skip if it is non EOP desc */
1014 		if (idpf_rx_singleq_is_non_eop(rx_desc) || unlikely(!skb))
1015 			continue;
1016 
1017 #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \
1018 				  VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M)
1019 		if (unlikely(idpf_rx_singleq_test_staterr(rx_desc,
1020 							  IDPF_RXD_ERR_S))) {
1021 			dev_kfree_skb_any(skb);
1022 			skb = NULL;
1023 			continue;
1024 		}
1025 
1026 		/* pad skb if needed (to make valid ethernet frame) */
1027 		if (eth_skb_pad(skb)) {
1028 			skb = NULL;
1029 			continue;
1030 		}
1031 
1032 		/* probably a little skewed due to removing CRC */
1033 		total_rx_bytes += skb->len;
1034 
1035 		/* protocol */
1036 		idpf_rx_singleq_process_skb_fields(rx_q, skb,
1037 						   rx_desc, fields.rx_ptype);
1038 
1039 		/* send completed skb up the stack */
1040 		napi_gro_receive(rx_q->pp->p.napi, skb);
1041 		skb = NULL;
1042 
1043 		/* update budget accounting */
1044 		total_rx_pkts++;
1045 	}
1046 
1047 	rx_q->skb = skb;
1048 
1049 	rx_q->next_to_clean = ntc;
1050 
1051 	page_pool_nid_changed(rx_q->pp, numa_mem_id());
1052 	if (cleaned_count)
1053 		failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count);
1054 
1055 	u64_stats_update_begin(&rx_q->stats_sync);
1056 	u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts);
1057 	u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes);
1058 	u64_stats_update_end(&rx_q->stats_sync);
1059 
1060 	/* guarantee a trip back through this routine if there was a failure */
1061 	return failure ? budget : (int)total_rx_pkts;
1062 }
1063 
1064 /**
1065  * idpf_rx_singleq_clean_all - Clean all Rx queues
1066  * @q_vec: queue vector
1067  * @budget: Used to determine if we are in netpoll
1068  * @cleaned: returns number of packets cleaned
1069  *
1070  * Returns false if clean is not complete else returns true
1071  */
1072 static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
1073 				      int *cleaned)
1074 {
1075 	u16 num_rxq = q_vec->num_rxq;
1076 	bool clean_complete = true;
1077 	int budget_per_q, i;
1078 
1079 	/* We attempt to distribute budget to each Rx queue fairly, but don't
1080 	 * allow the budget to go below 1 because that would exit polling early.
1081 	 */
1082 	budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0;
1083 	for (i = 0; i < num_rxq; i++) {
1084 		struct idpf_rx_queue *rxq = q_vec->rx[i];
1085 		int pkts_cleaned_per_q;
1086 
1087 		pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q);
1088 
1089 		/* if we clean as many as budgeted, we must not be done */
1090 		if (pkts_cleaned_per_q >= budget_per_q)
1091 			clean_complete = false;
1092 		*cleaned += pkts_cleaned_per_q;
1093 	}
1094 
1095 	return clean_complete;
1096 }
1097 
1098 /**
1099  * idpf_vport_singleq_napi_poll - NAPI handler
1100  * @napi: struct from which you get q_vector
1101  * @budget: budget provided by stack
1102  */
1103 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget)
1104 {
1105 	struct idpf_q_vector *q_vector =
1106 				container_of(napi, struct idpf_q_vector, napi);
1107 	bool clean_complete;
1108 	int work_done = 0;
1109 
1110 	/* Handle case where we are called by netpoll with a budget of 0 */
1111 	if (budget <= 0) {
1112 		idpf_tx_singleq_clean_all(q_vector, budget, &work_done);
1113 
1114 		return budget;
1115 	}
1116 
1117 	clean_complete = idpf_rx_singleq_clean_all(q_vector, budget,
1118 						   &work_done);
1119 	clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget,
1120 						    &work_done);
1121 
1122 	/* If work not completed, return budget and polling will return */
1123 	if (!clean_complete) {
1124 		idpf_vport_intr_set_wb_on_itr(q_vector);
1125 		return budget;
1126 	}
1127 
1128 	work_done = min_t(int, work_done, budget - 1);
1129 
1130 	/* Exit the polling mode, but don't re-enable interrupts if stack might
1131 	 * poll us due to busy-polling
1132 	 */
1133 	if (likely(napi_complete_done(napi, work_done)))
1134 		idpf_vport_intr_update_itr_ena_irq(q_vector);
1135 	else
1136 		idpf_vport_intr_set_wb_on_itr(q_vector);
1137 
1138 	return work_done;
1139 }
1140