xref: /linux/drivers/net/ethernet/google/gve/gve_rx_dqo.c (revision a202f24b08587021a39eade5aa5444d5714689fb)
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6 
7 #include "gve.h"
8 #include "gve_dqo.h"
9 #include "gve_adminq.h"
10 #include "gve_utils.h"
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <net/ip6_checksum.h>
16 #include <net/ipv6.h>
17 #include <net/tcp.h>
18 
19 static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
20 {
21 	struct device *hdev = &priv->pdev->dev;
22 	int buf_count = rx->dqo.bufq.mask + 1;
23 
24 	if (rx->dqo.hdr_bufs.data) {
25 		dma_free_coherent(hdev, priv->header_buf_size * buf_count,
26 				  rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
27 		rx->dqo.hdr_bufs.data = NULL;
28 	}
29 }
30 
31 static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx,
32 				       const u32 buffer_queue_slots,
33 				       const u32 completion_queue_slots)
34 {
35 	int i;
36 
37 	/* Set buffer queue state */
38 	rx->dqo.bufq.mask = buffer_queue_slots - 1;
39 	rx->dqo.bufq.head = 0;
40 	rx->dqo.bufq.tail = 0;
41 
42 	/* Set completion queue state */
43 	rx->dqo.complq.num_free_slots = completion_queue_slots;
44 	rx->dqo.complq.mask = completion_queue_slots - 1;
45 	rx->dqo.complq.cur_gen_bit = 0;
46 	rx->dqo.complq.head = 0;
47 
48 	/* Set RX SKB context */
49 	rx->ctx.skb_head = NULL;
50 	rx->ctx.skb_tail = NULL;
51 
52 	/* Set up linked list of buffer IDs */
53 	if (rx->dqo.buf_states) {
54 		for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
55 			rx->dqo.buf_states[i].next = i + 1;
56 		rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
57 	}
58 
59 	rx->dqo.free_buf_states = 0;
60 	rx->dqo.recycled_buf_states.head = -1;
61 	rx->dqo.recycled_buf_states.tail = -1;
62 	rx->dqo.used_buf_states.head = -1;
63 	rx->dqo.used_buf_states.tail = -1;
64 }
65 
66 static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx)
67 {
68 	struct gve_rx_ring *rx = &priv->rx[idx];
69 	size_t size;
70 	int i;
71 
72 	const u32 buffer_queue_slots = priv->rx_desc_cnt;
73 	const u32 completion_queue_slots = priv->rx_desc_cnt;
74 
75 	/* Reset buffer queue */
76 	if (rx->dqo.bufq.desc_ring) {
77 		size = sizeof(rx->dqo.bufq.desc_ring[0]) *
78 			buffer_queue_slots;
79 		memset(rx->dqo.bufq.desc_ring, 0, size);
80 	}
81 
82 	/* Reset completion queue */
83 	if (rx->dqo.complq.desc_ring) {
84 		size = sizeof(rx->dqo.complq.desc_ring[0]) *
85 			completion_queue_slots;
86 		memset(rx->dqo.complq.desc_ring, 0, size);
87 	}
88 
89 	/* Reset q_resources */
90 	if (rx->q_resources)
91 		memset(rx->q_resources, 0, sizeof(*rx->q_resources));
92 
93 	/* Reset buf states */
94 	if (rx->dqo.buf_states) {
95 		for (i = 0; i < rx->dqo.num_buf_states; i++) {
96 			struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
97 
98 			if (rx->dqo.page_pool)
99 				gve_free_to_page_pool(rx, bs, false);
100 			else
101 				gve_free_qpl_page_dqo(bs);
102 		}
103 	}
104 
105 	gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
106 				   completion_queue_slots);
107 }
108 
109 void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
110 {
111 	int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
112 	struct gve_rx_ring *rx = &priv->rx[idx];
113 
114 	if (!gve_rx_was_added_to_block(priv, idx))
115 		return;
116 
117 	if (rx->dqo.page_pool)
118 		page_pool_disable_direct_recycling(rx->dqo.page_pool);
119 	gve_remove_napi(priv, ntfy_idx);
120 	gve_rx_remove_from_block(priv, idx);
121 	gve_rx_reset_ring_dqo(priv, idx);
122 }
123 
124 void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
125 			  struct gve_rx_alloc_rings_cfg *cfg)
126 {
127 	struct device *hdev = &priv->pdev->dev;
128 	size_t completion_queue_slots;
129 	size_t buffer_queue_slots;
130 	int idx = rx->q_num;
131 	size_t size;
132 	u32 qpl_id;
133 	int i;
134 
135 	completion_queue_slots = rx->dqo.complq.mask + 1;
136 	buffer_queue_slots = rx->dqo.bufq.mask + 1;
137 
138 	if (rx->q_resources) {
139 		dma_free_coherent(hdev, sizeof(*rx->q_resources),
140 				  rx->q_resources, rx->q_resources_bus);
141 		rx->q_resources = NULL;
142 	}
143 
144 	for (i = 0; i < rx->dqo.num_buf_states; i++) {
145 		struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
146 
147 		if (rx->dqo.page_pool)
148 			gve_free_to_page_pool(rx, bs, false);
149 		else
150 			gve_free_qpl_page_dqo(bs);
151 	}
152 
153 	if (rx->dqo.qpl) {
154 		qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
155 		gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id);
156 		rx->dqo.qpl = NULL;
157 	}
158 
159 	if (rx->dqo.bufq.desc_ring) {
160 		size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
161 		dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
162 				  rx->dqo.bufq.bus);
163 		rx->dqo.bufq.desc_ring = NULL;
164 	}
165 
166 	if (rx->dqo.complq.desc_ring) {
167 		size = sizeof(rx->dqo.complq.desc_ring[0]) *
168 			completion_queue_slots;
169 		dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
170 				  rx->dqo.complq.bus);
171 		rx->dqo.complq.desc_ring = NULL;
172 	}
173 
174 	kvfree(rx->dqo.buf_states);
175 	rx->dqo.buf_states = NULL;
176 
177 	if (rx->dqo.page_pool) {
178 		page_pool_destroy(rx->dqo.page_pool);
179 		rx->dqo.page_pool = NULL;
180 	}
181 
182 	gve_rx_free_hdr_bufs(priv, rx);
183 
184 	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
185 }
186 
187 static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx,
188 				 const u32 buf_count)
189 {
190 	struct device *hdev = &priv->pdev->dev;
191 
192 	rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count,
193 						   &rx->dqo.hdr_bufs.addr, GFP_KERNEL);
194 	if (!rx->dqo.hdr_bufs.data)
195 		return -ENOMEM;
196 
197 	return 0;
198 }
199 
200 void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
201 {
202 	int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
203 
204 	gve_rx_add_to_block(priv, idx);
205 	gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
206 }
207 
208 int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
209 			  struct gve_rx_alloc_rings_cfg *cfg,
210 			  struct gve_rx_ring *rx,
211 			  int idx)
212 {
213 	struct device *hdev = &priv->pdev->dev;
214 	struct page_pool *pool;
215 	int qpl_page_cnt;
216 	size_t size;
217 	u32 qpl_id;
218 
219 	const u32 buffer_queue_slots = cfg->ring_size;
220 	const u32 completion_queue_slots = cfg->ring_size;
221 
222 	netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
223 
224 	memset(rx, 0, sizeof(*rx));
225 	rx->gve = priv;
226 	rx->q_num = idx;
227 	rx->packet_buffer_size = cfg->packet_buffer_size;
228 
229 	if (cfg->xdp) {
230 		rx->packet_buffer_truesize = GVE_XDP_RX_BUFFER_SIZE_DQO;
231 		rx->rx_headroom = XDP_PACKET_HEADROOM;
232 	} else {
233 		rx->packet_buffer_truesize = rx->packet_buffer_size;
234 		rx->rx_headroom = 0;
235 	}
236 
237 	rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots :
238 		gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
239 	rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
240 				      sizeof(rx->dqo.buf_states[0]),
241 				      GFP_KERNEL);
242 	if (!rx->dqo.buf_states)
243 		return -ENOMEM;
244 
245 	/* Allocate header buffers for header-split */
246 	if (cfg->enable_header_split)
247 		if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots))
248 			goto err;
249 
250 	/* Allocate RX completion queue */
251 	size = sizeof(rx->dqo.complq.desc_ring[0]) *
252 		completion_queue_slots;
253 	rx->dqo.complq.desc_ring =
254 		dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
255 	if (!rx->dqo.complq.desc_ring)
256 		goto err;
257 
258 	/* Allocate RX buffer queue */
259 	size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
260 	rx->dqo.bufq.desc_ring =
261 		dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
262 	if (!rx->dqo.bufq.desc_ring)
263 		goto err;
264 
265 	if (cfg->raw_addressing) {
266 		pool = gve_rx_create_page_pool(priv, rx, cfg->xdp);
267 		if (IS_ERR(pool))
268 			goto err;
269 
270 		rx->dqo.page_pool = pool;
271 	} else {
272 		qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
273 		qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
274 
275 		rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
276 							qpl_page_cnt);
277 		if (!rx->dqo.qpl)
278 			goto err;
279 		rx->dqo.next_qpl_page_idx = 0;
280 	}
281 
282 	rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
283 					     &rx->q_resources_bus, GFP_KERNEL);
284 	if (!rx->q_resources)
285 		goto err;
286 
287 	gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
288 				   completion_queue_slots);
289 
290 	return 0;
291 
292 err:
293 	gve_rx_free_ring_dqo(priv, rx, cfg);
294 	return -ENOMEM;
295 }
296 
297 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
298 {
299 	const struct gve_rx_ring *rx = &priv->rx[queue_idx];
300 	u64 index = be32_to_cpu(rx->q_resources->db_index);
301 
302 	iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
303 }
304 
305 int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
306 			   struct gve_rx_alloc_rings_cfg *cfg)
307 {
308 	struct gve_rx_ring *rx;
309 	int err;
310 	int i;
311 
312 	rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring),
313 		      GFP_KERNEL);
314 	if (!rx)
315 		return -ENOMEM;
316 
317 	for (i = 0; i < cfg->qcfg_rx->num_queues; i++) {
318 		err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
319 		if (err) {
320 			netif_err(priv, drv, priv->dev,
321 				  "Failed to alloc rx ring=%d: err=%d\n",
322 				  i, err);
323 			goto err;
324 		}
325 	}
326 
327 	cfg->rx = rx;
328 	return 0;
329 
330 err:
331 	for (i--; i >= 0; i--)
332 		gve_rx_free_ring_dqo(priv, &rx[i], cfg);
333 	kvfree(rx);
334 	return err;
335 }
336 
337 void gve_rx_free_rings_dqo(struct gve_priv *priv,
338 			   struct gve_rx_alloc_rings_cfg *cfg)
339 {
340 	struct gve_rx_ring *rx = cfg->rx;
341 	int i;
342 
343 	if (!rx)
344 		return;
345 
346 	for (i = 0; i < cfg->qcfg_rx->num_queues;  i++)
347 		gve_rx_free_ring_dqo(priv, &rx[i], cfg);
348 
349 	kvfree(rx);
350 	cfg->rx = NULL;
351 }
352 
353 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
354 {
355 	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
356 	struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
357 	struct gve_priv *priv = rx->gve;
358 	u32 num_avail_slots;
359 	u32 num_full_slots;
360 	u32 num_posted = 0;
361 
362 	num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
363 	num_avail_slots = bufq->mask - num_full_slots;
364 
365 	num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
366 	while (num_posted < num_avail_slots) {
367 		struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
368 
369 		if (unlikely(gve_alloc_buffer(rx, desc))) {
370 			u64_stats_update_begin(&rx->statss);
371 			rx->rx_buf_alloc_fail++;
372 			u64_stats_update_end(&rx->statss);
373 			break;
374 		}
375 
376 		if (rx->dqo.hdr_bufs.data)
377 			desc->header_buf_addr =
378 				cpu_to_le64(rx->dqo.hdr_bufs.addr +
379 					    priv->header_buf_size * bufq->tail);
380 
381 		bufq->tail = (bufq->tail + 1) & bufq->mask;
382 		complq->num_free_slots--;
383 		num_posted++;
384 
385 		if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
386 			gve_rx_write_doorbell_dqo(priv, rx->q_num);
387 	}
388 
389 	rx->fill_cnt += num_posted;
390 }
391 
392 static void gve_rx_skb_csum(struct sk_buff *skb,
393 			    const struct gve_rx_compl_desc_dqo *desc,
394 			    struct gve_ptype ptype)
395 {
396 	skb->ip_summed = CHECKSUM_NONE;
397 
398 	/* HW did not identify and process L3 and L4 headers. */
399 	if (unlikely(!desc->l3_l4_processed))
400 		return;
401 
402 	if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
403 		if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
404 			return;
405 	} else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
406 		/* Checksum should be skipped if this flag is set. */
407 		if (unlikely(desc->ipv6_ex_add))
408 			return;
409 	}
410 
411 	if (unlikely(desc->csum_l4_err))
412 		return;
413 
414 	switch (ptype.l4_type) {
415 	case GVE_L4_TYPE_TCP:
416 	case GVE_L4_TYPE_UDP:
417 	case GVE_L4_TYPE_ICMP:
418 	case GVE_L4_TYPE_SCTP:
419 		skb->ip_summed = CHECKSUM_UNNECESSARY;
420 		break;
421 	default:
422 		break;
423 	}
424 }
425 
426 static void gve_rx_skb_hash(struct sk_buff *skb,
427 			    const struct gve_rx_compl_desc_dqo *compl_desc,
428 			    struct gve_ptype ptype)
429 {
430 	enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
431 
432 	if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
433 		hash_type = PKT_HASH_TYPE_L4;
434 	else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
435 		hash_type = PKT_HASH_TYPE_L3;
436 
437 	skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
438 }
439 
440 /* Expand the hardware timestamp to the full 64 bits of width, and add it to the
441  * skb.
442  *
443  * This algorithm works by using the passed hardware timestamp to generate a
444  * diff relative to the last read of the nic clock. This diff can be positive or
445  * negative, as it is possible that we have read the clock more recently than
446  * the hardware has received this packet. To detect this, we use the high bit of
447  * the diff, and assume that the read is more recent if the high bit is set. In
448  * this case we invert the process.
449  *
450  * Note that this means if the time delta between packet reception and the last
451  * clock read is greater than ~2 seconds, this will provide invalid results.
452  */
453 static void gve_rx_skb_hwtstamp(struct gve_rx_ring *rx, u32 hwts)
454 {
455 	u64 last_read = READ_ONCE(rx->gve->last_sync_nic_counter);
456 	struct sk_buff *skb = rx->ctx.skb_head;
457 	u32 low = (u32)last_read;
458 	s32 diff = hwts - low;
459 
460 	skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(last_read + diff);
461 }
462 
463 static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx)
464 {
465 	if (!rx->ctx.skb_head)
466 		return;
467 
468 	if (rx->ctx.skb_head == napi->skb)
469 		napi->skb = NULL;
470 	dev_kfree_skb_any(rx->ctx.skb_head);
471 	rx->ctx.skb_head = NULL;
472 	rx->ctx.skb_tail = NULL;
473 }
474 
475 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
476 {
477 	if (!rx->dqo.qpl)
478 		return false;
479 	if (rx->dqo.used_buf_states_cnt <
480 		     (rx->dqo.num_buf_states -
481 		     GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
482 		return false;
483 	return true;
484 }
485 
486 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
487 				struct gve_rx_buf_state_dqo *buf_state,
488 				u16 buf_len)
489 {
490 	struct page *page = alloc_page(GFP_ATOMIC);
491 	int num_frags;
492 
493 	if (!page)
494 		return -ENOMEM;
495 
496 	memcpy(page_address(page),
497 	       buf_state->page_info.page_address +
498 	       buf_state->page_info.page_offset,
499 	       buf_len);
500 	num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
501 	skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
502 			0, buf_len, PAGE_SIZE);
503 
504 	u64_stats_update_begin(&rx->statss);
505 	rx->rx_frag_alloc_cnt++;
506 	u64_stats_update_end(&rx->statss);
507 	/* Return unused buffer. */
508 	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
509 	return 0;
510 }
511 
512 static void gve_skb_add_rx_frag(struct gve_rx_ring *rx,
513 				struct gve_rx_buf_state_dqo *buf_state,
514 				int num_frags, u16 buf_len)
515 {
516 	if (rx->dqo.page_pool) {
517 		skb_add_rx_frag_netmem(rx->ctx.skb_tail, num_frags,
518 				       buf_state->page_info.netmem,
519 				       buf_state->page_info.page_offset +
520 				       buf_state->page_info.pad, buf_len,
521 				       buf_state->page_info.buf_size);
522 	} else {
523 		skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
524 				buf_state->page_info.page,
525 				buf_state->page_info.page_offset +
526 				buf_state->page_info.pad, buf_len,
527 				buf_state->page_info.buf_size);
528 	}
529 }
530 
531 /* Chains multi skbs for single rx packet.
532  * Returns 0 if buffer is appended, -1 otherwise.
533  */
534 static int gve_rx_append_frags(struct napi_struct *napi,
535 			       struct gve_rx_buf_state_dqo *buf_state,
536 			       u16 buf_len, struct gve_rx_ring *rx,
537 			       struct gve_priv *priv)
538 {
539 	int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
540 
541 	if (unlikely(num_frags == MAX_SKB_FRAGS)) {
542 		struct sk_buff *skb;
543 
544 		skb = napi_alloc_skb(napi, 0);
545 		if (!skb)
546 			return -1;
547 
548 		if (rx->dqo.page_pool)
549 			skb_mark_for_recycle(skb);
550 
551 		if (rx->ctx.skb_tail == rx->ctx.skb_head)
552 			skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
553 		else
554 			rx->ctx.skb_tail->next = skb;
555 		rx->ctx.skb_tail = skb;
556 		num_frags = 0;
557 	}
558 	if (rx->ctx.skb_tail != rx->ctx.skb_head) {
559 		rx->ctx.skb_head->len += buf_len;
560 		rx->ctx.skb_head->data_len += buf_len;
561 		rx->ctx.skb_head->truesize += buf_state->page_info.buf_size;
562 	}
563 
564 	/* Trigger ondemand page allocation if we are running low on buffers */
565 	if (gve_rx_should_trigger_copy_ondemand(rx))
566 		return gve_rx_copy_ondemand(rx, buf_state, buf_len);
567 
568 	gve_skb_add_rx_frag(rx, buf_state, num_frags, buf_len);
569 	gve_reuse_buffer(rx, buf_state);
570 	return 0;
571 }
572 
573 static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
574 			     struct xdp_buff *xdp, struct bpf_prog *xprog,
575 			     int xdp_act,
576 			     struct gve_rx_buf_state_dqo *buf_state)
577 {
578 	u64_stats_update_begin(&rx->statss);
579 	switch (xdp_act) {
580 	case XDP_ABORTED:
581 	case XDP_DROP:
582 	default:
583 		rx->xdp_actions[xdp_act]++;
584 		break;
585 	case XDP_TX:
586 		rx->xdp_tx_errors++;
587 		break;
588 	case XDP_REDIRECT:
589 		rx->xdp_redirect_errors++;
590 		break;
591 	}
592 	u64_stats_update_end(&rx->statss);
593 	gve_free_buffer(rx, buf_state);
594 }
595 
596 /* Returns 0 if descriptor is completed successfully.
597  * Returns -EINVAL if descriptor is invalid.
598  * Returns -ENOMEM if data cannot be copied to skb.
599  */
600 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
601 		      const struct gve_rx_compl_desc_dqo *compl_desc,
602 		      u32 desc_idx, int queue_idx)
603 {
604 	const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
605 	const bool hbo = compl_desc->header_buffer_overflow;
606 	const bool eop = compl_desc->end_of_packet != 0;
607 	const bool hsplit = compl_desc->split_header;
608 	struct gve_rx_buf_state_dqo *buf_state;
609 	struct gve_priv *priv = rx->gve;
610 	struct bpf_prog *xprog;
611 	u16 buf_len;
612 	u16 hdr_len;
613 
614 	if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
615 		net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
616 				    priv->dev->name, buffer_id);
617 		return -EINVAL;
618 	}
619 	buf_state = &rx->dqo.buf_states[buffer_id];
620 	if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
621 		net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
622 				    priv->dev->name, buffer_id);
623 		return -EINVAL;
624 	}
625 
626 	if (unlikely(compl_desc->rx_error)) {
627 		gve_free_buffer(rx, buf_state);
628 		return -EINVAL;
629 	}
630 
631 	buf_len = compl_desc->packet_len;
632 	hdr_len = compl_desc->header_len;
633 
634 	/* Page might have not been used for awhile and was likely last written
635 	 * by a different thread.
636 	 */
637 	if (rx->dqo.page_pool) {
638 		if (!netmem_is_net_iov(buf_state->page_info.netmem))
639 			prefetch(netmem_to_page(buf_state->page_info.netmem));
640 	} else {
641 		prefetch(buf_state->page_info.page);
642 	}
643 
644 	/* Copy the header into the skb in the case of header split */
645 	if (hsplit) {
646 		int unsplit = 0;
647 
648 		if (hdr_len && !hbo) {
649 			rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
650 							    rx->dqo.hdr_bufs.data +
651 							    desc_idx * priv->header_buf_size,
652 							    hdr_len);
653 			if (unlikely(!rx->ctx.skb_head))
654 				goto error;
655 			rx->ctx.skb_tail = rx->ctx.skb_head;
656 
657 			if (rx->dqo.page_pool)
658 				skb_mark_for_recycle(rx->ctx.skb_head);
659 		} else {
660 			unsplit = 1;
661 		}
662 		u64_stats_update_begin(&rx->statss);
663 		rx->rx_hsplit_pkt++;
664 		rx->rx_hsplit_unsplit_pkt += unsplit;
665 		rx->rx_hsplit_bytes += hdr_len;
666 		u64_stats_update_end(&rx->statss);
667 	}
668 
669 	/* Sync the portion of dma buffer for CPU to read. */
670 	dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
671 				      buf_state->page_info.page_offset +
672 				      buf_state->page_info.pad,
673 				      buf_len, DMA_FROM_DEVICE);
674 
675 	/* Append to current skb if one exists. */
676 	if (rx->ctx.skb_head) {
677 		if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
678 						 priv)) != 0) {
679 			goto error;
680 		}
681 		return 0;
682 	}
683 
684 	xprog = READ_ONCE(priv->xdp_prog);
685 	if (xprog) {
686 		struct xdp_buff xdp;
687 		void *old_data;
688 		int xdp_act;
689 
690 		xdp_init_buff(&xdp, buf_state->page_info.buf_size,
691 			      &rx->xdp_rxq);
692 		xdp_prepare_buff(&xdp,
693 				 buf_state->page_info.page_address +
694 				 buf_state->page_info.page_offset,
695 				 buf_state->page_info.pad,
696 				 buf_len, false);
697 		old_data = xdp.data;
698 		xdp_act = bpf_prog_run_xdp(xprog, &xdp);
699 		buf_state->page_info.pad += xdp.data - old_data;
700 		buf_len = xdp.data_end - xdp.data;
701 		if (xdp_act != XDP_PASS) {
702 			gve_xdp_done_dqo(priv, rx, &xdp, xprog, xdp_act,
703 					 buf_state);
704 			return 0;
705 		}
706 
707 		u64_stats_update_begin(&rx->statss);
708 		rx->xdp_actions[XDP_PASS]++;
709 		u64_stats_update_end(&rx->statss);
710 	}
711 
712 	if (eop && buf_len <= priv->rx_copybreak) {
713 		rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
714 					       &buf_state->page_info, buf_len);
715 		if (unlikely(!rx->ctx.skb_head))
716 			goto error;
717 		rx->ctx.skb_tail = rx->ctx.skb_head;
718 
719 		u64_stats_update_begin(&rx->statss);
720 		rx->rx_copied_pkt++;
721 		rx->rx_copybreak_pkt++;
722 		u64_stats_update_end(&rx->statss);
723 
724 		gve_free_buffer(rx, buf_state);
725 		return 0;
726 	}
727 
728 	rx->ctx.skb_head = napi_get_frags(napi);
729 	if (unlikely(!rx->ctx.skb_head))
730 		goto error;
731 	rx->ctx.skb_tail = rx->ctx.skb_head;
732 
733 	if (gve_rx_should_trigger_copy_ondemand(rx)) {
734 		if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
735 			goto error;
736 		return 0;
737 	}
738 
739 	if (rx->dqo.page_pool)
740 		skb_mark_for_recycle(rx->ctx.skb_head);
741 
742 	gve_skb_add_rx_frag(rx, buf_state, 0, buf_len);
743 	gve_reuse_buffer(rx, buf_state);
744 	return 0;
745 
746 error:
747 	gve_free_buffer(rx, buf_state);
748 	return -ENOMEM;
749 }
750 
751 static int gve_rx_complete_rsc(struct sk_buff *skb,
752 			       const struct gve_rx_compl_desc_dqo *desc,
753 			       struct gve_ptype ptype)
754 {
755 	struct skb_shared_info *shinfo = skb_shinfo(skb);
756 
757 	/* Only TCP is supported right now. */
758 	if (ptype.l4_type != GVE_L4_TYPE_TCP)
759 		return -EINVAL;
760 
761 	switch (ptype.l3_type) {
762 	case GVE_L3_TYPE_IPV4:
763 		shinfo->gso_type = SKB_GSO_TCPV4;
764 		break;
765 	case GVE_L3_TYPE_IPV6:
766 		shinfo->gso_type = SKB_GSO_TCPV6;
767 		break;
768 	default:
769 		return -EINVAL;
770 	}
771 
772 	shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
773 	return 0;
774 }
775 
776 /* Returns 0 if skb is completed successfully, -1 otherwise. */
777 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
778 			       const struct gve_rx_compl_desc_dqo *desc,
779 			       netdev_features_t feat)
780 {
781 	struct gve_ptype ptype =
782 		rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
783 	int err;
784 
785 	skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
786 
787 	if (feat & NETIF_F_RXHASH)
788 		gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
789 
790 	if (feat & NETIF_F_RXCSUM)
791 		gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
792 
793 	if (rx->gve->ts_config.rx_filter == HWTSTAMP_FILTER_ALL)
794 		gve_rx_skb_hwtstamp(rx, le32_to_cpu(desc->ts));
795 
796 	/* RSC packets must set gso_size otherwise the TCP stack will complain
797 	 * that packets are larger than MTU.
798 	 */
799 	if (desc->rsc) {
800 		err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
801 		if (err < 0)
802 			return err;
803 	}
804 
805 	if (skb_headlen(rx->ctx.skb_head) == 0)
806 		napi_gro_frags(napi);
807 	else
808 		napi_gro_receive(napi, rx->ctx.skb_head);
809 
810 	return 0;
811 }
812 
813 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
814 {
815 	struct napi_struct *napi = &block->napi;
816 	netdev_features_t feat = napi->dev->features;
817 
818 	struct gve_rx_ring *rx = block->rx;
819 	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
820 
821 	u32 work_done = 0;
822 	u64 bytes = 0;
823 	int err;
824 
825 	while (work_done < budget) {
826 		struct gve_rx_compl_desc_dqo *compl_desc =
827 			&complq->desc_ring[complq->head];
828 		u32 pkt_bytes;
829 
830 		/* No more new packets */
831 		if (compl_desc->generation == complq->cur_gen_bit)
832 			break;
833 
834 		/* Prefetch the next two descriptors. */
835 		prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
836 		prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
837 
838 		/* Do not read data until we own the descriptor */
839 		dma_rmb();
840 
841 		err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num);
842 		if (err < 0) {
843 			gve_rx_free_skb(napi, rx);
844 			u64_stats_update_begin(&rx->statss);
845 			if (err == -ENOMEM)
846 				rx->rx_skb_alloc_fail++;
847 			else if (err == -EINVAL)
848 				rx->rx_desc_err_dropped_pkt++;
849 			u64_stats_update_end(&rx->statss);
850 		}
851 
852 		complq->head = (complq->head + 1) & complq->mask;
853 		complq->num_free_slots++;
854 
855 		/* When the ring wraps, the generation bit is flipped. */
856 		complq->cur_gen_bit ^= (complq->head == 0);
857 
858 		/* Receiving a completion means we have space to post another
859 		 * buffer on the buffer queue.
860 		 */
861 		{
862 			struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
863 
864 			bufq->head = (bufq->head + 1) & bufq->mask;
865 		}
866 
867 		/* Free running counter of completed descriptors */
868 		rx->cnt++;
869 
870 		if (!rx->ctx.skb_head)
871 			continue;
872 
873 		if (!compl_desc->end_of_packet)
874 			continue;
875 
876 		work_done++;
877 		pkt_bytes = rx->ctx.skb_head->len;
878 		/* The ethernet header (first ETH_HLEN bytes) is snipped off
879 		 * by eth_type_trans.
880 		 */
881 		if (skb_headlen(rx->ctx.skb_head))
882 			pkt_bytes += ETH_HLEN;
883 
884 		/* gve_rx_complete_skb() will consume skb if successful */
885 		if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
886 			gve_rx_free_skb(napi, rx);
887 			u64_stats_update_begin(&rx->statss);
888 			rx->rx_desc_err_dropped_pkt++;
889 			u64_stats_update_end(&rx->statss);
890 			continue;
891 		}
892 
893 		bytes += pkt_bytes;
894 		rx->ctx.skb_head = NULL;
895 		rx->ctx.skb_tail = NULL;
896 	}
897 
898 	gve_rx_post_buffers_dqo(rx);
899 
900 	u64_stats_update_begin(&rx->statss);
901 	rx->rpackets += work_done;
902 	rx->rbytes += bytes;
903 	u64_stats_update_end(&rx->statss);
904 
905 	return work_done;
906 }
907