xref: /linux/drivers/net/ethernet/google/gve/gve_rx_dqo.c (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6 
7 #include "gve.h"
8 #include "gve_dqo.h"
9 #include "gve_adminq.h"
10 #include "gve_utils.h"
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <net/ip6_checksum.h>
16 #include <net/ipv6.h>
17 #include <net/tcp.h>
18 
19 static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
20 {
21 	struct device *hdev = &priv->pdev->dev;
22 	int buf_count = rx->dqo.bufq.mask + 1;
23 
24 	if (rx->dqo.hdr_bufs.data) {
25 		dma_free_coherent(hdev, priv->header_buf_size * buf_count,
26 				  rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
27 		rx->dqo.hdr_bufs.data = NULL;
28 	}
29 }
30 
31 static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx,
32 				       const u32 buffer_queue_slots,
33 				       const u32 completion_queue_slots)
34 {
35 	int i;
36 
37 	/* Set buffer queue state */
38 	rx->dqo.bufq.mask = buffer_queue_slots - 1;
39 	rx->dqo.bufq.head = 0;
40 	rx->dqo.bufq.tail = 0;
41 
42 	/* Set completion queue state */
43 	rx->dqo.complq.num_free_slots = completion_queue_slots;
44 	rx->dqo.complq.mask = completion_queue_slots - 1;
45 	rx->dqo.complq.cur_gen_bit = 0;
46 	rx->dqo.complq.head = 0;
47 
48 	/* Set RX SKB context */
49 	rx->ctx.skb_head = NULL;
50 	rx->ctx.skb_tail = NULL;
51 
52 	/* Set up linked list of buffer IDs */
53 	if (rx->dqo.buf_states) {
54 		for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
55 			rx->dqo.buf_states[i].next = i + 1;
56 		rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
57 	}
58 
59 	rx->dqo.free_buf_states = 0;
60 	rx->dqo.recycled_buf_states.head = -1;
61 	rx->dqo.recycled_buf_states.tail = -1;
62 	rx->dqo.used_buf_states.head = -1;
63 	rx->dqo.used_buf_states.tail = -1;
64 }
65 
66 static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx)
67 {
68 	struct gve_rx_ring *rx = &priv->rx[idx];
69 	size_t size;
70 	int i;
71 
72 	const u32 buffer_queue_slots = priv->rx_desc_cnt;
73 	const u32 completion_queue_slots = priv->rx_desc_cnt;
74 
75 	/* Reset buffer queue */
76 	if (rx->dqo.bufq.desc_ring) {
77 		size = sizeof(rx->dqo.bufq.desc_ring[0]) *
78 			buffer_queue_slots;
79 		memset(rx->dqo.bufq.desc_ring, 0, size);
80 	}
81 
82 	/* Reset completion queue */
83 	if (rx->dqo.complq.desc_ring) {
84 		size = sizeof(rx->dqo.complq.desc_ring[0]) *
85 			completion_queue_slots;
86 		memset(rx->dqo.complq.desc_ring, 0, size);
87 	}
88 
89 	/* Reset q_resources */
90 	if (rx->q_resources)
91 		memset(rx->q_resources, 0, sizeof(*rx->q_resources));
92 
93 	/* Reset buf states */
94 	if (rx->dqo.buf_states) {
95 		for (i = 0; i < rx->dqo.num_buf_states; i++) {
96 			struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
97 
98 			if (rx->dqo.page_pool)
99 				gve_free_to_page_pool(rx, bs, false);
100 			else
101 				gve_free_qpl_page_dqo(bs);
102 		}
103 	}
104 
105 	gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
106 				   completion_queue_slots);
107 }
108 
109 void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
110 {
111 	int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
112 
113 	if (!gve_rx_was_added_to_block(priv, idx))
114 		return;
115 
116 	gve_remove_napi(priv, ntfy_idx);
117 	gve_rx_remove_from_block(priv, idx);
118 	gve_rx_reset_ring_dqo(priv, idx);
119 }
120 
121 void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
122 			  struct gve_rx_alloc_rings_cfg *cfg)
123 {
124 	struct device *hdev = &priv->pdev->dev;
125 	size_t completion_queue_slots;
126 	size_t buffer_queue_slots;
127 	int idx = rx->q_num;
128 	size_t size;
129 	u32 qpl_id;
130 	int i;
131 
132 	completion_queue_slots = rx->dqo.complq.mask + 1;
133 	buffer_queue_slots = rx->dqo.bufq.mask + 1;
134 
135 	if (rx->q_resources) {
136 		dma_free_coherent(hdev, sizeof(*rx->q_resources),
137 				  rx->q_resources, rx->q_resources_bus);
138 		rx->q_resources = NULL;
139 	}
140 
141 	for (i = 0; i < rx->dqo.num_buf_states; i++) {
142 		struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
143 
144 		if (rx->dqo.page_pool)
145 			gve_free_to_page_pool(rx, bs, false);
146 		else
147 			gve_free_qpl_page_dqo(bs);
148 	}
149 
150 	if (rx->dqo.qpl) {
151 		qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
152 		gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id);
153 		rx->dqo.qpl = NULL;
154 	}
155 
156 	if (rx->dqo.bufq.desc_ring) {
157 		size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
158 		dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
159 				  rx->dqo.bufq.bus);
160 		rx->dqo.bufq.desc_ring = NULL;
161 	}
162 
163 	if (rx->dqo.complq.desc_ring) {
164 		size = sizeof(rx->dqo.complq.desc_ring[0]) *
165 			completion_queue_slots;
166 		dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
167 				  rx->dqo.complq.bus);
168 		rx->dqo.complq.desc_ring = NULL;
169 	}
170 
171 	kvfree(rx->dqo.buf_states);
172 	rx->dqo.buf_states = NULL;
173 
174 	if (rx->dqo.page_pool) {
175 		page_pool_destroy(rx->dqo.page_pool);
176 		rx->dqo.page_pool = NULL;
177 	}
178 
179 	gve_rx_free_hdr_bufs(priv, rx);
180 
181 	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
182 }
183 
184 static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx,
185 				 const u32 buf_count)
186 {
187 	struct device *hdev = &priv->pdev->dev;
188 
189 	rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count,
190 						   &rx->dqo.hdr_bufs.addr, GFP_KERNEL);
191 	if (!rx->dqo.hdr_bufs.data)
192 		return -ENOMEM;
193 
194 	return 0;
195 }
196 
197 void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
198 {
199 	int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
200 
201 	gve_rx_add_to_block(priv, idx);
202 	gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
203 }
204 
205 int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
206 			  struct gve_rx_alloc_rings_cfg *cfg,
207 			  struct gve_rx_ring *rx,
208 			  int idx)
209 {
210 	struct device *hdev = &priv->pdev->dev;
211 	struct page_pool *pool;
212 	int qpl_page_cnt;
213 	size_t size;
214 	u32 qpl_id;
215 
216 	const u32 buffer_queue_slots = cfg->ring_size;
217 	const u32 completion_queue_slots = cfg->ring_size;
218 
219 	netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
220 
221 	memset(rx, 0, sizeof(*rx));
222 	rx->gve = priv;
223 	rx->q_num = idx;
224 
225 	rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots :
226 		gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
227 	rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
228 				      sizeof(rx->dqo.buf_states[0]),
229 				      GFP_KERNEL);
230 	if (!rx->dqo.buf_states)
231 		return -ENOMEM;
232 
233 	/* Allocate header buffers for header-split */
234 	if (cfg->enable_header_split)
235 		if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots))
236 			goto err;
237 
238 	/* Allocate RX completion queue */
239 	size = sizeof(rx->dqo.complq.desc_ring[0]) *
240 		completion_queue_slots;
241 	rx->dqo.complq.desc_ring =
242 		dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
243 	if (!rx->dqo.complq.desc_ring)
244 		goto err;
245 
246 	/* Allocate RX buffer queue */
247 	size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
248 	rx->dqo.bufq.desc_ring =
249 		dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
250 	if (!rx->dqo.bufq.desc_ring)
251 		goto err;
252 
253 	if (cfg->raw_addressing) {
254 		pool = gve_rx_create_page_pool(priv, rx);
255 		if (IS_ERR(pool))
256 			goto err;
257 
258 		rx->dqo.page_pool = pool;
259 	} else {
260 		qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
261 		qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
262 
263 		rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
264 							qpl_page_cnt);
265 		if (!rx->dqo.qpl)
266 			goto err;
267 		rx->dqo.next_qpl_page_idx = 0;
268 	}
269 
270 	rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
271 					     &rx->q_resources_bus, GFP_KERNEL);
272 	if (!rx->q_resources)
273 		goto err;
274 
275 	gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
276 				   completion_queue_slots);
277 
278 	return 0;
279 
280 err:
281 	gve_rx_free_ring_dqo(priv, rx, cfg);
282 	return -ENOMEM;
283 }
284 
285 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
286 {
287 	const struct gve_rx_ring *rx = &priv->rx[queue_idx];
288 	u64 index = be32_to_cpu(rx->q_resources->db_index);
289 
290 	iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
291 }
292 
293 int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
294 			   struct gve_rx_alloc_rings_cfg *cfg)
295 {
296 	struct gve_rx_ring *rx;
297 	int err;
298 	int i;
299 
300 	rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
301 		      GFP_KERNEL);
302 	if (!rx)
303 		return -ENOMEM;
304 
305 	for (i = 0; i < cfg->qcfg->num_queues; i++) {
306 		err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
307 		if (err) {
308 			netif_err(priv, drv, priv->dev,
309 				  "Failed to alloc rx ring=%d: err=%d\n",
310 				  i, err);
311 			goto err;
312 		}
313 	}
314 
315 	cfg->rx = rx;
316 	return 0;
317 
318 err:
319 	for (i--; i >= 0; i--)
320 		gve_rx_free_ring_dqo(priv, &rx[i], cfg);
321 	kvfree(rx);
322 	return err;
323 }
324 
325 void gve_rx_free_rings_dqo(struct gve_priv *priv,
326 			   struct gve_rx_alloc_rings_cfg *cfg)
327 {
328 	struct gve_rx_ring *rx = cfg->rx;
329 	int i;
330 
331 	if (!rx)
332 		return;
333 
334 	for (i = 0; i < cfg->qcfg->num_queues;  i++)
335 		gve_rx_free_ring_dqo(priv, &rx[i], cfg);
336 
337 	kvfree(rx);
338 	cfg->rx = NULL;
339 }
340 
341 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
342 {
343 	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
344 	struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
345 	struct gve_priv *priv = rx->gve;
346 	u32 num_avail_slots;
347 	u32 num_full_slots;
348 	u32 num_posted = 0;
349 
350 	num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
351 	num_avail_slots = bufq->mask - num_full_slots;
352 
353 	num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
354 	while (num_posted < num_avail_slots) {
355 		struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
356 
357 		if (unlikely(gve_alloc_buffer(rx, desc))) {
358 			u64_stats_update_begin(&rx->statss);
359 			rx->rx_buf_alloc_fail++;
360 			u64_stats_update_end(&rx->statss);
361 			break;
362 		}
363 
364 		if (rx->dqo.hdr_bufs.data)
365 			desc->header_buf_addr =
366 				cpu_to_le64(rx->dqo.hdr_bufs.addr +
367 					    priv->header_buf_size * bufq->tail);
368 
369 		bufq->tail = (bufq->tail + 1) & bufq->mask;
370 		complq->num_free_slots--;
371 		num_posted++;
372 
373 		if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
374 			gve_rx_write_doorbell_dqo(priv, rx->q_num);
375 	}
376 
377 	rx->fill_cnt += num_posted;
378 }
379 
380 static void gve_rx_skb_csum(struct sk_buff *skb,
381 			    const struct gve_rx_compl_desc_dqo *desc,
382 			    struct gve_ptype ptype)
383 {
384 	skb->ip_summed = CHECKSUM_NONE;
385 
386 	/* HW did not identify and process L3 and L4 headers. */
387 	if (unlikely(!desc->l3_l4_processed))
388 		return;
389 
390 	if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
391 		if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
392 			return;
393 	} else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
394 		/* Checksum should be skipped if this flag is set. */
395 		if (unlikely(desc->ipv6_ex_add))
396 			return;
397 	}
398 
399 	if (unlikely(desc->csum_l4_err))
400 		return;
401 
402 	switch (ptype.l4_type) {
403 	case GVE_L4_TYPE_TCP:
404 	case GVE_L4_TYPE_UDP:
405 	case GVE_L4_TYPE_ICMP:
406 	case GVE_L4_TYPE_SCTP:
407 		skb->ip_summed = CHECKSUM_UNNECESSARY;
408 		break;
409 	default:
410 		break;
411 	}
412 }
413 
414 static void gve_rx_skb_hash(struct sk_buff *skb,
415 			    const struct gve_rx_compl_desc_dqo *compl_desc,
416 			    struct gve_ptype ptype)
417 {
418 	enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
419 
420 	if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
421 		hash_type = PKT_HASH_TYPE_L4;
422 	else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
423 		hash_type = PKT_HASH_TYPE_L3;
424 
425 	skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
426 }
427 
428 static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx)
429 {
430 	if (!rx->ctx.skb_head)
431 		return;
432 
433 	if (rx->ctx.skb_head == napi->skb)
434 		napi->skb = NULL;
435 	dev_kfree_skb_any(rx->ctx.skb_head);
436 	rx->ctx.skb_head = NULL;
437 	rx->ctx.skb_tail = NULL;
438 }
439 
440 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
441 {
442 	if (!rx->dqo.qpl)
443 		return false;
444 	if (rx->dqo.used_buf_states_cnt <
445 		     (rx->dqo.num_buf_states -
446 		     GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
447 		return false;
448 	return true;
449 }
450 
451 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
452 				struct gve_rx_buf_state_dqo *buf_state,
453 				u16 buf_len)
454 {
455 	struct page *page = alloc_page(GFP_ATOMIC);
456 	int num_frags;
457 
458 	if (!page)
459 		return -ENOMEM;
460 
461 	memcpy(page_address(page),
462 	       buf_state->page_info.page_address +
463 	       buf_state->page_info.page_offset,
464 	       buf_len);
465 	num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
466 	skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
467 			0, buf_len, PAGE_SIZE);
468 
469 	u64_stats_update_begin(&rx->statss);
470 	rx->rx_frag_alloc_cnt++;
471 	u64_stats_update_end(&rx->statss);
472 	/* Return unused buffer. */
473 	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
474 	return 0;
475 }
476 
477 /* Chains multi skbs for single rx packet.
478  * Returns 0 if buffer is appended, -1 otherwise.
479  */
480 static int gve_rx_append_frags(struct napi_struct *napi,
481 			       struct gve_rx_buf_state_dqo *buf_state,
482 			       u16 buf_len, struct gve_rx_ring *rx,
483 			       struct gve_priv *priv)
484 {
485 	int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
486 
487 	if (unlikely(num_frags == MAX_SKB_FRAGS)) {
488 		struct sk_buff *skb;
489 
490 		skb = napi_alloc_skb(napi, 0);
491 		if (!skb)
492 			return -1;
493 
494 		if (rx->dqo.page_pool)
495 			skb_mark_for_recycle(skb);
496 
497 		if (rx->ctx.skb_tail == rx->ctx.skb_head)
498 			skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
499 		else
500 			rx->ctx.skb_tail->next = skb;
501 		rx->ctx.skb_tail = skb;
502 		num_frags = 0;
503 	}
504 	if (rx->ctx.skb_tail != rx->ctx.skb_head) {
505 		rx->ctx.skb_head->len += buf_len;
506 		rx->ctx.skb_head->data_len += buf_len;
507 		rx->ctx.skb_head->truesize += buf_state->page_info.buf_size;
508 	}
509 
510 	/* Trigger ondemand page allocation if we are running low on buffers */
511 	if (gve_rx_should_trigger_copy_ondemand(rx))
512 		return gve_rx_copy_ondemand(rx, buf_state, buf_len);
513 
514 	skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
515 			buf_state->page_info.page,
516 			buf_state->page_info.page_offset,
517 			buf_len, buf_state->page_info.buf_size);
518 	gve_reuse_buffer(rx, buf_state);
519 	return 0;
520 }
521 
522 /* Returns 0 if descriptor is completed successfully.
523  * Returns -EINVAL if descriptor is invalid.
524  * Returns -ENOMEM if data cannot be copied to skb.
525  */
526 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
527 		      const struct gve_rx_compl_desc_dqo *compl_desc,
528 		      u32 desc_idx, int queue_idx)
529 {
530 	const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
531 	const bool hbo = compl_desc->header_buffer_overflow;
532 	const bool eop = compl_desc->end_of_packet != 0;
533 	const bool hsplit = compl_desc->split_header;
534 	struct gve_rx_buf_state_dqo *buf_state;
535 	struct gve_priv *priv = rx->gve;
536 	u16 buf_len;
537 	u16 hdr_len;
538 
539 	if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
540 		net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
541 				    priv->dev->name, buffer_id);
542 		return -EINVAL;
543 	}
544 	buf_state = &rx->dqo.buf_states[buffer_id];
545 	if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
546 		net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
547 				    priv->dev->name, buffer_id);
548 		return -EINVAL;
549 	}
550 
551 	if (unlikely(compl_desc->rx_error)) {
552 		gve_free_buffer(rx, buf_state);
553 		return -EINVAL;
554 	}
555 
556 	buf_len = compl_desc->packet_len;
557 	hdr_len = compl_desc->header_len;
558 
559 	/* Page might have not been used for awhile and was likely last written
560 	 * by a different thread.
561 	 */
562 	prefetch(buf_state->page_info.page);
563 
564 	/* Copy the header into the skb in the case of header split */
565 	if (hsplit) {
566 		int unsplit = 0;
567 
568 		if (hdr_len && !hbo) {
569 			rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
570 							    rx->dqo.hdr_bufs.data +
571 							    desc_idx * priv->header_buf_size,
572 							    hdr_len);
573 			if (unlikely(!rx->ctx.skb_head))
574 				goto error;
575 			rx->ctx.skb_tail = rx->ctx.skb_head;
576 
577 			if (rx->dqo.page_pool)
578 				skb_mark_for_recycle(rx->ctx.skb_head);
579 		} else {
580 			unsplit = 1;
581 		}
582 		u64_stats_update_begin(&rx->statss);
583 		rx->rx_hsplit_pkt++;
584 		rx->rx_hsplit_unsplit_pkt += unsplit;
585 		rx->rx_hsplit_bytes += hdr_len;
586 		u64_stats_update_end(&rx->statss);
587 	}
588 
589 	/* Sync the portion of dma buffer for CPU to read. */
590 	dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
591 				      buf_state->page_info.page_offset,
592 				      buf_len, DMA_FROM_DEVICE);
593 
594 	/* Append to current skb if one exists. */
595 	if (rx->ctx.skb_head) {
596 		if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
597 						 priv)) != 0) {
598 			goto error;
599 		}
600 		return 0;
601 	}
602 
603 	if (eop && buf_len <= priv->rx_copybreak) {
604 		rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
605 					       &buf_state->page_info, buf_len);
606 		if (unlikely(!rx->ctx.skb_head))
607 			goto error;
608 		rx->ctx.skb_tail = rx->ctx.skb_head;
609 
610 		u64_stats_update_begin(&rx->statss);
611 		rx->rx_copied_pkt++;
612 		rx->rx_copybreak_pkt++;
613 		u64_stats_update_end(&rx->statss);
614 
615 		gve_free_buffer(rx, buf_state);
616 		return 0;
617 	}
618 
619 	rx->ctx.skb_head = napi_get_frags(napi);
620 	if (unlikely(!rx->ctx.skb_head))
621 		goto error;
622 	rx->ctx.skb_tail = rx->ctx.skb_head;
623 
624 	if (gve_rx_should_trigger_copy_ondemand(rx)) {
625 		if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
626 			goto error;
627 		return 0;
628 	}
629 
630 	if (rx->dqo.page_pool)
631 		skb_mark_for_recycle(rx->ctx.skb_head);
632 
633 	skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
634 			buf_state->page_info.page_offset, buf_len,
635 			buf_state->page_info.buf_size);
636 	gve_reuse_buffer(rx, buf_state);
637 	return 0;
638 
639 error:
640 	gve_free_buffer(rx, buf_state);
641 	return -ENOMEM;
642 }
643 
644 static int gve_rx_complete_rsc(struct sk_buff *skb,
645 			       const struct gve_rx_compl_desc_dqo *desc,
646 			       struct gve_ptype ptype)
647 {
648 	struct skb_shared_info *shinfo = skb_shinfo(skb);
649 
650 	/* Only TCP is supported right now. */
651 	if (ptype.l4_type != GVE_L4_TYPE_TCP)
652 		return -EINVAL;
653 
654 	switch (ptype.l3_type) {
655 	case GVE_L3_TYPE_IPV4:
656 		shinfo->gso_type = SKB_GSO_TCPV4;
657 		break;
658 	case GVE_L3_TYPE_IPV6:
659 		shinfo->gso_type = SKB_GSO_TCPV6;
660 		break;
661 	default:
662 		return -EINVAL;
663 	}
664 
665 	shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
666 	return 0;
667 }
668 
669 /* Returns 0 if skb is completed successfully, -1 otherwise. */
670 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
671 			       const struct gve_rx_compl_desc_dqo *desc,
672 			       netdev_features_t feat)
673 {
674 	struct gve_ptype ptype =
675 		rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
676 	int err;
677 
678 	skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
679 
680 	if (feat & NETIF_F_RXHASH)
681 		gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
682 
683 	if (feat & NETIF_F_RXCSUM)
684 		gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
685 
686 	/* RSC packets must set gso_size otherwise the TCP stack will complain
687 	 * that packets are larger than MTU.
688 	 */
689 	if (desc->rsc) {
690 		err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
691 		if (err < 0)
692 			return err;
693 	}
694 
695 	if (skb_headlen(rx->ctx.skb_head) == 0)
696 		napi_gro_frags(napi);
697 	else
698 		napi_gro_receive(napi, rx->ctx.skb_head);
699 
700 	return 0;
701 }
702 
703 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
704 {
705 	struct napi_struct *napi = &block->napi;
706 	netdev_features_t feat = napi->dev->features;
707 
708 	struct gve_rx_ring *rx = block->rx;
709 	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
710 
711 	u32 work_done = 0;
712 	u64 bytes = 0;
713 	int err;
714 
715 	while (work_done < budget) {
716 		struct gve_rx_compl_desc_dqo *compl_desc =
717 			&complq->desc_ring[complq->head];
718 		u32 pkt_bytes;
719 
720 		/* No more new packets */
721 		if (compl_desc->generation == complq->cur_gen_bit)
722 			break;
723 
724 		/* Prefetch the next two descriptors. */
725 		prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
726 		prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
727 
728 		/* Do not read data until we own the descriptor */
729 		dma_rmb();
730 
731 		err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num);
732 		if (err < 0) {
733 			gve_rx_free_skb(napi, rx);
734 			u64_stats_update_begin(&rx->statss);
735 			if (err == -ENOMEM)
736 				rx->rx_skb_alloc_fail++;
737 			else if (err == -EINVAL)
738 				rx->rx_desc_err_dropped_pkt++;
739 			u64_stats_update_end(&rx->statss);
740 		}
741 
742 		complq->head = (complq->head + 1) & complq->mask;
743 		complq->num_free_slots++;
744 
745 		/* When the ring wraps, the generation bit is flipped. */
746 		complq->cur_gen_bit ^= (complq->head == 0);
747 
748 		/* Receiving a completion means we have space to post another
749 		 * buffer on the buffer queue.
750 		 */
751 		{
752 			struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
753 
754 			bufq->head = (bufq->head + 1) & bufq->mask;
755 		}
756 
757 		/* Free running counter of completed descriptors */
758 		rx->cnt++;
759 
760 		if (!rx->ctx.skb_head)
761 			continue;
762 
763 		if (!compl_desc->end_of_packet)
764 			continue;
765 
766 		work_done++;
767 		pkt_bytes = rx->ctx.skb_head->len;
768 		/* The ethernet header (first ETH_HLEN bytes) is snipped off
769 		 * by eth_type_trans.
770 		 */
771 		if (skb_headlen(rx->ctx.skb_head))
772 			pkt_bytes += ETH_HLEN;
773 
774 		/* gve_rx_complete_skb() will consume skb if successful */
775 		if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
776 			gve_rx_free_skb(napi, rx);
777 			u64_stats_update_begin(&rx->statss);
778 			rx->rx_desc_err_dropped_pkt++;
779 			u64_stats_update_end(&rx->statss);
780 			continue;
781 		}
782 
783 		bytes += pkt_bytes;
784 		rx->ctx.skb_head = NULL;
785 		rx->ctx.skb_tail = NULL;
786 	}
787 
788 	gve_rx_post_buffers_dqo(rx);
789 
790 	u64_stats_update_begin(&rx->statss);
791 	rx->rpackets += work_done;
792 	rx->rbytes += bytes;
793 	u64_stats_update_end(&rx->statss);
794 
795 	return work_done;
796 }
797