xref: /linux/drivers/net/ethernet/google/gve/gve_rx_dqo.c (revision 2b0cfa6e49566c8fa6759734cf821aa6e8271a9e)
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6 
7 #include "gve.h"
8 #include "gve_dqo.h"
9 #include "gve_adminq.h"
10 #include "gve_utils.h"
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <net/ip6_checksum.h>
16 #include <net/ipv6.h>
17 #include <net/tcp.h>
18 
19 static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
20 {
21 	return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
22 }
23 
24 static void gve_free_page_dqo(struct gve_priv *priv,
25 			      struct gve_rx_buf_state_dqo *bs,
26 			      bool free_page)
27 {
28 	page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
29 	if (free_page)
30 		gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
31 			      DMA_FROM_DEVICE);
32 	bs->page_info.page = NULL;
33 }
34 
35 static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
36 {
37 	struct gve_rx_buf_state_dqo *buf_state;
38 	s16 buffer_id;
39 
40 	buffer_id = rx->dqo.free_buf_states;
41 	if (unlikely(buffer_id == -1))
42 		return NULL;
43 
44 	buf_state = &rx->dqo.buf_states[buffer_id];
45 
46 	/* Remove buf_state from free list */
47 	rx->dqo.free_buf_states = buf_state->next;
48 
49 	/* Point buf_state to itself to mark it as allocated */
50 	buf_state->next = buffer_id;
51 
52 	return buf_state;
53 }
54 
55 static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
56 				       struct gve_rx_buf_state_dqo *buf_state)
57 {
58 	s16 buffer_id = buf_state - rx->dqo.buf_states;
59 
60 	return buf_state->next == buffer_id;
61 }
62 
63 static void gve_free_buf_state(struct gve_rx_ring *rx,
64 			       struct gve_rx_buf_state_dqo *buf_state)
65 {
66 	s16 buffer_id = buf_state - rx->dqo.buf_states;
67 
68 	buf_state->next = rx->dqo.free_buf_states;
69 	rx->dqo.free_buf_states = buffer_id;
70 }
71 
72 static struct gve_rx_buf_state_dqo *
73 gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
74 {
75 	struct gve_rx_buf_state_dqo *buf_state;
76 	s16 buffer_id;
77 
78 	buffer_id = list->head;
79 	if (unlikely(buffer_id == -1))
80 		return NULL;
81 
82 	buf_state = &rx->dqo.buf_states[buffer_id];
83 
84 	/* Remove buf_state from list */
85 	list->head = buf_state->next;
86 	if (buf_state->next == -1)
87 		list->tail = -1;
88 
89 	/* Point buf_state to itself to mark it as allocated */
90 	buf_state->next = buffer_id;
91 
92 	return buf_state;
93 }
94 
95 static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
96 				  struct gve_index_list *list,
97 				  struct gve_rx_buf_state_dqo *buf_state)
98 {
99 	s16 buffer_id = buf_state - rx->dqo.buf_states;
100 
101 	buf_state->next = -1;
102 
103 	if (list->head == -1) {
104 		list->head = buffer_id;
105 		list->tail = buffer_id;
106 	} else {
107 		int tail = list->tail;
108 
109 		rx->dqo.buf_states[tail].next = buffer_id;
110 		list->tail = buffer_id;
111 	}
112 }
113 
114 static struct gve_rx_buf_state_dqo *
115 gve_get_recycled_buf_state(struct gve_rx_ring *rx)
116 {
117 	struct gve_rx_buf_state_dqo *buf_state;
118 	int i;
119 
120 	/* Recycled buf states are immediately usable. */
121 	buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
122 	if (likely(buf_state))
123 		return buf_state;
124 
125 	if (unlikely(rx->dqo.used_buf_states.head == -1))
126 		return NULL;
127 
128 	/* Used buf states are only usable when ref count reaches 0, which means
129 	 * no SKBs refer to them.
130 	 *
131 	 * Search a limited number before giving up.
132 	 */
133 	for (i = 0; i < 5; i++) {
134 		buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
135 		if (gve_buf_ref_cnt(buf_state) == 0) {
136 			rx->dqo.used_buf_states_cnt--;
137 			return buf_state;
138 		}
139 
140 		gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
141 	}
142 
143 	/* For QPL, we cannot allocate any new buffers and must
144 	 * wait for the existing ones to be available.
145 	 */
146 	if (rx->dqo.qpl)
147 		return NULL;
148 
149 	/* If there are no free buf states discard an entry from
150 	 * `used_buf_states` so it can be used.
151 	 */
152 	if (unlikely(rx->dqo.free_buf_states == -1)) {
153 		buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
154 		if (gve_buf_ref_cnt(buf_state) == 0)
155 			return buf_state;
156 
157 		gve_free_page_dqo(rx->gve, buf_state, true);
158 		gve_free_buf_state(rx, buf_state);
159 	}
160 
161 	return NULL;
162 }
163 
164 static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
165 			      struct gve_rx_buf_state_dqo *buf_state)
166 {
167 	struct gve_priv *priv = rx->gve;
168 	u32 idx;
169 
170 	if (!rx->dqo.qpl) {
171 		int err;
172 
173 		err = gve_alloc_page(priv, &priv->pdev->dev,
174 				     &buf_state->page_info.page,
175 				     &buf_state->addr,
176 				     DMA_FROM_DEVICE, GFP_ATOMIC);
177 		if (err)
178 			return err;
179 	} else {
180 		idx = rx->dqo.next_qpl_page_idx;
181 		if (idx >= priv->rx_pages_per_qpl) {
182 			net_err_ratelimited("%s: Out of QPL pages\n",
183 					    priv->dev->name);
184 			return -ENOMEM;
185 		}
186 		buf_state->page_info.page = rx->dqo.qpl->pages[idx];
187 		buf_state->addr = rx->dqo.qpl->page_buses[idx];
188 		rx->dqo.next_qpl_page_idx++;
189 	}
190 	buf_state->page_info.page_offset = 0;
191 	buf_state->page_info.page_address =
192 		page_address(buf_state->page_info.page);
193 	buf_state->last_single_ref_offset = 0;
194 
195 	/* The page already has 1 ref. */
196 	page_ref_add(buf_state->page_info.page, INT_MAX - 1);
197 	buf_state->page_info.pagecnt_bias = INT_MAX;
198 
199 	return 0;
200 }
201 
202 void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
203 {
204 	int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
205 
206 	if (!gve_rx_was_added_to_block(priv, idx))
207 		return;
208 
209 	gve_remove_napi(priv, ntfy_idx);
210 	gve_rx_remove_from_block(priv, idx);
211 }
212 
213 static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
214 				 struct gve_rx_alloc_rings_cfg *cfg)
215 {
216 	struct device *hdev = &priv->pdev->dev;
217 	size_t completion_queue_slots;
218 	size_t buffer_queue_slots;
219 	int idx = rx->q_num;
220 	size_t size;
221 	int i;
222 
223 	completion_queue_slots = rx->dqo.complq.mask + 1;
224 	buffer_queue_slots = rx->dqo.bufq.mask + 1;
225 
226 	if (rx->q_resources) {
227 		dma_free_coherent(hdev, sizeof(*rx->q_resources),
228 				  rx->q_resources, rx->q_resources_bus);
229 		rx->q_resources = NULL;
230 	}
231 
232 	for (i = 0; i < rx->dqo.num_buf_states; i++) {
233 		struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
234 		/* Only free page for RDA. QPL pages are freed in gve_main. */
235 		if (bs->page_info.page)
236 			gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
237 	}
238 	if (rx->dqo.qpl) {
239 		gve_unassign_qpl(cfg->qpl_cfg, rx->dqo.qpl->id);
240 		rx->dqo.qpl = NULL;
241 	}
242 
243 	if (rx->dqo.bufq.desc_ring) {
244 		size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
245 		dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
246 				  rx->dqo.bufq.bus);
247 		rx->dqo.bufq.desc_ring = NULL;
248 	}
249 
250 	if (rx->dqo.complq.desc_ring) {
251 		size = sizeof(rx->dqo.complq.desc_ring[0]) *
252 			completion_queue_slots;
253 		dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
254 				  rx->dqo.complq.bus);
255 		rx->dqo.complq.desc_ring = NULL;
256 	}
257 
258 	kvfree(rx->dqo.buf_states);
259 	rx->dqo.buf_states = NULL;
260 
261 	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
262 }
263 
264 void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
265 {
266 	int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
267 
268 	gve_rx_add_to_block(priv, idx);
269 	gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
270 }
271 
272 static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
273 				 struct gve_rx_alloc_rings_cfg *cfg,
274 				 struct gve_rx_ring *rx,
275 				 int idx)
276 {
277 	struct device *hdev = &priv->pdev->dev;
278 	size_t size;
279 	int i;
280 
281 	const u32 buffer_queue_slots = cfg->raw_addressing ?
282 		priv->options_dqo_rda.rx_buff_ring_entries : cfg->ring_size;
283 	const u32 completion_queue_slots = cfg->ring_size;
284 
285 	netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
286 
287 	memset(rx, 0, sizeof(*rx));
288 	rx->gve = priv;
289 	rx->q_num = idx;
290 	rx->dqo.bufq.mask = buffer_queue_slots - 1;
291 	rx->dqo.complq.num_free_slots = completion_queue_slots;
292 	rx->dqo.complq.mask = completion_queue_slots - 1;
293 	rx->ctx.skb_head = NULL;
294 	rx->ctx.skb_tail = NULL;
295 
296 	rx->dqo.num_buf_states = cfg->raw_addressing ?
297 		min_t(s16, S16_MAX, buffer_queue_slots * 4) :
298 		priv->rx_pages_per_qpl;
299 	rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
300 				      sizeof(rx->dqo.buf_states[0]),
301 				      GFP_KERNEL);
302 	if (!rx->dqo.buf_states)
303 		return -ENOMEM;
304 
305 	/* Set up linked list of buffer IDs */
306 	for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
307 		rx->dqo.buf_states[i].next = i + 1;
308 
309 	rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
310 	rx->dqo.recycled_buf_states.head = -1;
311 	rx->dqo.recycled_buf_states.tail = -1;
312 	rx->dqo.used_buf_states.head = -1;
313 	rx->dqo.used_buf_states.tail = -1;
314 
315 	/* Allocate RX completion queue */
316 	size = sizeof(rx->dqo.complq.desc_ring[0]) *
317 		completion_queue_slots;
318 	rx->dqo.complq.desc_ring =
319 		dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
320 	if (!rx->dqo.complq.desc_ring)
321 		goto err;
322 
323 	/* Allocate RX buffer queue */
324 	size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
325 	rx->dqo.bufq.desc_ring =
326 		dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
327 	if (!rx->dqo.bufq.desc_ring)
328 		goto err;
329 
330 	if (!cfg->raw_addressing) {
331 		rx->dqo.qpl = gve_assign_rx_qpl(cfg, rx->q_num);
332 		if (!rx->dqo.qpl)
333 			goto err;
334 		rx->dqo.next_qpl_page_idx = 0;
335 	}
336 
337 	rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
338 					     &rx->q_resources_bus, GFP_KERNEL);
339 	if (!rx->q_resources)
340 		goto err;
341 
342 	return 0;
343 
344 err:
345 	gve_rx_free_ring_dqo(priv, rx, cfg);
346 	return -ENOMEM;
347 }
348 
349 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
350 {
351 	const struct gve_rx_ring *rx = &priv->rx[queue_idx];
352 	u64 index = be32_to_cpu(rx->q_resources->db_index);
353 
354 	iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
355 }
356 
357 int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
358 			   struct gve_rx_alloc_rings_cfg *cfg)
359 {
360 	struct gve_rx_ring *rx;
361 	int err;
362 	int i;
363 
364 	if (!cfg->raw_addressing && !cfg->qpls) {
365 		netif_err(priv, drv, priv->dev,
366 			  "Cannot alloc QPL ring before allocing QPLs\n");
367 		return -EINVAL;
368 	}
369 
370 	rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
371 		      GFP_KERNEL);
372 	if (!rx)
373 		return -ENOMEM;
374 
375 	for (i = 0; i < cfg->qcfg->num_queues; i++) {
376 		err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
377 		if (err) {
378 			netif_err(priv, drv, priv->dev,
379 				  "Failed to alloc rx ring=%d: err=%d\n",
380 				  i, err);
381 			goto err;
382 		}
383 	}
384 
385 	cfg->rx = rx;
386 	return 0;
387 
388 err:
389 	for (i--; i >= 0; i--)
390 		gve_rx_free_ring_dqo(priv, &rx[i], cfg);
391 	kvfree(rx);
392 	return err;
393 }
394 
395 void gve_rx_free_rings_dqo(struct gve_priv *priv,
396 			   struct gve_rx_alloc_rings_cfg *cfg)
397 {
398 	struct gve_rx_ring *rx = cfg->rx;
399 	int i;
400 
401 	if (!rx)
402 		return;
403 
404 	for (i = 0; i < cfg->qcfg->num_queues;  i++)
405 		gve_rx_free_ring_dqo(priv, &rx[i], cfg);
406 
407 	kvfree(rx);
408 	cfg->rx = NULL;
409 }
410 
411 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
412 {
413 	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
414 	struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
415 	struct gve_priv *priv = rx->gve;
416 	u32 num_avail_slots;
417 	u32 num_full_slots;
418 	u32 num_posted = 0;
419 
420 	num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
421 	num_avail_slots = bufq->mask - num_full_slots;
422 
423 	num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
424 	while (num_posted < num_avail_slots) {
425 		struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
426 		struct gve_rx_buf_state_dqo *buf_state;
427 
428 		buf_state = gve_get_recycled_buf_state(rx);
429 		if (unlikely(!buf_state)) {
430 			buf_state = gve_alloc_buf_state(rx);
431 			if (unlikely(!buf_state))
432 				break;
433 
434 			if (unlikely(gve_alloc_page_dqo(rx, buf_state))) {
435 				u64_stats_update_begin(&rx->statss);
436 				rx->rx_buf_alloc_fail++;
437 				u64_stats_update_end(&rx->statss);
438 				gve_free_buf_state(rx, buf_state);
439 				break;
440 			}
441 		}
442 
443 		desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
444 		desc->buf_addr = cpu_to_le64(buf_state->addr +
445 					     buf_state->page_info.page_offset);
446 
447 		bufq->tail = (bufq->tail + 1) & bufq->mask;
448 		complq->num_free_slots--;
449 		num_posted++;
450 
451 		if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
452 			gve_rx_write_doorbell_dqo(priv, rx->q_num);
453 	}
454 
455 	rx->fill_cnt += num_posted;
456 }
457 
458 static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
459 				struct gve_rx_buf_state_dqo *buf_state)
460 {
461 	const int data_buffer_size = priv->data_buffer_size_dqo;
462 	int pagecount;
463 
464 	/* Can't reuse if we only fit one buffer per page */
465 	if (data_buffer_size * 2 > PAGE_SIZE)
466 		goto mark_used;
467 
468 	pagecount = gve_buf_ref_cnt(buf_state);
469 
470 	/* Record the offset when we have a single remaining reference.
471 	 *
472 	 * When this happens, we know all of the other offsets of the page are
473 	 * usable.
474 	 */
475 	if (pagecount == 1) {
476 		buf_state->last_single_ref_offset =
477 			buf_state->page_info.page_offset;
478 	}
479 
480 	/* Use the next buffer sized chunk in the page. */
481 	buf_state->page_info.page_offset += data_buffer_size;
482 	buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
483 
484 	/* If we wrap around to the same offset without ever dropping to 1
485 	 * reference, then we don't know if this offset was ever freed.
486 	 */
487 	if (buf_state->page_info.page_offset ==
488 	    buf_state->last_single_ref_offset) {
489 		goto mark_used;
490 	}
491 
492 	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
493 	return;
494 
495 mark_used:
496 	gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
497 	rx->dqo.used_buf_states_cnt++;
498 }
499 
500 static void gve_rx_skb_csum(struct sk_buff *skb,
501 			    const struct gve_rx_compl_desc_dqo *desc,
502 			    struct gve_ptype ptype)
503 {
504 	skb->ip_summed = CHECKSUM_NONE;
505 
506 	/* HW did not identify and process L3 and L4 headers. */
507 	if (unlikely(!desc->l3_l4_processed))
508 		return;
509 
510 	if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
511 		if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
512 			return;
513 	} else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
514 		/* Checksum should be skipped if this flag is set. */
515 		if (unlikely(desc->ipv6_ex_add))
516 			return;
517 	}
518 
519 	if (unlikely(desc->csum_l4_err))
520 		return;
521 
522 	switch (ptype.l4_type) {
523 	case GVE_L4_TYPE_TCP:
524 	case GVE_L4_TYPE_UDP:
525 	case GVE_L4_TYPE_ICMP:
526 	case GVE_L4_TYPE_SCTP:
527 		skb->ip_summed = CHECKSUM_UNNECESSARY;
528 		break;
529 	default:
530 		break;
531 	}
532 }
533 
534 static void gve_rx_skb_hash(struct sk_buff *skb,
535 			    const struct gve_rx_compl_desc_dqo *compl_desc,
536 			    struct gve_ptype ptype)
537 {
538 	enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
539 
540 	if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
541 		hash_type = PKT_HASH_TYPE_L4;
542 	else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
543 		hash_type = PKT_HASH_TYPE_L3;
544 
545 	skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
546 }
547 
548 static void gve_rx_free_skb(struct gve_rx_ring *rx)
549 {
550 	if (!rx->ctx.skb_head)
551 		return;
552 
553 	dev_kfree_skb_any(rx->ctx.skb_head);
554 	rx->ctx.skb_head = NULL;
555 	rx->ctx.skb_tail = NULL;
556 }
557 
558 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
559 {
560 	if (!rx->dqo.qpl)
561 		return false;
562 	if (rx->dqo.used_buf_states_cnt <
563 		     (rx->dqo.num_buf_states -
564 		     GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
565 		return false;
566 	return true;
567 }
568 
569 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
570 				struct gve_rx_buf_state_dqo *buf_state,
571 				u16 buf_len)
572 {
573 	struct page *page = alloc_page(GFP_ATOMIC);
574 	int num_frags;
575 
576 	if (!page)
577 		return -ENOMEM;
578 
579 	memcpy(page_address(page),
580 	       buf_state->page_info.page_address +
581 	       buf_state->page_info.page_offset,
582 	       buf_len);
583 	num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
584 	skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
585 			0, buf_len, PAGE_SIZE);
586 
587 	u64_stats_update_begin(&rx->statss);
588 	rx->rx_frag_alloc_cnt++;
589 	u64_stats_update_end(&rx->statss);
590 	/* Return unused buffer. */
591 	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
592 	return 0;
593 }
594 
595 /* Chains multi skbs for single rx packet.
596  * Returns 0 if buffer is appended, -1 otherwise.
597  */
598 static int gve_rx_append_frags(struct napi_struct *napi,
599 			       struct gve_rx_buf_state_dqo *buf_state,
600 			       u16 buf_len, struct gve_rx_ring *rx,
601 			       struct gve_priv *priv)
602 {
603 	int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
604 
605 	if (unlikely(num_frags == MAX_SKB_FRAGS)) {
606 		struct sk_buff *skb;
607 
608 		skb = napi_alloc_skb(napi, 0);
609 		if (!skb)
610 			return -1;
611 
612 		if (rx->ctx.skb_tail == rx->ctx.skb_head)
613 			skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
614 		else
615 			rx->ctx.skb_tail->next = skb;
616 		rx->ctx.skb_tail = skb;
617 		num_frags = 0;
618 	}
619 	if (rx->ctx.skb_tail != rx->ctx.skb_head) {
620 		rx->ctx.skb_head->len += buf_len;
621 		rx->ctx.skb_head->data_len += buf_len;
622 		rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
623 	}
624 
625 	/* Trigger ondemand page allocation if we are running low on buffers */
626 	if (gve_rx_should_trigger_copy_ondemand(rx))
627 		return gve_rx_copy_ondemand(rx, buf_state, buf_len);
628 
629 	skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
630 			buf_state->page_info.page,
631 			buf_state->page_info.page_offset,
632 			buf_len, priv->data_buffer_size_dqo);
633 	gve_dec_pagecnt_bias(&buf_state->page_info);
634 
635 	/* Advances buffer page-offset if page is partially used.
636 	 * Marks buffer as used if page is full.
637 	 */
638 	gve_try_recycle_buf(priv, rx, buf_state);
639 	return 0;
640 }
641 
642 /* Returns 0 if descriptor is completed successfully.
643  * Returns -EINVAL if descriptor is invalid.
644  * Returns -ENOMEM if data cannot be copied to skb.
645  */
646 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
647 		      const struct gve_rx_compl_desc_dqo *compl_desc,
648 		      int queue_idx)
649 {
650 	const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
651 	const bool eop = compl_desc->end_of_packet != 0;
652 	struct gve_rx_buf_state_dqo *buf_state;
653 	struct gve_priv *priv = rx->gve;
654 	u16 buf_len;
655 
656 	if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
657 		net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
658 				    priv->dev->name, buffer_id);
659 		return -EINVAL;
660 	}
661 	buf_state = &rx->dqo.buf_states[buffer_id];
662 	if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
663 		net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
664 				    priv->dev->name, buffer_id);
665 		return -EINVAL;
666 	}
667 
668 	if (unlikely(compl_desc->rx_error)) {
669 		gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
670 				      buf_state);
671 		return -EINVAL;
672 	}
673 
674 	buf_len = compl_desc->packet_len;
675 
676 	/* Page might have not been used for awhile and was likely last written
677 	 * by a different thread.
678 	 */
679 	prefetch(buf_state->page_info.page);
680 
681 	/* Sync the portion of dma buffer for CPU to read. */
682 	dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
683 				      buf_state->page_info.page_offset,
684 				      buf_len, DMA_FROM_DEVICE);
685 
686 	/* Append to current skb if one exists. */
687 	if (rx->ctx.skb_head) {
688 		if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
689 						 priv)) != 0) {
690 			goto error;
691 		}
692 		return 0;
693 	}
694 
695 	if (eop && buf_len <= priv->rx_copybreak) {
696 		rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
697 					       &buf_state->page_info, buf_len);
698 		if (unlikely(!rx->ctx.skb_head))
699 			goto error;
700 		rx->ctx.skb_tail = rx->ctx.skb_head;
701 
702 		u64_stats_update_begin(&rx->statss);
703 		rx->rx_copied_pkt++;
704 		rx->rx_copybreak_pkt++;
705 		u64_stats_update_end(&rx->statss);
706 
707 		gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
708 				      buf_state);
709 		return 0;
710 	}
711 
712 	rx->ctx.skb_head = napi_get_frags(napi);
713 	if (unlikely(!rx->ctx.skb_head))
714 		goto error;
715 	rx->ctx.skb_tail = rx->ctx.skb_head;
716 
717 	if (gve_rx_should_trigger_copy_ondemand(rx)) {
718 		if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
719 			goto error;
720 		return 0;
721 	}
722 
723 	skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
724 			buf_state->page_info.page_offset, buf_len,
725 			priv->data_buffer_size_dqo);
726 	gve_dec_pagecnt_bias(&buf_state->page_info);
727 
728 	gve_try_recycle_buf(priv, rx, buf_state);
729 	return 0;
730 
731 error:
732 	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
733 	return -ENOMEM;
734 }
735 
736 static int gve_rx_complete_rsc(struct sk_buff *skb,
737 			       const struct gve_rx_compl_desc_dqo *desc,
738 			       struct gve_ptype ptype)
739 {
740 	struct skb_shared_info *shinfo = skb_shinfo(skb);
741 
742 	/* Only TCP is supported right now. */
743 	if (ptype.l4_type != GVE_L4_TYPE_TCP)
744 		return -EINVAL;
745 
746 	switch (ptype.l3_type) {
747 	case GVE_L3_TYPE_IPV4:
748 		shinfo->gso_type = SKB_GSO_TCPV4;
749 		break;
750 	case GVE_L3_TYPE_IPV6:
751 		shinfo->gso_type = SKB_GSO_TCPV6;
752 		break;
753 	default:
754 		return -EINVAL;
755 	}
756 
757 	shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
758 	return 0;
759 }
760 
761 /* Returns 0 if skb is completed successfully, -1 otherwise. */
762 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
763 			       const struct gve_rx_compl_desc_dqo *desc,
764 			       netdev_features_t feat)
765 {
766 	struct gve_ptype ptype =
767 		rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
768 	int err;
769 
770 	skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
771 
772 	if (feat & NETIF_F_RXHASH)
773 		gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
774 
775 	if (feat & NETIF_F_RXCSUM)
776 		gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
777 
778 	/* RSC packets must set gso_size otherwise the TCP stack will complain
779 	 * that packets are larger than MTU.
780 	 */
781 	if (desc->rsc) {
782 		err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
783 		if (err < 0)
784 			return err;
785 	}
786 
787 	if (skb_headlen(rx->ctx.skb_head) == 0)
788 		napi_gro_frags(napi);
789 	else
790 		napi_gro_receive(napi, rx->ctx.skb_head);
791 
792 	return 0;
793 }
794 
795 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
796 {
797 	struct napi_struct *napi = &block->napi;
798 	netdev_features_t feat = napi->dev->features;
799 
800 	struct gve_rx_ring *rx = block->rx;
801 	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
802 
803 	u32 work_done = 0;
804 	u64 bytes = 0;
805 	int err;
806 
807 	while (work_done < budget) {
808 		struct gve_rx_compl_desc_dqo *compl_desc =
809 			&complq->desc_ring[complq->head];
810 		u32 pkt_bytes;
811 
812 		/* No more new packets */
813 		if (compl_desc->generation == complq->cur_gen_bit)
814 			break;
815 
816 		/* Prefetch the next two descriptors. */
817 		prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
818 		prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
819 
820 		/* Do not read data until we own the descriptor */
821 		dma_rmb();
822 
823 		err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num);
824 		if (err < 0) {
825 			gve_rx_free_skb(rx);
826 			u64_stats_update_begin(&rx->statss);
827 			if (err == -ENOMEM)
828 				rx->rx_skb_alloc_fail++;
829 			else if (err == -EINVAL)
830 				rx->rx_desc_err_dropped_pkt++;
831 			u64_stats_update_end(&rx->statss);
832 		}
833 
834 		complq->head = (complq->head + 1) & complq->mask;
835 		complq->num_free_slots++;
836 
837 		/* When the ring wraps, the generation bit is flipped. */
838 		complq->cur_gen_bit ^= (complq->head == 0);
839 
840 		/* Receiving a completion means we have space to post another
841 		 * buffer on the buffer queue.
842 		 */
843 		{
844 			struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
845 
846 			bufq->head = (bufq->head + 1) & bufq->mask;
847 		}
848 
849 		/* Free running counter of completed descriptors */
850 		rx->cnt++;
851 
852 		if (!rx->ctx.skb_head)
853 			continue;
854 
855 		if (!compl_desc->end_of_packet)
856 			continue;
857 
858 		work_done++;
859 		pkt_bytes = rx->ctx.skb_head->len;
860 		/* The ethernet header (first ETH_HLEN bytes) is snipped off
861 		 * by eth_type_trans.
862 		 */
863 		if (skb_headlen(rx->ctx.skb_head))
864 			pkt_bytes += ETH_HLEN;
865 
866 		/* gve_rx_complete_skb() will consume skb if successful */
867 		if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
868 			gve_rx_free_skb(rx);
869 			u64_stats_update_begin(&rx->statss);
870 			rx->rx_desc_err_dropped_pkt++;
871 			u64_stats_update_end(&rx->statss);
872 			continue;
873 		}
874 
875 		bytes += pkt_bytes;
876 		rx->ctx.skb_head = NULL;
877 		rx->ctx.skb_tail = NULL;
878 	}
879 
880 	gve_rx_post_buffers_dqo(rx);
881 
882 	u64_stats_update_begin(&rx->statss);
883 	rx->rpackets += work_done;
884 	rx->rbytes += bytes;
885 	u64_stats_update_end(&rx->statss);
886 
887 	return work_done;
888 }
889