xref: /linux/drivers/net/ethernet/google/gve/gve_rx_dqo.c (revision 1e15510b71c99c6e49134d756df91069f7d18141)
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6 
7 #include "gve.h"
8 #include "gve_dqo.h"
9 #include "gve_adminq.h"
10 #include "gve_utils.h"
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <net/ip6_checksum.h>
16 #include <net/ipv6.h>
17 #include <net/tcp.h>
18 
gve_rx_free_hdr_bufs(struct gve_priv * priv,struct gve_rx_ring * rx)19 static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
20 {
21 	struct device *hdev = &priv->pdev->dev;
22 	int buf_count = rx->dqo.bufq.mask + 1;
23 
24 	if (rx->dqo.hdr_bufs.data) {
25 		dma_free_coherent(hdev, priv->header_buf_size * buf_count,
26 				  rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
27 		rx->dqo.hdr_bufs.data = NULL;
28 	}
29 }
30 
gve_rx_init_ring_state_dqo(struct gve_rx_ring * rx,const u32 buffer_queue_slots,const u32 completion_queue_slots)31 static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx,
32 				       const u32 buffer_queue_slots,
33 				       const u32 completion_queue_slots)
34 {
35 	int i;
36 
37 	/* Set buffer queue state */
38 	rx->dqo.bufq.mask = buffer_queue_slots - 1;
39 	rx->dqo.bufq.head = 0;
40 	rx->dqo.bufq.tail = 0;
41 
42 	/* Set completion queue state */
43 	rx->dqo.complq.num_free_slots = completion_queue_slots;
44 	rx->dqo.complq.mask = completion_queue_slots - 1;
45 	rx->dqo.complq.cur_gen_bit = 0;
46 	rx->dqo.complq.head = 0;
47 
48 	/* Set RX SKB context */
49 	rx->ctx.skb_head = NULL;
50 	rx->ctx.skb_tail = NULL;
51 
52 	/* Set up linked list of buffer IDs */
53 	if (rx->dqo.buf_states) {
54 		for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
55 			rx->dqo.buf_states[i].next = i + 1;
56 		rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
57 	}
58 
59 	rx->dqo.free_buf_states = 0;
60 	rx->dqo.recycled_buf_states.head = -1;
61 	rx->dqo.recycled_buf_states.tail = -1;
62 	rx->dqo.used_buf_states.head = -1;
63 	rx->dqo.used_buf_states.tail = -1;
64 }
65 
gve_rx_reset_ring_dqo(struct gve_priv * priv,int idx)66 static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx)
67 {
68 	struct gve_rx_ring *rx = &priv->rx[idx];
69 	size_t size;
70 	int i;
71 
72 	const u32 buffer_queue_slots = priv->rx_desc_cnt;
73 	const u32 completion_queue_slots = priv->rx_desc_cnt;
74 
75 	/* Reset buffer queue */
76 	if (rx->dqo.bufq.desc_ring) {
77 		size = sizeof(rx->dqo.bufq.desc_ring[0]) *
78 			buffer_queue_slots;
79 		memset(rx->dqo.bufq.desc_ring, 0, size);
80 	}
81 
82 	/* Reset completion queue */
83 	if (rx->dqo.complq.desc_ring) {
84 		size = sizeof(rx->dqo.complq.desc_ring[0]) *
85 			completion_queue_slots;
86 		memset(rx->dqo.complq.desc_ring, 0, size);
87 	}
88 
89 	/* Reset q_resources */
90 	if (rx->q_resources)
91 		memset(rx->q_resources, 0, sizeof(*rx->q_resources));
92 
93 	/* Reset buf states */
94 	if (rx->dqo.buf_states) {
95 		for (i = 0; i < rx->dqo.num_buf_states; i++) {
96 			struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
97 
98 			if (rx->dqo.page_pool)
99 				gve_free_to_page_pool(rx, bs, false);
100 			else
101 				gve_free_qpl_page_dqo(bs);
102 		}
103 	}
104 
105 	gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
106 				   completion_queue_slots);
107 }
108 
gve_rx_stop_ring_dqo(struct gve_priv * priv,int idx)109 void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
110 {
111 	int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
112 	struct gve_rx_ring *rx = &priv->rx[idx];
113 
114 	if (!gve_rx_was_added_to_block(priv, idx))
115 		return;
116 
117 	page_pool_disable_direct_recycling(rx->dqo.page_pool);
118 	gve_remove_napi(priv, ntfy_idx);
119 	gve_rx_remove_from_block(priv, idx);
120 	gve_rx_reset_ring_dqo(priv, idx);
121 }
122 
gve_rx_free_ring_dqo(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_alloc_rings_cfg * cfg)123 void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
124 			  struct gve_rx_alloc_rings_cfg *cfg)
125 {
126 	struct device *hdev = &priv->pdev->dev;
127 	size_t completion_queue_slots;
128 	size_t buffer_queue_slots;
129 	int idx = rx->q_num;
130 	size_t size;
131 	u32 qpl_id;
132 	int i;
133 
134 	completion_queue_slots = rx->dqo.complq.mask + 1;
135 	buffer_queue_slots = rx->dqo.bufq.mask + 1;
136 
137 	if (rx->q_resources) {
138 		dma_free_coherent(hdev, sizeof(*rx->q_resources),
139 				  rx->q_resources, rx->q_resources_bus);
140 		rx->q_resources = NULL;
141 	}
142 
143 	for (i = 0; i < rx->dqo.num_buf_states; i++) {
144 		struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
145 
146 		if (rx->dqo.page_pool)
147 			gve_free_to_page_pool(rx, bs, false);
148 		else
149 			gve_free_qpl_page_dqo(bs);
150 	}
151 
152 	if (rx->dqo.qpl) {
153 		qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
154 		gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id);
155 		rx->dqo.qpl = NULL;
156 	}
157 
158 	if (rx->dqo.bufq.desc_ring) {
159 		size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
160 		dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
161 				  rx->dqo.bufq.bus);
162 		rx->dqo.bufq.desc_ring = NULL;
163 	}
164 
165 	if (rx->dqo.complq.desc_ring) {
166 		size = sizeof(rx->dqo.complq.desc_ring[0]) *
167 			completion_queue_slots;
168 		dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
169 				  rx->dqo.complq.bus);
170 		rx->dqo.complq.desc_ring = NULL;
171 	}
172 
173 	kvfree(rx->dqo.buf_states);
174 	rx->dqo.buf_states = NULL;
175 
176 	if (rx->dqo.page_pool) {
177 		page_pool_destroy(rx->dqo.page_pool);
178 		rx->dqo.page_pool = NULL;
179 	}
180 
181 	gve_rx_free_hdr_bufs(priv, rx);
182 
183 	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
184 }
185 
gve_rx_alloc_hdr_bufs(struct gve_priv * priv,struct gve_rx_ring * rx,const u32 buf_count)186 static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx,
187 				 const u32 buf_count)
188 {
189 	struct device *hdev = &priv->pdev->dev;
190 
191 	rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count,
192 						   &rx->dqo.hdr_bufs.addr, GFP_KERNEL);
193 	if (!rx->dqo.hdr_bufs.data)
194 		return -ENOMEM;
195 
196 	return 0;
197 }
198 
gve_rx_start_ring_dqo(struct gve_priv * priv,int idx)199 void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
200 {
201 	int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
202 
203 	gve_rx_add_to_block(priv, idx);
204 	gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
205 }
206 
gve_rx_alloc_ring_dqo(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg,struct gve_rx_ring * rx,int idx)207 int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
208 			  struct gve_rx_alloc_rings_cfg *cfg,
209 			  struct gve_rx_ring *rx,
210 			  int idx)
211 {
212 	struct device *hdev = &priv->pdev->dev;
213 	struct page_pool *pool;
214 	int qpl_page_cnt;
215 	size_t size;
216 	u32 qpl_id;
217 
218 	const u32 buffer_queue_slots = cfg->ring_size;
219 	const u32 completion_queue_slots = cfg->ring_size;
220 
221 	netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
222 
223 	memset(rx, 0, sizeof(*rx));
224 	rx->gve = priv;
225 	rx->q_num = idx;
226 
227 	rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots :
228 		gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
229 	rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
230 				      sizeof(rx->dqo.buf_states[0]),
231 				      GFP_KERNEL);
232 	if (!rx->dqo.buf_states)
233 		return -ENOMEM;
234 
235 	/* Allocate header buffers for header-split */
236 	if (cfg->enable_header_split)
237 		if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots))
238 			goto err;
239 
240 	/* Allocate RX completion queue */
241 	size = sizeof(rx->dqo.complq.desc_ring[0]) *
242 		completion_queue_slots;
243 	rx->dqo.complq.desc_ring =
244 		dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
245 	if (!rx->dqo.complq.desc_ring)
246 		goto err;
247 
248 	/* Allocate RX buffer queue */
249 	size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
250 	rx->dqo.bufq.desc_ring =
251 		dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
252 	if (!rx->dqo.bufq.desc_ring)
253 		goto err;
254 
255 	if (cfg->raw_addressing) {
256 		pool = gve_rx_create_page_pool(priv, rx);
257 		if (IS_ERR(pool))
258 			goto err;
259 
260 		rx->dqo.page_pool = pool;
261 	} else {
262 		qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
263 		qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
264 
265 		rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
266 							qpl_page_cnt);
267 		if (!rx->dqo.qpl)
268 			goto err;
269 		rx->dqo.next_qpl_page_idx = 0;
270 	}
271 
272 	rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
273 					     &rx->q_resources_bus, GFP_KERNEL);
274 	if (!rx->q_resources)
275 		goto err;
276 
277 	gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
278 				   completion_queue_slots);
279 
280 	return 0;
281 
282 err:
283 	gve_rx_free_ring_dqo(priv, rx, cfg);
284 	return -ENOMEM;
285 }
286 
gve_rx_write_doorbell_dqo(const struct gve_priv * priv,int queue_idx)287 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
288 {
289 	const struct gve_rx_ring *rx = &priv->rx[queue_idx];
290 	u64 index = be32_to_cpu(rx->q_resources->db_index);
291 
292 	iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
293 }
294 
gve_rx_alloc_rings_dqo(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg)295 int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
296 			   struct gve_rx_alloc_rings_cfg *cfg)
297 {
298 	struct gve_rx_ring *rx;
299 	int err;
300 	int i;
301 
302 	rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
303 		      GFP_KERNEL);
304 	if (!rx)
305 		return -ENOMEM;
306 
307 	for (i = 0; i < cfg->qcfg->num_queues; i++) {
308 		err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
309 		if (err) {
310 			netif_err(priv, drv, priv->dev,
311 				  "Failed to alloc rx ring=%d: err=%d\n",
312 				  i, err);
313 			goto err;
314 		}
315 	}
316 
317 	cfg->rx = rx;
318 	return 0;
319 
320 err:
321 	for (i--; i >= 0; i--)
322 		gve_rx_free_ring_dqo(priv, &rx[i], cfg);
323 	kvfree(rx);
324 	return err;
325 }
326 
gve_rx_free_rings_dqo(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg)327 void gve_rx_free_rings_dqo(struct gve_priv *priv,
328 			   struct gve_rx_alloc_rings_cfg *cfg)
329 {
330 	struct gve_rx_ring *rx = cfg->rx;
331 	int i;
332 
333 	if (!rx)
334 		return;
335 
336 	for (i = 0; i < cfg->qcfg->num_queues;  i++)
337 		gve_rx_free_ring_dqo(priv, &rx[i], cfg);
338 
339 	kvfree(rx);
340 	cfg->rx = NULL;
341 }
342 
gve_rx_post_buffers_dqo(struct gve_rx_ring * rx)343 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
344 {
345 	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
346 	struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
347 	struct gve_priv *priv = rx->gve;
348 	u32 num_avail_slots;
349 	u32 num_full_slots;
350 	u32 num_posted = 0;
351 
352 	num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
353 	num_avail_slots = bufq->mask - num_full_slots;
354 
355 	num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
356 	while (num_posted < num_avail_slots) {
357 		struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
358 
359 		if (unlikely(gve_alloc_buffer(rx, desc))) {
360 			u64_stats_update_begin(&rx->statss);
361 			rx->rx_buf_alloc_fail++;
362 			u64_stats_update_end(&rx->statss);
363 			break;
364 		}
365 
366 		if (rx->dqo.hdr_bufs.data)
367 			desc->header_buf_addr =
368 				cpu_to_le64(rx->dqo.hdr_bufs.addr +
369 					    priv->header_buf_size * bufq->tail);
370 
371 		bufq->tail = (bufq->tail + 1) & bufq->mask;
372 		complq->num_free_slots--;
373 		num_posted++;
374 
375 		if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
376 			gve_rx_write_doorbell_dqo(priv, rx->q_num);
377 	}
378 
379 	rx->fill_cnt += num_posted;
380 }
381 
gve_rx_skb_csum(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)382 static void gve_rx_skb_csum(struct sk_buff *skb,
383 			    const struct gve_rx_compl_desc_dqo *desc,
384 			    struct gve_ptype ptype)
385 {
386 	skb->ip_summed = CHECKSUM_NONE;
387 
388 	/* HW did not identify and process L3 and L4 headers. */
389 	if (unlikely(!desc->l3_l4_processed))
390 		return;
391 
392 	if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
393 		if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
394 			return;
395 	} else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
396 		/* Checksum should be skipped if this flag is set. */
397 		if (unlikely(desc->ipv6_ex_add))
398 			return;
399 	}
400 
401 	if (unlikely(desc->csum_l4_err))
402 		return;
403 
404 	switch (ptype.l4_type) {
405 	case GVE_L4_TYPE_TCP:
406 	case GVE_L4_TYPE_UDP:
407 	case GVE_L4_TYPE_ICMP:
408 	case GVE_L4_TYPE_SCTP:
409 		skb->ip_summed = CHECKSUM_UNNECESSARY;
410 		break;
411 	default:
412 		break;
413 	}
414 }
415 
gve_rx_skb_hash(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * compl_desc,struct gve_ptype ptype)416 static void gve_rx_skb_hash(struct sk_buff *skb,
417 			    const struct gve_rx_compl_desc_dqo *compl_desc,
418 			    struct gve_ptype ptype)
419 {
420 	enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
421 
422 	if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
423 		hash_type = PKT_HASH_TYPE_L4;
424 	else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
425 		hash_type = PKT_HASH_TYPE_L3;
426 
427 	skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
428 }
429 
gve_rx_free_skb(struct napi_struct * napi,struct gve_rx_ring * rx)430 static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx)
431 {
432 	if (!rx->ctx.skb_head)
433 		return;
434 
435 	if (rx->ctx.skb_head == napi->skb)
436 		napi->skb = NULL;
437 	dev_kfree_skb_any(rx->ctx.skb_head);
438 	rx->ctx.skb_head = NULL;
439 	rx->ctx.skb_tail = NULL;
440 }
441 
gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring * rx)442 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
443 {
444 	if (!rx->dqo.qpl)
445 		return false;
446 	if (rx->dqo.used_buf_states_cnt <
447 		     (rx->dqo.num_buf_states -
448 		     GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
449 		return false;
450 	return true;
451 }
452 
gve_rx_copy_ondemand(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state,u16 buf_len)453 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
454 				struct gve_rx_buf_state_dqo *buf_state,
455 				u16 buf_len)
456 {
457 	struct page *page = alloc_page(GFP_ATOMIC);
458 	int num_frags;
459 
460 	if (!page)
461 		return -ENOMEM;
462 
463 	memcpy(page_address(page),
464 	       buf_state->page_info.page_address +
465 	       buf_state->page_info.page_offset,
466 	       buf_len);
467 	num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
468 	skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
469 			0, buf_len, PAGE_SIZE);
470 
471 	u64_stats_update_begin(&rx->statss);
472 	rx->rx_frag_alloc_cnt++;
473 	u64_stats_update_end(&rx->statss);
474 	/* Return unused buffer. */
475 	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
476 	return 0;
477 }
478 
479 /* Chains multi skbs for single rx packet.
480  * Returns 0 if buffer is appended, -1 otherwise.
481  */
gve_rx_append_frags(struct napi_struct * napi,struct gve_rx_buf_state_dqo * buf_state,u16 buf_len,struct gve_rx_ring * rx,struct gve_priv * priv)482 static int gve_rx_append_frags(struct napi_struct *napi,
483 			       struct gve_rx_buf_state_dqo *buf_state,
484 			       u16 buf_len, struct gve_rx_ring *rx,
485 			       struct gve_priv *priv)
486 {
487 	int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
488 
489 	if (unlikely(num_frags == MAX_SKB_FRAGS)) {
490 		struct sk_buff *skb;
491 
492 		skb = napi_alloc_skb(napi, 0);
493 		if (!skb)
494 			return -1;
495 
496 		if (rx->dqo.page_pool)
497 			skb_mark_for_recycle(skb);
498 
499 		if (rx->ctx.skb_tail == rx->ctx.skb_head)
500 			skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
501 		else
502 			rx->ctx.skb_tail->next = skb;
503 		rx->ctx.skb_tail = skb;
504 		num_frags = 0;
505 	}
506 	if (rx->ctx.skb_tail != rx->ctx.skb_head) {
507 		rx->ctx.skb_head->len += buf_len;
508 		rx->ctx.skb_head->data_len += buf_len;
509 		rx->ctx.skb_head->truesize += buf_state->page_info.buf_size;
510 	}
511 
512 	/* Trigger ondemand page allocation if we are running low on buffers */
513 	if (gve_rx_should_trigger_copy_ondemand(rx))
514 		return gve_rx_copy_ondemand(rx, buf_state, buf_len);
515 
516 	skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
517 			buf_state->page_info.page,
518 			buf_state->page_info.page_offset,
519 			buf_len, buf_state->page_info.buf_size);
520 	gve_reuse_buffer(rx, buf_state);
521 	return 0;
522 }
523 
524 /* Returns 0 if descriptor is completed successfully.
525  * Returns -EINVAL if descriptor is invalid.
526  * Returns -ENOMEM if data cannot be copied to skb.
527  */
gve_rx_dqo(struct napi_struct * napi,struct gve_rx_ring * rx,const struct gve_rx_compl_desc_dqo * compl_desc,u32 desc_idx,int queue_idx)528 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
529 		      const struct gve_rx_compl_desc_dqo *compl_desc,
530 		      u32 desc_idx, int queue_idx)
531 {
532 	const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
533 	const bool hbo = compl_desc->header_buffer_overflow;
534 	const bool eop = compl_desc->end_of_packet != 0;
535 	const bool hsplit = compl_desc->split_header;
536 	struct gve_rx_buf_state_dqo *buf_state;
537 	struct gve_priv *priv = rx->gve;
538 	u16 buf_len;
539 	u16 hdr_len;
540 
541 	if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
542 		net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
543 				    priv->dev->name, buffer_id);
544 		return -EINVAL;
545 	}
546 	buf_state = &rx->dqo.buf_states[buffer_id];
547 	if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
548 		net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
549 				    priv->dev->name, buffer_id);
550 		return -EINVAL;
551 	}
552 
553 	if (unlikely(compl_desc->rx_error)) {
554 		gve_free_buffer(rx, buf_state);
555 		return -EINVAL;
556 	}
557 
558 	buf_len = compl_desc->packet_len;
559 	hdr_len = compl_desc->header_len;
560 
561 	/* Page might have not been used for awhile and was likely last written
562 	 * by a different thread.
563 	 */
564 	prefetch(buf_state->page_info.page);
565 
566 	/* Copy the header into the skb in the case of header split */
567 	if (hsplit) {
568 		int unsplit = 0;
569 
570 		if (hdr_len && !hbo) {
571 			rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
572 							    rx->dqo.hdr_bufs.data +
573 							    desc_idx * priv->header_buf_size,
574 							    hdr_len);
575 			if (unlikely(!rx->ctx.skb_head))
576 				goto error;
577 			rx->ctx.skb_tail = rx->ctx.skb_head;
578 
579 			if (rx->dqo.page_pool)
580 				skb_mark_for_recycle(rx->ctx.skb_head);
581 		} else {
582 			unsplit = 1;
583 		}
584 		u64_stats_update_begin(&rx->statss);
585 		rx->rx_hsplit_pkt++;
586 		rx->rx_hsplit_unsplit_pkt += unsplit;
587 		rx->rx_hsplit_bytes += hdr_len;
588 		u64_stats_update_end(&rx->statss);
589 	}
590 
591 	/* Sync the portion of dma buffer for CPU to read. */
592 	dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
593 				      buf_state->page_info.page_offset,
594 				      buf_len, DMA_FROM_DEVICE);
595 
596 	/* Append to current skb if one exists. */
597 	if (rx->ctx.skb_head) {
598 		if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
599 						 priv)) != 0) {
600 			goto error;
601 		}
602 		return 0;
603 	}
604 
605 	if (eop && buf_len <= priv->rx_copybreak) {
606 		rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
607 					       &buf_state->page_info, buf_len);
608 		if (unlikely(!rx->ctx.skb_head))
609 			goto error;
610 		rx->ctx.skb_tail = rx->ctx.skb_head;
611 
612 		u64_stats_update_begin(&rx->statss);
613 		rx->rx_copied_pkt++;
614 		rx->rx_copybreak_pkt++;
615 		u64_stats_update_end(&rx->statss);
616 
617 		gve_free_buffer(rx, buf_state);
618 		return 0;
619 	}
620 
621 	rx->ctx.skb_head = napi_get_frags(napi);
622 	if (unlikely(!rx->ctx.skb_head))
623 		goto error;
624 	rx->ctx.skb_tail = rx->ctx.skb_head;
625 
626 	if (gve_rx_should_trigger_copy_ondemand(rx)) {
627 		if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
628 			goto error;
629 		return 0;
630 	}
631 
632 	if (rx->dqo.page_pool)
633 		skb_mark_for_recycle(rx->ctx.skb_head);
634 
635 	skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
636 			buf_state->page_info.page_offset, buf_len,
637 			buf_state->page_info.buf_size);
638 	gve_reuse_buffer(rx, buf_state);
639 	return 0;
640 
641 error:
642 	gve_free_buffer(rx, buf_state);
643 	return -ENOMEM;
644 }
645 
gve_rx_complete_rsc(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)646 static int gve_rx_complete_rsc(struct sk_buff *skb,
647 			       const struct gve_rx_compl_desc_dqo *desc,
648 			       struct gve_ptype ptype)
649 {
650 	struct skb_shared_info *shinfo = skb_shinfo(skb);
651 
652 	/* Only TCP is supported right now. */
653 	if (ptype.l4_type != GVE_L4_TYPE_TCP)
654 		return -EINVAL;
655 
656 	switch (ptype.l3_type) {
657 	case GVE_L3_TYPE_IPV4:
658 		shinfo->gso_type = SKB_GSO_TCPV4;
659 		break;
660 	case GVE_L3_TYPE_IPV6:
661 		shinfo->gso_type = SKB_GSO_TCPV6;
662 		break;
663 	default:
664 		return -EINVAL;
665 	}
666 
667 	shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
668 	return 0;
669 }
670 
671 /* Returns 0 if skb is completed successfully, -1 otherwise. */
gve_rx_complete_skb(struct gve_rx_ring * rx,struct napi_struct * napi,const struct gve_rx_compl_desc_dqo * desc,netdev_features_t feat)672 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
673 			       const struct gve_rx_compl_desc_dqo *desc,
674 			       netdev_features_t feat)
675 {
676 	struct gve_ptype ptype =
677 		rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
678 	int err;
679 
680 	skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
681 
682 	if (feat & NETIF_F_RXHASH)
683 		gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
684 
685 	if (feat & NETIF_F_RXCSUM)
686 		gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
687 
688 	/* RSC packets must set gso_size otherwise the TCP stack will complain
689 	 * that packets are larger than MTU.
690 	 */
691 	if (desc->rsc) {
692 		err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
693 		if (err < 0)
694 			return err;
695 	}
696 
697 	if (skb_headlen(rx->ctx.skb_head) == 0)
698 		napi_gro_frags(napi);
699 	else
700 		napi_gro_receive(napi, rx->ctx.skb_head);
701 
702 	return 0;
703 }
704 
gve_rx_poll_dqo(struct gve_notify_block * block,int budget)705 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
706 {
707 	struct napi_struct *napi = &block->napi;
708 	netdev_features_t feat = napi->dev->features;
709 
710 	struct gve_rx_ring *rx = block->rx;
711 	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
712 
713 	u32 work_done = 0;
714 	u64 bytes = 0;
715 	int err;
716 
717 	while (work_done < budget) {
718 		struct gve_rx_compl_desc_dqo *compl_desc =
719 			&complq->desc_ring[complq->head];
720 		u32 pkt_bytes;
721 
722 		/* No more new packets */
723 		if (compl_desc->generation == complq->cur_gen_bit)
724 			break;
725 
726 		/* Prefetch the next two descriptors. */
727 		prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
728 		prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
729 
730 		/* Do not read data until we own the descriptor */
731 		dma_rmb();
732 
733 		err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num);
734 		if (err < 0) {
735 			gve_rx_free_skb(napi, rx);
736 			u64_stats_update_begin(&rx->statss);
737 			if (err == -ENOMEM)
738 				rx->rx_skb_alloc_fail++;
739 			else if (err == -EINVAL)
740 				rx->rx_desc_err_dropped_pkt++;
741 			u64_stats_update_end(&rx->statss);
742 		}
743 
744 		complq->head = (complq->head + 1) & complq->mask;
745 		complq->num_free_slots++;
746 
747 		/* When the ring wraps, the generation bit is flipped. */
748 		complq->cur_gen_bit ^= (complq->head == 0);
749 
750 		/* Receiving a completion means we have space to post another
751 		 * buffer on the buffer queue.
752 		 */
753 		{
754 			struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
755 
756 			bufq->head = (bufq->head + 1) & bufq->mask;
757 		}
758 
759 		/* Free running counter of completed descriptors */
760 		rx->cnt++;
761 
762 		if (!rx->ctx.skb_head)
763 			continue;
764 
765 		if (!compl_desc->end_of_packet)
766 			continue;
767 
768 		work_done++;
769 		pkt_bytes = rx->ctx.skb_head->len;
770 		/* The ethernet header (first ETH_HLEN bytes) is snipped off
771 		 * by eth_type_trans.
772 		 */
773 		if (skb_headlen(rx->ctx.skb_head))
774 			pkt_bytes += ETH_HLEN;
775 
776 		/* gve_rx_complete_skb() will consume skb if successful */
777 		if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
778 			gve_rx_free_skb(napi, rx);
779 			u64_stats_update_begin(&rx->statss);
780 			rx->rx_desc_err_dropped_pkt++;
781 			u64_stats_update_end(&rx->statss);
782 			continue;
783 		}
784 
785 		bytes += pkt_bytes;
786 		rx->ctx.skb_head = NULL;
787 		rx->ctx.skb_tail = NULL;
788 	}
789 
790 	gve_rx_post_buffers_dqo(rx);
791 
792 	u64_stats_update_begin(&rx->statss);
793 	rx->rpackets += work_done;
794 	rx->rbytes += bytes;
795 	u64_stats_update_end(&rx->statss);
796 
797 	return work_done;
798 }
799