1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include "gve.h"
8 #include "gve_dqo.h"
9 #include "gve_adminq.h"
10 #include "gve_utils.h"
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <net/ip6_checksum.h>
16 #include <net/ipv6.h>
17 #include <net/tcp.h>
18
gve_rx_free_hdr_bufs(struct gve_priv * priv,struct gve_rx_ring * rx)19 static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
20 {
21 struct device *hdev = &priv->pdev->dev;
22 int buf_count = rx->dqo.bufq.mask + 1;
23
24 if (rx->dqo.hdr_bufs.data) {
25 dma_free_coherent(hdev, priv->header_buf_size * buf_count,
26 rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
27 rx->dqo.hdr_bufs.data = NULL;
28 }
29 }
30
gve_rx_init_ring_state_dqo(struct gve_rx_ring * rx,const u32 buffer_queue_slots,const u32 completion_queue_slots)31 static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx,
32 const u32 buffer_queue_slots,
33 const u32 completion_queue_slots)
34 {
35 int i;
36
37 /* Set buffer queue state */
38 rx->dqo.bufq.mask = buffer_queue_slots - 1;
39 rx->dqo.bufq.head = 0;
40 rx->dqo.bufq.tail = 0;
41
42 /* Set completion queue state */
43 rx->dqo.complq.num_free_slots = completion_queue_slots;
44 rx->dqo.complq.mask = completion_queue_slots - 1;
45 rx->dqo.complq.cur_gen_bit = 0;
46 rx->dqo.complq.head = 0;
47
48 /* Set RX SKB context */
49 rx->ctx.skb_head = NULL;
50 rx->ctx.skb_tail = NULL;
51
52 /* Set up linked list of buffer IDs */
53 if (rx->dqo.buf_states) {
54 for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
55 rx->dqo.buf_states[i].next = i + 1;
56 rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
57 }
58
59 rx->dqo.free_buf_states = 0;
60 rx->dqo.recycled_buf_states.head = -1;
61 rx->dqo.recycled_buf_states.tail = -1;
62 rx->dqo.used_buf_states.head = -1;
63 rx->dqo.used_buf_states.tail = -1;
64 }
65
gve_rx_reset_ring_dqo(struct gve_priv * priv,int idx)66 static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx)
67 {
68 struct gve_rx_ring *rx = &priv->rx[idx];
69 size_t size;
70 int i;
71
72 const u32 buffer_queue_slots = priv->rx_desc_cnt;
73 const u32 completion_queue_slots = priv->rx_desc_cnt;
74
75 /* Reset buffer queue */
76 if (rx->dqo.bufq.desc_ring) {
77 size = sizeof(rx->dqo.bufq.desc_ring[0]) *
78 buffer_queue_slots;
79 memset(rx->dqo.bufq.desc_ring, 0, size);
80 }
81
82 /* Reset completion queue */
83 if (rx->dqo.complq.desc_ring) {
84 size = sizeof(rx->dqo.complq.desc_ring[0]) *
85 completion_queue_slots;
86 memset(rx->dqo.complq.desc_ring, 0, size);
87 }
88
89 /* Reset q_resources */
90 if (rx->q_resources)
91 memset(rx->q_resources, 0, sizeof(*rx->q_resources));
92
93 /* Reset buf states */
94 if (rx->dqo.buf_states) {
95 for (i = 0; i < rx->dqo.num_buf_states; i++) {
96 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
97
98 if (rx->dqo.page_pool)
99 gve_free_to_page_pool(rx, bs, false);
100 else
101 gve_free_qpl_page_dqo(bs);
102 }
103 }
104
105 gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
106 completion_queue_slots);
107 }
108
gve_rx_stop_ring_dqo(struct gve_priv * priv,int idx)109 void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
110 {
111 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
112 struct gve_rx_ring *rx = &priv->rx[idx];
113
114 if (!gve_rx_was_added_to_block(priv, idx))
115 return;
116
117 page_pool_disable_direct_recycling(rx->dqo.page_pool);
118 gve_remove_napi(priv, ntfy_idx);
119 gve_rx_remove_from_block(priv, idx);
120 gve_rx_reset_ring_dqo(priv, idx);
121 }
122
gve_rx_free_ring_dqo(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_alloc_rings_cfg * cfg)123 void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
124 struct gve_rx_alloc_rings_cfg *cfg)
125 {
126 struct device *hdev = &priv->pdev->dev;
127 size_t completion_queue_slots;
128 size_t buffer_queue_slots;
129 int idx = rx->q_num;
130 size_t size;
131 u32 qpl_id;
132 int i;
133
134 completion_queue_slots = rx->dqo.complq.mask + 1;
135 buffer_queue_slots = rx->dqo.bufq.mask + 1;
136
137 if (rx->q_resources) {
138 dma_free_coherent(hdev, sizeof(*rx->q_resources),
139 rx->q_resources, rx->q_resources_bus);
140 rx->q_resources = NULL;
141 }
142
143 for (i = 0; i < rx->dqo.num_buf_states; i++) {
144 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
145
146 if (rx->dqo.page_pool)
147 gve_free_to_page_pool(rx, bs, false);
148 else
149 gve_free_qpl_page_dqo(bs);
150 }
151
152 if (rx->dqo.qpl) {
153 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
154 gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id);
155 rx->dqo.qpl = NULL;
156 }
157
158 if (rx->dqo.bufq.desc_ring) {
159 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
160 dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
161 rx->dqo.bufq.bus);
162 rx->dqo.bufq.desc_ring = NULL;
163 }
164
165 if (rx->dqo.complq.desc_ring) {
166 size = sizeof(rx->dqo.complq.desc_ring[0]) *
167 completion_queue_slots;
168 dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
169 rx->dqo.complq.bus);
170 rx->dqo.complq.desc_ring = NULL;
171 }
172
173 kvfree(rx->dqo.buf_states);
174 rx->dqo.buf_states = NULL;
175
176 if (rx->dqo.page_pool) {
177 page_pool_destroy(rx->dqo.page_pool);
178 rx->dqo.page_pool = NULL;
179 }
180
181 gve_rx_free_hdr_bufs(priv, rx);
182
183 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
184 }
185
gve_rx_alloc_hdr_bufs(struct gve_priv * priv,struct gve_rx_ring * rx,const u32 buf_count)186 static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx,
187 const u32 buf_count)
188 {
189 struct device *hdev = &priv->pdev->dev;
190
191 rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count,
192 &rx->dqo.hdr_bufs.addr, GFP_KERNEL);
193 if (!rx->dqo.hdr_bufs.data)
194 return -ENOMEM;
195
196 return 0;
197 }
198
gve_rx_start_ring_dqo(struct gve_priv * priv,int idx)199 void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
200 {
201 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
202
203 gve_rx_add_to_block(priv, idx);
204 gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
205 }
206
gve_rx_alloc_ring_dqo(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg,struct gve_rx_ring * rx,int idx)207 int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
208 struct gve_rx_alloc_rings_cfg *cfg,
209 struct gve_rx_ring *rx,
210 int idx)
211 {
212 struct device *hdev = &priv->pdev->dev;
213 struct page_pool *pool;
214 int qpl_page_cnt;
215 size_t size;
216 u32 qpl_id;
217
218 const u32 buffer_queue_slots = cfg->ring_size;
219 const u32 completion_queue_slots = cfg->ring_size;
220
221 netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
222
223 memset(rx, 0, sizeof(*rx));
224 rx->gve = priv;
225 rx->q_num = idx;
226
227 rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots :
228 gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
229 rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
230 sizeof(rx->dqo.buf_states[0]),
231 GFP_KERNEL);
232 if (!rx->dqo.buf_states)
233 return -ENOMEM;
234
235 /* Allocate header buffers for header-split */
236 if (cfg->enable_header_split)
237 if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots))
238 goto err;
239
240 /* Allocate RX completion queue */
241 size = sizeof(rx->dqo.complq.desc_ring[0]) *
242 completion_queue_slots;
243 rx->dqo.complq.desc_ring =
244 dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
245 if (!rx->dqo.complq.desc_ring)
246 goto err;
247
248 /* Allocate RX buffer queue */
249 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
250 rx->dqo.bufq.desc_ring =
251 dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
252 if (!rx->dqo.bufq.desc_ring)
253 goto err;
254
255 if (cfg->raw_addressing) {
256 pool = gve_rx_create_page_pool(priv, rx);
257 if (IS_ERR(pool))
258 goto err;
259
260 rx->dqo.page_pool = pool;
261 } else {
262 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
263 qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
264
265 rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
266 qpl_page_cnt);
267 if (!rx->dqo.qpl)
268 goto err;
269 rx->dqo.next_qpl_page_idx = 0;
270 }
271
272 rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
273 &rx->q_resources_bus, GFP_KERNEL);
274 if (!rx->q_resources)
275 goto err;
276
277 gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
278 completion_queue_slots);
279
280 return 0;
281
282 err:
283 gve_rx_free_ring_dqo(priv, rx, cfg);
284 return -ENOMEM;
285 }
286
gve_rx_write_doorbell_dqo(const struct gve_priv * priv,int queue_idx)287 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
288 {
289 const struct gve_rx_ring *rx = &priv->rx[queue_idx];
290 u64 index = be32_to_cpu(rx->q_resources->db_index);
291
292 iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
293 }
294
gve_rx_alloc_rings_dqo(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg)295 int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
296 struct gve_rx_alloc_rings_cfg *cfg)
297 {
298 struct gve_rx_ring *rx;
299 int err;
300 int i;
301
302 rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
303 GFP_KERNEL);
304 if (!rx)
305 return -ENOMEM;
306
307 for (i = 0; i < cfg->qcfg->num_queues; i++) {
308 err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
309 if (err) {
310 netif_err(priv, drv, priv->dev,
311 "Failed to alloc rx ring=%d: err=%d\n",
312 i, err);
313 goto err;
314 }
315 }
316
317 cfg->rx = rx;
318 return 0;
319
320 err:
321 for (i--; i >= 0; i--)
322 gve_rx_free_ring_dqo(priv, &rx[i], cfg);
323 kvfree(rx);
324 return err;
325 }
326
gve_rx_free_rings_dqo(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg)327 void gve_rx_free_rings_dqo(struct gve_priv *priv,
328 struct gve_rx_alloc_rings_cfg *cfg)
329 {
330 struct gve_rx_ring *rx = cfg->rx;
331 int i;
332
333 if (!rx)
334 return;
335
336 for (i = 0; i < cfg->qcfg->num_queues; i++)
337 gve_rx_free_ring_dqo(priv, &rx[i], cfg);
338
339 kvfree(rx);
340 cfg->rx = NULL;
341 }
342
gve_rx_post_buffers_dqo(struct gve_rx_ring * rx)343 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
344 {
345 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
346 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
347 struct gve_priv *priv = rx->gve;
348 u32 num_avail_slots;
349 u32 num_full_slots;
350 u32 num_posted = 0;
351
352 num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
353 num_avail_slots = bufq->mask - num_full_slots;
354
355 num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
356 while (num_posted < num_avail_slots) {
357 struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
358
359 if (unlikely(gve_alloc_buffer(rx, desc))) {
360 u64_stats_update_begin(&rx->statss);
361 rx->rx_buf_alloc_fail++;
362 u64_stats_update_end(&rx->statss);
363 break;
364 }
365
366 if (rx->dqo.hdr_bufs.data)
367 desc->header_buf_addr =
368 cpu_to_le64(rx->dqo.hdr_bufs.addr +
369 priv->header_buf_size * bufq->tail);
370
371 bufq->tail = (bufq->tail + 1) & bufq->mask;
372 complq->num_free_slots--;
373 num_posted++;
374
375 if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
376 gve_rx_write_doorbell_dqo(priv, rx->q_num);
377 }
378
379 rx->fill_cnt += num_posted;
380 }
381
gve_rx_skb_csum(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)382 static void gve_rx_skb_csum(struct sk_buff *skb,
383 const struct gve_rx_compl_desc_dqo *desc,
384 struct gve_ptype ptype)
385 {
386 skb->ip_summed = CHECKSUM_NONE;
387
388 /* HW did not identify and process L3 and L4 headers. */
389 if (unlikely(!desc->l3_l4_processed))
390 return;
391
392 if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
393 if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
394 return;
395 } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
396 /* Checksum should be skipped if this flag is set. */
397 if (unlikely(desc->ipv6_ex_add))
398 return;
399 }
400
401 if (unlikely(desc->csum_l4_err))
402 return;
403
404 switch (ptype.l4_type) {
405 case GVE_L4_TYPE_TCP:
406 case GVE_L4_TYPE_UDP:
407 case GVE_L4_TYPE_ICMP:
408 case GVE_L4_TYPE_SCTP:
409 skb->ip_summed = CHECKSUM_UNNECESSARY;
410 break;
411 default:
412 break;
413 }
414 }
415
gve_rx_skb_hash(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * compl_desc,struct gve_ptype ptype)416 static void gve_rx_skb_hash(struct sk_buff *skb,
417 const struct gve_rx_compl_desc_dqo *compl_desc,
418 struct gve_ptype ptype)
419 {
420 enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
421
422 if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
423 hash_type = PKT_HASH_TYPE_L4;
424 else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
425 hash_type = PKT_HASH_TYPE_L3;
426
427 skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
428 }
429
gve_rx_free_skb(struct napi_struct * napi,struct gve_rx_ring * rx)430 static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx)
431 {
432 if (!rx->ctx.skb_head)
433 return;
434
435 if (rx->ctx.skb_head == napi->skb)
436 napi->skb = NULL;
437 dev_kfree_skb_any(rx->ctx.skb_head);
438 rx->ctx.skb_head = NULL;
439 rx->ctx.skb_tail = NULL;
440 }
441
gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring * rx)442 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
443 {
444 if (!rx->dqo.qpl)
445 return false;
446 if (rx->dqo.used_buf_states_cnt <
447 (rx->dqo.num_buf_states -
448 GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
449 return false;
450 return true;
451 }
452
gve_rx_copy_ondemand(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state,u16 buf_len)453 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
454 struct gve_rx_buf_state_dqo *buf_state,
455 u16 buf_len)
456 {
457 struct page *page = alloc_page(GFP_ATOMIC);
458 int num_frags;
459
460 if (!page)
461 return -ENOMEM;
462
463 memcpy(page_address(page),
464 buf_state->page_info.page_address +
465 buf_state->page_info.page_offset,
466 buf_len);
467 num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
468 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
469 0, buf_len, PAGE_SIZE);
470
471 u64_stats_update_begin(&rx->statss);
472 rx->rx_frag_alloc_cnt++;
473 u64_stats_update_end(&rx->statss);
474 /* Return unused buffer. */
475 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
476 return 0;
477 }
478
479 /* Chains multi skbs for single rx packet.
480 * Returns 0 if buffer is appended, -1 otherwise.
481 */
gve_rx_append_frags(struct napi_struct * napi,struct gve_rx_buf_state_dqo * buf_state,u16 buf_len,struct gve_rx_ring * rx,struct gve_priv * priv)482 static int gve_rx_append_frags(struct napi_struct *napi,
483 struct gve_rx_buf_state_dqo *buf_state,
484 u16 buf_len, struct gve_rx_ring *rx,
485 struct gve_priv *priv)
486 {
487 int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
488
489 if (unlikely(num_frags == MAX_SKB_FRAGS)) {
490 struct sk_buff *skb;
491
492 skb = napi_alloc_skb(napi, 0);
493 if (!skb)
494 return -1;
495
496 if (rx->dqo.page_pool)
497 skb_mark_for_recycle(skb);
498
499 if (rx->ctx.skb_tail == rx->ctx.skb_head)
500 skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
501 else
502 rx->ctx.skb_tail->next = skb;
503 rx->ctx.skb_tail = skb;
504 num_frags = 0;
505 }
506 if (rx->ctx.skb_tail != rx->ctx.skb_head) {
507 rx->ctx.skb_head->len += buf_len;
508 rx->ctx.skb_head->data_len += buf_len;
509 rx->ctx.skb_head->truesize += buf_state->page_info.buf_size;
510 }
511
512 /* Trigger ondemand page allocation if we are running low on buffers */
513 if (gve_rx_should_trigger_copy_ondemand(rx))
514 return gve_rx_copy_ondemand(rx, buf_state, buf_len);
515
516 skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
517 buf_state->page_info.page,
518 buf_state->page_info.page_offset,
519 buf_len, buf_state->page_info.buf_size);
520 gve_reuse_buffer(rx, buf_state);
521 return 0;
522 }
523
524 /* Returns 0 if descriptor is completed successfully.
525 * Returns -EINVAL if descriptor is invalid.
526 * Returns -ENOMEM if data cannot be copied to skb.
527 */
gve_rx_dqo(struct napi_struct * napi,struct gve_rx_ring * rx,const struct gve_rx_compl_desc_dqo * compl_desc,u32 desc_idx,int queue_idx)528 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
529 const struct gve_rx_compl_desc_dqo *compl_desc,
530 u32 desc_idx, int queue_idx)
531 {
532 const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
533 const bool hbo = compl_desc->header_buffer_overflow;
534 const bool eop = compl_desc->end_of_packet != 0;
535 const bool hsplit = compl_desc->split_header;
536 struct gve_rx_buf_state_dqo *buf_state;
537 struct gve_priv *priv = rx->gve;
538 u16 buf_len;
539 u16 hdr_len;
540
541 if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
542 net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
543 priv->dev->name, buffer_id);
544 return -EINVAL;
545 }
546 buf_state = &rx->dqo.buf_states[buffer_id];
547 if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
548 net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
549 priv->dev->name, buffer_id);
550 return -EINVAL;
551 }
552
553 if (unlikely(compl_desc->rx_error)) {
554 gve_free_buffer(rx, buf_state);
555 return -EINVAL;
556 }
557
558 buf_len = compl_desc->packet_len;
559 hdr_len = compl_desc->header_len;
560
561 /* Page might have not been used for awhile and was likely last written
562 * by a different thread.
563 */
564 prefetch(buf_state->page_info.page);
565
566 /* Copy the header into the skb in the case of header split */
567 if (hsplit) {
568 int unsplit = 0;
569
570 if (hdr_len && !hbo) {
571 rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
572 rx->dqo.hdr_bufs.data +
573 desc_idx * priv->header_buf_size,
574 hdr_len);
575 if (unlikely(!rx->ctx.skb_head))
576 goto error;
577 rx->ctx.skb_tail = rx->ctx.skb_head;
578
579 if (rx->dqo.page_pool)
580 skb_mark_for_recycle(rx->ctx.skb_head);
581 } else {
582 unsplit = 1;
583 }
584 u64_stats_update_begin(&rx->statss);
585 rx->rx_hsplit_pkt++;
586 rx->rx_hsplit_unsplit_pkt += unsplit;
587 rx->rx_hsplit_bytes += hdr_len;
588 u64_stats_update_end(&rx->statss);
589 }
590
591 /* Sync the portion of dma buffer for CPU to read. */
592 dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
593 buf_state->page_info.page_offset,
594 buf_len, DMA_FROM_DEVICE);
595
596 /* Append to current skb if one exists. */
597 if (rx->ctx.skb_head) {
598 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
599 priv)) != 0) {
600 goto error;
601 }
602 return 0;
603 }
604
605 if (eop && buf_len <= priv->rx_copybreak) {
606 rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
607 &buf_state->page_info, buf_len);
608 if (unlikely(!rx->ctx.skb_head))
609 goto error;
610 rx->ctx.skb_tail = rx->ctx.skb_head;
611
612 u64_stats_update_begin(&rx->statss);
613 rx->rx_copied_pkt++;
614 rx->rx_copybreak_pkt++;
615 u64_stats_update_end(&rx->statss);
616
617 gve_free_buffer(rx, buf_state);
618 return 0;
619 }
620
621 rx->ctx.skb_head = napi_get_frags(napi);
622 if (unlikely(!rx->ctx.skb_head))
623 goto error;
624 rx->ctx.skb_tail = rx->ctx.skb_head;
625
626 if (gve_rx_should_trigger_copy_ondemand(rx)) {
627 if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
628 goto error;
629 return 0;
630 }
631
632 if (rx->dqo.page_pool)
633 skb_mark_for_recycle(rx->ctx.skb_head);
634
635 skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
636 buf_state->page_info.page_offset, buf_len,
637 buf_state->page_info.buf_size);
638 gve_reuse_buffer(rx, buf_state);
639 return 0;
640
641 error:
642 gve_free_buffer(rx, buf_state);
643 return -ENOMEM;
644 }
645
gve_rx_complete_rsc(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)646 static int gve_rx_complete_rsc(struct sk_buff *skb,
647 const struct gve_rx_compl_desc_dqo *desc,
648 struct gve_ptype ptype)
649 {
650 struct skb_shared_info *shinfo = skb_shinfo(skb);
651
652 /* Only TCP is supported right now. */
653 if (ptype.l4_type != GVE_L4_TYPE_TCP)
654 return -EINVAL;
655
656 switch (ptype.l3_type) {
657 case GVE_L3_TYPE_IPV4:
658 shinfo->gso_type = SKB_GSO_TCPV4;
659 break;
660 case GVE_L3_TYPE_IPV6:
661 shinfo->gso_type = SKB_GSO_TCPV6;
662 break;
663 default:
664 return -EINVAL;
665 }
666
667 shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
668 return 0;
669 }
670
671 /* Returns 0 if skb is completed successfully, -1 otherwise. */
gve_rx_complete_skb(struct gve_rx_ring * rx,struct napi_struct * napi,const struct gve_rx_compl_desc_dqo * desc,netdev_features_t feat)672 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
673 const struct gve_rx_compl_desc_dqo *desc,
674 netdev_features_t feat)
675 {
676 struct gve_ptype ptype =
677 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
678 int err;
679
680 skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
681
682 if (feat & NETIF_F_RXHASH)
683 gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
684
685 if (feat & NETIF_F_RXCSUM)
686 gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
687
688 /* RSC packets must set gso_size otherwise the TCP stack will complain
689 * that packets are larger than MTU.
690 */
691 if (desc->rsc) {
692 err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
693 if (err < 0)
694 return err;
695 }
696
697 if (skb_headlen(rx->ctx.skb_head) == 0)
698 napi_gro_frags(napi);
699 else
700 napi_gro_receive(napi, rx->ctx.skb_head);
701
702 return 0;
703 }
704
gve_rx_poll_dqo(struct gve_notify_block * block,int budget)705 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
706 {
707 struct napi_struct *napi = &block->napi;
708 netdev_features_t feat = napi->dev->features;
709
710 struct gve_rx_ring *rx = block->rx;
711 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
712
713 u32 work_done = 0;
714 u64 bytes = 0;
715 int err;
716
717 while (work_done < budget) {
718 struct gve_rx_compl_desc_dqo *compl_desc =
719 &complq->desc_ring[complq->head];
720 u32 pkt_bytes;
721
722 /* No more new packets */
723 if (compl_desc->generation == complq->cur_gen_bit)
724 break;
725
726 /* Prefetch the next two descriptors. */
727 prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
728 prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
729
730 /* Do not read data until we own the descriptor */
731 dma_rmb();
732
733 err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num);
734 if (err < 0) {
735 gve_rx_free_skb(napi, rx);
736 u64_stats_update_begin(&rx->statss);
737 if (err == -ENOMEM)
738 rx->rx_skb_alloc_fail++;
739 else if (err == -EINVAL)
740 rx->rx_desc_err_dropped_pkt++;
741 u64_stats_update_end(&rx->statss);
742 }
743
744 complq->head = (complq->head + 1) & complq->mask;
745 complq->num_free_slots++;
746
747 /* When the ring wraps, the generation bit is flipped. */
748 complq->cur_gen_bit ^= (complq->head == 0);
749
750 /* Receiving a completion means we have space to post another
751 * buffer on the buffer queue.
752 */
753 {
754 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
755
756 bufq->head = (bufq->head + 1) & bufq->mask;
757 }
758
759 /* Free running counter of completed descriptors */
760 rx->cnt++;
761
762 if (!rx->ctx.skb_head)
763 continue;
764
765 if (!compl_desc->end_of_packet)
766 continue;
767
768 work_done++;
769 pkt_bytes = rx->ctx.skb_head->len;
770 /* The ethernet header (first ETH_HLEN bytes) is snipped off
771 * by eth_type_trans.
772 */
773 if (skb_headlen(rx->ctx.skb_head))
774 pkt_bytes += ETH_HLEN;
775
776 /* gve_rx_complete_skb() will consume skb if successful */
777 if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
778 gve_rx_free_skb(napi, rx);
779 u64_stats_update_begin(&rx->statss);
780 rx->rx_desc_err_dropped_pkt++;
781 u64_stats_update_end(&rx->statss);
782 continue;
783 }
784
785 bytes += pkt_bytes;
786 rx->ctx.skb_head = NULL;
787 rx->ctx.skb_tail = NULL;
788 }
789
790 gve_rx_post_buffers_dqo(rx);
791
792 u64_stats_update_begin(&rx->statss);
793 rx->rpackets += work_done;
794 rx->rbytes += bytes;
795 u64_stats_update_end(&rx->statss);
796
797 return work_done;
798 }
799