1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include "gve.h"
8 #include "gve_dqo.h"
9 #include "gve_adminq.h"
10 #include "gve_utils.h"
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <net/ip6_checksum.h>
16 #include <net/ipv6.h>
17 #include <net/tcp.h>
18
gve_rx_free_hdr_bufs(struct gve_priv * priv,struct gve_rx_ring * rx)19 static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
20 {
21 struct device *hdev = &priv->pdev->dev;
22 int buf_count = rx->dqo.bufq.mask + 1;
23
24 if (rx->dqo.hdr_bufs.data) {
25 dma_free_coherent(hdev, priv->header_buf_size * buf_count,
26 rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
27 rx->dqo.hdr_bufs.data = NULL;
28 }
29 }
30
gve_rx_init_ring_state_dqo(struct gve_rx_ring * rx,const u32 buffer_queue_slots,const u32 completion_queue_slots)31 static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx,
32 const u32 buffer_queue_slots,
33 const u32 completion_queue_slots)
34 {
35 int i;
36
37 /* Set buffer queue state */
38 rx->dqo.bufq.mask = buffer_queue_slots - 1;
39 rx->dqo.bufq.head = 0;
40 rx->dqo.bufq.tail = 0;
41
42 /* Set completion queue state */
43 rx->dqo.complq.num_free_slots = completion_queue_slots;
44 rx->dqo.complq.mask = completion_queue_slots - 1;
45 rx->dqo.complq.cur_gen_bit = 0;
46 rx->dqo.complq.head = 0;
47
48 /* Set RX SKB context */
49 rx->ctx.skb_head = NULL;
50 rx->ctx.skb_tail = NULL;
51
52 /* Set up linked list of buffer IDs */
53 if (rx->dqo.buf_states) {
54 for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
55 rx->dqo.buf_states[i].next = i + 1;
56 rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
57 }
58
59 rx->dqo.free_buf_states = 0;
60 rx->dqo.recycled_buf_states.head = -1;
61 rx->dqo.recycled_buf_states.tail = -1;
62 rx->dqo.used_buf_states.head = -1;
63 rx->dqo.used_buf_states.tail = -1;
64 }
65
gve_rx_reset_ring_dqo(struct gve_priv * priv,int idx)66 static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx)
67 {
68 struct gve_rx_ring *rx = &priv->rx[idx];
69 size_t size;
70 int i;
71
72 const u32 buffer_queue_slots = priv->rx_desc_cnt;
73 const u32 completion_queue_slots = priv->rx_desc_cnt;
74
75 /* Reset buffer queue */
76 if (rx->dqo.bufq.desc_ring) {
77 size = sizeof(rx->dqo.bufq.desc_ring[0]) *
78 buffer_queue_slots;
79 memset(rx->dqo.bufq.desc_ring, 0, size);
80 }
81
82 /* Reset completion queue */
83 if (rx->dqo.complq.desc_ring) {
84 size = sizeof(rx->dqo.complq.desc_ring[0]) *
85 completion_queue_slots;
86 memset(rx->dqo.complq.desc_ring, 0, size);
87 }
88
89 /* Reset q_resources */
90 if (rx->q_resources)
91 memset(rx->q_resources, 0, sizeof(*rx->q_resources));
92
93 /* Reset buf states */
94 if (rx->dqo.buf_states) {
95 for (i = 0; i < rx->dqo.num_buf_states; i++) {
96 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
97
98 if (rx->dqo.page_pool)
99 gve_free_to_page_pool(rx, bs, false);
100 else
101 gve_free_qpl_page_dqo(bs);
102 }
103 }
104
105 gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
106 completion_queue_slots);
107 }
108
gve_rx_stop_ring_dqo(struct gve_priv * priv,int idx)109 void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
110 {
111 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
112 struct gve_rx_ring *rx = &priv->rx[idx];
113
114 if (!gve_rx_was_added_to_block(priv, idx))
115 return;
116
117 if (rx->dqo.page_pool)
118 page_pool_disable_direct_recycling(rx->dqo.page_pool);
119 gve_remove_napi(priv, ntfy_idx);
120 gve_rx_remove_from_block(priv, idx);
121 gve_rx_reset_ring_dqo(priv, idx);
122 }
123
gve_rx_free_ring_dqo(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_alloc_rings_cfg * cfg)124 void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
125 struct gve_rx_alloc_rings_cfg *cfg)
126 {
127 struct device *hdev = &priv->pdev->dev;
128 size_t completion_queue_slots;
129 size_t buffer_queue_slots;
130 int idx = rx->q_num;
131 size_t size;
132 u32 qpl_id;
133 int i;
134
135 completion_queue_slots = rx->dqo.complq.mask + 1;
136 buffer_queue_slots = rx->dqo.bufq.mask + 1;
137
138 if (rx->q_resources) {
139 dma_free_coherent(hdev, sizeof(*rx->q_resources),
140 rx->q_resources, rx->q_resources_bus);
141 rx->q_resources = NULL;
142 }
143
144 for (i = 0; i < rx->dqo.num_buf_states; i++) {
145 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
146
147 if (rx->dqo.page_pool)
148 gve_free_to_page_pool(rx, bs, false);
149 else
150 gve_free_qpl_page_dqo(bs);
151 }
152
153 if (rx->dqo.qpl) {
154 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
155 gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id);
156 rx->dqo.qpl = NULL;
157 }
158
159 if (rx->dqo.bufq.desc_ring) {
160 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
161 dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
162 rx->dqo.bufq.bus);
163 rx->dqo.bufq.desc_ring = NULL;
164 }
165
166 if (rx->dqo.complq.desc_ring) {
167 size = sizeof(rx->dqo.complq.desc_ring[0]) *
168 completion_queue_slots;
169 dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
170 rx->dqo.complq.bus);
171 rx->dqo.complq.desc_ring = NULL;
172 }
173
174 kvfree(rx->dqo.buf_states);
175 rx->dqo.buf_states = NULL;
176
177 if (rx->dqo.page_pool) {
178 page_pool_destroy(rx->dqo.page_pool);
179 rx->dqo.page_pool = NULL;
180 }
181
182 gve_rx_free_hdr_bufs(priv, rx);
183
184 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
185 }
186
gve_rx_alloc_hdr_bufs(struct gve_priv * priv,struct gve_rx_ring * rx,const u32 buf_count)187 static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx,
188 const u32 buf_count)
189 {
190 struct device *hdev = &priv->pdev->dev;
191
192 rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count,
193 &rx->dqo.hdr_bufs.addr, GFP_KERNEL);
194 if (!rx->dqo.hdr_bufs.data)
195 return -ENOMEM;
196
197 return 0;
198 }
199
gve_rx_start_ring_dqo(struct gve_priv * priv,int idx)200 void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
201 {
202 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
203
204 gve_rx_add_to_block(priv, idx);
205 gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
206 }
207
gve_rx_alloc_ring_dqo(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg,struct gve_rx_ring * rx,int idx)208 int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
209 struct gve_rx_alloc_rings_cfg *cfg,
210 struct gve_rx_ring *rx,
211 int idx)
212 {
213 struct device *hdev = &priv->pdev->dev;
214 struct page_pool *pool;
215 int qpl_page_cnt;
216 size_t size;
217 u32 qpl_id;
218
219 const u32 buffer_queue_slots = cfg->ring_size;
220 const u32 completion_queue_slots = cfg->ring_size;
221
222 netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
223
224 memset(rx, 0, sizeof(*rx));
225 rx->gve = priv;
226 rx->q_num = idx;
227 rx->packet_buffer_size = cfg->packet_buffer_size;
228
229 if (cfg->xdp) {
230 rx->packet_buffer_truesize = GVE_XDP_RX_BUFFER_SIZE_DQO;
231 rx->rx_headroom = XDP_PACKET_HEADROOM;
232 } else {
233 rx->packet_buffer_truesize = rx->packet_buffer_size;
234 rx->rx_headroom = 0;
235 }
236
237 rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots :
238 gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
239 rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
240 sizeof(rx->dqo.buf_states[0]),
241 GFP_KERNEL);
242 if (!rx->dqo.buf_states)
243 return -ENOMEM;
244
245 /* Allocate header buffers for header-split */
246 if (cfg->enable_header_split)
247 if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots))
248 goto err;
249
250 /* Allocate RX completion queue */
251 size = sizeof(rx->dqo.complq.desc_ring[0]) *
252 completion_queue_slots;
253 rx->dqo.complq.desc_ring =
254 dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
255 if (!rx->dqo.complq.desc_ring)
256 goto err;
257
258 /* Allocate RX buffer queue */
259 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
260 rx->dqo.bufq.desc_ring =
261 dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
262 if (!rx->dqo.bufq.desc_ring)
263 goto err;
264
265 if (cfg->raw_addressing) {
266 pool = gve_rx_create_page_pool(priv, rx, cfg->xdp);
267 if (IS_ERR(pool))
268 goto err;
269
270 rx->dqo.page_pool = pool;
271 } else {
272 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
273 qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
274
275 rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
276 qpl_page_cnt);
277 if (!rx->dqo.qpl)
278 goto err;
279 rx->dqo.next_qpl_page_idx = 0;
280 }
281
282 rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
283 &rx->q_resources_bus, GFP_KERNEL);
284 if (!rx->q_resources)
285 goto err;
286
287 gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
288 completion_queue_slots);
289
290 return 0;
291
292 err:
293 gve_rx_free_ring_dqo(priv, rx, cfg);
294 return -ENOMEM;
295 }
296
gve_rx_write_doorbell_dqo(const struct gve_priv * priv,int queue_idx)297 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
298 {
299 const struct gve_rx_ring *rx = &priv->rx[queue_idx];
300 u64 index = be32_to_cpu(rx->q_resources->db_index);
301
302 iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
303 }
304
gve_rx_alloc_rings_dqo(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg)305 int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
306 struct gve_rx_alloc_rings_cfg *cfg)
307 {
308 struct gve_rx_ring *rx;
309 int err;
310 int i;
311
312 rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring),
313 GFP_KERNEL);
314 if (!rx)
315 return -ENOMEM;
316
317 for (i = 0; i < cfg->qcfg_rx->num_queues; i++) {
318 err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
319 if (err) {
320 netif_err(priv, drv, priv->dev,
321 "Failed to alloc rx ring=%d: err=%d\n",
322 i, err);
323 goto err;
324 }
325 }
326
327 cfg->rx = rx;
328 return 0;
329
330 err:
331 for (i--; i >= 0; i--)
332 gve_rx_free_ring_dqo(priv, &rx[i], cfg);
333 kvfree(rx);
334 return err;
335 }
336
gve_rx_free_rings_dqo(struct gve_priv * priv,struct gve_rx_alloc_rings_cfg * cfg)337 void gve_rx_free_rings_dqo(struct gve_priv *priv,
338 struct gve_rx_alloc_rings_cfg *cfg)
339 {
340 struct gve_rx_ring *rx = cfg->rx;
341 int i;
342
343 if (!rx)
344 return;
345
346 for (i = 0; i < cfg->qcfg_rx->num_queues; i++)
347 gve_rx_free_ring_dqo(priv, &rx[i], cfg);
348
349 kvfree(rx);
350 cfg->rx = NULL;
351 }
352
gve_rx_post_buffers_dqo(struct gve_rx_ring * rx)353 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
354 {
355 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
356 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
357 struct gve_priv *priv = rx->gve;
358 u32 num_avail_slots;
359 u32 num_full_slots;
360 u32 num_posted = 0;
361
362 num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
363 num_avail_slots = bufq->mask - num_full_slots;
364
365 num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
366 while (num_posted < num_avail_slots) {
367 struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
368
369 if (unlikely(gve_alloc_buffer(rx, desc))) {
370 u64_stats_update_begin(&rx->statss);
371 rx->rx_buf_alloc_fail++;
372 u64_stats_update_end(&rx->statss);
373 break;
374 }
375
376 if (rx->dqo.hdr_bufs.data)
377 desc->header_buf_addr =
378 cpu_to_le64(rx->dqo.hdr_bufs.addr +
379 priv->header_buf_size * bufq->tail);
380
381 bufq->tail = (bufq->tail + 1) & bufq->mask;
382 complq->num_free_slots--;
383 num_posted++;
384
385 if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
386 gve_rx_write_doorbell_dqo(priv, rx->q_num);
387 }
388
389 rx->fill_cnt += num_posted;
390 }
391
gve_rx_skb_csum(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)392 static void gve_rx_skb_csum(struct sk_buff *skb,
393 const struct gve_rx_compl_desc_dqo *desc,
394 struct gve_ptype ptype)
395 {
396 skb->ip_summed = CHECKSUM_NONE;
397
398 /* HW did not identify and process L3 and L4 headers. */
399 if (unlikely(!desc->l3_l4_processed))
400 return;
401
402 if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
403 if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
404 return;
405 } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
406 /* Checksum should be skipped if this flag is set. */
407 if (unlikely(desc->ipv6_ex_add))
408 return;
409 }
410
411 if (unlikely(desc->csum_l4_err))
412 return;
413
414 switch (ptype.l4_type) {
415 case GVE_L4_TYPE_TCP:
416 case GVE_L4_TYPE_UDP:
417 case GVE_L4_TYPE_ICMP:
418 case GVE_L4_TYPE_SCTP:
419 skb->ip_summed = CHECKSUM_UNNECESSARY;
420 break;
421 default:
422 break;
423 }
424 }
425
gve_rx_skb_hash(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * compl_desc,struct gve_ptype ptype)426 static void gve_rx_skb_hash(struct sk_buff *skb,
427 const struct gve_rx_compl_desc_dqo *compl_desc,
428 struct gve_ptype ptype)
429 {
430 enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
431
432 if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
433 hash_type = PKT_HASH_TYPE_L4;
434 else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
435 hash_type = PKT_HASH_TYPE_L3;
436
437 skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
438 }
439
gve_rx_free_skb(struct napi_struct * napi,struct gve_rx_ring * rx)440 static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx)
441 {
442 if (!rx->ctx.skb_head)
443 return;
444
445 if (rx->ctx.skb_head == napi->skb)
446 napi->skb = NULL;
447 dev_kfree_skb_any(rx->ctx.skb_head);
448 rx->ctx.skb_head = NULL;
449 rx->ctx.skb_tail = NULL;
450 }
451
gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring * rx)452 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
453 {
454 if (!rx->dqo.qpl)
455 return false;
456 if (rx->dqo.used_buf_states_cnt <
457 (rx->dqo.num_buf_states -
458 GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
459 return false;
460 return true;
461 }
462
gve_rx_copy_ondemand(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state,u16 buf_len)463 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
464 struct gve_rx_buf_state_dqo *buf_state,
465 u16 buf_len)
466 {
467 struct page *page = alloc_page(GFP_ATOMIC);
468 int num_frags;
469
470 if (!page)
471 return -ENOMEM;
472
473 memcpy(page_address(page),
474 buf_state->page_info.page_address +
475 buf_state->page_info.page_offset,
476 buf_len);
477 num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
478 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
479 0, buf_len, PAGE_SIZE);
480
481 u64_stats_update_begin(&rx->statss);
482 rx->rx_frag_alloc_cnt++;
483 u64_stats_update_end(&rx->statss);
484 /* Return unused buffer. */
485 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
486 return 0;
487 }
488
gve_skb_add_rx_frag(struct gve_rx_ring * rx,struct gve_rx_buf_state_dqo * buf_state,int num_frags,u16 buf_len)489 static void gve_skb_add_rx_frag(struct gve_rx_ring *rx,
490 struct gve_rx_buf_state_dqo *buf_state,
491 int num_frags, u16 buf_len)
492 {
493 if (rx->dqo.page_pool) {
494 skb_add_rx_frag_netmem(rx->ctx.skb_tail, num_frags,
495 buf_state->page_info.netmem,
496 buf_state->page_info.page_offset +
497 buf_state->page_info.pad, buf_len,
498 buf_state->page_info.buf_size);
499 } else {
500 skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
501 buf_state->page_info.page,
502 buf_state->page_info.page_offset +
503 buf_state->page_info.pad, buf_len,
504 buf_state->page_info.buf_size);
505 }
506 }
507
508 /* Chains multi skbs for single rx packet.
509 * Returns 0 if buffer is appended, -1 otherwise.
510 */
gve_rx_append_frags(struct napi_struct * napi,struct gve_rx_buf_state_dqo * buf_state,u16 buf_len,struct gve_rx_ring * rx,struct gve_priv * priv)511 static int gve_rx_append_frags(struct napi_struct *napi,
512 struct gve_rx_buf_state_dqo *buf_state,
513 u16 buf_len, struct gve_rx_ring *rx,
514 struct gve_priv *priv)
515 {
516 int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
517
518 if (unlikely(num_frags == MAX_SKB_FRAGS)) {
519 struct sk_buff *skb;
520
521 skb = napi_alloc_skb(napi, 0);
522 if (!skb)
523 return -1;
524
525 if (rx->dqo.page_pool)
526 skb_mark_for_recycle(skb);
527
528 if (rx->ctx.skb_tail == rx->ctx.skb_head)
529 skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
530 else
531 rx->ctx.skb_tail->next = skb;
532 rx->ctx.skb_tail = skb;
533 num_frags = 0;
534 }
535 if (rx->ctx.skb_tail != rx->ctx.skb_head) {
536 rx->ctx.skb_head->len += buf_len;
537 rx->ctx.skb_head->data_len += buf_len;
538 rx->ctx.skb_head->truesize += buf_state->page_info.buf_size;
539 }
540
541 /* Trigger ondemand page allocation if we are running low on buffers */
542 if (gve_rx_should_trigger_copy_ondemand(rx))
543 return gve_rx_copy_ondemand(rx, buf_state, buf_len);
544
545 gve_skb_add_rx_frag(rx, buf_state, num_frags, buf_len);
546 gve_reuse_buffer(rx, buf_state);
547 return 0;
548 }
549
gve_xdp_done_dqo(struct gve_priv * priv,struct gve_rx_ring * rx,struct xdp_buff * xdp,struct bpf_prog * xprog,int xdp_act,struct gve_rx_buf_state_dqo * buf_state)550 static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
551 struct xdp_buff *xdp, struct bpf_prog *xprog,
552 int xdp_act,
553 struct gve_rx_buf_state_dqo *buf_state)
554 {
555 u64_stats_update_begin(&rx->statss);
556 switch (xdp_act) {
557 case XDP_ABORTED:
558 case XDP_DROP:
559 default:
560 rx->xdp_actions[xdp_act]++;
561 break;
562 case XDP_TX:
563 rx->xdp_tx_errors++;
564 break;
565 case XDP_REDIRECT:
566 rx->xdp_redirect_errors++;
567 break;
568 }
569 u64_stats_update_end(&rx->statss);
570 gve_free_buffer(rx, buf_state);
571 }
572
573 /* Returns 0 if descriptor is completed successfully.
574 * Returns -EINVAL if descriptor is invalid.
575 * Returns -ENOMEM if data cannot be copied to skb.
576 */
gve_rx_dqo(struct napi_struct * napi,struct gve_rx_ring * rx,const struct gve_rx_compl_desc_dqo * compl_desc,u32 desc_idx,int queue_idx)577 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
578 const struct gve_rx_compl_desc_dqo *compl_desc,
579 u32 desc_idx, int queue_idx)
580 {
581 const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
582 const bool hbo = compl_desc->header_buffer_overflow;
583 const bool eop = compl_desc->end_of_packet != 0;
584 const bool hsplit = compl_desc->split_header;
585 struct gve_rx_buf_state_dqo *buf_state;
586 struct gve_priv *priv = rx->gve;
587 struct bpf_prog *xprog;
588 u16 buf_len;
589 u16 hdr_len;
590
591 if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
592 net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
593 priv->dev->name, buffer_id);
594 return -EINVAL;
595 }
596 buf_state = &rx->dqo.buf_states[buffer_id];
597 if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
598 net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
599 priv->dev->name, buffer_id);
600 return -EINVAL;
601 }
602
603 if (unlikely(compl_desc->rx_error)) {
604 gve_free_buffer(rx, buf_state);
605 return -EINVAL;
606 }
607
608 buf_len = compl_desc->packet_len;
609 hdr_len = compl_desc->header_len;
610
611 /* Page might have not been used for awhile and was likely last written
612 * by a different thread.
613 */
614 if (rx->dqo.page_pool) {
615 if (!netmem_is_net_iov(buf_state->page_info.netmem))
616 prefetch(netmem_to_page(buf_state->page_info.netmem));
617 } else {
618 prefetch(buf_state->page_info.page);
619 }
620
621 /* Copy the header into the skb in the case of header split */
622 if (hsplit) {
623 int unsplit = 0;
624
625 if (hdr_len && !hbo) {
626 rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
627 rx->dqo.hdr_bufs.data +
628 desc_idx * priv->header_buf_size,
629 hdr_len);
630 if (unlikely(!rx->ctx.skb_head))
631 goto error;
632 rx->ctx.skb_tail = rx->ctx.skb_head;
633
634 if (rx->dqo.page_pool)
635 skb_mark_for_recycle(rx->ctx.skb_head);
636 } else {
637 unsplit = 1;
638 }
639 u64_stats_update_begin(&rx->statss);
640 rx->rx_hsplit_pkt++;
641 rx->rx_hsplit_unsplit_pkt += unsplit;
642 rx->rx_hsplit_bytes += hdr_len;
643 u64_stats_update_end(&rx->statss);
644 }
645
646 /* Sync the portion of dma buffer for CPU to read. */
647 dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
648 buf_state->page_info.page_offset +
649 buf_state->page_info.pad,
650 buf_len, DMA_FROM_DEVICE);
651
652 /* Append to current skb if one exists. */
653 if (rx->ctx.skb_head) {
654 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
655 priv)) != 0) {
656 goto error;
657 }
658 return 0;
659 }
660
661 xprog = READ_ONCE(priv->xdp_prog);
662 if (xprog) {
663 struct xdp_buff xdp;
664 void *old_data;
665 int xdp_act;
666
667 xdp_init_buff(&xdp, buf_state->page_info.buf_size,
668 &rx->xdp_rxq);
669 xdp_prepare_buff(&xdp,
670 buf_state->page_info.page_address +
671 buf_state->page_info.page_offset,
672 buf_state->page_info.pad,
673 buf_len, false);
674 old_data = xdp.data;
675 xdp_act = bpf_prog_run_xdp(xprog, &xdp);
676 buf_state->page_info.pad += xdp.data - old_data;
677 buf_len = xdp.data_end - xdp.data;
678 if (xdp_act != XDP_PASS) {
679 gve_xdp_done_dqo(priv, rx, &xdp, xprog, xdp_act,
680 buf_state);
681 return 0;
682 }
683
684 u64_stats_update_begin(&rx->statss);
685 rx->xdp_actions[XDP_PASS]++;
686 u64_stats_update_end(&rx->statss);
687 }
688
689 if (eop && buf_len <= priv->rx_copybreak) {
690 rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
691 &buf_state->page_info, buf_len);
692 if (unlikely(!rx->ctx.skb_head))
693 goto error;
694 rx->ctx.skb_tail = rx->ctx.skb_head;
695
696 u64_stats_update_begin(&rx->statss);
697 rx->rx_copied_pkt++;
698 rx->rx_copybreak_pkt++;
699 u64_stats_update_end(&rx->statss);
700
701 gve_free_buffer(rx, buf_state);
702 return 0;
703 }
704
705 rx->ctx.skb_head = napi_get_frags(napi);
706 if (unlikely(!rx->ctx.skb_head))
707 goto error;
708 rx->ctx.skb_tail = rx->ctx.skb_head;
709
710 if (gve_rx_should_trigger_copy_ondemand(rx)) {
711 if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
712 goto error;
713 return 0;
714 }
715
716 if (rx->dqo.page_pool)
717 skb_mark_for_recycle(rx->ctx.skb_head);
718
719 gve_skb_add_rx_frag(rx, buf_state, 0, buf_len);
720 gve_reuse_buffer(rx, buf_state);
721 return 0;
722
723 error:
724 gve_free_buffer(rx, buf_state);
725 return -ENOMEM;
726 }
727
gve_rx_complete_rsc(struct sk_buff * skb,const struct gve_rx_compl_desc_dqo * desc,struct gve_ptype ptype)728 static int gve_rx_complete_rsc(struct sk_buff *skb,
729 const struct gve_rx_compl_desc_dqo *desc,
730 struct gve_ptype ptype)
731 {
732 struct skb_shared_info *shinfo = skb_shinfo(skb);
733
734 /* Only TCP is supported right now. */
735 if (ptype.l4_type != GVE_L4_TYPE_TCP)
736 return -EINVAL;
737
738 switch (ptype.l3_type) {
739 case GVE_L3_TYPE_IPV4:
740 shinfo->gso_type = SKB_GSO_TCPV4;
741 break;
742 case GVE_L3_TYPE_IPV6:
743 shinfo->gso_type = SKB_GSO_TCPV6;
744 break;
745 default:
746 return -EINVAL;
747 }
748
749 shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
750 return 0;
751 }
752
753 /* Returns 0 if skb is completed successfully, -1 otherwise. */
gve_rx_complete_skb(struct gve_rx_ring * rx,struct napi_struct * napi,const struct gve_rx_compl_desc_dqo * desc,netdev_features_t feat)754 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
755 const struct gve_rx_compl_desc_dqo *desc,
756 netdev_features_t feat)
757 {
758 struct gve_ptype ptype =
759 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
760 int err;
761
762 skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
763
764 if (feat & NETIF_F_RXHASH)
765 gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
766
767 if (feat & NETIF_F_RXCSUM)
768 gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
769
770 /* RSC packets must set gso_size otherwise the TCP stack will complain
771 * that packets are larger than MTU.
772 */
773 if (desc->rsc) {
774 err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
775 if (err < 0)
776 return err;
777 }
778
779 if (skb_headlen(rx->ctx.skb_head) == 0)
780 napi_gro_frags(napi);
781 else
782 napi_gro_receive(napi, rx->ctx.skb_head);
783
784 return 0;
785 }
786
gve_rx_poll_dqo(struct gve_notify_block * block,int budget)787 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
788 {
789 struct napi_struct *napi = &block->napi;
790 netdev_features_t feat = napi->dev->features;
791
792 struct gve_rx_ring *rx = block->rx;
793 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
794
795 u32 work_done = 0;
796 u64 bytes = 0;
797 int err;
798
799 while (work_done < budget) {
800 struct gve_rx_compl_desc_dqo *compl_desc =
801 &complq->desc_ring[complq->head];
802 u32 pkt_bytes;
803
804 /* No more new packets */
805 if (compl_desc->generation == complq->cur_gen_bit)
806 break;
807
808 /* Prefetch the next two descriptors. */
809 prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
810 prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
811
812 /* Do not read data until we own the descriptor */
813 dma_rmb();
814
815 err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num);
816 if (err < 0) {
817 gve_rx_free_skb(napi, rx);
818 u64_stats_update_begin(&rx->statss);
819 if (err == -ENOMEM)
820 rx->rx_skb_alloc_fail++;
821 else if (err == -EINVAL)
822 rx->rx_desc_err_dropped_pkt++;
823 u64_stats_update_end(&rx->statss);
824 }
825
826 complq->head = (complq->head + 1) & complq->mask;
827 complq->num_free_slots++;
828
829 /* When the ring wraps, the generation bit is flipped. */
830 complq->cur_gen_bit ^= (complq->head == 0);
831
832 /* Receiving a completion means we have space to post another
833 * buffer on the buffer queue.
834 */
835 {
836 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
837
838 bufq->head = (bufq->head + 1) & bufq->mask;
839 }
840
841 /* Free running counter of completed descriptors */
842 rx->cnt++;
843
844 if (!rx->ctx.skb_head)
845 continue;
846
847 if (!compl_desc->end_of_packet)
848 continue;
849
850 work_done++;
851 pkt_bytes = rx->ctx.skb_head->len;
852 /* The ethernet header (first ETH_HLEN bytes) is snipped off
853 * by eth_type_trans.
854 */
855 if (skb_headlen(rx->ctx.skb_head))
856 pkt_bytes += ETH_HLEN;
857
858 /* gve_rx_complete_skb() will consume skb if successful */
859 if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
860 gve_rx_free_skb(napi, rx);
861 u64_stats_update_begin(&rx->statss);
862 rx->rx_desc_err_dropped_pkt++;
863 u64_stats_update_end(&rx->statss);
864 continue;
865 }
866
867 bytes += pkt_bytes;
868 rx->ctx.skb_head = NULL;
869 rx->ctx.skb_tail = NULL;
870 }
871
872 gve_rx_post_buffers_dqo(rx);
873
874 u64_stats_update_begin(&rx->statss);
875 rx->rpackets += work_done;
876 rx->rbytes += bytes;
877 u64_stats_update_end(&rx->statss);
878
879 return work_done;
880 }
881