1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_dqo.h" 9 #include "gve_adminq.h" 10 #include "gve_utils.h" 11 #include <linux/ip.h> 12 #include <linux/ipv6.h> 13 #include <linux/skbuff.h> 14 #include <linux/slab.h> 15 #include <net/ip6_checksum.h> 16 #include <net/ipv6.h> 17 #include <net/tcp.h> 18 19 static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) 20 { 21 struct device *hdev = &priv->pdev->dev; 22 int buf_count = rx->dqo.bufq.mask + 1; 23 24 if (rx->dqo.hdr_bufs.data) { 25 dma_free_coherent(hdev, priv->header_buf_size * buf_count, 26 rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr); 27 rx->dqo.hdr_bufs.data = NULL; 28 } 29 } 30 31 static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx, 32 const u32 buffer_queue_slots, 33 const u32 completion_queue_slots) 34 { 35 int i; 36 37 /* Set buffer queue state */ 38 rx->dqo.bufq.mask = buffer_queue_slots - 1; 39 rx->dqo.bufq.head = 0; 40 rx->dqo.bufq.tail = 0; 41 42 /* Set completion queue state */ 43 rx->dqo.complq.num_free_slots = completion_queue_slots; 44 rx->dqo.complq.mask = completion_queue_slots - 1; 45 rx->dqo.complq.cur_gen_bit = 0; 46 rx->dqo.complq.head = 0; 47 48 /* Set RX SKB context */ 49 rx->ctx.skb_head = NULL; 50 rx->ctx.skb_tail = NULL; 51 52 /* Set up linked list of buffer IDs */ 53 if (rx->dqo.buf_states) { 54 for (i = 0; i < rx->dqo.num_buf_states - 1; i++) 55 rx->dqo.buf_states[i].next = i + 1; 56 rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; 57 } 58 59 rx->dqo.free_buf_states = 0; 60 rx->dqo.recycled_buf_states.head = -1; 61 rx->dqo.recycled_buf_states.tail = -1; 62 rx->dqo.used_buf_states.head = -1; 63 rx->dqo.used_buf_states.tail = -1; 64 } 65 66 static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx) 67 { 68 struct gve_rx_ring *rx = &priv->rx[idx]; 69 size_t size; 70 int i; 71 72 const u32 buffer_queue_slots = priv->rx_desc_cnt; 73 const u32 completion_queue_slots = priv->rx_desc_cnt; 74 75 /* Reset buffer queue */ 76 if (rx->dqo.bufq.desc_ring) { 77 size = sizeof(rx->dqo.bufq.desc_ring[0]) * 78 buffer_queue_slots; 79 memset(rx->dqo.bufq.desc_ring, 0, size); 80 } 81 82 /* Reset completion queue */ 83 if (rx->dqo.complq.desc_ring) { 84 size = sizeof(rx->dqo.complq.desc_ring[0]) * 85 completion_queue_slots; 86 memset(rx->dqo.complq.desc_ring, 0, size); 87 } 88 89 /* Reset q_resources */ 90 if (rx->q_resources) 91 memset(rx->q_resources, 0, sizeof(*rx->q_resources)); 92 93 /* Reset buf states */ 94 if (rx->dqo.buf_states) { 95 for (i = 0; i < rx->dqo.num_buf_states; i++) { 96 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; 97 98 if (rx->dqo.page_pool) 99 gve_free_to_page_pool(rx, bs, false); 100 else 101 gve_free_qpl_page_dqo(bs); 102 } 103 } 104 105 gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, 106 completion_queue_slots); 107 } 108 109 void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) 110 { 111 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 112 struct gve_rx_ring *rx = &priv->rx[idx]; 113 114 if (!gve_rx_was_added_to_block(priv, idx)) 115 return; 116 117 if (rx->dqo.page_pool) 118 page_pool_disable_direct_recycling(rx->dqo.page_pool); 119 gve_remove_napi(priv, ntfy_idx); 120 gve_rx_remove_from_block(priv, idx); 121 gve_rx_reset_ring_dqo(priv, idx); 122 } 123 124 void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, 125 struct gve_rx_alloc_rings_cfg *cfg) 126 { 127 struct device *hdev = &priv->pdev->dev; 128 size_t completion_queue_slots; 129 size_t buffer_queue_slots; 130 int idx = rx->q_num; 131 size_t size; 132 u32 qpl_id; 133 int i; 134 135 completion_queue_slots = rx->dqo.complq.mask + 1; 136 buffer_queue_slots = rx->dqo.bufq.mask + 1; 137 138 if (rx->q_resources) { 139 dma_free_coherent(hdev, sizeof(*rx->q_resources), 140 rx->q_resources, rx->q_resources_bus); 141 rx->q_resources = NULL; 142 } 143 144 for (i = 0; i < rx->dqo.num_buf_states; i++) { 145 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; 146 147 if (rx->dqo.page_pool) 148 gve_free_to_page_pool(rx, bs, false); 149 else 150 gve_free_qpl_page_dqo(bs); 151 } 152 153 if (rx->dqo.qpl) { 154 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); 155 gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id); 156 rx->dqo.qpl = NULL; 157 } 158 159 if (rx->dqo.bufq.desc_ring) { 160 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; 161 dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring, 162 rx->dqo.bufq.bus); 163 rx->dqo.bufq.desc_ring = NULL; 164 } 165 166 if (rx->dqo.complq.desc_ring) { 167 size = sizeof(rx->dqo.complq.desc_ring[0]) * 168 completion_queue_slots; 169 dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring, 170 rx->dqo.complq.bus); 171 rx->dqo.complq.desc_ring = NULL; 172 } 173 174 kvfree(rx->dqo.buf_states); 175 rx->dqo.buf_states = NULL; 176 177 if (rx->dqo.page_pool) { 178 page_pool_destroy(rx->dqo.page_pool); 179 rx->dqo.page_pool = NULL; 180 } 181 182 gve_rx_free_hdr_bufs(priv, rx); 183 184 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 185 } 186 187 static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx, 188 const u32 buf_count) 189 { 190 struct device *hdev = &priv->pdev->dev; 191 192 rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count, 193 &rx->dqo.hdr_bufs.addr, GFP_KERNEL); 194 if (!rx->dqo.hdr_bufs.data) 195 return -ENOMEM; 196 197 return 0; 198 } 199 200 void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx) 201 { 202 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 203 204 gve_rx_add_to_block(priv, idx); 205 gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); 206 } 207 208 int gve_rx_alloc_ring_dqo(struct gve_priv *priv, 209 struct gve_rx_alloc_rings_cfg *cfg, 210 struct gve_rx_ring *rx, 211 int idx) 212 { 213 struct device *hdev = &priv->pdev->dev; 214 struct page_pool *pool; 215 int qpl_page_cnt; 216 size_t size; 217 u32 qpl_id; 218 219 const u32 buffer_queue_slots = cfg->ring_size; 220 const u32 completion_queue_slots = cfg->ring_size; 221 222 netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n"); 223 224 memset(rx, 0, sizeof(*rx)); 225 rx->gve = priv; 226 rx->q_num = idx; 227 rx->packet_buffer_size = cfg->packet_buffer_size; 228 229 if (cfg->xdp) { 230 rx->packet_buffer_truesize = GVE_XDP_RX_BUFFER_SIZE_DQO; 231 rx->rx_headroom = XDP_PACKET_HEADROOM; 232 } else { 233 rx->packet_buffer_truesize = rx->packet_buffer_size; 234 rx->rx_headroom = 0; 235 } 236 237 rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots : 238 gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); 239 rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, 240 sizeof(rx->dqo.buf_states[0]), 241 GFP_KERNEL); 242 if (!rx->dqo.buf_states) 243 return -ENOMEM; 244 245 /* Allocate header buffers for header-split */ 246 if (cfg->enable_header_split) 247 if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots)) 248 goto err; 249 250 /* Allocate RX completion queue */ 251 size = sizeof(rx->dqo.complq.desc_ring[0]) * 252 completion_queue_slots; 253 rx->dqo.complq.desc_ring = 254 dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL); 255 if (!rx->dqo.complq.desc_ring) 256 goto err; 257 258 /* Allocate RX buffer queue */ 259 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; 260 rx->dqo.bufq.desc_ring = 261 dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL); 262 if (!rx->dqo.bufq.desc_ring) 263 goto err; 264 265 if (cfg->raw_addressing) { 266 pool = gve_rx_create_page_pool(priv, rx, cfg->xdp); 267 if (IS_ERR(pool)) 268 goto err; 269 270 rx->dqo.page_pool = pool; 271 } else { 272 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); 273 qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); 274 275 rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, 276 qpl_page_cnt); 277 if (!rx->dqo.qpl) 278 goto err; 279 rx->dqo.next_qpl_page_idx = 0; 280 } 281 282 rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources), 283 &rx->q_resources_bus, GFP_KERNEL); 284 if (!rx->q_resources) 285 goto err; 286 287 gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, 288 completion_queue_slots); 289 290 return 0; 291 292 err: 293 gve_rx_free_ring_dqo(priv, rx, cfg); 294 return -ENOMEM; 295 } 296 297 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx) 298 { 299 const struct gve_rx_ring *rx = &priv->rx[queue_idx]; 300 u64 index = be32_to_cpu(rx->q_resources->db_index); 301 302 iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]); 303 } 304 305 int gve_rx_alloc_rings_dqo(struct gve_priv *priv, 306 struct gve_rx_alloc_rings_cfg *cfg) 307 { 308 struct gve_rx_ring *rx; 309 int err; 310 int i; 311 312 rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring), 313 GFP_KERNEL); 314 if (!rx) 315 return -ENOMEM; 316 317 for (i = 0; i < cfg->qcfg_rx->num_queues; i++) { 318 err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i); 319 if (err) { 320 netif_err(priv, drv, priv->dev, 321 "Failed to alloc rx ring=%d: err=%d\n", 322 i, err); 323 goto err; 324 } 325 } 326 327 cfg->rx = rx; 328 return 0; 329 330 err: 331 for (i--; i >= 0; i--) 332 gve_rx_free_ring_dqo(priv, &rx[i], cfg); 333 kvfree(rx); 334 return err; 335 } 336 337 void gve_rx_free_rings_dqo(struct gve_priv *priv, 338 struct gve_rx_alloc_rings_cfg *cfg) 339 { 340 struct gve_rx_ring *rx = cfg->rx; 341 int i; 342 343 if (!rx) 344 return; 345 346 for (i = 0; i < cfg->qcfg_rx->num_queues; i++) 347 gve_rx_free_ring_dqo(priv, &rx[i], cfg); 348 349 kvfree(rx); 350 cfg->rx = NULL; 351 } 352 353 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) 354 { 355 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; 356 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq; 357 struct gve_priv *priv = rx->gve; 358 u32 num_avail_slots; 359 u32 num_full_slots; 360 u32 num_posted = 0; 361 362 num_full_slots = (bufq->tail - bufq->head) & bufq->mask; 363 num_avail_slots = bufq->mask - num_full_slots; 364 365 num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots); 366 while (num_posted < num_avail_slots) { 367 struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail]; 368 369 if (unlikely(gve_alloc_buffer(rx, desc))) { 370 u64_stats_update_begin(&rx->statss); 371 rx->rx_buf_alloc_fail++; 372 u64_stats_update_end(&rx->statss); 373 break; 374 } 375 376 if (rx->dqo.hdr_bufs.data) 377 desc->header_buf_addr = 378 cpu_to_le64(rx->dqo.hdr_bufs.addr + 379 priv->header_buf_size * bufq->tail); 380 381 bufq->tail = (bufq->tail + 1) & bufq->mask; 382 complq->num_free_slots--; 383 num_posted++; 384 385 if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) 386 gve_rx_write_doorbell_dqo(priv, rx->q_num); 387 } 388 389 rx->fill_cnt += num_posted; 390 } 391 392 static void gve_rx_skb_csum(struct sk_buff *skb, 393 const struct gve_rx_compl_desc_dqo *desc, 394 struct gve_ptype ptype) 395 { 396 skb->ip_summed = CHECKSUM_NONE; 397 398 /* HW did not identify and process L3 and L4 headers. */ 399 if (unlikely(!desc->l3_l4_processed)) 400 return; 401 402 if (ptype.l3_type == GVE_L3_TYPE_IPV4) { 403 if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err)) 404 return; 405 } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) { 406 /* Checksum should be skipped if this flag is set. */ 407 if (unlikely(desc->ipv6_ex_add)) 408 return; 409 } 410 411 if (unlikely(desc->csum_l4_err)) 412 return; 413 414 switch (ptype.l4_type) { 415 case GVE_L4_TYPE_TCP: 416 case GVE_L4_TYPE_UDP: 417 case GVE_L4_TYPE_ICMP: 418 case GVE_L4_TYPE_SCTP: 419 skb->ip_summed = CHECKSUM_UNNECESSARY; 420 break; 421 default: 422 break; 423 } 424 } 425 426 static void gve_rx_skb_hash(struct sk_buff *skb, 427 const struct gve_rx_compl_desc_dqo *compl_desc, 428 struct gve_ptype ptype) 429 { 430 enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2; 431 432 if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN) 433 hash_type = PKT_HASH_TYPE_L4; 434 else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN) 435 hash_type = PKT_HASH_TYPE_L3; 436 437 skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); 438 } 439 440 static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) 441 { 442 if (!rx->ctx.skb_head) 443 return; 444 445 if (rx->ctx.skb_head == napi->skb) 446 napi->skb = NULL; 447 dev_kfree_skb_any(rx->ctx.skb_head); 448 rx->ctx.skb_head = NULL; 449 rx->ctx.skb_tail = NULL; 450 } 451 452 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx) 453 { 454 if (!rx->dqo.qpl) 455 return false; 456 if (rx->dqo.used_buf_states_cnt < 457 (rx->dqo.num_buf_states - 458 GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD)) 459 return false; 460 return true; 461 } 462 463 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, 464 struct gve_rx_buf_state_dqo *buf_state, 465 u16 buf_len) 466 { 467 struct page *page = alloc_page(GFP_ATOMIC); 468 int num_frags; 469 470 if (!page) 471 return -ENOMEM; 472 473 memcpy(page_address(page), 474 buf_state->page_info.page_address + 475 buf_state->page_info.page_offset, 476 buf_len); 477 num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; 478 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page, 479 0, buf_len, PAGE_SIZE); 480 481 u64_stats_update_begin(&rx->statss); 482 rx->rx_frag_alloc_cnt++; 483 u64_stats_update_end(&rx->statss); 484 /* Return unused buffer. */ 485 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); 486 return 0; 487 } 488 489 static void gve_skb_add_rx_frag(struct gve_rx_ring *rx, 490 struct gve_rx_buf_state_dqo *buf_state, 491 int num_frags, u16 buf_len) 492 { 493 if (rx->dqo.page_pool) { 494 skb_add_rx_frag_netmem(rx->ctx.skb_tail, num_frags, 495 buf_state->page_info.netmem, 496 buf_state->page_info.page_offset + 497 buf_state->page_info.pad, buf_len, 498 buf_state->page_info.buf_size); 499 } else { 500 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, 501 buf_state->page_info.page, 502 buf_state->page_info.page_offset + 503 buf_state->page_info.pad, buf_len, 504 buf_state->page_info.buf_size); 505 } 506 } 507 508 /* Chains multi skbs for single rx packet. 509 * Returns 0 if buffer is appended, -1 otherwise. 510 */ 511 static int gve_rx_append_frags(struct napi_struct *napi, 512 struct gve_rx_buf_state_dqo *buf_state, 513 u16 buf_len, struct gve_rx_ring *rx, 514 struct gve_priv *priv) 515 { 516 int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; 517 518 if (unlikely(num_frags == MAX_SKB_FRAGS)) { 519 struct sk_buff *skb; 520 521 skb = napi_alloc_skb(napi, 0); 522 if (!skb) 523 return -1; 524 525 if (rx->dqo.page_pool) 526 skb_mark_for_recycle(skb); 527 528 if (rx->ctx.skb_tail == rx->ctx.skb_head) 529 skb_shinfo(rx->ctx.skb_head)->frag_list = skb; 530 else 531 rx->ctx.skb_tail->next = skb; 532 rx->ctx.skb_tail = skb; 533 num_frags = 0; 534 } 535 if (rx->ctx.skb_tail != rx->ctx.skb_head) { 536 rx->ctx.skb_head->len += buf_len; 537 rx->ctx.skb_head->data_len += buf_len; 538 rx->ctx.skb_head->truesize += buf_state->page_info.buf_size; 539 } 540 541 /* Trigger ondemand page allocation if we are running low on buffers */ 542 if (gve_rx_should_trigger_copy_ondemand(rx)) 543 return gve_rx_copy_ondemand(rx, buf_state, buf_len); 544 545 gve_skb_add_rx_frag(rx, buf_state, num_frags, buf_len); 546 gve_reuse_buffer(rx, buf_state); 547 return 0; 548 } 549 550 static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, 551 struct xdp_buff *xdp, struct bpf_prog *xprog, 552 int xdp_act, 553 struct gve_rx_buf_state_dqo *buf_state) 554 { 555 u64_stats_update_begin(&rx->statss); 556 switch (xdp_act) { 557 case XDP_ABORTED: 558 case XDP_DROP: 559 default: 560 rx->xdp_actions[xdp_act]++; 561 break; 562 case XDP_TX: 563 rx->xdp_tx_errors++; 564 break; 565 case XDP_REDIRECT: 566 rx->xdp_redirect_errors++; 567 break; 568 } 569 u64_stats_update_end(&rx->statss); 570 gve_free_buffer(rx, buf_state); 571 } 572 573 /* Returns 0 if descriptor is completed successfully. 574 * Returns -EINVAL if descriptor is invalid. 575 * Returns -ENOMEM if data cannot be copied to skb. 576 */ 577 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, 578 const struct gve_rx_compl_desc_dqo *compl_desc, 579 u32 desc_idx, int queue_idx) 580 { 581 const u16 buffer_id = le16_to_cpu(compl_desc->buf_id); 582 const bool hbo = compl_desc->header_buffer_overflow; 583 const bool eop = compl_desc->end_of_packet != 0; 584 const bool hsplit = compl_desc->split_header; 585 struct gve_rx_buf_state_dqo *buf_state; 586 struct gve_priv *priv = rx->gve; 587 struct bpf_prog *xprog; 588 u16 buf_len; 589 u16 hdr_len; 590 591 if (unlikely(buffer_id >= rx->dqo.num_buf_states)) { 592 net_err_ratelimited("%s: Invalid RX buffer_id=%u\n", 593 priv->dev->name, buffer_id); 594 return -EINVAL; 595 } 596 buf_state = &rx->dqo.buf_states[buffer_id]; 597 if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) { 598 net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n", 599 priv->dev->name, buffer_id); 600 return -EINVAL; 601 } 602 603 if (unlikely(compl_desc->rx_error)) { 604 gve_free_buffer(rx, buf_state); 605 return -EINVAL; 606 } 607 608 buf_len = compl_desc->packet_len; 609 hdr_len = compl_desc->header_len; 610 611 /* Page might have not been used for awhile and was likely last written 612 * by a different thread. 613 */ 614 if (rx->dqo.page_pool) { 615 if (!netmem_is_net_iov(buf_state->page_info.netmem)) 616 prefetch(netmem_to_page(buf_state->page_info.netmem)); 617 } else { 618 prefetch(buf_state->page_info.page); 619 } 620 621 /* Copy the header into the skb in the case of header split */ 622 if (hsplit) { 623 int unsplit = 0; 624 625 if (hdr_len && !hbo) { 626 rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi, 627 rx->dqo.hdr_bufs.data + 628 desc_idx * priv->header_buf_size, 629 hdr_len); 630 if (unlikely(!rx->ctx.skb_head)) 631 goto error; 632 rx->ctx.skb_tail = rx->ctx.skb_head; 633 634 if (rx->dqo.page_pool) 635 skb_mark_for_recycle(rx->ctx.skb_head); 636 } else { 637 unsplit = 1; 638 } 639 u64_stats_update_begin(&rx->statss); 640 rx->rx_hsplit_pkt++; 641 rx->rx_hsplit_unsplit_pkt += unsplit; 642 rx->rx_hsplit_bytes += hdr_len; 643 u64_stats_update_end(&rx->statss); 644 } 645 646 /* Sync the portion of dma buffer for CPU to read. */ 647 dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, 648 buf_state->page_info.page_offset + 649 buf_state->page_info.pad, 650 buf_len, DMA_FROM_DEVICE); 651 652 /* Append to current skb if one exists. */ 653 if (rx->ctx.skb_head) { 654 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx, 655 priv)) != 0) { 656 goto error; 657 } 658 return 0; 659 } 660 661 xprog = READ_ONCE(priv->xdp_prog); 662 if (xprog) { 663 struct xdp_buff xdp; 664 void *old_data; 665 int xdp_act; 666 667 xdp_init_buff(&xdp, buf_state->page_info.buf_size, 668 &rx->xdp_rxq); 669 xdp_prepare_buff(&xdp, 670 buf_state->page_info.page_address + 671 buf_state->page_info.page_offset, 672 buf_state->page_info.pad, 673 buf_len, false); 674 old_data = xdp.data; 675 xdp_act = bpf_prog_run_xdp(xprog, &xdp); 676 buf_state->page_info.pad += xdp.data - old_data; 677 buf_len = xdp.data_end - xdp.data; 678 if (xdp_act != XDP_PASS) { 679 gve_xdp_done_dqo(priv, rx, &xdp, xprog, xdp_act, 680 buf_state); 681 return 0; 682 } 683 684 u64_stats_update_begin(&rx->statss); 685 rx->xdp_actions[XDP_PASS]++; 686 u64_stats_update_end(&rx->statss); 687 } 688 689 if (eop && buf_len <= priv->rx_copybreak) { 690 rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, 691 &buf_state->page_info, buf_len); 692 if (unlikely(!rx->ctx.skb_head)) 693 goto error; 694 rx->ctx.skb_tail = rx->ctx.skb_head; 695 696 u64_stats_update_begin(&rx->statss); 697 rx->rx_copied_pkt++; 698 rx->rx_copybreak_pkt++; 699 u64_stats_update_end(&rx->statss); 700 701 gve_free_buffer(rx, buf_state); 702 return 0; 703 } 704 705 rx->ctx.skb_head = napi_get_frags(napi); 706 if (unlikely(!rx->ctx.skb_head)) 707 goto error; 708 rx->ctx.skb_tail = rx->ctx.skb_head; 709 710 if (gve_rx_should_trigger_copy_ondemand(rx)) { 711 if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0) 712 goto error; 713 return 0; 714 } 715 716 if (rx->dqo.page_pool) 717 skb_mark_for_recycle(rx->ctx.skb_head); 718 719 gve_skb_add_rx_frag(rx, buf_state, 0, buf_len); 720 gve_reuse_buffer(rx, buf_state); 721 return 0; 722 723 error: 724 gve_free_buffer(rx, buf_state); 725 return -ENOMEM; 726 } 727 728 static int gve_rx_complete_rsc(struct sk_buff *skb, 729 const struct gve_rx_compl_desc_dqo *desc, 730 struct gve_ptype ptype) 731 { 732 struct skb_shared_info *shinfo = skb_shinfo(skb); 733 734 /* Only TCP is supported right now. */ 735 if (ptype.l4_type != GVE_L4_TYPE_TCP) 736 return -EINVAL; 737 738 switch (ptype.l3_type) { 739 case GVE_L3_TYPE_IPV4: 740 shinfo->gso_type = SKB_GSO_TCPV4; 741 break; 742 case GVE_L3_TYPE_IPV6: 743 shinfo->gso_type = SKB_GSO_TCPV6; 744 break; 745 default: 746 return -EINVAL; 747 } 748 749 shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len); 750 return 0; 751 } 752 753 /* Returns 0 if skb is completed successfully, -1 otherwise. */ 754 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, 755 const struct gve_rx_compl_desc_dqo *desc, 756 netdev_features_t feat) 757 { 758 struct gve_ptype ptype = 759 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type]; 760 int err; 761 762 skb_record_rx_queue(rx->ctx.skb_head, rx->q_num); 763 764 if (feat & NETIF_F_RXHASH) 765 gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype); 766 767 if (feat & NETIF_F_RXCSUM) 768 gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype); 769 770 /* RSC packets must set gso_size otherwise the TCP stack will complain 771 * that packets are larger than MTU. 772 */ 773 if (desc->rsc) { 774 err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype); 775 if (err < 0) 776 return err; 777 } 778 779 if (skb_headlen(rx->ctx.skb_head) == 0) 780 napi_gro_frags(napi); 781 else 782 napi_gro_receive(napi, rx->ctx.skb_head); 783 784 return 0; 785 } 786 787 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) 788 { 789 struct napi_struct *napi = &block->napi; 790 netdev_features_t feat = napi->dev->features; 791 792 struct gve_rx_ring *rx = block->rx; 793 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; 794 795 u32 work_done = 0; 796 u64 bytes = 0; 797 int err; 798 799 while (work_done < budget) { 800 struct gve_rx_compl_desc_dqo *compl_desc = 801 &complq->desc_ring[complq->head]; 802 u32 pkt_bytes; 803 804 /* No more new packets */ 805 if (compl_desc->generation == complq->cur_gen_bit) 806 break; 807 808 /* Prefetch the next two descriptors. */ 809 prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]); 810 prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]); 811 812 /* Do not read data until we own the descriptor */ 813 dma_rmb(); 814 815 err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num); 816 if (err < 0) { 817 gve_rx_free_skb(napi, rx); 818 u64_stats_update_begin(&rx->statss); 819 if (err == -ENOMEM) 820 rx->rx_skb_alloc_fail++; 821 else if (err == -EINVAL) 822 rx->rx_desc_err_dropped_pkt++; 823 u64_stats_update_end(&rx->statss); 824 } 825 826 complq->head = (complq->head + 1) & complq->mask; 827 complq->num_free_slots++; 828 829 /* When the ring wraps, the generation bit is flipped. */ 830 complq->cur_gen_bit ^= (complq->head == 0); 831 832 /* Receiving a completion means we have space to post another 833 * buffer on the buffer queue. 834 */ 835 { 836 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq; 837 838 bufq->head = (bufq->head + 1) & bufq->mask; 839 } 840 841 /* Free running counter of completed descriptors */ 842 rx->cnt++; 843 844 if (!rx->ctx.skb_head) 845 continue; 846 847 if (!compl_desc->end_of_packet) 848 continue; 849 850 work_done++; 851 pkt_bytes = rx->ctx.skb_head->len; 852 /* The ethernet header (first ETH_HLEN bytes) is snipped off 853 * by eth_type_trans. 854 */ 855 if (skb_headlen(rx->ctx.skb_head)) 856 pkt_bytes += ETH_HLEN; 857 858 /* gve_rx_complete_skb() will consume skb if successful */ 859 if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) { 860 gve_rx_free_skb(napi, rx); 861 u64_stats_update_begin(&rx->statss); 862 rx->rx_desc_err_dropped_pkt++; 863 u64_stats_update_end(&rx->statss); 864 continue; 865 } 866 867 bytes += pkt_bytes; 868 rx->ctx.skb_head = NULL; 869 rx->ctx.skb_tail = NULL; 870 } 871 872 gve_rx_post_buffers_dqo(rx); 873 874 u64_stats_update_begin(&rx->statss); 875 rx->rpackets += work_done; 876 rx->rbytes += bytes; 877 u64_stats_update_end(&rx->statss); 878 879 return work_done; 880 } 881