1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_dqo.h" 9 #include "gve_adminq.h" 10 #include "gve_utils.h" 11 #include <linux/ip.h> 12 #include <linux/ipv6.h> 13 #include <linux/skbuff.h> 14 #include <linux/slab.h> 15 #include <net/ip6_checksum.h> 16 #include <net/ipv6.h> 17 #include <net/tcp.h> 18 19 static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) 20 { 21 return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias; 22 } 23 24 static void gve_free_page_dqo(struct gve_priv *priv, 25 struct gve_rx_buf_state_dqo *bs, 26 bool free_page) 27 { 28 page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1); 29 if (free_page) 30 gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr, 31 DMA_FROM_DEVICE); 32 bs->page_info.page = NULL; 33 } 34 35 static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) 36 { 37 struct gve_rx_buf_state_dqo *buf_state; 38 s16 buffer_id; 39 40 buffer_id = rx->dqo.free_buf_states; 41 if (unlikely(buffer_id == -1)) 42 return NULL; 43 44 buf_state = &rx->dqo.buf_states[buffer_id]; 45 46 /* Remove buf_state from free list */ 47 rx->dqo.free_buf_states = buf_state->next; 48 49 /* Point buf_state to itself to mark it as allocated */ 50 buf_state->next = buffer_id; 51 52 return buf_state; 53 } 54 55 static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, 56 struct gve_rx_buf_state_dqo *buf_state) 57 { 58 s16 buffer_id = buf_state - rx->dqo.buf_states; 59 60 return buf_state->next == buffer_id; 61 } 62 63 static void gve_free_buf_state(struct gve_rx_ring *rx, 64 struct gve_rx_buf_state_dqo *buf_state) 65 { 66 s16 buffer_id = buf_state - rx->dqo.buf_states; 67 68 buf_state->next = rx->dqo.free_buf_states; 69 rx->dqo.free_buf_states = buffer_id; 70 } 71 72 static struct gve_rx_buf_state_dqo * 73 gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list) 74 { 75 struct gve_rx_buf_state_dqo *buf_state; 76 s16 buffer_id; 77 78 buffer_id = list->head; 79 if (unlikely(buffer_id == -1)) 80 return NULL; 81 82 buf_state = &rx->dqo.buf_states[buffer_id]; 83 84 /* Remove buf_state from list */ 85 list->head = buf_state->next; 86 if (buf_state->next == -1) 87 list->tail = -1; 88 89 /* Point buf_state to itself to mark it as allocated */ 90 buf_state->next = buffer_id; 91 92 return buf_state; 93 } 94 95 static void gve_enqueue_buf_state(struct gve_rx_ring *rx, 96 struct gve_index_list *list, 97 struct gve_rx_buf_state_dqo *buf_state) 98 { 99 s16 buffer_id = buf_state - rx->dqo.buf_states; 100 101 buf_state->next = -1; 102 103 if (list->head == -1) { 104 list->head = buffer_id; 105 list->tail = buffer_id; 106 } else { 107 int tail = list->tail; 108 109 rx->dqo.buf_states[tail].next = buffer_id; 110 list->tail = buffer_id; 111 } 112 } 113 114 static struct gve_rx_buf_state_dqo * 115 gve_get_recycled_buf_state(struct gve_rx_ring *rx) 116 { 117 struct gve_rx_buf_state_dqo *buf_state; 118 int i; 119 120 /* Recycled buf states are immediately usable. */ 121 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states); 122 if (likely(buf_state)) 123 return buf_state; 124 125 if (unlikely(rx->dqo.used_buf_states.head == -1)) 126 return NULL; 127 128 /* Used buf states are only usable when ref count reaches 0, which means 129 * no SKBs refer to them. 130 * 131 * Search a limited number before giving up. 132 */ 133 for (i = 0; i < 5; i++) { 134 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); 135 if (gve_buf_ref_cnt(buf_state) == 0) { 136 rx->dqo.used_buf_states_cnt--; 137 return buf_state; 138 } 139 140 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); 141 } 142 143 /* For QPL, we cannot allocate any new buffers and must 144 * wait for the existing ones to be available. 145 */ 146 if (rx->dqo.qpl) 147 return NULL; 148 149 /* If there are no free buf states discard an entry from 150 * `used_buf_states` so it can be used. 151 */ 152 if (unlikely(rx->dqo.free_buf_states == -1)) { 153 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); 154 if (gve_buf_ref_cnt(buf_state) == 0) 155 return buf_state; 156 157 gve_free_page_dqo(rx->gve, buf_state, true); 158 gve_free_buf_state(rx, buf_state); 159 } 160 161 return NULL; 162 } 163 164 static int gve_alloc_page_dqo(struct gve_rx_ring *rx, 165 struct gve_rx_buf_state_dqo *buf_state) 166 { 167 struct gve_priv *priv = rx->gve; 168 u32 idx; 169 170 if (!rx->dqo.qpl) { 171 int err; 172 173 err = gve_alloc_page(priv, &priv->pdev->dev, 174 &buf_state->page_info.page, 175 &buf_state->addr, 176 DMA_FROM_DEVICE, GFP_ATOMIC); 177 if (err) 178 return err; 179 } else { 180 idx = rx->dqo.next_qpl_page_idx; 181 if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { 182 net_err_ratelimited("%s: Out of QPL pages\n", 183 priv->dev->name); 184 return -ENOMEM; 185 } 186 buf_state->page_info.page = rx->dqo.qpl->pages[idx]; 187 buf_state->addr = rx->dqo.qpl->page_buses[idx]; 188 rx->dqo.next_qpl_page_idx++; 189 } 190 buf_state->page_info.page_offset = 0; 191 buf_state->page_info.page_address = 192 page_address(buf_state->page_info.page); 193 buf_state->last_single_ref_offset = 0; 194 195 /* The page already has 1 ref. */ 196 page_ref_add(buf_state->page_info.page, INT_MAX - 1); 197 buf_state->page_info.pagecnt_bias = INT_MAX; 198 199 return 0; 200 } 201 202 static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) 203 { 204 struct device *hdev = &priv->pdev->dev; 205 int buf_count = rx->dqo.bufq.mask + 1; 206 207 if (rx->dqo.hdr_bufs.data) { 208 dma_free_coherent(hdev, priv->header_buf_size * buf_count, 209 rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr); 210 rx->dqo.hdr_bufs.data = NULL; 211 } 212 } 213 214 void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) 215 { 216 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 217 218 if (!gve_rx_was_added_to_block(priv, idx)) 219 return; 220 221 gve_remove_napi(priv, ntfy_idx); 222 gve_rx_remove_from_block(priv, idx); 223 } 224 225 static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, 226 struct gve_rx_alloc_rings_cfg *cfg) 227 { 228 struct device *hdev = &priv->pdev->dev; 229 size_t completion_queue_slots; 230 size_t buffer_queue_slots; 231 int idx = rx->q_num; 232 size_t size; 233 int i; 234 235 completion_queue_slots = rx->dqo.complq.mask + 1; 236 buffer_queue_slots = rx->dqo.bufq.mask + 1; 237 238 if (rx->q_resources) { 239 dma_free_coherent(hdev, sizeof(*rx->q_resources), 240 rx->q_resources, rx->q_resources_bus); 241 rx->q_resources = NULL; 242 } 243 244 for (i = 0; i < rx->dqo.num_buf_states; i++) { 245 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; 246 /* Only free page for RDA. QPL pages are freed in gve_main. */ 247 if (bs->page_info.page) 248 gve_free_page_dqo(priv, bs, !rx->dqo.qpl); 249 } 250 if (rx->dqo.qpl) { 251 gve_unassign_qpl(cfg->qpl_cfg, rx->dqo.qpl->id); 252 rx->dqo.qpl = NULL; 253 } 254 255 if (rx->dqo.bufq.desc_ring) { 256 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; 257 dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring, 258 rx->dqo.bufq.bus); 259 rx->dqo.bufq.desc_ring = NULL; 260 } 261 262 if (rx->dqo.complq.desc_ring) { 263 size = sizeof(rx->dqo.complq.desc_ring[0]) * 264 completion_queue_slots; 265 dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring, 266 rx->dqo.complq.bus); 267 rx->dqo.complq.desc_ring = NULL; 268 } 269 270 kvfree(rx->dqo.buf_states); 271 rx->dqo.buf_states = NULL; 272 273 gve_rx_free_hdr_bufs(priv, rx); 274 275 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 276 } 277 278 static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) 279 { 280 struct device *hdev = &priv->pdev->dev; 281 int buf_count = rx->dqo.bufq.mask + 1; 282 283 rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count, 284 &rx->dqo.hdr_bufs.addr, GFP_KERNEL); 285 if (!rx->dqo.hdr_bufs.data) 286 return -ENOMEM; 287 288 return 0; 289 } 290 291 void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx) 292 { 293 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 294 295 gve_rx_add_to_block(priv, idx); 296 gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); 297 } 298 299 static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, 300 struct gve_rx_alloc_rings_cfg *cfg, 301 struct gve_rx_ring *rx, 302 int idx) 303 { 304 struct device *hdev = &priv->pdev->dev; 305 size_t size; 306 int i; 307 308 const u32 buffer_queue_slots = cfg->ring_size; 309 const u32 completion_queue_slots = cfg->ring_size; 310 311 netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n"); 312 313 memset(rx, 0, sizeof(*rx)); 314 rx->gve = priv; 315 rx->q_num = idx; 316 rx->dqo.bufq.mask = buffer_queue_slots - 1; 317 rx->dqo.complq.num_free_slots = completion_queue_slots; 318 rx->dqo.complq.mask = completion_queue_slots - 1; 319 rx->ctx.skb_head = NULL; 320 rx->ctx.skb_tail = NULL; 321 322 rx->dqo.num_buf_states = cfg->raw_addressing ? 323 min_t(s16, S16_MAX, buffer_queue_slots * 4) : 324 gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); 325 rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, 326 sizeof(rx->dqo.buf_states[0]), 327 GFP_KERNEL); 328 if (!rx->dqo.buf_states) 329 return -ENOMEM; 330 331 /* Allocate header buffers for header-split */ 332 if (cfg->enable_header_split) 333 if (gve_rx_alloc_hdr_bufs(priv, rx)) 334 goto err; 335 336 /* Set up linked list of buffer IDs */ 337 for (i = 0; i < rx->dqo.num_buf_states - 1; i++) 338 rx->dqo.buf_states[i].next = i + 1; 339 340 rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; 341 rx->dqo.recycled_buf_states.head = -1; 342 rx->dqo.recycled_buf_states.tail = -1; 343 rx->dqo.used_buf_states.head = -1; 344 rx->dqo.used_buf_states.tail = -1; 345 346 /* Allocate RX completion queue */ 347 size = sizeof(rx->dqo.complq.desc_ring[0]) * 348 completion_queue_slots; 349 rx->dqo.complq.desc_ring = 350 dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL); 351 if (!rx->dqo.complq.desc_ring) 352 goto err; 353 354 /* Allocate RX buffer queue */ 355 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; 356 rx->dqo.bufq.desc_ring = 357 dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL); 358 if (!rx->dqo.bufq.desc_ring) 359 goto err; 360 361 if (!cfg->raw_addressing) { 362 rx->dqo.qpl = gve_assign_rx_qpl(cfg, rx->q_num); 363 if (!rx->dqo.qpl) 364 goto err; 365 rx->dqo.next_qpl_page_idx = 0; 366 } 367 368 rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources), 369 &rx->q_resources_bus, GFP_KERNEL); 370 if (!rx->q_resources) 371 goto err; 372 373 return 0; 374 375 err: 376 gve_rx_free_ring_dqo(priv, rx, cfg); 377 return -ENOMEM; 378 } 379 380 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx) 381 { 382 const struct gve_rx_ring *rx = &priv->rx[queue_idx]; 383 u64 index = be32_to_cpu(rx->q_resources->db_index); 384 385 iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]); 386 } 387 388 int gve_rx_alloc_rings_dqo(struct gve_priv *priv, 389 struct gve_rx_alloc_rings_cfg *cfg) 390 { 391 struct gve_rx_ring *rx; 392 int err; 393 int i; 394 395 if (!cfg->raw_addressing && !cfg->qpls) { 396 netif_err(priv, drv, priv->dev, 397 "Cannot alloc QPL ring before allocing QPLs\n"); 398 return -EINVAL; 399 } 400 401 rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring), 402 GFP_KERNEL); 403 if (!rx) 404 return -ENOMEM; 405 406 for (i = 0; i < cfg->qcfg->num_queues; i++) { 407 err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i); 408 if (err) { 409 netif_err(priv, drv, priv->dev, 410 "Failed to alloc rx ring=%d: err=%d\n", 411 i, err); 412 goto err; 413 } 414 } 415 416 cfg->rx = rx; 417 return 0; 418 419 err: 420 for (i--; i >= 0; i--) 421 gve_rx_free_ring_dqo(priv, &rx[i], cfg); 422 kvfree(rx); 423 return err; 424 } 425 426 void gve_rx_free_rings_dqo(struct gve_priv *priv, 427 struct gve_rx_alloc_rings_cfg *cfg) 428 { 429 struct gve_rx_ring *rx = cfg->rx; 430 int i; 431 432 if (!rx) 433 return; 434 435 for (i = 0; i < cfg->qcfg->num_queues; i++) 436 gve_rx_free_ring_dqo(priv, &rx[i], cfg); 437 438 kvfree(rx); 439 cfg->rx = NULL; 440 } 441 442 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) 443 { 444 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; 445 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq; 446 struct gve_priv *priv = rx->gve; 447 u32 num_avail_slots; 448 u32 num_full_slots; 449 u32 num_posted = 0; 450 451 num_full_slots = (bufq->tail - bufq->head) & bufq->mask; 452 num_avail_slots = bufq->mask - num_full_slots; 453 454 num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots); 455 while (num_posted < num_avail_slots) { 456 struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail]; 457 struct gve_rx_buf_state_dqo *buf_state; 458 459 buf_state = gve_get_recycled_buf_state(rx); 460 if (unlikely(!buf_state)) { 461 buf_state = gve_alloc_buf_state(rx); 462 if (unlikely(!buf_state)) 463 break; 464 465 if (unlikely(gve_alloc_page_dqo(rx, buf_state))) { 466 u64_stats_update_begin(&rx->statss); 467 rx->rx_buf_alloc_fail++; 468 u64_stats_update_end(&rx->statss); 469 gve_free_buf_state(rx, buf_state); 470 break; 471 } 472 } 473 474 desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); 475 desc->buf_addr = cpu_to_le64(buf_state->addr + 476 buf_state->page_info.page_offset); 477 if (rx->dqo.hdr_bufs.data) 478 desc->header_buf_addr = 479 cpu_to_le64(rx->dqo.hdr_bufs.addr + 480 priv->header_buf_size * bufq->tail); 481 482 bufq->tail = (bufq->tail + 1) & bufq->mask; 483 complq->num_free_slots--; 484 num_posted++; 485 486 if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) 487 gve_rx_write_doorbell_dqo(priv, rx->q_num); 488 } 489 490 rx->fill_cnt += num_posted; 491 } 492 493 static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, 494 struct gve_rx_buf_state_dqo *buf_state) 495 { 496 const u16 data_buffer_size = priv->data_buffer_size_dqo; 497 int pagecount; 498 499 /* Can't reuse if we only fit one buffer per page */ 500 if (data_buffer_size * 2 > PAGE_SIZE) 501 goto mark_used; 502 503 pagecount = gve_buf_ref_cnt(buf_state); 504 505 /* Record the offset when we have a single remaining reference. 506 * 507 * When this happens, we know all of the other offsets of the page are 508 * usable. 509 */ 510 if (pagecount == 1) { 511 buf_state->last_single_ref_offset = 512 buf_state->page_info.page_offset; 513 } 514 515 /* Use the next buffer sized chunk in the page. */ 516 buf_state->page_info.page_offset += data_buffer_size; 517 buf_state->page_info.page_offset &= (PAGE_SIZE - 1); 518 519 /* If we wrap around to the same offset without ever dropping to 1 520 * reference, then we don't know if this offset was ever freed. 521 */ 522 if (buf_state->page_info.page_offset == 523 buf_state->last_single_ref_offset) { 524 goto mark_used; 525 } 526 527 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); 528 return; 529 530 mark_used: 531 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); 532 rx->dqo.used_buf_states_cnt++; 533 } 534 535 static void gve_rx_skb_csum(struct sk_buff *skb, 536 const struct gve_rx_compl_desc_dqo *desc, 537 struct gve_ptype ptype) 538 { 539 skb->ip_summed = CHECKSUM_NONE; 540 541 /* HW did not identify and process L3 and L4 headers. */ 542 if (unlikely(!desc->l3_l4_processed)) 543 return; 544 545 if (ptype.l3_type == GVE_L3_TYPE_IPV4) { 546 if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err)) 547 return; 548 } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) { 549 /* Checksum should be skipped if this flag is set. */ 550 if (unlikely(desc->ipv6_ex_add)) 551 return; 552 } 553 554 if (unlikely(desc->csum_l4_err)) 555 return; 556 557 switch (ptype.l4_type) { 558 case GVE_L4_TYPE_TCP: 559 case GVE_L4_TYPE_UDP: 560 case GVE_L4_TYPE_ICMP: 561 case GVE_L4_TYPE_SCTP: 562 skb->ip_summed = CHECKSUM_UNNECESSARY; 563 break; 564 default: 565 break; 566 } 567 } 568 569 static void gve_rx_skb_hash(struct sk_buff *skb, 570 const struct gve_rx_compl_desc_dqo *compl_desc, 571 struct gve_ptype ptype) 572 { 573 enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2; 574 575 if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN) 576 hash_type = PKT_HASH_TYPE_L4; 577 else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN) 578 hash_type = PKT_HASH_TYPE_L3; 579 580 skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); 581 } 582 583 static void gve_rx_free_skb(struct gve_rx_ring *rx) 584 { 585 if (!rx->ctx.skb_head) 586 return; 587 588 dev_kfree_skb_any(rx->ctx.skb_head); 589 rx->ctx.skb_head = NULL; 590 rx->ctx.skb_tail = NULL; 591 } 592 593 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx) 594 { 595 if (!rx->dqo.qpl) 596 return false; 597 if (rx->dqo.used_buf_states_cnt < 598 (rx->dqo.num_buf_states - 599 GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD)) 600 return false; 601 return true; 602 } 603 604 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, 605 struct gve_rx_buf_state_dqo *buf_state, 606 u16 buf_len) 607 { 608 struct page *page = alloc_page(GFP_ATOMIC); 609 int num_frags; 610 611 if (!page) 612 return -ENOMEM; 613 614 memcpy(page_address(page), 615 buf_state->page_info.page_address + 616 buf_state->page_info.page_offset, 617 buf_len); 618 num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; 619 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page, 620 0, buf_len, PAGE_SIZE); 621 622 u64_stats_update_begin(&rx->statss); 623 rx->rx_frag_alloc_cnt++; 624 u64_stats_update_end(&rx->statss); 625 /* Return unused buffer. */ 626 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); 627 return 0; 628 } 629 630 /* Chains multi skbs for single rx packet. 631 * Returns 0 if buffer is appended, -1 otherwise. 632 */ 633 static int gve_rx_append_frags(struct napi_struct *napi, 634 struct gve_rx_buf_state_dqo *buf_state, 635 u16 buf_len, struct gve_rx_ring *rx, 636 struct gve_priv *priv) 637 { 638 int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; 639 640 if (unlikely(num_frags == MAX_SKB_FRAGS)) { 641 struct sk_buff *skb; 642 643 skb = napi_alloc_skb(napi, 0); 644 if (!skb) 645 return -1; 646 647 if (rx->ctx.skb_tail == rx->ctx.skb_head) 648 skb_shinfo(rx->ctx.skb_head)->frag_list = skb; 649 else 650 rx->ctx.skb_tail->next = skb; 651 rx->ctx.skb_tail = skb; 652 num_frags = 0; 653 } 654 if (rx->ctx.skb_tail != rx->ctx.skb_head) { 655 rx->ctx.skb_head->len += buf_len; 656 rx->ctx.skb_head->data_len += buf_len; 657 rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo; 658 } 659 660 /* Trigger ondemand page allocation if we are running low on buffers */ 661 if (gve_rx_should_trigger_copy_ondemand(rx)) 662 return gve_rx_copy_ondemand(rx, buf_state, buf_len); 663 664 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, 665 buf_state->page_info.page, 666 buf_state->page_info.page_offset, 667 buf_len, priv->data_buffer_size_dqo); 668 gve_dec_pagecnt_bias(&buf_state->page_info); 669 670 /* Advances buffer page-offset if page is partially used. 671 * Marks buffer as used if page is full. 672 */ 673 gve_try_recycle_buf(priv, rx, buf_state); 674 return 0; 675 } 676 677 /* Returns 0 if descriptor is completed successfully. 678 * Returns -EINVAL if descriptor is invalid. 679 * Returns -ENOMEM if data cannot be copied to skb. 680 */ 681 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, 682 const struct gve_rx_compl_desc_dqo *compl_desc, 683 u32 desc_idx, int queue_idx) 684 { 685 const u16 buffer_id = le16_to_cpu(compl_desc->buf_id); 686 const bool hbo = compl_desc->header_buffer_overflow; 687 const bool eop = compl_desc->end_of_packet != 0; 688 const bool hsplit = compl_desc->split_header; 689 struct gve_rx_buf_state_dqo *buf_state; 690 struct gve_priv *priv = rx->gve; 691 u16 buf_len; 692 u16 hdr_len; 693 694 if (unlikely(buffer_id >= rx->dqo.num_buf_states)) { 695 net_err_ratelimited("%s: Invalid RX buffer_id=%u\n", 696 priv->dev->name, buffer_id); 697 return -EINVAL; 698 } 699 buf_state = &rx->dqo.buf_states[buffer_id]; 700 if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) { 701 net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n", 702 priv->dev->name, buffer_id); 703 return -EINVAL; 704 } 705 706 if (unlikely(compl_desc->rx_error)) { 707 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, 708 buf_state); 709 return -EINVAL; 710 } 711 712 buf_len = compl_desc->packet_len; 713 hdr_len = compl_desc->header_len; 714 715 /* Page might have not been used for awhile and was likely last written 716 * by a different thread. 717 */ 718 prefetch(buf_state->page_info.page); 719 720 /* Copy the header into the skb in the case of header split */ 721 if (hsplit) { 722 int unsplit = 0; 723 724 if (hdr_len && !hbo) { 725 rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi, 726 rx->dqo.hdr_bufs.data + 727 desc_idx * priv->header_buf_size, 728 hdr_len); 729 if (unlikely(!rx->ctx.skb_head)) 730 goto error; 731 rx->ctx.skb_tail = rx->ctx.skb_head; 732 } else { 733 unsplit = 1; 734 } 735 u64_stats_update_begin(&rx->statss); 736 rx->rx_hsplit_pkt++; 737 rx->rx_hsplit_unsplit_pkt += unsplit; 738 rx->rx_hsplit_bytes += hdr_len; 739 u64_stats_update_end(&rx->statss); 740 } 741 742 /* Sync the portion of dma buffer for CPU to read. */ 743 dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, 744 buf_state->page_info.page_offset, 745 buf_len, DMA_FROM_DEVICE); 746 747 /* Append to current skb if one exists. */ 748 if (rx->ctx.skb_head) { 749 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx, 750 priv)) != 0) { 751 goto error; 752 } 753 return 0; 754 } 755 756 if (eop && buf_len <= priv->rx_copybreak) { 757 rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, 758 &buf_state->page_info, buf_len); 759 if (unlikely(!rx->ctx.skb_head)) 760 goto error; 761 rx->ctx.skb_tail = rx->ctx.skb_head; 762 763 u64_stats_update_begin(&rx->statss); 764 rx->rx_copied_pkt++; 765 rx->rx_copybreak_pkt++; 766 u64_stats_update_end(&rx->statss); 767 768 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, 769 buf_state); 770 return 0; 771 } 772 773 rx->ctx.skb_head = napi_get_frags(napi); 774 if (unlikely(!rx->ctx.skb_head)) 775 goto error; 776 rx->ctx.skb_tail = rx->ctx.skb_head; 777 778 if (gve_rx_should_trigger_copy_ondemand(rx)) { 779 if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0) 780 goto error; 781 return 0; 782 } 783 784 skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page, 785 buf_state->page_info.page_offset, buf_len, 786 priv->data_buffer_size_dqo); 787 gve_dec_pagecnt_bias(&buf_state->page_info); 788 789 gve_try_recycle_buf(priv, rx, buf_state); 790 return 0; 791 792 error: 793 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); 794 return -ENOMEM; 795 } 796 797 static int gve_rx_complete_rsc(struct sk_buff *skb, 798 const struct gve_rx_compl_desc_dqo *desc, 799 struct gve_ptype ptype) 800 { 801 struct skb_shared_info *shinfo = skb_shinfo(skb); 802 803 /* Only TCP is supported right now. */ 804 if (ptype.l4_type != GVE_L4_TYPE_TCP) 805 return -EINVAL; 806 807 switch (ptype.l3_type) { 808 case GVE_L3_TYPE_IPV4: 809 shinfo->gso_type = SKB_GSO_TCPV4; 810 break; 811 case GVE_L3_TYPE_IPV6: 812 shinfo->gso_type = SKB_GSO_TCPV6; 813 break; 814 default: 815 return -EINVAL; 816 } 817 818 shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len); 819 return 0; 820 } 821 822 /* Returns 0 if skb is completed successfully, -1 otherwise. */ 823 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, 824 const struct gve_rx_compl_desc_dqo *desc, 825 netdev_features_t feat) 826 { 827 struct gve_ptype ptype = 828 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type]; 829 int err; 830 831 skb_record_rx_queue(rx->ctx.skb_head, rx->q_num); 832 833 if (feat & NETIF_F_RXHASH) 834 gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype); 835 836 if (feat & NETIF_F_RXCSUM) 837 gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype); 838 839 /* RSC packets must set gso_size otherwise the TCP stack will complain 840 * that packets are larger than MTU. 841 */ 842 if (desc->rsc) { 843 err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype); 844 if (err < 0) 845 return err; 846 } 847 848 if (skb_headlen(rx->ctx.skb_head) == 0) 849 napi_gro_frags(napi); 850 else 851 napi_gro_receive(napi, rx->ctx.skb_head); 852 853 return 0; 854 } 855 856 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) 857 { 858 struct napi_struct *napi = &block->napi; 859 netdev_features_t feat = napi->dev->features; 860 861 struct gve_rx_ring *rx = block->rx; 862 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; 863 864 u32 work_done = 0; 865 u64 bytes = 0; 866 int err; 867 868 while (work_done < budget) { 869 struct gve_rx_compl_desc_dqo *compl_desc = 870 &complq->desc_ring[complq->head]; 871 u32 pkt_bytes; 872 873 /* No more new packets */ 874 if (compl_desc->generation == complq->cur_gen_bit) 875 break; 876 877 /* Prefetch the next two descriptors. */ 878 prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]); 879 prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]); 880 881 /* Do not read data until we own the descriptor */ 882 dma_rmb(); 883 884 err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num); 885 if (err < 0) { 886 gve_rx_free_skb(rx); 887 u64_stats_update_begin(&rx->statss); 888 if (err == -ENOMEM) 889 rx->rx_skb_alloc_fail++; 890 else if (err == -EINVAL) 891 rx->rx_desc_err_dropped_pkt++; 892 u64_stats_update_end(&rx->statss); 893 } 894 895 complq->head = (complq->head + 1) & complq->mask; 896 complq->num_free_slots++; 897 898 /* When the ring wraps, the generation bit is flipped. */ 899 complq->cur_gen_bit ^= (complq->head == 0); 900 901 /* Receiving a completion means we have space to post another 902 * buffer on the buffer queue. 903 */ 904 { 905 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq; 906 907 bufq->head = (bufq->head + 1) & bufq->mask; 908 } 909 910 /* Free running counter of completed descriptors */ 911 rx->cnt++; 912 913 if (!rx->ctx.skb_head) 914 continue; 915 916 if (!compl_desc->end_of_packet) 917 continue; 918 919 work_done++; 920 pkt_bytes = rx->ctx.skb_head->len; 921 /* The ethernet header (first ETH_HLEN bytes) is snipped off 922 * by eth_type_trans. 923 */ 924 if (skb_headlen(rx->ctx.skb_head)) 925 pkt_bytes += ETH_HLEN; 926 927 /* gve_rx_complete_skb() will consume skb if successful */ 928 if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) { 929 gve_rx_free_skb(rx); 930 u64_stats_update_begin(&rx->statss); 931 rx->rx_desc_err_dropped_pkt++; 932 u64_stats_update_end(&rx->statss); 933 continue; 934 } 935 936 bytes += pkt_bytes; 937 rx->ctx.skb_head = NULL; 938 rx->ctx.skb_tail = NULL; 939 } 940 941 gve_rx_post_buffers_dqo(rx); 942 943 u64_stats_update_begin(&rx->statss); 944 rx->rpackets += work_done; 945 rx->rbytes += bytes; 946 u64_stats_update_end(&rx->statss); 947 948 return work_done; 949 } 950