1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_dqo.h" 9 #include "gve_adminq.h" 10 #include "gve_utils.h" 11 #include <linux/ip.h> 12 #include <linux/ipv6.h> 13 #include <linux/skbuff.h> 14 #include <linux/slab.h> 15 #include <net/ip6_checksum.h> 16 #include <net/ipv6.h> 17 #include <net/tcp.h> 18 19 static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) 20 { 21 return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias; 22 } 23 24 static void gve_free_page_dqo(struct gve_priv *priv, 25 struct gve_rx_buf_state_dqo *bs, 26 bool free_page) 27 { 28 page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1); 29 if (free_page) 30 gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr, 31 DMA_FROM_DEVICE); 32 bs->page_info.page = NULL; 33 } 34 35 static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) 36 { 37 struct gve_rx_buf_state_dqo *buf_state; 38 s16 buffer_id; 39 40 buffer_id = rx->dqo.free_buf_states; 41 if (unlikely(buffer_id == -1)) 42 return NULL; 43 44 buf_state = &rx->dqo.buf_states[buffer_id]; 45 46 /* Remove buf_state from free list */ 47 rx->dqo.free_buf_states = buf_state->next; 48 49 /* Point buf_state to itself to mark it as allocated */ 50 buf_state->next = buffer_id; 51 52 return buf_state; 53 } 54 55 static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, 56 struct gve_rx_buf_state_dqo *buf_state) 57 { 58 s16 buffer_id = buf_state - rx->dqo.buf_states; 59 60 return buf_state->next == buffer_id; 61 } 62 63 static void gve_free_buf_state(struct gve_rx_ring *rx, 64 struct gve_rx_buf_state_dqo *buf_state) 65 { 66 s16 buffer_id = buf_state - rx->dqo.buf_states; 67 68 buf_state->next = rx->dqo.free_buf_states; 69 rx->dqo.free_buf_states = buffer_id; 70 } 71 72 static struct gve_rx_buf_state_dqo * 73 gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list) 74 { 75 struct gve_rx_buf_state_dqo *buf_state; 76 s16 buffer_id; 77 78 buffer_id = list->head; 79 if (unlikely(buffer_id == -1)) 80 return NULL; 81 82 buf_state = &rx->dqo.buf_states[buffer_id]; 83 84 /* Remove buf_state from list */ 85 list->head = buf_state->next; 86 if (buf_state->next == -1) 87 list->tail = -1; 88 89 /* Point buf_state to itself to mark it as allocated */ 90 buf_state->next = buffer_id; 91 92 return buf_state; 93 } 94 95 static void gve_enqueue_buf_state(struct gve_rx_ring *rx, 96 struct gve_index_list *list, 97 struct gve_rx_buf_state_dqo *buf_state) 98 { 99 s16 buffer_id = buf_state - rx->dqo.buf_states; 100 101 buf_state->next = -1; 102 103 if (list->head == -1) { 104 list->head = buffer_id; 105 list->tail = buffer_id; 106 } else { 107 int tail = list->tail; 108 109 rx->dqo.buf_states[tail].next = buffer_id; 110 list->tail = buffer_id; 111 } 112 } 113 114 static struct gve_rx_buf_state_dqo * 115 gve_get_recycled_buf_state(struct gve_rx_ring *rx) 116 { 117 struct gve_rx_buf_state_dqo *buf_state; 118 int i; 119 120 /* Recycled buf states are immediately usable. */ 121 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states); 122 if (likely(buf_state)) 123 return buf_state; 124 125 if (unlikely(rx->dqo.used_buf_states.head == -1)) 126 return NULL; 127 128 /* Used buf states are only usable when ref count reaches 0, which means 129 * no SKBs refer to them. 130 * 131 * Search a limited number before giving up. 132 */ 133 for (i = 0; i < 5; i++) { 134 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); 135 if (gve_buf_ref_cnt(buf_state) == 0) { 136 rx->dqo.used_buf_states_cnt--; 137 return buf_state; 138 } 139 140 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); 141 } 142 143 /* For QPL, we cannot allocate any new buffers and must 144 * wait for the existing ones to be available. 145 */ 146 if (rx->dqo.qpl) 147 return NULL; 148 149 /* If there are no free buf states discard an entry from 150 * `used_buf_states` so it can be used. 151 */ 152 if (unlikely(rx->dqo.free_buf_states == -1)) { 153 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); 154 if (gve_buf_ref_cnt(buf_state) == 0) 155 return buf_state; 156 157 gve_free_page_dqo(rx->gve, buf_state, true); 158 gve_free_buf_state(rx, buf_state); 159 } 160 161 return NULL; 162 } 163 164 static int gve_alloc_page_dqo(struct gve_rx_ring *rx, 165 struct gve_rx_buf_state_dqo *buf_state) 166 { 167 struct gve_priv *priv = rx->gve; 168 u32 idx; 169 170 if (!rx->dqo.qpl) { 171 int err; 172 173 err = gve_alloc_page(priv, &priv->pdev->dev, 174 &buf_state->page_info.page, 175 &buf_state->addr, 176 DMA_FROM_DEVICE, GFP_ATOMIC); 177 if (err) 178 return err; 179 } else { 180 idx = rx->dqo.next_qpl_page_idx; 181 if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { 182 net_err_ratelimited("%s: Out of QPL pages\n", 183 priv->dev->name); 184 return -ENOMEM; 185 } 186 buf_state->page_info.page = rx->dqo.qpl->pages[idx]; 187 buf_state->addr = rx->dqo.qpl->page_buses[idx]; 188 rx->dqo.next_qpl_page_idx++; 189 } 190 buf_state->page_info.page_offset = 0; 191 buf_state->page_info.page_address = 192 page_address(buf_state->page_info.page); 193 buf_state->last_single_ref_offset = 0; 194 195 /* The page already has 1 ref. */ 196 page_ref_add(buf_state->page_info.page, INT_MAX - 1); 197 buf_state->page_info.pagecnt_bias = INT_MAX; 198 199 return 0; 200 } 201 202 static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) 203 { 204 struct device *hdev = &priv->pdev->dev; 205 int buf_count = rx->dqo.bufq.mask + 1; 206 207 if (rx->dqo.hdr_bufs.data) { 208 dma_free_coherent(hdev, priv->header_buf_size * buf_count, 209 rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr); 210 rx->dqo.hdr_bufs.data = NULL; 211 } 212 } 213 214 static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx, 215 const u32 buffer_queue_slots, 216 const u32 completion_queue_slots) 217 { 218 int i; 219 220 /* Set buffer queue state */ 221 rx->dqo.bufq.mask = buffer_queue_slots - 1; 222 rx->dqo.bufq.head = 0; 223 rx->dqo.bufq.tail = 0; 224 225 /* Set completion queue state */ 226 rx->dqo.complq.num_free_slots = completion_queue_slots; 227 rx->dqo.complq.mask = completion_queue_slots - 1; 228 rx->dqo.complq.cur_gen_bit = 0; 229 rx->dqo.complq.head = 0; 230 231 /* Set RX SKB context */ 232 rx->ctx.skb_head = NULL; 233 rx->ctx.skb_tail = NULL; 234 235 /* Set up linked list of buffer IDs */ 236 if (rx->dqo.buf_states) { 237 for (i = 0; i < rx->dqo.num_buf_states - 1; i++) 238 rx->dqo.buf_states[i].next = i + 1; 239 rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; 240 } 241 242 rx->dqo.free_buf_states = 0; 243 rx->dqo.recycled_buf_states.head = -1; 244 rx->dqo.recycled_buf_states.tail = -1; 245 rx->dqo.used_buf_states.head = -1; 246 rx->dqo.used_buf_states.tail = -1; 247 } 248 249 static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx) 250 { 251 struct gve_rx_ring *rx = &priv->rx[idx]; 252 size_t size; 253 int i; 254 255 const u32 buffer_queue_slots = priv->rx_desc_cnt; 256 const u32 completion_queue_slots = priv->rx_desc_cnt; 257 258 /* Reset buffer queue */ 259 if (rx->dqo.bufq.desc_ring) { 260 size = sizeof(rx->dqo.bufq.desc_ring[0]) * 261 buffer_queue_slots; 262 memset(rx->dqo.bufq.desc_ring, 0, size); 263 } 264 265 /* Reset completion queue */ 266 if (rx->dqo.complq.desc_ring) { 267 size = sizeof(rx->dqo.complq.desc_ring[0]) * 268 completion_queue_slots; 269 memset(rx->dqo.complq.desc_ring, 0, size); 270 } 271 272 /* Reset q_resources */ 273 if (rx->q_resources) 274 memset(rx->q_resources, 0, sizeof(*rx->q_resources)); 275 276 /* Reset buf states */ 277 if (rx->dqo.buf_states) { 278 for (i = 0; i < rx->dqo.num_buf_states; i++) { 279 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; 280 281 if (bs->page_info.page) 282 gve_free_page_dqo(priv, bs, !rx->dqo.qpl); 283 } 284 } 285 286 gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, 287 completion_queue_slots); 288 } 289 290 void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) 291 { 292 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 293 294 if (!gve_rx_was_added_to_block(priv, idx)) 295 return; 296 297 gve_remove_napi(priv, ntfy_idx); 298 gve_rx_remove_from_block(priv, idx); 299 gve_rx_reset_ring_dqo(priv, idx); 300 } 301 302 void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, 303 struct gve_rx_alloc_rings_cfg *cfg) 304 { 305 struct device *hdev = &priv->pdev->dev; 306 size_t completion_queue_slots; 307 size_t buffer_queue_slots; 308 int idx = rx->q_num; 309 size_t size; 310 u32 qpl_id; 311 int i; 312 313 completion_queue_slots = rx->dqo.complq.mask + 1; 314 buffer_queue_slots = rx->dqo.bufq.mask + 1; 315 316 if (rx->q_resources) { 317 dma_free_coherent(hdev, sizeof(*rx->q_resources), 318 rx->q_resources, rx->q_resources_bus); 319 rx->q_resources = NULL; 320 } 321 322 for (i = 0; i < rx->dqo.num_buf_states; i++) { 323 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; 324 /* Only free page for RDA. QPL pages are freed in gve_main. */ 325 if (bs->page_info.page) 326 gve_free_page_dqo(priv, bs, !rx->dqo.qpl); 327 } 328 329 if (rx->dqo.qpl) { 330 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); 331 gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id); 332 rx->dqo.qpl = NULL; 333 } 334 335 if (rx->dqo.bufq.desc_ring) { 336 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; 337 dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring, 338 rx->dqo.bufq.bus); 339 rx->dqo.bufq.desc_ring = NULL; 340 } 341 342 if (rx->dqo.complq.desc_ring) { 343 size = sizeof(rx->dqo.complq.desc_ring[0]) * 344 completion_queue_slots; 345 dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring, 346 rx->dqo.complq.bus); 347 rx->dqo.complq.desc_ring = NULL; 348 } 349 350 kvfree(rx->dqo.buf_states); 351 rx->dqo.buf_states = NULL; 352 353 gve_rx_free_hdr_bufs(priv, rx); 354 355 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 356 } 357 358 static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx, 359 const u32 buf_count) 360 { 361 struct device *hdev = &priv->pdev->dev; 362 363 rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count, 364 &rx->dqo.hdr_bufs.addr, GFP_KERNEL); 365 if (!rx->dqo.hdr_bufs.data) 366 return -ENOMEM; 367 368 return 0; 369 } 370 371 void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx) 372 { 373 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 374 375 gve_rx_add_to_block(priv, idx); 376 gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); 377 } 378 379 int gve_rx_alloc_ring_dqo(struct gve_priv *priv, 380 struct gve_rx_alloc_rings_cfg *cfg, 381 struct gve_rx_ring *rx, 382 int idx) 383 { 384 struct device *hdev = &priv->pdev->dev; 385 int qpl_page_cnt; 386 size_t size; 387 u32 qpl_id; 388 389 const u32 buffer_queue_slots = cfg->ring_size; 390 const u32 completion_queue_slots = cfg->ring_size; 391 392 netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n"); 393 394 memset(rx, 0, sizeof(*rx)); 395 rx->gve = priv; 396 rx->q_num = idx; 397 398 rx->dqo.num_buf_states = cfg->raw_addressing ? 399 min_t(s16, S16_MAX, buffer_queue_slots * 4) : 400 gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); 401 rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, 402 sizeof(rx->dqo.buf_states[0]), 403 GFP_KERNEL); 404 if (!rx->dqo.buf_states) 405 return -ENOMEM; 406 407 /* Allocate header buffers for header-split */ 408 if (cfg->enable_header_split) 409 if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots)) 410 goto err; 411 412 /* Allocate RX completion queue */ 413 size = sizeof(rx->dqo.complq.desc_ring[0]) * 414 completion_queue_slots; 415 rx->dqo.complq.desc_ring = 416 dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL); 417 if (!rx->dqo.complq.desc_ring) 418 goto err; 419 420 /* Allocate RX buffer queue */ 421 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; 422 rx->dqo.bufq.desc_ring = 423 dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL); 424 if (!rx->dqo.bufq.desc_ring) 425 goto err; 426 427 if (!cfg->raw_addressing) { 428 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); 429 qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); 430 431 rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, 432 qpl_page_cnt); 433 if (!rx->dqo.qpl) 434 goto err; 435 rx->dqo.next_qpl_page_idx = 0; 436 } 437 438 rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources), 439 &rx->q_resources_bus, GFP_KERNEL); 440 if (!rx->q_resources) 441 goto err; 442 443 gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, 444 completion_queue_slots); 445 446 return 0; 447 448 err: 449 gve_rx_free_ring_dqo(priv, rx, cfg); 450 return -ENOMEM; 451 } 452 453 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx) 454 { 455 const struct gve_rx_ring *rx = &priv->rx[queue_idx]; 456 u64 index = be32_to_cpu(rx->q_resources->db_index); 457 458 iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]); 459 } 460 461 int gve_rx_alloc_rings_dqo(struct gve_priv *priv, 462 struct gve_rx_alloc_rings_cfg *cfg) 463 { 464 struct gve_rx_ring *rx; 465 int err; 466 int i; 467 468 rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring), 469 GFP_KERNEL); 470 if (!rx) 471 return -ENOMEM; 472 473 for (i = 0; i < cfg->qcfg->num_queues; i++) { 474 err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i); 475 if (err) { 476 netif_err(priv, drv, priv->dev, 477 "Failed to alloc rx ring=%d: err=%d\n", 478 i, err); 479 goto err; 480 } 481 } 482 483 cfg->rx = rx; 484 return 0; 485 486 err: 487 for (i--; i >= 0; i--) 488 gve_rx_free_ring_dqo(priv, &rx[i], cfg); 489 kvfree(rx); 490 return err; 491 } 492 493 void gve_rx_free_rings_dqo(struct gve_priv *priv, 494 struct gve_rx_alloc_rings_cfg *cfg) 495 { 496 struct gve_rx_ring *rx = cfg->rx; 497 int i; 498 499 if (!rx) 500 return; 501 502 for (i = 0; i < cfg->qcfg->num_queues; i++) 503 gve_rx_free_ring_dqo(priv, &rx[i], cfg); 504 505 kvfree(rx); 506 cfg->rx = NULL; 507 } 508 509 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) 510 { 511 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; 512 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq; 513 struct gve_priv *priv = rx->gve; 514 u32 num_avail_slots; 515 u32 num_full_slots; 516 u32 num_posted = 0; 517 518 num_full_slots = (bufq->tail - bufq->head) & bufq->mask; 519 num_avail_slots = bufq->mask - num_full_slots; 520 521 num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots); 522 while (num_posted < num_avail_slots) { 523 struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail]; 524 struct gve_rx_buf_state_dqo *buf_state; 525 526 buf_state = gve_get_recycled_buf_state(rx); 527 if (unlikely(!buf_state)) { 528 buf_state = gve_alloc_buf_state(rx); 529 if (unlikely(!buf_state)) 530 break; 531 532 if (unlikely(gve_alloc_page_dqo(rx, buf_state))) { 533 u64_stats_update_begin(&rx->statss); 534 rx->rx_buf_alloc_fail++; 535 u64_stats_update_end(&rx->statss); 536 gve_free_buf_state(rx, buf_state); 537 break; 538 } 539 } 540 541 desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); 542 desc->buf_addr = cpu_to_le64(buf_state->addr + 543 buf_state->page_info.page_offset); 544 if (rx->dqo.hdr_bufs.data) 545 desc->header_buf_addr = 546 cpu_to_le64(rx->dqo.hdr_bufs.addr + 547 priv->header_buf_size * bufq->tail); 548 549 bufq->tail = (bufq->tail + 1) & bufq->mask; 550 complq->num_free_slots--; 551 num_posted++; 552 553 if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) 554 gve_rx_write_doorbell_dqo(priv, rx->q_num); 555 } 556 557 rx->fill_cnt += num_posted; 558 } 559 560 static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, 561 struct gve_rx_buf_state_dqo *buf_state) 562 { 563 const u16 data_buffer_size = priv->data_buffer_size_dqo; 564 int pagecount; 565 566 /* Can't reuse if we only fit one buffer per page */ 567 if (data_buffer_size * 2 > PAGE_SIZE) 568 goto mark_used; 569 570 pagecount = gve_buf_ref_cnt(buf_state); 571 572 /* Record the offset when we have a single remaining reference. 573 * 574 * When this happens, we know all of the other offsets of the page are 575 * usable. 576 */ 577 if (pagecount == 1) { 578 buf_state->last_single_ref_offset = 579 buf_state->page_info.page_offset; 580 } 581 582 /* Use the next buffer sized chunk in the page. */ 583 buf_state->page_info.page_offset += data_buffer_size; 584 buf_state->page_info.page_offset &= (PAGE_SIZE - 1); 585 586 /* If we wrap around to the same offset without ever dropping to 1 587 * reference, then we don't know if this offset was ever freed. 588 */ 589 if (buf_state->page_info.page_offset == 590 buf_state->last_single_ref_offset) { 591 goto mark_used; 592 } 593 594 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); 595 return; 596 597 mark_used: 598 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); 599 rx->dqo.used_buf_states_cnt++; 600 } 601 602 static void gve_rx_skb_csum(struct sk_buff *skb, 603 const struct gve_rx_compl_desc_dqo *desc, 604 struct gve_ptype ptype) 605 { 606 skb->ip_summed = CHECKSUM_NONE; 607 608 /* HW did not identify and process L3 and L4 headers. */ 609 if (unlikely(!desc->l3_l4_processed)) 610 return; 611 612 if (ptype.l3_type == GVE_L3_TYPE_IPV4) { 613 if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err)) 614 return; 615 } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) { 616 /* Checksum should be skipped if this flag is set. */ 617 if (unlikely(desc->ipv6_ex_add)) 618 return; 619 } 620 621 if (unlikely(desc->csum_l4_err)) 622 return; 623 624 switch (ptype.l4_type) { 625 case GVE_L4_TYPE_TCP: 626 case GVE_L4_TYPE_UDP: 627 case GVE_L4_TYPE_ICMP: 628 case GVE_L4_TYPE_SCTP: 629 skb->ip_summed = CHECKSUM_UNNECESSARY; 630 break; 631 default: 632 break; 633 } 634 } 635 636 static void gve_rx_skb_hash(struct sk_buff *skb, 637 const struct gve_rx_compl_desc_dqo *compl_desc, 638 struct gve_ptype ptype) 639 { 640 enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2; 641 642 if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN) 643 hash_type = PKT_HASH_TYPE_L4; 644 else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN) 645 hash_type = PKT_HASH_TYPE_L3; 646 647 skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); 648 } 649 650 static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) 651 { 652 if (!rx->ctx.skb_head) 653 return; 654 655 if (rx->ctx.skb_head == napi->skb) 656 napi->skb = NULL; 657 dev_kfree_skb_any(rx->ctx.skb_head); 658 rx->ctx.skb_head = NULL; 659 rx->ctx.skb_tail = NULL; 660 } 661 662 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx) 663 { 664 if (!rx->dqo.qpl) 665 return false; 666 if (rx->dqo.used_buf_states_cnt < 667 (rx->dqo.num_buf_states - 668 GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD)) 669 return false; 670 return true; 671 } 672 673 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, 674 struct gve_rx_buf_state_dqo *buf_state, 675 u16 buf_len) 676 { 677 struct page *page = alloc_page(GFP_ATOMIC); 678 int num_frags; 679 680 if (!page) 681 return -ENOMEM; 682 683 memcpy(page_address(page), 684 buf_state->page_info.page_address + 685 buf_state->page_info.page_offset, 686 buf_len); 687 num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; 688 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page, 689 0, buf_len, PAGE_SIZE); 690 691 u64_stats_update_begin(&rx->statss); 692 rx->rx_frag_alloc_cnt++; 693 u64_stats_update_end(&rx->statss); 694 /* Return unused buffer. */ 695 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); 696 return 0; 697 } 698 699 /* Chains multi skbs for single rx packet. 700 * Returns 0 if buffer is appended, -1 otherwise. 701 */ 702 static int gve_rx_append_frags(struct napi_struct *napi, 703 struct gve_rx_buf_state_dqo *buf_state, 704 u16 buf_len, struct gve_rx_ring *rx, 705 struct gve_priv *priv) 706 { 707 int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; 708 709 if (unlikely(num_frags == MAX_SKB_FRAGS)) { 710 struct sk_buff *skb; 711 712 skb = napi_alloc_skb(napi, 0); 713 if (!skb) 714 return -1; 715 716 if (rx->ctx.skb_tail == rx->ctx.skb_head) 717 skb_shinfo(rx->ctx.skb_head)->frag_list = skb; 718 else 719 rx->ctx.skb_tail->next = skb; 720 rx->ctx.skb_tail = skb; 721 num_frags = 0; 722 } 723 if (rx->ctx.skb_tail != rx->ctx.skb_head) { 724 rx->ctx.skb_head->len += buf_len; 725 rx->ctx.skb_head->data_len += buf_len; 726 rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo; 727 } 728 729 /* Trigger ondemand page allocation if we are running low on buffers */ 730 if (gve_rx_should_trigger_copy_ondemand(rx)) 731 return gve_rx_copy_ondemand(rx, buf_state, buf_len); 732 733 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, 734 buf_state->page_info.page, 735 buf_state->page_info.page_offset, 736 buf_len, priv->data_buffer_size_dqo); 737 gve_dec_pagecnt_bias(&buf_state->page_info); 738 739 /* Advances buffer page-offset if page is partially used. 740 * Marks buffer as used if page is full. 741 */ 742 gve_try_recycle_buf(priv, rx, buf_state); 743 return 0; 744 } 745 746 /* Returns 0 if descriptor is completed successfully. 747 * Returns -EINVAL if descriptor is invalid. 748 * Returns -ENOMEM if data cannot be copied to skb. 749 */ 750 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, 751 const struct gve_rx_compl_desc_dqo *compl_desc, 752 u32 desc_idx, int queue_idx) 753 { 754 const u16 buffer_id = le16_to_cpu(compl_desc->buf_id); 755 const bool hbo = compl_desc->header_buffer_overflow; 756 const bool eop = compl_desc->end_of_packet != 0; 757 const bool hsplit = compl_desc->split_header; 758 struct gve_rx_buf_state_dqo *buf_state; 759 struct gve_priv *priv = rx->gve; 760 u16 buf_len; 761 u16 hdr_len; 762 763 if (unlikely(buffer_id >= rx->dqo.num_buf_states)) { 764 net_err_ratelimited("%s: Invalid RX buffer_id=%u\n", 765 priv->dev->name, buffer_id); 766 return -EINVAL; 767 } 768 buf_state = &rx->dqo.buf_states[buffer_id]; 769 if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) { 770 net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n", 771 priv->dev->name, buffer_id); 772 return -EINVAL; 773 } 774 775 if (unlikely(compl_desc->rx_error)) { 776 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, 777 buf_state); 778 return -EINVAL; 779 } 780 781 buf_len = compl_desc->packet_len; 782 hdr_len = compl_desc->header_len; 783 784 /* Page might have not been used for awhile and was likely last written 785 * by a different thread. 786 */ 787 prefetch(buf_state->page_info.page); 788 789 /* Copy the header into the skb in the case of header split */ 790 if (hsplit) { 791 int unsplit = 0; 792 793 if (hdr_len && !hbo) { 794 rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi, 795 rx->dqo.hdr_bufs.data + 796 desc_idx * priv->header_buf_size, 797 hdr_len); 798 if (unlikely(!rx->ctx.skb_head)) 799 goto error; 800 rx->ctx.skb_tail = rx->ctx.skb_head; 801 } else { 802 unsplit = 1; 803 } 804 u64_stats_update_begin(&rx->statss); 805 rx->rx_hsplit_pkt++; 806 rx->rx_hsplit_unsplit_pkt += unsplit; 807 rx->rx_hsplit_bytes += hdr_len; 808 u64_stats_update_end(&rx->statss); 809 } 810 811 /* Sync the portion of dma buffer for CPU to read. */ 812 dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, 813 buf_state->page_info.page_offset, 814 buf_len, DMA_FROM_DEVICE); 815 816 /* Append to current skb if one exists. */ 817 if (rx->ctx.skb_head) { 818 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx, 819 priv)) != 0) { 820 goto error; 821 } 822 return 0; 823 } 824 825 if (eop && buf_len <= priv->rx_copybreak) { 826 rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, 827 &buf_state->page_info, buf_len); 828 if (unlikely(!rx->ctx.skb_head)) 829 goto error; 830 rx->ctx.skb_tail = rx->ctx.skb_head; 831 832 u64_stats_update_begin(&rx->statss); 833 rx->rx_copied_pkt++; 834 rx->rx_copybreak_pkt++; 835 u64_stats_update_end(&rx->statss); 836 837 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, 838 buf_state); 839 return 0; 840 } 841 842 rx->ctx.skb_head = napi_get_frags(napi); 843 if (unlikely(!rx->ctx.skb_head)) 844 goto error; 845 rx->ctx.skb_tail = rx->ctx.skb_head; 846 847 if (gve_rx_should_trigger_copy_ondemand(rx)) { 848 if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0) 849 goto error; 850 return 0; 851 } 852 853 skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page, 854 buf_state->page_info.page_offset, buf_len, 855 priv->data_buffer_size_dqo); 856 gve_dec_pagecnt_bias(&buf_state->page_info); 857 858 gve_try_recycle_buf(priv, rx, buf_state); 859 return 0; 860 861 error: 862 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); 863 return -ENOMEM; 864 } 865 866 static int gve_rx_complete_rsc(struct sk_buff *skb, 867 const struct gve_rx_compl_desc_dqo *desc, 868 struct gve_ptype ptype) 869 { 870 struct skb_shared_info *shinfo = skb_shinfo(skb); 871 872 /* Only TCP is supported right now. */ 873 if (ptype.l4_type != GVE_L4_TYPE_TCP) 874 return -EINVAL; 875 876 switch (ptype.l3_type) { 877 case GVE_L3_TYPE_IPV4: 878 shinfo->gso_type = SKB_GSO_TCPV4; 879 break; 880 case GVE_L3_TYPE_IPV6: 881 shinfo->gso_type = SKB_GSO_TCPV6; 882 break; 883 default: 884 return -EINVAL; 885 } 886 887 shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len); 888 return 0; 889 } 890 891 /* Returns 0 if skb is completed successfully, -1 otherwise. */ 892 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, 893 const struct gve_rx_compl_desc_dqo *desc, 894 netdev_features_t feat) 895 { 896 struct gve_ptype ptype = 897 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type]; 898 int err; 899 900 skb_record_rx_queue(rx->ctx.skb_head, rx->q_num); 901 902 if (feat & NETIF_F_RXHASH) 903 gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype); 904 905 if (feat & NETIF_F_RXCSUM) 906 gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype); 907 908 /* RSC packets must set gso_size otherwise the TCP stack will complain 909 * that packets are larger than MTU. 910 */ 911 if (desc->rsc) { 912 err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype); 913 if (err < 0) 914 return err; 915 } 916 917 if (skb_headlen(rx->ctx.skb_head) == 0) 918 napi_gro_frags(napi); 919 else 920 napi_gro_receive(napi, rx->ctx.skb_head); 921 922 return 0; 923 } 924 925 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) 926 { 927 struct napi_struct *napi = &block->napi; 928 netdev_features_t feat = napi->dev->features; 929 930 struct gve_rx_ring *rx = block->rx; 931 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; 932 933 u32 work_done = 0; 934 u64 bytes = 0; 935 int err; 936 937 while (work_done < budget) { 938 struct gve_rx_compl_desc_dqo *compl_desc = 939 &complq->desc_ring[complq->head]; 940 u32 pkt_bytes; 941 942 /* No more new packets */ 943 if (compl_desc->generation == complq->cur_gen_bit) 944 break; 945 946 /* Prefetch the next two descriptors. */ 947 prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]); 948 prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]); 949 950 /* Do not read data until we own the descriptor */ 951 dma_rmb(); 952 953 err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num); 954 if (err < 0) { 955 gve_rx_free_skb(napi, rx); 956 u64_stats_update_begin(&rx->statss); 957 if (err == -ENOMEM) 958 rx->rx_skb_alloc_fail++; 959 else if (err == -EINVAL) 960 rx->rx_desc_err_dropped_pkt++; 961 u64_stats_update_end(&rx->statss); 962 } 963 964 complq->head = (complq->head + 1) & complq->mask; 965 complq->num_free_slots++; 966 967 /* When the ring wraps, the generation bit is flipped. */ 968 complq->cur_gen_bit ^= (complq->head == 0); 969 970 /* Receiving a completion means we have space to post another 971 * buffer on the buffer queue. 972 */ 973 { 974 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq; 975 976 bufq->head = (bufq->head + 1) & bufq->mask; 977 } 978 979 /* Free running counter of completed descriptors */ 980 rx->cnt++; 981 982 if (!rx->ctx.skb_head) 983 continue; 984 985 if (!compl_desc->end_of_packet) 986 continue; 987 988 work_done++; 989 pkt_bytes = rx->ctx.skb_head->len; 990 /* The ethernet header (first ETH_HLEN bytes) is snipped off 991 * by eth_type_trans. 992 */ 993 if (skb_headlen(rx->ctx.skb_head)) 994 pkt_bytes += ETH_HLEN; 995 996 /* gve_rx_complete_skb() will consume skb if successful */ 997 if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) { 998 gve_rx_free_skb(napi, rx); 999 u64_stats_update_begin(&rx->statss); 1000 rx->rx_desc_err_dropped_pkt++; 1001 u64_stats_update_end(&rx->statss); 1002 continue; 1003 } 1004 1005 bytes += pkt_bytes; 1006 rx->ctx.skb_head = NULL; 1007 rx->ctx.skb_tail = NULL; 1008 } 1009 1010 gve_rx_post_buffers_dqo(rx); 1011 1012 u64_stats_update_begin(&rx->statss); 1013 rx->rpackets += work_done; 1014 rx->rbytes += bytes; 1015 u64_stats_update_end(&rx->statss); 1016 1017 return work_done; 1018 } 1019