1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_dqo.h" 9 #include "gve_adminq.h" 10 #include "gve_utils.h" 11 #include <linux/bpf.h> 12 #include <linux/ip.h> 13 #include <linux/ipv6.h> 14 #include <linux/skbuff.h> 15 #include <linux/slab.h> 16 #include <net/ip6_checksum.h> 17 #include <net/ipv6.h> 18 #include <net/tcp.h> 19 20 static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) 21 { 22 struct device *hdev = &priv->pdev->dev; 23 int buf_count = rx->dqo.bufq.mask + 1; 24 25 if (rx->dqo.hdr_bufs.data) { 26 dma_free_coherent(hdev, priv->header_buf_size * buf_count, 27 rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr); 28 rx->dqo.hdr_bufs.data = NULL; 29 } 30 } 31 32 static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx, 33 const u32 buffer_queue_slots, 34 const u32 completion_queue_slots) 35 { 36 int i; 37 38 /* Set buffer queue state */ 39 rx->dqo.bufq.mask = buffer_queue_slots - 1; 40 rx->dqo.bufq.head = 0; 41 rx->dqo.bufq.tail = 0; 42 43 /* Set completion queue state */ 44 rx->dqo.complq.num_free_slots = completion_queue_slots; 45 rx->dqo.complq.mask = completion_queue_slots - 1; 46 rx->dqo.complq.cur_gen_bit = 0; 47 rx->dqo.complq.head = 0; 48 49 /* Set RX SKB context */ 50 rx->ctx.skb_head = NULL; 51 rx->ctx.skb_tail = NULL; 52 53 /* Set up linked list of buffer IDs */ 54 if (rx->dqo.buf_states) { 55 for (i = 0; i < rx->dqo.num_buf_states - 1; i++) 56 rx->dqo.buf_states[i].next = i + 1; 57 rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; 58 } 59 60 rx->dqo.free_buf_states = 0; 61 rx->dqo.recycled_buf_states.head = -1; 62 rx->dqo.recycled_buf_states.tail = -1; 63 rx->dqo.used_buf_states.head = -1; 64 rx->dqo.used_buf_states.tail = -1; 65 } 66 67 static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx) 68 { 69 struct gve_rx_ring *rx = &priv->rx[idx]; 70 size_t size; 71 int i; 72 73 const u32 buffer_queue_slots = priv->rx_desc_cnt; 74 const u32 completion_queue_slots = priv->rx_desc_cnt; 75 76 /* Reset buffer queue */ 77 if (rx->dqo.bufq.desc_ring) { 78 size = sizeof(rx->dqo.bufq.desc_ring[0]) * 79 buffer_queue_slots; 80 memset(rx->dqo.bufq.desc_ring, 0, size); 81 } 82 83 /* Reset completion queue */ 84 if (rx->dqo.complq.desc_ring) { 85 size = sizeof(rx->dqo.complq.desc_ring[0]) * 86 completion_queue_slots; 87 memset(rx->dqo.complq.desc_ring, 0, size); 88 } 89 90 /* Reset q_resources */ 91 if (rx->q_resources) 92 memset(rx->q_resources, 0, sizeof(*rx->q_resources)); 93 94 /* Reset buf states */ 95 if (rx->dqo.buf_states) { 96 for (i = 0; i < rx->dqo.num_buf_states; i++) { 97 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; 98 99 if (rx->dqo.page_pool) 100 gve_free_to_page_pool(rx, bs, false); 101 else 102 gve_free_qpl_page_dqo(bs); 103 } 104 } 105 106 gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, 107 completion_queue_slots); 108 } 109 110 void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) 111 { 112 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 113 struct gve_rx_ring *rx = &priv->rx[idx]; 114 115 if (!gve_rx_was_added_to_block(priv, idx)) 116 return; 117 118 if (rx->dqo.page_pool) 119 page_pool_disable_direct_recycling(rx->dqo.page_pool); 120 gve_remove_napi(priv, ntfy_idx); 121 gve_rx_remove_from_block(priv, idx); 122 gve_rx_reset_ring_dqo(priv, idx); 123 } 124 125 void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, 126 struct gve_rx_alloc_rings_cfg *cfg) 127 { 128 struct device *hdev = &priv->pdev->dev; 129 size_t completion_queue_slots; 130 size_t buffer_queue_slots; 131 int idx = rx->q_num; 132 size_t size; 133 u32 qpl_id; 134 int i; 135 136 completion_queue_slots = rx->dqo.complq.mask + 1; 137 buffer_queue_slots = rx->dqo.bufq.mask + 1; 138 139 if (rx->q_resources) { 140 dma_free_coherent(hdev, sizeof(*rx->q_resources), 141 rx->q_resources, rx->q_resources_bus); 142 rx->q_resources = NULL; 143 } 144 145 for (i = 0; i < rx->dqo.num_buf_states; i++) { 146 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; 147 148 if (rx->dqo.page_pool) 149 gve_free_to_page_pool(rx, bs, false); 150 else 151 gve_free_qpl_page_dqo(bs); 152 } 153 154 if (rx->dqo.qpl) { 155 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); 156 gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id); 157 rx->dqo.qpl = NULL; 158 } 159 160 if (rx->dqo.bufq.desc_ring) { 161 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; 162 dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring, 163 rx->dqo.bufq.bus); 164 rx->dqo.bufq.desc_ring = NULL; 165 } 166 167 if (rx->dqo.complq.desc_ring) { 168 size = sizeof(rx->dqo.complq.desc_ring[0]) * 169 completion_queue_slots; 170 dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring, 171 rx->dqo.complq.bus); 172 rx->dqo.complq.desc_ring = NULL; 173 } 174 175 kvfree(rx->dqo.buf_states); 176 rx->dqo.buf_states = NULL; 177 178 if (rx->dqo.page_pool) { 179 page_pool_destroy(rx->dqo.page_pool); 180 rx->dqo.page_pool = NULL; 181 } 182 183 gve_rx_free_hdr_bufs(priv, rx); 184 185 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 186 } 187 188 static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx, 189 const u32 buf_count) 190 { 191 struct device *hdev = &priv->pdev->dev; 192 193 rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count, 194 &rx->dqo.hdr_bufs.addr, GFP_KERNEL); 195 if (!rx->dqo.hdr_bufs.data) 196 return -ENOMEM; 197 198 return 0; 199 } 200 201 void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx) 202 { 203 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 204 205 gve_rx_add_to_block(priv, idx); 206 gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); 207 } 208 209 int gve_rx_alloc_ring_dqo(struct gve_priv *priv, 210 struct gve_rx_alloc_rings_cfg *cfg, 211 struct gve_rx_ring *rx, 212 int idx) 213 { 214 struct device *hdev = &priv->pdev->dev; 215 struct page_pool *pool; 216 int qpl_page_cnt; 217 size_t size; 218 u32 qpl_id; 219 220 const u32 buffer_queue_slots = cfg->ring_size; 221 const u32 completion_queue_slots = cfg->ring_size; 222 223 netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n"); 224 225 memset(rx, 0, sizeof(*rx)); 226 rx->gve = priv; 227 rx->q_num = idx; 228 rx->packet_buffer_size = cfg->packet_buffer_size; 229 230 if (cfg->xdp) { 231 rx->packet_buffer_truesize = GVE_XDP_RX_BUFFER_SIZE_DQO; 232 rx->rx_headroom = XDP_PACKET_HEADROOM; 233 } else { 234 rx->packet_buffer_truesize = rx->packet_buffer_size; 235 rx->rx_headroom = 0; 236 } 237 238 rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots : 239 gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); 240 rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, 241 sizeof(rx->dqo.buf_states[0]), 242 GFP_KERNEL); 243 if (!rx->dqo.buf_states) 244 return -ENOMEM; 245 246 /* Allocate header buffers for header-split */ 247 if (cfg->enable_header_split) 248 if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots)) 249 goto err; 250 251 /* Allocate RX completion queue */ 252 size = sizeof(rx->dqo.complq.desc_ring[0]) * 253 completion_queue_slots; 254 rx->dqo.complq.desc_ring = 255 dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL); 256 if (!rx->dqo.complq.desc_ring) 257 goto err; 258 259 /* Allocate RX buffer queue */ 260 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; 261 rx->dqo.bufq.desc_ring = 262 dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL); 263 if (!rx->dqo.bufq.desc_ring) 264 goto err; 265 266 if (cfg->raw_addressing) { 267 pool = gve_rx_create_page_pool(priv, rx, cfg->xdp); 268 if (IS_ERR(pool)) 269 goto err; 270 271 rx->dqo.page_pool = pool; 272 } else { 273 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); 274 qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); 275 276 rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, 277 qpl_page_cnt); 278 if (!rx->dqo.qpl) 279 goto err; 280 rx->dqo.next_qpl_page_idx = 0; 281 } 282 283 rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources), 284 &rx->q_resources_bus, GFP_KERNEL); 285 if (!rx->q_resources) 286 goto err; 287 288 gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, 289 completion_queue_slots); 290 291 return 0; 292 293 err: 294 gve_rx_free_ring_dqo(priv, rx, cfg); 295 return -ENOMEM; 296 } 297 298 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx) 299 { 300 const struct gve_rx_ring *rx = &priv->rx[queue_idx]; 301 u64 index = be32_to_cpu(rx->q_resources->db_index); 302 303 iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]); 304 } 305 306 int gve_rx_alloc_rings_dqo(struct gve_priv *priv, 307 struct gve_rx_alloc_rings_cfg *cfg) 308 { 309 struct gve_rx_ring *rx; 310 int err; 311 int i; 312 313 rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring), 314 GFP_KERNEL); 315 if (!rx) 316 return -ENOMEM; 317 318 for (i = 0; i < cfg->qcfg_rx->num_queues; i++) { 319 err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i); 320 if (err) { 321 netif_err(priv, drv, priv->dev, 322 "Failed to alloc rx ring=%d: err=%d\n", 323 i, err); 324 goto err; 325 } 326 } 327 328 cfg->rx = rx; 329 return 0; 330 331 err: 332 for (i--; i >= 0; i--) 333 gve_rx_free_ring_dqo(priv, &rx[i], cfg); 334 kvfree(rx); 335 return err; 336 } 337 338 void gve_rx_free_rings_dqo(struct gve_priv *priv, 339 struct gve_rx_alloc_rings_cfg *cfg) 340 { 341 struct gve_rx_ring *rx = cfg->rx; 342 int i; 343 344 if (!rx) 345 return; 346 347 for (i = 0; i < cfg->qcfg_rx->num_queues; i++) 348 gve_rx_free_ring_dqo(priv, &rx[i], cfg); 349 350 kvfree(rx); 351 cfg->rx = NULL; 352 } 353 354 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) 355 { 356 struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; 357 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq; 358 struct gve_priv *priv = rx->gve; 359 u32 num_avail_slots; 360 u32 num_full_slots; 361 u32 num_posted = 0; 362 363 num_full_slots = (bufq->tail - bufq->head) & bufq->mask; 364 num_avail_slots = bufq->mask - num_full_slots; 365 366 num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots); 367 while (num_posted < num_avail_slots) { 368 struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail]; 369 370 if (unlikely(gve_alloc_buffer(rx, desc))) { 371 u64_stats_update_begin(&rx->statss); 372 rx->rx_buf_alloc_fail++; 373 u64_stats_update_end(&rx->statss); 374 break; 375 } 376 377 if (rx->dqo.hdr_bufs.data) 378 desc->header_buf_addr = 379 cpu_to_le64(rx->dqo.hdr_bufs.addr + 380 priv->header_buf_size * bufq->tail); 381 382 bufq->tail = (bufq->tail + 1) & bufq->mask; 383 complq->num_free_slots--; 384 num_posted++; 385 386 if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) 387 gve_rx_write_doorbell_dqo(priv, rx->q_num); 388 } 389 390 rx->fill_cnt += num_posted; 391 } 392 393 static void gve_rx_skb_csum(struct sk_buff *skb, 394 const struct gve_rx_compl_desc_dqo *desc, 395 struct gve_ptype ptype) 396 { 397 skb->ip_summed = CHECKSUM_NONE; 398 399 /* HW did not identify and process L3 and L4 headers. */ 400 if (unlikely(!desc->l3_l4_processed)) 401 return; 402 403 if (ptype.l3_type == GVE_L3_TYPE_IPV4) { 404 if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err)) 405 return; 406 } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) { 407 /* Checksum should be skipped if this flag is set. */ 408 if (unlikely(desc->ipv6_ex_add)) 409 return; 410 } 411 412 if (unlikely(desc->csum_l4_err)) 413 return; 414 415 switch (ptype.l4_type) { 416 case GVE_L4_TYPE_TCP: 417 case GVE_L4_TYPE_UDP: 418 case GVE_L4_TYPE_ICMP: 419 case GVE_L4_TYPE_SCTP: 420 skb->ip_summed = CHECKSUM_UNNECESSARY; 421 break; 422 default: 423 break; 424 } 425 } 426 427 static void gve_rx_skb_hash(struct sk_buff *skb, 428 const struct gve_rx_compl_desc_dqo *compl_desc, 429 struct gve_ptype ptype) 430 { 431 enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2; 432 433 if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN) 434 hash_type = PKT_HASH_TYPE_L4; 435 else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN) 436 hash_type = PKT_HASH_TYPE_L3; 437 438 skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); 439 } 440 441 /* Expand the hardware timestamp to the full 64 bits of width, and add it to the 442 * skb. 443 * 444 * This algorithm works by using the passed hardware timestamp to generate a 445 * diff relative to the last read of the nic clock. This diff can be positive or 446 * negative, as it is possible that we have read the clock more recently than 447 * the hardware has received this packet. To detect this, we use the high bit of 448 * the diff, and assume that the read is more recent if the high bit is set. In 449 * this case we invert the process. 450 * 451 * Note that this means if the time delta between packet reception and the last 452 * clock read is greater than ~2 seconds, this will provide invalid results. 453 */ 454 static void gve_rx_skb_hwtstamp(struct gve_rx_ring *rx, u32 hwts) 455 { 456 u64 last_read = READ_ONCE(rx->gve->last_sync_nic_counter); 457 struct sk_buff *skb = rx->ctx.skb_head; 458 u32 low = (u32)last_read; 459 s32 diff = hwts - low; 460 461 skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(last_read + diff); 462 } 463 464 static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) 465 { 466 if (!rx->ctx.skb_head) 467 return; 468 469 if (rx->ctx.skb_head == napi->skb) 470 napi->skb = NULL; 471 dev_kfree_skb_any(rx->ctx.skb_head); 472 rx->ctx.skb_head = NULL; 473 rx->ctx.skb_tail = NULL; 474 } 475 476 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx) 477 { 478 if (!rx->dqo.qpl) 479 return false; 480 if (rx->dqo.used_buf_states_cnt < 481 (rx->dqo.num_buf_states - 482 GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD)) 483 return false; 484 return true; 485 } 486 487 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, 488 struct gve_rx_buf_state_dqo *buf_state, 489 u16 buf_len) 490 { 491 struct page *page = alloc_page(GFP_ATOMIC); 492 int num_frags; 493 494 if (!page) 495 return -ENOMEM; 496 497 memcpy(page_address(page), 498 buf_state->page_info.page_address + 499 buf_state->page_info.page_offset, 500 buf_len); 501 num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; 502 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page, 503 0, buf_len, PAGE_SIZE); 504 505 u64_stats_update_begin(&rx->statss); 506 rx->rx_frag_alloc_cnt++; 507 u64_stats_update_end(&rx->statss); 508 /* Return unused buffer. */ 509 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); 510 return 0; 511 } 512 513 static void gve_skb_add_rx_frag(struct gve_rx_ring *rx, 514 struct gve_rx_buf_state_dqo *buf_state, 515 int num_frags, u16 buf_len) 516 { 517 if (rx->dqo.page_pool) { 518 skb_add_rx_frag_netmem(rx->ctx.skb_tail, num_frags, 519 buf_state->page_info.netmem, 520 buf_state->page_info.page_offset + 521 buf_state->page_info.pad, buf_len, 522 buf_state->page_info.buf_size); 523 } else { 524 skb_add_rx_frag(rx->ctx.skb_tail, num_frags, 525 buf_state->page_info.page, 526 buf_state->page_info.page_offset + 527 buf_state->page_info.pad, buf_len, 528 buf_state->page_info.buf_size); 529 } 530 } 531 532 /* Chains multi skbs for single rx packet. 533 * Returns 0 if buffer is appended, -1 otherwise. 534 */ 535 static int gve_rx_append_frags(struct napi_struct *napi, 536 struct gve_rx_buf_state_dqo *buf_state, 537 u16 buf_len, struct gve_rx_ring *rx, 538 struct gve_priv *priv) 539 { 540 int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags; 541 542 if (unlikely(num_frags == MAX_SKB_FRAGS)) { 543 struct sk_buff *skb; 544 545 skb = napi_alloc_skb(napi, 0); 546 if (!skb) 547 return -1; 548 549 if (rx->dqo.page_pool) 550 skb_mark_for_recycle(skb); 551 552 if (rx->ctx.skb_tail == rx->ctx.skb_head) 553 skb_shinfo(rx->ctx.skb_head)->frag_list = skb; 554 else 555 rx->ctx.skb_tail->next = skb; 556 rx->ctx.skb_tail = skb; 557 num_frags = 0; 558 } 559 if (rx->ctx.skb_tail != rx->ctx.skb_head) { 560 rx->ctx.skb_head->len += buf_len; 561 rx->ctx.skb_head->data_len += buf_len; 562 rx->ctx.skb_head->truesize += buf_state->page_info.buf_size; 563 } 564 565 /* Trigger ondemand page allocation if we are running low on buffers */ 566 if (gve_rx_should_trigger_copy_ondemand(rx)) 567 return gve_rx_copy_ondemand(rx, buf_state, buf_len); 568 569 gve_skb_add_rx_frag(rx, buf_state, num_frags, buf_len); 570 gve_reuse_buffer(rx, buf_state); 571 return 0; 572 } 573 574 static int gve_xdp_tx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, 575 struct xdp_buff *xdp) 576 { 577 struct gve_tx_ring *tx; 578 struct xdp_frame *xdpf; 579 u32 tx_qid; 580 int err; 581 582 xdpf = xdp_convert_buff_to_frame(xdp); 583 if (unlikely(!xdpf)) 584 return -ENOSPC; 585 586 tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num); 587 tx = &priv->tx[tx_qid]; 588 spin_lock(&tx->dqo_tx.xdp_lock); 589 err = gve_xdp_xmit_one_dqo(priv, tx, xdpf); 590 spin_unlock(&tx->dqo_tx.xdp_lock); 591 592 return err; 593 } 594 595 static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, 596 struct xdp_buff *xdp, struct bpf_prog *xprog, 597 int xdp_act, 598 struct gve_rx_buf_state_dqo *buf_state) 599 { 600 int err; 601 switch (xdp_act) { 602 case XDP_ABORTED: 603 case XDP_DROP: 604 default: 605 gve_free_buffer(rx, buf_state); 606 break; 607 case XDP_TX: 608 err = gve_xdp_tx_dqo(priv, rx, xdp); 609 if (unlikely(err)) 610 goto err; 611 gve_reuse_buffer(rx, buf_state); 612 break; 613 case XDP_REDIRECT: 614 err = xdp_do_redirect(priv->dev, xdp, xprog); 615 if (unlikely(err)) 616 goto err; 617 gve_reuse_buffer(rx, buf_state); 618 break; 619 } 620 u64_stats_update_begin(&rx->statss); 621 if ((u32)xdp_act < GVE_XDP_ACTIONS) 622 rx->xdp_actions[xdp_act]++; 623 u64_stats_update_end(&rx->statss); 624 return; 625 err: 626 u64_stats_update_begin(&rx->statss); 627 if (xdp_act == XDP_TX) 628 rx->xdp_tx_errors++; 629 else if (xdp_act == XDP_REDIRECT) 630 rx->xdp_redirect_errors++; 631 u64_stats_update_end(&rx->statss); 632 gve_free_buffer(rx, buf_state); 633 return; 634 } 635 636 /* Returns 0 if descriptor is completed successfully. 637 * Returns -EINVAL if descriptor is invalid. 638 * Returns -ENOMEM if data cannot be copied to skb. 639 */ 640 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, 641 const struct gve_rx_compl_desc_dqo *compl_desc, 642 u32 desc_idx, int queue_idx) 643 { 644 const u16 buffer_id = le16_to_cpu(compl_desc->buf_id); 645 const bool hbo = compl_desc->header_buffer_overflow; 646 const bool eop = compl_desc->end_of_packet != 0; 647 const bool hsplit = compl_desc->split_header; 648 struct gve_rx_buf_state_dqo *buf_state; 649 struct gve_priv *priv = rx->gve; 650 struct bpf_prog *xprog; 651 u16 buf_len; 652 u16 hdr_len; 653 654 if (unlikely(buffer_id >= rx->dqo.num_buf_states)) { 655 net_err_ratelimited("%s: Invalid RX buffer_id=%u\n", 656 priv->dev->name, buffer_id); 657 return -EINVAL; 658 } 659 buf_state = &rx->dqo.buf_states[buffer_id]; 660 if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) { 661 net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n", 662 priv->dev->name, buffer_id); 663 return -EINVAL; 664 } 665 666 if (unlikely(compl_desc->rx_error)) { 667 gve_free_buffer(rx, buf_state); 668 return -EINVAL; 669 } 670 671 buf_len = compl_desc->packet_len; 672 hdr_len = compl_desc->header_len; 673 674 /* Page might have not been used for awhile and was likely last written 675 * by a different thread. 676 */ 677 if (rx->dqo.page_pool) { 678 if (!netmem_is_net_iov(buf_state->page_info.netmem)) 679 prefetch(netmem_to_page(buf_state->page_info.netmem)); 680 } else { 681 prefetch(buf_state->page_info.page); 682 } 683 684 /* Copy the header into the skb in the case of header split */ 685 if (hsplit) { 686 int unsplit = 0; 687 688 if (hdr_len && !hbo) { 689 rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi, 690 rx->dqo.hdr_bufs.data + 691 desc_idx * priv->header_buf_size, 692 hdr_len); 693 if (unlikely(!rx->ctx.skb_head)) 694 goto error; 695 rx->ctx.skb_tail = rx->ctx.skb_head; 696 697 if (rx->dqo.page_pool) 698 skb_mark_for_recycle(rx->ctx.skb_head); 699 } else { 700 unsplit = 1; 701 } 702 u64_stats_update_begin(&rx->statss); 703 rx->rx_hsplit_pkt++; 704 rx->rx_hsplit_unsplit_pkt += unsplit; 705 rx->rx_hsplit_bytes += hdr_len; 706 u64_stats_update_end(&rx->statss); 707 } 708 709 /* Sync the portion of dma buffer for CPU to read. */ 710 dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, 711 buf_state->page_info.page_offset + 712 buf_state->page_info.pad, 713 buf_len, DMA_FROM_DEVICE); 714 715 /* Append to current skb if one exists. */ 716 if (rx->ctx.skb_head) { 717 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx, 718 priv)) != 0) { 719 goto error; 720 } 721 return 0; 722 } 723 724 xprog = READ_ONCE(priv->xdp_prog); 725 if (xprog) { 726 struct xdp_buff xdp; 727 void *old_data; 728 int xdp_act; 729 730 xdp_init_buff(&xdp, buf_state->page_info.buf_size, 731 &rx->xdp_rxq); 732 xdp_prepare_buff(&xdp, 733 buf_state->page_info.page_address + 734 buf_state->page_info.page_offset, 735 buf_state->page_info.pad, 736 buf_len, false); 737 old_data = xdp.data; 738 xdp_act = bpf_prog_run_xdp(xprog, &xdp); 739 buf_state->page_info.pad += xdp.data - old_data; 740 buf_len = xdp.data_end - xdp.data; 741 if (xdp_act != XDP_PASS) { 742 gve_xdp_done_dqo(priv, rx, &xdp, xprog, xdp_act, 743 buf_state); 744 return 0; 745 } 746 747 u64_stats_update_begin(&rx->statss); 748 rx->xdp_actions[XDP_PASS]++; 749 u64_stats_update_end(&rx->statss); 750 } 751 752 if (eop && buf_len <= priv->rx_copybreak) { 753 rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, 754 &buf_state->page_info, buf_len); 755 if (unlikely(!rx->ctx.skb_head)) 756 goto error; 757 rx->ctx.skb_tail = rx->ctx.skb_head; 758 759 u64_stats_update_begin(&rx->statss); 760 rx->rx_copied_pkt++; 761 rx->rx_copybreak_pkt++; 762 u64_stats_update_end(&rx->statss); 763 764 gve_free_buffer(rx, buf_state); 765 return 0; 766 } 767 768 rx->ctx.skb_head = napi_get_frags(napi); 769 if (unlikely(!rx->ctx.skb_head)) 770 goto error; 771 rx->ctx.skb_tail = rx->ctx.skb_head; 772 773 if (gve_rx_should_trigger_copy_ondemand(rx)) { 774 if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0) 775 goto error; 776 return 0; 777 } 778 779 if (rx->dqo.page_pool) 780 skb_mark_for_recycle(rx->ctx.skb_head); 781 782 gve_skb_add_rx_frag(rx, buf_state, 0, buf_len); 783 gve_reuse_buffer(rx, buf_state); 784 return 0; 785 786 error: 787 gve_free_buffer(rx, buf_state); 788 return -ENOMEM; 789 } 790 791 static int gve_rx_complete_rsc(struct sk_buff *skb, 792 const struct gve_rx_compl_desc_dqo *desc, 793 struct gve_ptype ptype) 794 { 795 struct skb_shared_info *shinfo = skb_shinfo(skb); 796 797 /* Only TCP is supported right now. */ 798 if (ptype.l4_type != GVE_L4_TYPE_TCP) 799 return -EINVAL; 800 801 switch (ptype.l3_type) { 802 case GVE_L3_TYPE_IPV4: 803 shinfo->gso_type = SKB_GSO_TCPV4; 804 break; 805 case GVE_L3_TYPE_IPV6: 806 shinfo->gso_type = SKB_GSO_TCPV6; 807 break; 808 default: 809 return -EINVAL; 810 } 811 812 shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len); 813 return 0; 814 } 815 816 /* Returns 0 if skb is completed successfully, -1 otherwise. */ 817 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, 818 const struct gve_rx_compl_desc_dqo *desc, 819 netdev_features_t feat) 820 { 821 struct gve_ptype ptype = 822 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type]; 823 int err; 824 825 skb_record_rx_queue(rx->ctx.skb_head, rx->q_num); 826 827 if (feat & NETIF_F_RXHASH) 828 gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype); 829 830 if (feat & NETIF_F_RXCSUM) 831 gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype); 832 833 if (rx->gve->ts_config.rx_filter == HWTSTAMP_FILTER_ALL) 834 gve_rx_skb_hwtstamp(rx, le32_to_cpu(desc->ts)); 835 836 /* RSC packets must set gso_size otherwise the TCP stack will complain 837 * that packets are larger than MTU. 838 */ 839 if (desc->rsc) { 840 err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype); 841 if (err < 0) 842 return err; 843 } 844 845 if (skb_headlen(rx->ctx.skb_head) == 0) 846 napi_gro_frags(napi); 847 else 848 napi_gro_receive(napi, rx->ctx.skb_head); 849 850 return 0; 851 } 852 853 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) 854 { 855 struct gve_rx_compl_queue_dqo *complq; 856 struct napi_struct *napi; 857 netdev_features_t feat; 858 struct gve_rx_ring *rx; 859 struct gve_priv *priv; 860 u64 xdp_redirects; 861 u32 work_done = 0; 862 u64 bytes = 0; 863 u64 xdp_txs; 864 int err; 865 866 napi = &block->napi; 867 feat = napi->dev->features; 868 869 rx = block->rx; 870 priv = rx->gve; 871 complq = &rx->dqo.complq; 872 873 xdp_redirects = rx->xdp_actions[XDP_REDIRECT]; 874 xdp_txs = rx->xdp_actions[XDP_TX]; 875 876 while (work_done < budget) { 877 struct gve_rx_compl_desc_dqo *compl_desc = 878 &complq->desc_ring[complq->head]; 879 u32 pkt_bytes; 880 881 /* No more new packets */ 882 if (compl_desc->generation == complq->cur_gen_bit) 883 break; 884 885 /* Prefetch the next two descriptors. */ 886 prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]); 887 prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]); 888 889 /* Do not read data until we own the descriptor */ 890 dma_rmb(); 891 892 err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num); 893 if (err < 0) { 894 gve_rx_free_skb(napi, rx); 895 u64_stats_update_begin(&rx->statss); 896 if (err == -ENOMEM) 897 rx->rx_skb_alloc_fail++; 898 else if (err == -EINVAL) 899 rx->rx_desc_err_dropped_pkt++; 900 u64_stats_update_end(&rx->statss); 901 } 902 903 complq->head = (complq->head + 1) & complq->mask; 904 complq->num_free_slots++; 905 906 /* When the ring wraps, the generation bit is flipped. */ 907 complq->cur_gen_bit ^= (complq->head == 0); 908 909 /* Receiving a completion means we have space to post another 910 * buffer on the buffer queue. 911 */ 912 { 913 struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq; 914 915 bufq->head = (bufq->head + 1) & bufq->mask; 916 } 917 918 /* Free running counter of completed descriptors */ 919 rx->cnt++; 920 921 if (!rx->ctx.skb_head) 922 continue; 923 924 if (!compl_desc->end_of_packet) 925 continue; 926 927 work_done++; 928 pkt_bytes = rx->ctx.skb_head->len; 929 /* The ethernet header (first ETH_HLEN bytes) is snipped off 930 * by eth_type_trans. 931 */ 932 if (skb_headlen(rx->ctx.skb_head)) 933 pkt_bytes += ETH_HLEN; 934 935 /* gve_rx_complete_skb() will consume skb if successful */ 936 if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) { 937 gve_rx_free_skb(napi, rx); 938 u64_stats_update_begin(&rx->statss); 939 rx->rx_desc_err_dropped_pkt++; 940 u64_stats_update_end(&rx->statss); 941 continue; 942 } 943 944 bytes += pkt_bytes; 945 rx->ctx.skb_head = NULL; 946 rx->ctx.skb_tail = NULL; 947 } 948 949 if (xdp_txs != rx->xdp_actions[XDP_TX]) 950 gve_xdp_tx_flush_dqo(priv, rx->q_num); 951 952 if (xdp_redirects != rx->xdp_actions[XDP_REDIRECT]) 953 xdp_do_flush(); 954 955 gve_rx_post_buffers_dqo(rx); 956 957 u64_stats_update_begin(&rx->statss); 958 rx->rpackets += work_done; 959 rx->rbytes += bytes; 960 u64_stats_update_end(&rx->statss); 961 962 return work_done; 963 } 964