1 // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 /* Google virtual Ethernet (gve) driver 3 * 4 * Copyright (C) 2015-2021 Google, Inc. 5 */ 6 7 #include "gve.h" 8 #include "gve_adminq.h" 9 #include "gve_utils.h" 10 #include "gve_dqo.h" 11 #include <net/ip.h> 12 #include <linux/bpf.h> 13 #include <linux/tcp.h> 14 #include <linux/slab.h> 15 #include <linux/skbuff.h> 16 17 /* Returns true if tx_bufs are available. */ 18 static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count) 19 { 20 int num_avail; 21 22 if (!tx->dqo.qpl) 23 return true; 24 25 num_avail = tx->dqo.num_tx_qpl_bufs - 26 (tx->dqo_tx.alloc_tx_qpl_buf_cnt - 27 tx->dqo_tx.free_tx_qpl_buf_cnt); 28 29 if (count <= num_avail) 30 return true; 31 32 /* Update cached value from dqo_compl. */ 33 tx->dqo_tx.free_tx_qpl_buf_cnt = 34 atomic_read_acquire(&tx->dqo_compl.free_tx_qpl_buf_cnt); 35 36 num_avail = tx->dqo.num_tx_qpl_bufs - 37 (tx->dqo_tx.alloc_tx_qpl_buf_cnt - 38 tx->dqo_tx.free_tx_qpl_buf_cnt); 39 40 return count <= num_avail; 41 } 42 43 static s16 44 gve_alloc_tx_qpl_buf(struct gve_tx_ring *tx) 45 { 46 s16 index; 47 48 index = tx->dqo_tx.free_tx_qpl_buf_head; 49 50 /* No TX buffers available, try to steal the list from the 51 * completion handler. 52 */ 53 if (unlikely(index == -1)) { 54 tx->dqo_tx.free_tx_qpl_buf_head = 55 atomic_xchg(&tx->dqo_compl.free_tx_qpl_buf_head, -1); 56 index = tx->dqo_tx.free_tx_qpl_buf_head; 57 58 if (unlikely(index == -1)) 59 return index; 60 } 61 62 /* Remove TX buf from free list */ 63 tx->dqo_tx.free_tx_qpl_buf_head = tx->dqo.tx_qpl_buf_next[index]; 64 65 return index; 66 } 67 68 static void 69 gve_free_tx_qpl_bufs(struct gve_tx_ring *tx, 70 struct gve_tx_pending_packet_dqo *pkt) 71 { 72 s16 index; 73 int i; 74 75 if (!pkt->num_bufs) 76 return; 77 78 index = pkt->tx_qpl_buf_ids[0]; 79 /* Create a linked list of buffers to be added to the free list */ 80 for (i = 1; i < pkt->num_bufs; i++) { 81 tx->dqo.tx_qpl_buf_next[index] = pkt->tx_qpl_buf_ids[i]; 82 index = pkt->tx_qpl_buf_ids[i]; 83 } 84 85 while (true) { 86 s16 old_head = atomic_read_acquire(&tx->dqo_compl.free_tx_qpl_buf_head); 87 88 tx->dqo.tx_qpl_buf_next[index] = old_head; 89 if (atomic_cmpxchg(&tx->dqo_compl.free_tx_qpl_buf_head, 90 old_head, 91 pkt->tx_qpl_buf_ids[0]) == old_head) { 92 break; 93 } 94 } 95 96 atomic_add(pkt->num_bufs, &tx->dqo_compl.free_tx_qpl_buf_cnt); 97 pkt->num_bufs = 0; 98 } 99 100 /* Returns true if a gve_tx_pending_packet_dqo object is available. */ 101 static bool gve_has_pending_packet(struct gve_tx_ring *tx) 102 { 103 /* Check TX path's list. */ 104 if (tx->dqo_tx.free_pending_packets != -1) 105 return true; 106 107 /* Check completion handler's list. */ 108 if (atomic_read_acquire(&tx->dqo_compl.free_pending_packets) != -1) 109 return true; 110 111 return false; 112 } 113 114 void gve_xdp_tx_flush_dqo(struct gve_priv *priv, u32 xdp_qid) 115 { 116 u32 tx_qid = gve_xdp_tx_queue_id(priv, xdp_qid); 117 struct gve_tx_ring *tx = &priv->tx[tx_qid]; 118 119 gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); 120 } 121 122 static struct gve_tx_pending_packet_dqo * 123 gve_alloc_pending_packet(struct gve_tx_ring *tx) 124 { 125 struct gve_tx_pending_packet_dqo *pending_packet; 126 s16 index; 127 128 index = tx->dqo_tx.free_pending_packets; 129 130 /* No pending_packets available, try to steal the list from the 131 * completion handler. 132 */ 133 if (unlikely(index == -1)) { 134 tx->dqo_tx.free_pending_packets = 135 atomic_xchg(&tx->dqo_compl.free_pending_packets, -1); 136 index = tx->dqo_tx.free_pending_packets; 137 138 if (unlikely(index == -1)) 139 return NULL; 140 } 141 142 pending_packet = &tx->dqo.pending_packets[index]; 143 144 /* Remove pending_packet from free list */ 145 tx->dqo_tx.free_pending_packets = pending_packet->next; 146 pending_packet->state = GVE_PACKET_STATE_PENDING_DATA_COMPL; 147 148 return pending_packet; 149 } 150 151 static void 152 gve_free_pending_packet(struct gve_tx_ring *tx, 153 struct gve_tx_pending_packet_dqo *pending_packet) 154 { 155 s16 index = pending_packet - tx->dqo.pending_packets; 156 157 pending_packet->state = GVE_PACKET_STATE_UNALLOCATED; 158 while (true) { 159 s16 old_head = atomic_read_acquire(&tx->dqo_compl.free_pending_packets); 160 161 pending_packet->next = old_head; 162 if (atomic_cmpxchg(&tx->dqo_compl.free_pending_packets, 163 old_head, index) == old_head) { 164 break; 165 } 166 } 167 } 168 169 /* gve_tx_free_desc - Cleans up all pending tx requests and buffers. 170 */ 171 static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx) 172 { 173 int i; 174 175 for (i = 0; i < tx->dqo.num_pending_packets; i++) { 176 struct gve_tx_pending_packet_dqo *cur_state = 177 &tx->dqo.pending_packets[i]; 178 int j; 179 180 for (j = 0; j < cur_state->num_bufs; j++) { 181 if (j == 0) { 182 dma_unmap_single(tx->dev, 183 dma_unmap_addr(cur_state, dma[j]), 184 dma_unmap_len(cur_state, len[j]), 185 DMA_TO_DEVICE); 186 } else { 187 dma_unmap_page(tx->dev, 188 dma_unmap_addr(cur_state, dma[j]), 189 dma_unmap_len(cur_state, len[j]), 190 DMA_TO_DEVICE); 191 } 192 } 193 if (cur_state->skb) { 194 dev_consume_skb_any(cur_state->skb); 195 cur_state->skb = NULL; 196 } 197 } 198 } 199 200 void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx) 201 { 202 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 203 struct gve_tx_ring *tx = &priv->tx[idx]; 204 205 if (!gve_tx_was_added_to_block(priv, idx)) 206 return; 207 208 gve_remove_napi(priv, ntfy_idx); 209 gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL); 210 if (tx->netdev_txq) 211 netdev_tx_reset_queue(tx->netdev_txq); 212 gve_tx_clean_pending_packets(tx); 213 gve_tx_remove_from_block(priv, idx); 214 } 215 216 static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, 217 struct gve_tx_alloc_rings_cfg *cfg) 218 { 219 struct device *hdev = &priv->pdev->dev; 220 int idx = tx->q_num; 221 size_t bytes; 222 u32 qpl_id; 223 224 if (tx->q_resources) { 225 dma_free_coherent(hdev, sizeof(*tx->q_resources), 226 tx->q_resources, tx->q_resources_bus); 227 tx->q_resources = NULL; 228 } 229 230 if (tx->dqo.compl_ring) { 231 bytes = sizeof(tx->dqo.compl_ring[0]) * 232 (tx->dqo.complq_mask + 1); 233 dma_free_coherent(hdev, bytes, tx->dqo.compl_ring, 234 tx->complq_bus_dqo); 235 tx->dqo.compl_ring = NULL; 236 } 237 238 if (tx->dqo.tx_ring) { 239 bytes = sizeof(tx->dqo.tx_ring[0]) * (tx->mask + 1); 240 dma_free_coherent(hdev, bytes, tx->dqo.tx_ring, tx->bus); 241 tx->dqo.tx_ring = NULL; 242 } 243 244 kvfree(tx->dqo.pending_packets); 245 tx->dqo.pending_packets = NULL; 246 247 kvfree(tx->dqo.tx_qpl_buf_next); 248 tx->dqo.tx_qpl_buf_next = NULL; 249 250 if (tx->dqo.qpl) { 251 qpl_id = gve_tx_qpl_id(priv, tx->q_num); 252 gve_free_queue_page_list(priv, tx->dqo.qpl, qpl_id); 253 tx->dqo.qpl = NULL; 254 } 255 256 netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx); 257 } 258 259 static int gve_tx_qpl_buf_init(struct gve_tx_ring *tx) 260 { 261 int num_tx_qpl_bufs = GVE_TX_BUFS_PER_PAGE_DQO * 262 tx->dqo.qpl->num_entries; 263 int i; 264 265 tx->dqo.tx_qpl_buf_next = kvcalloc(num_tx_qpl_bufs, 266 sizeof(tx->dqo.tx_qpl_buf_next[0]), 267 GFP_KERNEL); 268 if (!tx->dqo.tx_qpl_buf_next) 269 return -ENOMEM; 270 271 tx->dqo.num_tx_qpl_bufs = num_tx_qpl_bufs; 272 273 /* Generate free TX buf list */ 274 for (i = 0; i < num_tx_qpl_bufs - 1; i++) 275 tx->dqo.tx_qpl_buf_next[i] = i + 1; 276 tx->dqo.tx_qpl_buf_next[num_tx_qpl_bufs - 1] = -1; 277 278 atomic_set_release(&tx->dqo_compl.free_tx_qpl_buf_head, -1); 279 return 0; 280 } 281 282 void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx) 283 { 284 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 285 struct gve_tx_ring *tx = &priv->tx[idx]; 286 287 gve_tx_add_to_block(priv, idx); 288 289 if (idx < priv->tx_cfg.num_queues) 290 tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); 291 gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); 292 } 293 294 static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, 295 struct gve_tx_alloc_rings_cfg *cfg, 296 struct gve_tx_ring *tx, 297 int idx) 298 { 299 struct device *hdev = &priv->pdev->dev; 300 int num_pending_packets; 301 int qpl_page_cnt; 302 size_t bytes; 303 u32 qpl_id; 304 int i; 305 306 memset(tx, 0, sizeof(*tx)); 307 tx->q_num = idx; 308 tx->dev = hdev; 309 spin_lock_init(&tx->dqo_tx.xdp_lock); 310 atomic_set_release(&tx->dqo_compl.hw_tx_head, 0); 311 312 /* Queue sizes must be a power of 2 */ 313 tx->mask = cfg->ring_size - 1; 314 tx->dqo.complq_mask = tx->mask; 315 316 /* The max number of pending packets determines the maximum number of 317 * descriptors which maybe written to the completion queue. 318 * 319 * We must set the number small enough to make sure we never overrun the 320 * completion queue. 321 */ 322 num_pending_packets = tx->dqo.complq_mask + 1; 323 324 /* Reserve space for descriptor completions, which will be reported at 325 * most every GVE_TX_MIN_RE_INTERVAL packets. 326 */ 327 num_pending_packets -= 328 (tx->dqo.complq_mask + 1) / GVE_TX_MIN_RE_INTERVAL; 329 330 /* Each packet may have at most 2 buffer completions if it receives both 331 * a miss and reinjection completion. 332 */ 333 num_pending_packets /= 2; 334 335 tx->dqo.num_pending_packets = min_t(int, num_pending_packets, S16_MAX); 336 tx->dqo.pending_packets = kvcalloc(tx->dqo.num_pending_packets, 337 sizeof(tx->dqo.pending_packets[0]), 338 GFP_KERNEL); 339 if (!tx->dqo.pending_packets) 340 goto err; 341 342 /* Set up linked list of pending packets */ 343 for (i = 0; i < tx->dqo.num_pending_packets - 1; i++) 344 tx->dqo.pending_packets[i].next = i + 1; 345 346 tx->dqo.pending_packets[tx->dqo.num_pending_packets - 1].next = -1; 347 atomic_set_release(&tx->dqo_compl.free_pending_packets, -1); 348 tx->dqo_compl.miss_completions.head = -1; 349 tx->dqo_compl.miss_completions.tail = -1; 350 tx->dqo_compl.timed_out_completions.head = -1; 351 tx->dqo_compl.timed_out_completions.tail = -1; 352 353 bytes = sizeof(tx->dqo.tx_ring[0]) * (tx->mask + 1); 354 tx->dqo.tx_ring = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL); 355 if (!tx->dqo.tx_ring) 356 goto err; 357 358 bytes = sizeof(tx->dqo.compl_ring[0]) * (tx->dqo.complq_mask + 1); 359 tx->dqo.compl_ring = dma_alloc_coherent(hdev, bytes, 360 &tx->complq_bus_dqo, 361 GFP_KERNEL); 362 if (!tx->dqo.compl_ring) 363 goto err; 364 365 tx->q_resources = dma_alloc_coherent(hdev, sizeof(*tx->q_resources), 366 &tx->q_resources_bus, GFP_KERNEL); 367 if (!tx->q_resources) 368 goto err; 369 370 if (!cfg->raw_addressing) { 371 qpl_id = gve_tx_qpl_id(priv, tx->q_num); 372 qpl_page_cnt = priv->tx_pages_per_qpl; 373 374 tx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, 375 qpl_page_cnt); 376 if (!tx->dqo.qpl) 377 goto err; 378 379 if (gve_tx_qpl_buf_init(tx)) 380 goto err; 381 } 382 383 return 0; 384 385 err: 386 gve_tx_free_ring_dqo(priv, tx, cfg); 387 return -ENOMEM; 388 } 389 390 int gve_tx_alloc_rings_dqo(struct gve_priv *priv, 391 struct gve_tx_alloc_rings_cfg *cfg) 392 { 393 struct gve_tx_ring *tx = cfg->tx; 394 int total_queues; 395 int err = 0; 396 int i, j; 397 398 total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings; 399 if (total_queues > cfg->qcfg->max_queues) { 400 netif_err(priv, drv, priv->dev, 401 "Cannot alloc more than the max num of Tx rings\n"); 402 return -EINVAL; 403 } 404 405 tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), 406 GFP_KERNEL); 407 if (!tx) 408 return -ENOMEM; 409 410 for (i = 0; i < total_queues; i++) { 411 err = gve_tx_alloc_ring_dqo(priv, cfg, &tx[i], i); 412 if (err) { 413 netif_err(priv, drv, priv->dev, 414 "Failed to alloc tx ring=%d: err=%d\n", 415 i, err); 416 goto err; 417 } 418 } 419 420 cfg->tx = tx; 421 return 0; 422 423 err: 424 for (j = 0; j < i; j++) 425 gve_tx_free_ring_dqo(priv, &tx[j], cfg); 426 kvfree(tx); 427 return err; 428 } 429 430 void gve_tx_free_rings_dqo(struct gve_priv *priv, 431 struct gve_tx_alloc_rings_cfg *cfg) 432 { 433 struct gve_tx_ring *tx = cfg->tx; 434 int i; 435 436 if (!tx) 437 return; 438 439 for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++) 440 gve_tx_free_ring_dqo(priv, &tx[i], cfg); 441 442 kvfree(tx); 443 cfg->tx = NULL; 444 } 445 446 /* Returns the number of slots available in the ring */ 447 static u32 num_avail_tx_slots(const struct gve_tx_ring *tx) 448 { 449 u32 num_used = (tx->dqo_tx.tail - tx->dqo_tx.head) & tx->mask; 450 451 return tx->mask - num_used; 452 } 453 454 /* Checks if the requested number of slots are available in the ring */ 455 static bool gve_has_tx_slots_available(struct gve_tx_ring *tx, u32 slots_req) 456 { 457 u32 num_avail = num_avail_tx_slots(tx); 458 459 slots_req += GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP; 460 461 if (num_avail >= slots_req) 462 return true; 463 464 /* Update cached TX head pointer */ 465 tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head); 466 467 return num_avail_tx_slots(tx) >= slots_req; 468 } 469 470 static bool gve_has_avail_slots_tx_dqo(struct gve_tx_ring *tx, 471 int desc_count, int buf_count) 472 { 473 return gve_has_pending_packet(tx) && 474 gve_has_tx_slots_available(tx, desc_count) && 475 gve_has_free_tx_qpl_bufs(tx, buf_count); 476 } 477 478 /* Stops the queue if available descriptors is less than 'count'. 479 * Return: 0 if stop is not required. 480 */ 481 static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, 482 int desc_count, int buf_count) 483 { 484 if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) 485 return 0; 486 487 /* No space, so stop the queue */ 488 tx->stop_queue++; 489 netif_tx_stop_queue(tx->netdev_txq); 490 491 /* Sync with restarting queue in `gve_tx_poll_dqo()` */ 492 mb(); 493 494 /* After stopping queue, check if we can transmit again in order to 495 * avoid TOCTOU bug. 496 */ 497 if (likely(!gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) 498 return -EBUSY; 499 500 netif_tx_start_queue(tx->netdev_txq); 501 tx->wake_queue++; 502 return 0; 503 } 504 505 static void gve_extract_tx_metadata_dqo(const struct sk_buff *skb, 506 struct gve_tx_metadata_dqo *metadata) 507 { 508 memset(metadata, 0, sizeof(*metadata)); 509 metadata->version = GVE_TX_METADATA_VERSION_DQO; 510 511 if (skb->l4_hash) { 512 u16 path_hash = skb->hash ^ (skb->hash >> 16); 513 514 path_hash &= (1 << 15) - 1; 515 if (unlikely(path_hash == 0)) 516 path_hash = ~path_hash; 517 518 metadata->path_hash = path_hash; 519 } 520 } 521 522 static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx, 523 bool enable_csum, u32 len, u64 addr, 524 s16 compl_tag, bool eop, bool is_gso) 525 { 526 while (len > 0) { 527 struct gve_tx_pkt_desc_dqo *desc = 528 &tx->dqo.tx_ring[*desc_idx].pkt; 529 u32 cur_len = min_t(u32, len, GVE_TX_MAX_BUF_SIZE_DQO); 530 bool cur_eop = eop && cur_len == len; 531 532 *desc = (struct gve_tx_pkt_desc_dqo){ 533 .buf_addr = cpu_to_le64(addr), 534 .dtype = GVE_TX_PKT_DESC_DTYPE_DQO, 535 .end_of_packet = cur_eop, 536 .checksum_offload_enable = enable_csum, 537 .compl_tag = cpu_to_le16(compl_tag), 538 .buf_size = cur_len, 539 }; 540 541 addr += cur_len; 542 len -= cur_len; 543 *desc_idx = (*desc_idx + 1) & tx->mask; 544 } 545 } 546 547 /* Validates and prepares `skb` for TSO. 548 * 549 * Returns header length, or < 0 if invalid. 550 */ 551 static int gve_prep_tso(struct sk_buff *skb) 552 { 553 struct tcphdr *tcp; 554 int header_len; 555 u32 paylen; 556 int err; 557 558 /* Note: HW requires MSS (gso_size) to be <= 9728 and the total length 559 * of the TSO to be <= 262143. 560 * 561 * However, we don't validate these because: 562 * - Hypervisor enforces a limit of 9K MTU 563 * - Kernel will not produce a TSO larger than 64k 564 */ 565 566 if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO)) 567 return -1; 568 569 if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) 570 return -EINVAL; 571 572 /* Needed because we will modify header. */ 573 err = skb_cow_head(skb, 0); 574 if (err < 0) 575 return err; 576 577 tcp = tcp_hdr(skb); 578 paylen = skb->len - skb_transport_offset(skb); 579 csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); 580 header_len = skb_tcp_all_headers(skb); 581 582 if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO)) 583 return -EINVAL; 584 585 return header_len; 586 } 587 588 static void gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc, 589 const struct sk_buff *skb, 590 const struct gve_tx_metadata_dqo *metadata, 591 int header_len) 592 { 593 *desc = (struct gve_tx_tso_context_desc_dqo){ 594 .header_len = header_len, 595 .cmd_dtype = { 596 .dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO, 597 .tso = 1, 598 }, 599 .flex0 = metadata->bytes[0], 600 .flex5 = metadata->bytes[5], 601 .flex6 = metadata->bytes[6], 602 .flex7 = metadata->bytes[7], 603 .flex8 = metadata->bytes[8], 604 .flex9 = metadata->bytes[9], 605 .flex10 = metadata->bytes[10], 606 .flex11 = metadata->bytes[11], 607 }; 608 desc->tso_total_len = skb->len - header_len; 609 desc->mss = skb_shinfo(skb)->gso_size; 610 } 611 612 static void 613 gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc, 614 const struct gve_tx_metadata_dqo *metadata) 615 { 616 *desc = (struct gve_tx_general_context_desc_dqo){ 617 .flex0 = metadata->bytes[0], 618 .flex1 = metadata->bytes[1], 619 .flex2 = metadata->bytes[2], 620 .flex3 = metadata->bytes[3], 621 .flex4 = metadata->bytes[4], 622 .flex5 = metadata->bytes[5], 623 .flex6 = metadata->bytes[6], 624 .flex7 = metadata->bytes[7], 625 .flex8 = metadata->bytes[8], 626 .flex9 = metadata->bytes[9], 627 .flex10 = metadata->bytes[10], 628 .flex11 = metadata->bytes[11], 629 .cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO}, 630 }; 631 } 632 633 static void gve_tx_update_tail(struct gve_tx_ring *tx, u32 desc_idx) 634 { 635 u32 last_desc_idx = (desc_idx - 1) & tx->mask; 636 u32 last_report_event_interval = 637 (last_desc_idx - tx->dqo_tx.last_re_idx) & tx->mask; 638 639 /* Commit the changes to our state */ 640 tx->dqo_tx.tail = desc_idx; 641 642 /* Request a descriptor completion on the last descriptor of the 643 * packet if we are allowed to by the HW enforced interval. 644 */ 645 646 if (unlikely(last_report_event_interval >= GVE_TX_MIN_RE_INTERVAL)) { 647 tx->dqo.tx_ring[last_desc_idx].pkt.report_event = true; 648 tx->dqo_tx.last_re_idx = last_desc_idx; 649 } 650 } 651 652 static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, 653 struct sk_buff *skb, 654 struct gve_tx_pending_packet_dqo *pkt, 655 s16 completion_tag, 656 u32 *desc_idx, 657 bool is_gso) 658 { 659 bool enable_csum = skb->ip_summed == CHECKSUM_PARTIAL; 660 const struct skb_shared_info *shinfo = skb_shinfo(skb); 661 int i; 662 663 /* Note: HW requires that the size of a non-TSO packet be within the 664 * range of [17, 9728]. 665 * 666 * We don't double check because 667 * - We limited `netdev->min_mtu` to ETH_MIN_MTU. 668 * - Hypervisor won't allow MTU larger than 9216. 669 */ 670 671 pkt->num_bufs = 0; 672 /* Map the linear portion of skb */ 673 { 674 u32 len = skb_headlen(skb); 675 dma_addr_t addr; 676 677 addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE); 678 if (unlikely(dma_mapping_error(tx->dev, addr))) 679 goto err; 680 681 dma_unmap_len_set(pkt, len[pkt->num_bufs], len); 682 dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); 683 ++pkt->num_bufs; 684 685 gve_tx_fill_pkt_desc_dqo(tx, desc_idx, enable_csum, len, addr, 686 completion_tag, 687 /*eop=*/shinfo->nr_frags == 0, is_gso); 688 } 689 690 for (i = 0; i < shinfo->nr_frags; i++) { 691 const skb_frag_t *frag = &shinfo->frags[i]; 692 bool is_eop = i == (shinfo->nr_frags - 1); 693 u32 len = skb_frag_size(frag); 694 dma_addr_t addr; 695 696 addr = skb_frag_dma_map(tx->dev, frag, 0, len, DMA_TO_DEVICE); 697 if (unlikely(dma_mapping_error(tx->dev, addr))) 698 goto err; 699 700 dma_unmap_len_set(pkt, len[pkt->num_bufs], len); 701 netmem_dma_unmap_addr_set(skb_frag_netmem(frag), pkt, 702 dma[pkt->num_bufs], addr); 703 ++pkt->num_bufs; 704 705 gve_tx_fill_pkt_desc_dqo(tx, desc_idx, enable_csum, len, addr, 706 completion_tag, is_eop, is_gso); 707 } 708 709 return 0; 710 err: 711 for (i = 0; i < pkt->num_bufs; i++) { 712 if (i == 0) { 713 dma_unmap_single(tx->dev, 714 dma_unmap_addr(pkt, dma[i]), 715 dma_unmap_len(pkt, len[i]), 716 DMA_TO_DEVICE); 717 } else { 718 dma_unmap_page(tx->dev, 719 dma_unmap_addr(pkt, dma[i]), 720 dma_unmap_len(pkt, len[i]), 721 DMA_TO_DEVICE); 722 } 723 } 724 pkt->num_bufs = 0; 725 return -1; 726 } 727 728 /* Tx buffer i corresponds to 729 * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO 730 * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO 731 */ 732 static void gve_tx_buf_get_addr(struct gve_tx_ring *tx, 733 s16 index, 734 void **va, dma_addr_t *dma_addr) 735 { 736 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO); 737 int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) << GVE_TX_BUF_SHIFT_DQO; 738 739 *va = page_address(tx->dqo.qpl->pages[page_id]) + offset; 740 *dma_addr = tx->dqo.qpl->page_buses[page_id] + offset; 741 } 742 743 static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx, 744 struct sk_buff *skb, 745 struct gve_tx_pending_packet_dqo *pkt, 746 s16 completion_tag, 747 u32 *desc_idx, 748 bool is_gso) 749 { 750 bool enable_csum = skb->ip_summed == CHECKSUM_PARTIAL; 751 u32 copy_offset = 0; 752 dma_addr_t dma_addr; 753 u32 copy_len; 754 s16 index; 755 void *va; 756 757 /* Break the packet into buffer size chunks */ 758 pkt->num_bufs = 0; 759 while (copy_offset < skb->len) { 760 index = gve_alloc_tx_qpl_buf(tx); 761 if (unlikely(index == -1)) 762 goto err; 763 764 gve_tx_buf_get_addr(tx, index, &va, &dma_addr); 765 copy_len = min_t(u32, GVE_TX_BUF_SIZE_DQO, 766 skb->len - copy_offset); 767 skb_copy_bits(skb, copy_offset, va, copy_len); 768 769 copy_offset += copy_len; 770 dma_sync_single_for_device(tx->dev, dma_addr, 771 copy_len, DMA_TO_DEVICE); 772 gve_tx_fill_pkt_desc_dqo(tx, desc_idx, enable_csum, 773 copy_len, 774 dma_addr, 775 completion_tag, 776 copy_offset == skb->len, 777 is_gso); 778 779 pkt->tx_qpl_buf_ids[pkt->num_bufs] = index; 780 ++tx->dqo_tx.alloc_tx_qpl_buf_cnt; 781 ++pkt->num_bufs; 782 } 783 784 return 0; 785 err: 786 /* Should not be here if gve_has_free_tx_qpl_bufs() check is correct */ 787 gve_free_tx_qpl_bufs(tx, pkt); 788 return -ENOMEM; 789 } 790 791 /* Returns 0 on success, or < 0 on error. 792 * 793 * Before this function is called, the caller must ensure 794 * gve_has_pending_packet(tx) returns true. 795 */ 796 static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx, 797 struct sk_buff *skb) 798 { 799 const bool is_gso = skb_is_gso(skb); 800 u32 desc_idx = tx->dqo_tx.tail; 801 struct gve_tx_pending_packet_dqo *pkt; 802 struct gve_tx_metadata_dqo metadata; 803 s16 completion_tag; 804 805 pkt = gve_alloc_pending_packet(tx); 806 if (!pkt) 807 return -ENOMEM; 808 809 pkt->skb = skb; 810 pkt->type = GVE_TX_PENDING_PACKET_DQO_SKB; 811 completion_tag = pkt - tx->dqo.pending_packets; 812 813 gve_extract_tx_metadata_dqo(skb, &metadata); 814 if (is_gso) { 815 int header_len = gve_prep_tso(skb); 816 817 if (unlikely(header_len < 0)) 818 goto err; 819 820 gve_tx_fill_tso_ctx_desc(&tx->dqo.tx_ring[desc_idx].tso_ctx, 821 skb, &metadata, header_len); 822 desc_idx = (desc_idx + 1) & tx->mask; 823 } 824 825 gve_tx_fill_general_ctx_desc(&tx->dqo.tx_ring[desc_idx].general_ctx, 826 &metadata); 827 desc_idx = (desc_idx + 1) & tx->mask; 828 829 if (tx->dqo.qpl) { 830 if (gve_tx_add_skb_copy_dqo(tx, skb, pkt, 831 completion_tag, 832 &desc_idx, is_gso)) 833 goto err; 834 } else { 835 if (gve_tx_add_skb_no_copy_dqo(tx, skb, pkt, 836 completion_tag, 837 &desc_idx, is_gso)) 838 goto err; 839 } 840 841 tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs; 842 843 gve_tx_update_tail(tx, desc_idx); 844 return 0; 845 846 err: 847 pkt->skb = NULL; 848 gve_free_pending_packet(tx, pkt); 849 850 return -1; 851 } 852 853 static int gve_num_descs_per_buf(size_t size) 854 { 855 return DIV_ROUND_UP(size, GVE_TX_MAX_BUF_SIZE_DQO); 856 } 857 858 static int gve_num_buffer_descs_needed(const struct sk_buff *skb) 859 { 860 const struct skb_shared_info *shinfo = skb_shinfo(skb); 861 int num_descs; 862 int i; 863 864 num_descs = gve_num_descs_per_buf(skb_headlen(skb)); 865 866 for (i = 0; i < shinfo->nr_frags; i++) { 867 unsigned int frag_size = skb_frag_size(&shinfo->frags[i]); 868 869 num_descs += gve_num_descs_per_buf(frag_size); 870 } 871 872 return num_descs; 873 } 874 875 /* Returns true if HW is capable of sending TSO represented by `skb`. 876 * 877 * Each segment must not span more than GVE_TX_MAX_DATA_DESCS buffers. 878 * - The header is counted as one buffer for every single segment. 879 * - A buffer which is split between two segments is counted for both. 880 * - If a buffer contains both header and payload, it is counted as two buffers. 881 */ 882 static bool gve_can_send_tso(const struct sk_buff *skb) 883 { 884 const int max_bufs_per_seg = GVE_TX_MAX_DATA_DESCS - 1; 885 const struct skb_shared_info *shinfo = skb_shinfo(skb); 886 const int header_len = skb_tcp_all_headers(skb); 887 const int gso_size = shinfo->gso_size; 888 int cur_seg_num_bufs; 889 int prev_frag_size; 890 int cur_seg_size; 891 int i; 892 893 cur_seg_size = skb_headlen(skb) - header_len; 894 prev_frag_size = skb_headlen(skb); 895 cur_seg_num_bufs = cur_seg_size > 0; 896 897 for (i = 0; i < shinfo->nr_frags; i++) { 898 if (cur_seg_size >= gso_size) { 899 cur_seg_size %= gso_size; 900 cur_seg_num_bufs = cur_seg_size > 0; 901 902 if (prev_frag_size > GVE_TX_MAX_BUF_SIZE_DQO) { 903 int prev_frag_remain = prev_frag_size % 904 GVE_TX_MAX_BUF_SIZE_DQO; 905 906 /* If the last descriptor of the previous frag 907 * is less than cur_seg_size, the segment will 908 * span two descriptors in the previous frag. 909 * Since max gso size (9728) is less than 910 * GVE_TX_MAX_BUF_SIZE_DQO, it is impossible 911 * for the segment to span more than two 912 * descriptors. 913 */ 914 if (prev_frag_remain && 915 cur_seg_size > prev_frag_remain) 916 cur_seg_num_bufs++; 917 } 918 } 919 920 if (unlikely(++cur_seg_num_bufs > max_bufs_per_seg)) 921 return false; 922 923 prev_frag_size = skb_frag_size(&shinfo->frags[i]); 924 cur_seg_size += prev_frag_size; 925 } 926 927 return true; 928 } 929 930 netdev_features_t gve_features_check_dqo(struct sk_buff *skb, 931 struct net_device *dev, 932 netdev_features_t features) 933 { 934 if (skb_is_gso(skb) && !gve_can_send_tso(skb)) 935 return features & ~NETIF_F_GSO_MASK; 936 937 return features; 938 } 939 940 /* Attempt to transmit specified SKB. 941 * 942 * Returns 0 if the SKB was transmitted or dropped. 943 * Returns -1 if there is not currently enough space to transmit the SKB. 944 */ 945 static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx, 946 struct sk_buff *skb) 947 { 948 int num_buffer_descs; 949 int total_num_descs; 950 951 if (skb_is_gso(skb) && unlikely(ipv6_hopopt_jumbo_remove(skb))) 952 goto drop; 953 954 if (tx->dqo.qpl) { 955 /* We do not need to verify the number of buffers used per 956 * packet or per segment in case of TSO as with 2K size buffers 957 * none of the TX packet rules would be violated. 958 * 959 * gve_can_send_tso() checks that each TCP segment of gso_size is 960 * not distributed over more than 9 SKB frags.. 961 */ 962 num_buffer_descs = DIV_ROUND_UP(skb->len, GVE_TX_BUF_SIZE_DQO); 963 } else { 964 num_buffer_descs = gve_num_buffer_descs_needed(skb); 965 if (!skb_is_gso(skb)) { 966 if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) { 967 if (unlikely(skb_linearize(skb) < 0)) 968 goto drop; 969 970 num_buffer_descs = 1; 971 } 972 } 973 } 974 975 /* Metadata + (optional TSO) + data descriptors. */ 976 total_num_descs = 1 + skb_is_gso(skb) + num_buffer_descs; 977 if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs, 978 num_buffer_descs))) { 979 return -1; 980 } 981 982 if (unlikely(gve_tx_add_skb_dqo(tx, skb) < 0)) 983 goto drop; 984 985 netdev_tx_sent_queue(tx->netdev_txq, skb->len); 986 skb_tx_timestamp(skb); 987 return 0; 988 989 drop: 990 tx->dropped_pkt++; 991 dev_kfree_skb_any(skb); 992 return 0; 993 } 994 995 /* Transmit a given skb and ring the doorbell. */ 996 netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev) 997 { 998 struct gve_priv *priv = netdev_priv(dev); 999 struct gve_tx_ring *tx; 1000 1001 tx = &priv->tx[skb_get_queue_mapping(skb)]; 1002 if (unlikely(gve_try_tx_skb(priv, tx, skb) < 0)) { 1003 /* We need to ring the txq doorbell -- we have stopped the Tx 1004 * queue for want of resources, but prior calls to gve_tx() 1005 * may have added descriptors without ringing the doorbell. 1006 */ 1007 gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); 1008 return NETDEV_TX_BUSY; 1009 } 1010 1011 if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more()) 1012 return NETDEV_TX_OK; 1013 1014 gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); 1015 return NETDEV_TX_OK; 1016 } 1017 1018 static void add_to_list(struct gve_tx_ring *tx, struct gve_index_list *list, 1019 struct gve_tx_pending_packet_dqo *pending_packet) 1020 { 1021 s16 old_tail, index; 1022 1023 index = pending_packet - tx->dqo.pending_packets; 1024 old_tail = list->tail; 1025 list->tail = index; 1026 if (old_tail == -1) 1027 list->head = index; 1028 else 1029 tx->dqo.pending_packets[old_tail].next = index; 1030 1031 pending_packet->next = -1; 1032 pending_packet->prev = old_tail; 1033 } 1034 1035 static void remove_from_list(struct gve_tx_ring *tx, 1036 struct gve_index_list *list, 1037 struct gve_tx_pending_packet_dqo *pkt) 1038 { 1039 s16 prev_index, next_index; 1040 1041 prev_index = pkt->prev; 1042 next_index = pkt->next; 1043 1044 if (prev_index == -1) { 1045 /* Node is head */ 1046 list->head = next_index; 1047 } else { 1048 tx->dqo.pending_packets[prev_index].next = next_index; 1049 } 1050 if (next_index == -1) { 1051 /* Node is tail */ 1052 list->tail = prev_index; 1053 } else { 1054 tx->dqo.pending_packets[next_index].prev = prev_index; 1055 } 1056 } 1057 1058 static void gve_unmap_packet(struct device *dev, 1059 struct gve_tx_pending_packet_dqo *pkt) 1060 { 1061 int i; 1062 1063 /* SKB linear portion is guaranteed to be mapped */ 1064 dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), 1065 dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); 1066 for (i = 1; i < pkt->num_bufs; i++) { 1067 netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]), 1068 dma_unmap_len(pkt, len[i]), 1069 DMA_TO_DEVICE, 0); 1070 } 1071 pkt->num_bufs = 0; 1072 } 1073 1074 /* Completion types and expected behavior: 1075 * No Miss compl + Packet compl = Packet completed normally. 1076 * Miss compl + Re-inject compl = Packet completed normally. 1077 * No Miss compl + Re-inject compl = Skipped i.e. packet not completed. 1078 * Miss compl + Packet compl = Skipped i.e. packet not completed. 1079 */ 1080 static void gve_handle_packet_completion(struct gve_priv *priv, 1081 struct gve_tx_ring *tx, bool is_napi, 1082 u16 compl_tag, u64 *bytes, u64 *pkts, 1083 bool is_reinjection) 1084 { 1085 struct gve_tx_pending_packet_dqo *pending_packet; 1086 1087 if (unlikely(compl_tag >= tx->dqo.num_pending_packets)) { 1088 net_err_ratelimited("%s: Invalid TX completion tag: %d\n", 1089 priv->dev->name, (int)compl_tag); 1090 return; 1091 } 1092 1093 pending_packet = &tx->dqo.pending_packets[compl_tag]; 1094 1095 if (unlikely(is_reinjection)) { 1096 if (unlikely(pending_packet->state == 1097 GVE_PACKET_STATE_TIMED_OUT_COMPL)) { 1098 net_err_ratelimited("%s: Re-injection completion: %d received after timeout.\n", 1099 priv->dev->name, (int)compl_tag); 1100 /* Packet was already completed as a result of timeout, 1101 * so just remove from list and free pending packet. 1102 */ 1103 remove_from_list(tx, 1104 &tx->dqo_compl.timed_out_completions, 1105 pending_packet); 1106 gve_free_pending_packet(tx, pending_packet); 1107 return; 1108 } 1109 if (unlikely(pending_packet->state != 1110 GVE_PACKET_STATE_PENDING_REINJECT_COMPL)) { 1111 /* No outstanding miss completion but packet allocated 1112 * implies packet receives a re-injection completion 1113 * without a prior miss completion. Return without 1114 * completing the packet. 1115 */ 1116 net_err_ratelimited("%s: Re-injection completion received without corresponding miss completion: %d\n", 1117 priv->dev->name, (int)compl_tag); 1118 return; 1119 } 1120 remove_from_list(tx, &tx->dqo_compl.miss_completions, 1121 pending_packet); 1122 } else { 1123 /* Packet is allocated but not a pending data completion. */ 1124 if (unlikely(pending_packet->state != 1125 GVE_PACKET_STATE_PENDING_DATA_COMPL)) { 1126 net_err_ratelimited("%s: No pending data completion: %d\n", 1127 priv->dev->name, (int)compl_tag); 1128 return; 1129 } 1130 } 1131 tx->dqo_tx.completed_packet_desc_cnt += pending_packet->num_bufs; 1132 1133 switch (pending_packet->type) { 1134 case GVE_TX_PENDING_PACKET_DQO_SKB: 1135 if (tx->dqo.qpl) 1136 gve_free_tx_qpl_bufs(tx, pending_packet); 1137 else 1138 gve_unmap_packet(tx->dev, pending_packet); 1139 (*pkts)++; 1140 *bytes += pending_packet->skb->len; 1141 1142 napi_consume_skb(pending_packet->skb, is_napi); 1143 pending_packet->skb = NULL; 1144 gve_free_pending_packet(tx, pending_packet); 1145 break; 1146 case GVE_TX_PENDING_PACKET_DQO_XDP_FRAME: 1147 gve_unmap_packet(tx->dev, pending_packet); 1148 (*pkts)++; 1149 *bytes += pending_packet->xdpf->len; 1150 1151 xdp_return_frame(pending_packet->xdpf); 1152 pending_packet->xdpf = NULL; 1153 gve_free_pending_packet(tx, pending_packet); 1154 break; 1155 default: 1156 WARN_ON_ONCE(1); 1157 } 1158 } 1159 1160 static void gve_handle_miss_completion(struct gve_priv *priv, 1161 struct gve_tx_ring *tx, u16 compl_tag, 1162 u64 *bytes, u64 *pkts) 1163 { 1164 struct gve_tx_pending_packet_dqo *pending_packet; 1165 1166 if (unlikely(compl_tag >= tx->dqo.num_pending_packets)) { 1167 net_err_ratelimited("%s: Invalid TX completion tag: %d\n", 1168 priv->dev->name, (int)compl_tag); 1169 return; 1170 } 1171 1172 pending_packet = &tx->dqo.pending_packets[compl_tag]; 1173 if (unlikely(pending_packet->state != 1174 GVE_PACKET_STATE_PENDING_DATA_COMPL)) { 1175 net_err_ratelimited("%s: Unexpected packet state: %d for completion tag : %d\n", 1176 priv->dev->name, (int)pending_packet->state, 1177 (int)compl_tag); 1178 return; 1179 } 1180 1181 pending_packet->state = GVE_PACKET_STATE_PENDING_REINJECT_COMPL; 1182 /* jiffies can wraparound but time comparisons can handle overflows. */ 1183 pending_packet->timeout_jiffies = 1184 jiffies + 1185 secs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT); 1186 add_to_list(tx, &tx->dqo_compl.miss_completions, pending_packet); 1187 1188 *bytes += pending_packet->skb->len; 1189 (*pkts)++; 1190 } 1191 1192 static void remove_miss_completions(struct gve_priv *priv, 1193 struct gve_tx_ring *tx) 1194 { 1195 struct gve_tx_pending_packet_dqo *pending_packet; 1196 s16 next_index; 1197 1198 next_index = tx->dqo_compl.miss_completions.head; 1199 while (next_index != -1) { 1200 pending_packet = &tx->dqo.pending_packets[next_index]; 1201 next_index = pending_packet->next; 1202 /* Break early because packets should timeout in order. */ 1203 if (time_is_after_jiffies(pending_packet->timeout_jiffies)) 1204 break; 1205 1206 remove_from_list(tx, &tx->dqo_compl.miss_completions, 1207 pending_packet); 1208 /* Unmap/free TX buffers and free skb but do not unallocate packet i.e. 1209 * the completion tag is not freed to ensure that the driver 1210 * can take appropriate action if a corresponding valid 1211 * completion is received later. 1212 */ 1213 if (tx->dqo.qpl) 1214 gve_free_tx_qpl_bufs(tx, pending_packet); 1215 else 1216 gve_unmap_packet(tx->dev, pending_packet); 1217 1218 /* This indicates the packet was dropped. */ 1219 dev_kfree_skb_any(pending_packet->skb); 1220 pending_packet->skb = NULL; 1221 tx->dropped_pkt++; 1222 net_err_ratelimited("%s: No reinjection completion was received for: %d.\n", 1223 priv->dev->name, 1224 (int)(pending_packet - tx->dqo.pending_packets)); 1225 1226 pending_packet->state = GVE_PACKET_STATE_TIMED_OUT_COMPL; 1227 pending_packet->timeout_jiffies = 1228 jiffies + 1229 secs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT); 1230 /* Maintain pending packet in another list so the packet can be 1231 * unallocated at a later time. 1232 */ 1233 add_to_list(tx, &tx->dqo_compl.timed_out_completions, 1234 pending_packet); 1235 } 1236 } 1237 1238 static void remove_timed_out_completions(struct gve_priv *priv, 1239 struct gve_tx_ring *tx) 1240 { 1241 struct gve_tx_pending_packet_dqo *pending_packet; 1242 s16 next_index; 1243 1244 next_index = tx->dqo_compl.timed_out_completions.head; 1245 while (next_index != -1) { 1246 pending_packet = &tx->dqo.pending_packets[next_index]; 1247 next_index = pending_packet->next; 1248 /* Break early because packets should timeout in order. */ 1249 if (time_is_after_jiffies(pending_packet->timeout_jiffies)) 1250 break; 1251 1252 remove_from_list(tx, &tx->dqo_compl.timed_out_completions, 1253 pending_packet); 1254 gve_free_pending_packet(tx, pending_packet); 1255 } 1256 } 1257 1258 int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, 1259 struct napi_struct *napi) 1260 { 1261 u64 reinject_compl_bytes = 0; 1262 u64 reinject_compl_pkts = 0; 1263 int num_descs_cleaned = 0; 1264 u64 miss_compl_bytes = 0; 1265 u64 miss_compl_pkts = 0; 1266 u64 pkt_compl_bytes = 0; 1267 u64 pkt_compl_pkts = 0; 1268 1269 /* Limit in order to avoid blocking for too long */ 1270 while (!napi || pkt_compl_pkts < napi->weight) { 1271 struct gve_tx_compl_desc *compl_desc = 1272 &tx->dqo.compl_ring[tx->dqo_compl.head]; 1273 u16 type; 1274 1275 if (compl_desc->generation == tx->dqo_compl.cur_gen_bit) 1276 break; 1277 1278 /* Prefetch the next descriptor. */ 1279 prefetch(&tx->dqo.compl_ring[(tx->dqo_compl.head + 1) & 1280 tx->dqo.complq_mask]); 1281 1282 /* Do not read data until we own the descriptor */ 1283 dma_rmb(); 1284 type = compl_desc->type; 1285 1286 if (type == GVE_COMPL_TYPE_DQO_DESC) { 1287 /* This is the last descriptor fetched by HW plus one */ 1288 u16 tx_head = le16_to_cpu(compl_desc->tx_head); 1289 1290 atomic_set_release(&tx->dqo_compl.hw_tx_head, tx_head); 1291 } else if (type == GVE_COMPL_TYPE_DQO_PKT) { 1292 u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); 1293 if (compl_tag & GVE_ALT_MISS_COMPL_BIT) { 1294 compl_tag &= ~GVE_ALT_MISS_COMPL_BIT; 1295 gve_handle_miss_completion(priv, tx, compl_tag, 1296 &miss_compl_bytes, 1297 &miss_compl_pkts); 1298 } else { 1299 gve_handle_packet_completion(priv, tx, !!napi, 1300 compl_tag, 1301 &pkt_compl_bytes, 1302 &pkt_compl_pkts, 1303 false); 1304 } 1305 } else if (type == GVE_COMPL_TYPE_DQO_MISS) { 1306 u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); 1307 1308 gve_handle_miss_completion(priv, tx, compl_tag, 1309 &miss_compl_bytes, 1310 &miss_compl_pkts); 1311 } else if (type == GVE_COMPL_TYPE_DQO_REINJECTION) { 1312 u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); 1313 1314 gve_handle_packet_completion(priv, tx, !!napi, 1315 compl_tag, 1316 &reinject_compl_bytes, 1317 &reinject_compl_pkts, 1318 true); 1319 } 1320 1321 tx->dqo_compl.head = 1322 (tx->dqo_compl.head + 1) & tx->dqo.complq_mask; 1323 /* Flip the generation bit when we wrap around */ 1324 tx->dqo_compl.cur_gen_bit ^= tx->dqo_compl.head == 0; 1325 num_descs_cleaned++; 1326 } 1327 1328 if (tx->netdev_txq) 1329 netdev_tx_completed_queue(tx->netdev_txq, 1330 pkt_compl_pkts + miss_compl_pkts, 1331 pkt_compl_bytes + miss_compl_bytes); 1332 1333 remove_miss_completions(priv, tx); 1334 remove_timed_out_completions(priv, tx); 1335 1336 u64_stats_update_begin(&tx->statss); 1337 tx->bytes_done += pkt_compl_bytes + reinject_compl_bytes; 1338 tx->pkt_done += pkt_compl_pkts + reinject_compl_pkts; 1339 u64_stats_update_end(&tx->statss); 1340 return num_descs_cleaned; 1341 } 1342 1343 bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean) 1344 { 1345 struct gve_tx_compl_desc *compl_desc; 1346 struct gve_tx_ring *tx = block->tx; 1347 struct gve_priv *priv = block->priv; 1348 1349 if (do_clean) { 1350 int num_descs_cleaned = gve_clean_tx_done_dqo(priv, tx, 1351 &block->napi); 1352 1353 /* Sync with queue being stopped in `gve_maybe_stop_tx_dqo()` */ 1354 mb(); 1355 1356 if (netif_tx_queue_stopped(tx->netdev_txq) && 1357 num_descs_cleaned > 0) { 1358 tx->wake_queue++; 1359 netif_tx_wake_queue(tx->netdev_txq); 1360 } 1361 } 1362 1363 /* Return true if we still have work. */ 1364 compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head]; 1365 return compl_desc->generation != tx->dqo_compl.cur_gen_bit; 1366 } 1367 1368 bool gve_xdp_poll_dqo(struct gve_notify_block *block) 1369 { 1370 struct gve_tx_compl_desc *compl_desc; 1371 struct gve_tx_ring *tx = block->tx; 1372 struct gve_priv *priv = block->priv; 1373 1374 gve_clean_tx_done_dqo(priv, tx, &block->napi); 1375 1376 /* Return true if we still have work. */ 1377 compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head]; 1378 return compl_desc->generation != tx->dqo_compl.cur_gen_bit; 1379 } 1380 1381 int gve_xdp_xmit_one_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, 1382 struct xdp_frame *xdpf) 1383 { 1384 struct gve_tx_pending_packet_dqo *pkt; 1385 u32 desc_idx = tx->dqo_tx.tail; 1386 s16 completion_tag; 1387 int num_descs = 1; 1388 dma_addr_t addr; 1389 int err; 1390 1391 if (unlikely(!gve_has_tx_slots_available(tx, num_descs))) 1392 return -EBUSY; 1393 1394 pkt = gve_alloc_pending_packet(tx); 1395 if (unlikely(!pkt)) 1396 return -EBUSY; 1397 1398 pkt->type = GVE_TX_PENDING_PACKET_DQO_XDP_FRAME; 1399 pkt->num_bufs = 0; 1400 pkt->xdpf = xdpf; 1401 completion_tag = pkt - tx->dqo.pending_packets; 1402 1403 /* Generate Packet Descriptor */ 1404 addr = dma_map_single(tx->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE); 1405 err = dma_mapping_error(tx->dev, addr); 1406 if (unlikely(err)) 1407 goto err; 1408 1409 dma_unmap_len_set(pkt, len[pkt->num_bufs], xdpf->len); 1410 dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); 1411 pkt->num_bufs++; 1412 1413 gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, 1414 false, xdpf->len, 1415 addr, completion_tag, true, 1416 false); 1417 1418 gve_tx_update_tail(tx, desc_idx); 1419 return 0; 1420 1421 err: 1422 pkt->xdpf = NULL; 1423 pkt->num_bufs = 0; 1424 gve_free_pending_packet(tx, pkt); 1425 return err; 1426 } 1427 1428 int gve_xdp_xmit_dqo(struct net_device *dev, int n, struct xdp_frame **frames, 1429 u32 flags) 1430 { 1431 struct gve_priv *priv = netdev_priv(dev); 1432 struct gve_tx_ring *tx; 1433 int i, err = 0, qid; 1434 1435 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 1436 return -EINVAL; 1437 1438 qid = gve_xdp_tx_queue_id(priv, 1439 smp_processor_id() % priv->tx_cfg.num_xdp_queues); 1440 1441 tx = &priv->tx[qid]; 1442 1443 spin_lock(&tx->dqo_tx.xdp_lock); 1444 for (i = 0; i < n; i++) { 1445 err = gve_xdp_xmit_one_dqo(priv, tx, frames[i]); 1446 if (err) 1447 break; 1448 } 1449 1450 if (flags & XDP_XMIT_FLUSH) 1451 gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); 1452 1453 spin_unlock(&tx->dqo_tx.xdp_lock); 1454 1455 u64_stats_update_begin(&tx->statss); 1456 tx->xdp_xmit += n; 1457 tx->xdp_xmit_errors += n - i; 1458 u64_stats_update_end(&tx->statss); 1459 1460 return i ? i : err; 1461 } 1462