1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2005-2006 Fen Systems Ltd. 5 * Copyright 2005-2013 Solarflare Communications Inc. 6 */ 7 8 #include <linux/pci.h> 9 #include <linux/tcp.h> 10 #include <linux/ip.h> 11 #include <linux/in.h> 12 #include <linux/ipv6.h> 13 #include <linux/slab.h> 14 #include <net/ipv6.h> 15 #include <linux/if_ether.h> 16 #include <linux/highmem.h> 17 #include <linux/cache.h> 18 #include "net_driver.h" 19 #include "efx.h" 20 #include "io.h" 21 #include "nic.h" 22 #include "tx.h" 23 #include "workarounds.h" 24 #include "ef10_regs.h" 25 26 #ifdef EFX_USE_PIO 27 28 #define EFX_PIOBUF_SIZE_DEF ALIGN(256, L1_CACHE_BYTES) 29 unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF; 30 31 #endif /* EFX_USE_PIO */ 32 33 static inline u8 *efx_tx_get_copy_buffer(struct efx_tx_queue *tx_queue, 34 struct efx_tx_buffer *buffer) 35 { 36 unsigned int index = efx_tx_queue_get_insert_index(tx_queue); 37 struct efx_buffer *page_buf = 38 &tx_queue->cb_page[index >> (PAGE_SHIFT - EFX_TX_CB_ORDER)]; 39 unsigned int offset = 40 ((index << EFX_TX_CB_ORDER) + NET_IP_ALIGN) & (PAGE_SIZE - 1); 41 42 if (unlikely(!page_buf->addr) && 43 efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE, 44 GFP_ATOMIC)) 45 return NULL; 46 buffer->dma_addr = page_buf->dma_addr + offset; 47 buffer->unmap_len = 0; 48 return (u8 *)page_buf->addr + offset; 49 } 50 51 u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue, 52 struct efx_tx_buffer *buffer, size_t len) 53 { 54 if (len > EFX_TX_CB_SIZE) 55 return NULL; 56 return efx_tx_get_copy_buffer(tx_queue, buffer); 57 } 58 59 static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, 60 struct efx_tx_buffer *buffer, 61 unsigned int *pkts_compl, 62 unsigned int *bytes_compl) 63 { 64 if (buffer->unmap_len) { 65 struct device *dma_dev = &tx_queue->efx->pci_dev->dev; 66 dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset; 67 if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) 68 dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, 69 DMA_TO_DEVICE); 70 else 71 dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, 72 DMA_TO_DEVICE); 73 buffer->unmap_len = 0; 74 } 75 76 if (buffer->flags & EFX_TX_BUF_SKB) { 77 struct sk_buff *skb = (struct sk_buff *)buffer->skb; 78 79 EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl); 80 (*pkts_compl)++; 81 (*bytes_compl) += skb->len; 82 if (tx_queue->timestamping && 83 (tx_queue->completed_timestamp_major || 84 tx_queue->completed_timestamp_minor)) { 85 struct skb_shared_hwtstamps hwtstamp; 86 87 hwtstamp.hwtstamp = 88 efx_ptp_nic_to_kernel_time(tx_queue); 89 skb_tstamp_tx(skb, &hwtstamp); 90 91 tx_queue->completed_timestamp_major = 0; 92 tx_queue->completed_timestamp_minor = 0; 93 } 94 dev_consume_skb_any((struct sk_buff *)buffer->skb); 95 netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, 96 "TX queue %d transmission id %x complete\n", 97 tx_queue->queue, tx_queue->read_count); 98 } else if (buffer->flags & EFX_TX_BUF_XDP) { 99 xdp_return_frame_rx_napi(buffer->xdpf); 100 } 101 102 buffer->len = 0; 103 buffer->flags = 0; 104 } 105 106 unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) 107 { 108 /* Header and payload descriptor for each output segment, plus 109 * one for every input fragment boundary within a segment 110 */ 111 unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS; 112 113 /* Possibly one more per segment for option descriptors */ 114 if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) 115 max_descs += EFX_TSO_MAX_SEGS; 116 117 /* Possibly more for PCIe page boundaries within input fragments */ 118 if (PAGE_SIZE > EFX_PAGE_SIZE) 119 max_descs += max_t(unsigned int, MAX_SKB_FRAGS, 120 DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE)); 121 122 return max_descs; 123 } 124 125 static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) 126 { 127 /* We need to consider both queues that the net core sees as one */ 128 struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1); 129 struct efx_nic *efx = txq1->efx; 130 unsigned int fill_level; 131 132 fill_level = max(txq1->insert_count - txq1->old_read_count, 133 txq2->insert_count - txq2->old_read_count); 134 if (likely(fill_level < efx->txq_stop_thresh)) 135 return; 136 137 /* We used the stale old_read_count above, which gives us a 138 * pessimistic estimate of the fill level (which may even 139 * validly be >= efx->txq_entries). Now try again using 140 * read_count (more likely to be a cache miss). 141 * 142 * If we read read_count and then conditionally stop the 143 * queue, it is possible for the completion path to race with 144 * us and complete all outstanding descriptors in the middle, 145 * after which there will be no more completions to wake it. 146 * Therefore we stop the queue first, then read read_count 147 * (with a memory barrier to ensure the ordering), then 148 * restart the queue if the fill level turns out to be low 149 * enough. 150 */ 151 netif_tx_stop_queue(txq1->core_txq); 152 smp_mb(); 153 txq1->old_read_count = READ_ONCE(txq1->read_count); 154 txq2->old_read_count = READ_ONCE(txq2->read_count); 155 156 fill_level = max(txq1->insert_count - txq1->old_read_count, 157 txq2->insert_count - txq2->old_read_count); 158 EFX_WARN_ON_ONCE_PARANOID(fill_level >= efx->txq_entries); 159 if (likely(fill_level < efx->txq_stop_thresh)) { 160 smp_mb(); 161 if (likely(!efx->loopback_selftest)) 162 netif_tx_start_queue(txq1->core_txq); 163 } 164 } 165 166 static int efx_enqueue_skb_copy(struct efx_tx_queue *tx_queue, 167 struct sk_buff *skb) 168 { 169 unsigned int copy_len = skb->len; 170 struct efx_tx_buffer *buffer; 171 u8 *copy_buffer; 172 int rc; 173 174 EFX_WARN_ON_ONCE_PARANOID(copy_len > EFX_TX_CB_SIZE); 175 176 buffer = efx_tx_queue_get_insert_buffer(tx_queue); 177 178 copy_buffer = efx_tx_get_copy_buffer(tx_queue, buffer); 179 if (unlikely(!copy_buffer)) 180 return -ENOMEM; 181 182 rc = skb_copy_bits(skb, 0, copy_buffer, copy_len); 183 EFX_WARN_ON_PARANOID(rc); 184 buffer->len = copy_len; 185 186 buffer->skb = skb; 187 buffer->flags = EFX_TX_BUF_SKB; 188 189 ++tx_queue->insert_count; 190 return rc; 191 } 192 193 #ifdef EFX_USE_PIO 194 195 struct efx_short_copy_buffer { 196 int used; 197 u8 buf[L1_CACHE_BYTES]; 198 }; 199 200 /* Copy to PIO, respecting that writes to PIO buffers must be dword aligned. 201 * Advances piobuf pointer. Leaves additional data in the copy buffer. 202 */ 203 static void efx_memcpy_toio_aligned(struct efx_nic *efx, u8 __iomem **piobuf, 204 u8 *data, int len, 205 struct efx_short_copy_buffer *copy_buf) 206 { 207 int block_len = len & ~(sizeof(copy_buf->buf) - 1); 208 209 __iowrite64_copy(*piobuf, data, block_len >> 3); 210 *piobuf += block_len; 211 len -= block_len; 212 213 if (len) { 214 data += block_len; 215 BUG_ON(copy_buf->used); 216 BUG_ON(len > sizeof(copy_buf->buf)); 217 memcpy(copy_buf->buf, data, len); 218 copy_buf->used = len; 219 } 220 } 221 222 /* Copy to PIO, respecting dword alignment, popping data from copy buffer first. 223 * Advances piobuf pointer. Leaves additional data in the copy buffer. 224 */ 225 static void efx_memcpy_toio_aligned_cb(struct efx_nic *efx, u8 __iomem **piobuf, 226 u8 *data, int len, 227 struct efx_short_copy_buffer *copy_buf) 228 { 229 if (copy_buf->used) { 230 /* if the copy buffer is partially full, fill it up and write */ 231 int copy_to_buf = 232 min_t(int, sizeof(copy_buf->buf) - copy_buf->used, len); 233 234 memcpy(copy_buf->buf + copy_buf->used, data, copy_to_buf); 235 copy_buf->used += copy_to_buf; 236 237 /* if we didn't fill it up then we're done for now */ 238 if (copy_buf->used < sizeof(copy_buf->buf)) 239 return; 240 241 __iowrite64_copy(*piobuf, copy_buf->buf, 242 sizeof(copy_buf->buf) >> 3); 243 *piobuf += sizeof(copy_buf->buf); 244 data += copy_to_buf; 245 len -= copy_to_buf; 246 copy_buf->used = 0; 247 } 248 249 efx_memcpy_toio_aligned(efx, piobuf, data, len, copy_buf); 250 } 251 252 static void efx_flush_copy_buffer(struct efx_nic *efx, u8 __iomem *piobuf, 253 struct efx_short_copy_buffer *copy_buf) 254 { 255 /* if there's anything in it, write the whole buffer, including junk */ 256 if (copy_buf->used) 257 __iowrite64_copy(piobuf, copy_buf->buf, 258 sizeof(copy_buf->buf) >> 3); 259 } 260 261 /* Traverse skb structure and copy fragments in to PIO buffer. 262 * Advances piobuf pointer. 263 */ 264 static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb, 265 u8 __iomem **piobuf, 266 struct efx_short_copy_buffer *copy_buf) 267 { 268 int i; 269 270 efx_memcpy_toio_aligned(efx, piobuf, skb->data, skb_headlen(skb), 271 copy_buf); 272 273 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { 274 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 275 u8 *vaddr; 276 277 vaddr = kmap_atomic(skb_frag_page(f)); 278 279 efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + skb_frag_off(f), 280 skb_frag_size(f), copy_buf); 281 kunmap_atomic(vaddr); 282 } 283 284 EFX_WARN_ON_ONCE_PARANOID(skb_shinfo(skb)->frag_list); 285 } 286 287 static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, 288 struct sk_buff *skb) 289 { 290 struct efx_tx_buffer *buffer = 291 efx_tx_queue_get_insert_buffer(tx_queue); 292 u8 __iomem *piobuf = tx_queue->piobuf; 293 294 /* Copy to PIO buffer. Ensure the writes are padded to the end 295 * of a cache line, as this is required for write-combining to be 296 * effective on at least x86. 297 */ 298 299 if (skb_shinfo(skb)->nr_frags) { 300 /* The size of the copy buffer will ensure all writes 301 * are the size of a cache line. 302 */ 303 struct efx_short_copy_buffer copy_buf; 304 305 copy_buf.used = 0; 306 307 efx_skb_copy_bits_to_pio(tx_queue->efx, skb, 308 &piobuf, ©_buf); 309 efx_flush_copy_buffer(tx_queue->efx, piobuf, ©_buf); 310 } else { 311 /* Pad the write to the size of a cache line. 312 * We can do this because we know the skb_shared_info struct is 313 * after the source, and the destination buffer is big enough. 314 */ 315 BUILD_BUG_ON(L1_CACHE_BYTES > 316 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); 317 __iowrite64_copy(tx_queue->piobuf, skb->data, 318 ALIGN(skb->len, L1_CACHE_BYTES) >> 3); 319 } 320 321 buffer->skb = skb; 322 buffer->flags = EFX_TX_BUF_SKB | EFX_TX_BUF_OPTION; 323 324 EFX_POPULATE_QWORD_5(buffer->option, 325 ESF_DZ_TX_DESC_IS_OPT, 1, 326 ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO, 327 ESF_DZ_TX_PIO_CONT, 0, 328 ESF_DZ_TX_PIO_BYTE_CNT, skb->len, 329 ESF_DZ_TX_PIO_BUF_ADDR, 330 tx_queue->piobuf_offset); 331 ++tx_queue->insert_count; 332 return 0; 333 } 334 #endif /* EFX_USE_PIO */ 335 336 static struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue, 337 dma_addr_t dma_addr, 338 size_t len) 339 { 340 const struct efx_nic_type *nic_type = tx_queue->efx->type; 341 struct efx_tx_buffer *buffer; 342 unsigned int dma_len; 343 344 /* Map the fragment taking account of NIC-dependent DMA limits. */ 345 do { 346 buffer = efx_tx_queue_get_insert_buffer(tx_queue); 347 dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len); 348 349 buffer->len = dma_len; 350 buffer->dma_addr = dma_addr; 351 buffer->flags = EFX_TX_BUF_CONT; 352 len -= dma_len; 353 dma_addr += dma_len; 354 ++tx_queue->insert_count; 355 } while (len); 356 357 return buffer; 358 } 359 360 /* Map all data from an SKB for DMA and create descriptors on the queue. 361 */ 362 static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, 363 unsigned int segment_count) 364 { 365 struct efx_nic *efx = tx_queue->efx; 366 struct device *dma_dev = &efx->pci_dev->dev; 367 unsigned int frag_index, nr_frags; 368 dma_addr_t dma_addr, unmap_addr; 369 unsigned short dma_flags; 370 size_t len, unmap_len; 371 372 nr_frags = skb_shinfo(skb)->nr_frags; 373 frag_index = 0; 374 375 /* Map header data. */ 376 len = skb_headlen(skb); 377 dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE); 378 dma_flags = EFX_TX_BUF_MAP_SINGLE; 379 unmap_len = len; 380 unmap_addr = dma_addr; 381 382 if (unlikely(dma_mapping_error(dma_dev, dma_addr))) 383 return -EIO; 384 385 if (segment_count) { 386 /* For TSO we need to put the header in to a separate 387 * descriptor. Map this separately if necessary. 388 */ 389 size_t header_len = skb_transport_header(skb) - skb->data + 390 (tcp_hdr(skb)->doff << 2u); 391 392 if (header_len != len) { 393 tx_queue->tso_long_headers++; 394 efx_tx_map_chunk(tx_queue, dma_addr, header_len); 395 len -= header_len; 396 dma_addr += header_len; 397 } 398 } 399 400 /* Add descriptors for each fragment. */ 401 do { 402 struct efx_tx_buffer *buffer; 403 skb_frag_t *fragment; 404 405 buffer = efx_tx_map_chunk(tx_queue, dma_addr, len); 406 407 /* The final descriptor for a fragment is responsible for 408 * unmapping the whole fragment. 409 */ 410 buffer->flags = EFX_TX_BUF_CONT | dma_flags; 411 buffer->unmap_len = unmap_len; 412 buffer->dma_offset = buffer->dma_addr - unmap_addr; 413 414 if (frag_index >= nr_frags) { 415 /* Store SKB details with the final buffer for 416 * the completion. 417 */ 418 buffer->skb = skb; 419 buffer->flags = EFX_TX_BUF_SKB | dma_flags; 420 return 0; 421 } 422 423 /* Move on to the next fragment. */ 424 fragment = &skb_shinfo(skb)->frags[frag_index++]; 425 len = skb_frag_size(fragment); 426 dma_addr = skb_frag_dma_map(dma_dev, fragment, 427 0, len, DMA_TO_DEVICE); 428 dma_flags = 0; 429 unmap_len = len; 430 unmap_addr = dma_addr; 431 432 if (unlikely(dma_mapping_error(dma_dev, dma_addr))) 433 return -EIO; 434 } while (1); 435 } 436 437 /* Remove buffers put into a tx_queue for the current packet. 438 * None of the buffers must have an skb attached. 439 */ 440 static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, 441 unsigned int insert_count) 442 { 443 struct efx_tx_buffer *buffer; 444 unsigned int bytes_compl = 0; 445 unsigned int pkts_compl = 0; 446 447 /* Work backwards until we hit the original insert pointer value */ 448 while (tx_queue->insert_count != insert_count) { 449 --tx_queue->insert_count; 450 buffer = __efx_tx_queue_get_insert_buffer(tx_queue); 451 efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); 452 } 453 } 454 455 /* 456 * Fallback to software TSO. 457 * 458 * This is used if we are unable to send a GSO packet through hardware TSO. 459 * This should only ever happen due to per-queue restrictions - unsupported 460 * packets should first be filtered by the feature flags. 461 * 462 * Returns 0 on success, error code otherwise. 463 */ 464 static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, 465 struct sk_buff *skb) 466 { 467 struct sk_buff *segments, *next; 468 469 segments = skb_gso_segment(skb, 0); 470 if (IS_ERR(segments)) 471 return PTR_ERR(segments); 472 473 dev_consume_skb_any(skb); 474 skb = segments; 475 476 while (skb) { 477 next = skb->next; 478 skb->next = NULL; 479 480 efx_enqueue_skb(tx_queue, skb); 481 skb = next; 482 } 483 484 return 0; 485 } 486 487 /* 488 * Add a socket buffer to a TX queue 489 * 490 * This maps all fragments of a socket buffer for DMA and adds them to 491 * the TX queue. The queue's insert pointer will be incremented by 492 * the number of fragments in the socket buffer. 493 * 494 * If any DMA mapping fails, any mapped fragments will be unmapped, 495 * the queue's insert pointer will be restored to its original value. 496 * 497 * This function is split out from efx_hard_start_xmit to allow the 498 * loopback test to direct packets via specific TX queues. 499 * 500 * Returns NETDEV_TX_OK. 501 * You must hold netif_tx_lock() to call this function. 502 */ 503 netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) 504 { 505 unsigned int old_insert_count = tx_queue->insert_count; 506 bool xmit_more = netdev_xmit_more(); 507 bool data_mapped = false; 508 unsigned int segments; 509 unsigned int skb_len; 510 int rc; 511 512 skb_len = skb->len; 513 segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0; 514 if (segments == 1) 515 segments = 0; /* Don't use TSO for a single segment. */ 516 517 /* Handle TSO first - it's *possible* (although unlikely) that we might 518 * be passed a packet to segment that's smaller than the copybreak/PIO 519 * size limit. 520 */ 521 if (segments) { 522 EFX_WARN_ON_ONCE_PARANOID(!tx_queue->handle_tso); 523 rc = tx_queue->handle_tso(tx_queue, skb, &data_mapped); 524 if (rc == -EINVAL) { 525 rc = efx_tx_tso_fallback(tx_queue, skb); 526 tx_queue->tso_fallbacks++; 527 if (rc == 0) 528 return 0; 529 } 530 if (rc) 531 goto err; 532 #ifdef EFX_USE_PIO 533 } else if (skb_len <= efx_piobuf_size && !xmit_more && 534 efx_nic_may_tx_pio(tx_queue)) { 535 /* Use PIO for short packets with an empty queue. */ 536 if (efx_enqueue_skb_pio(tx_queue, skb)) 537 goto err; 538 tx_queue->pio_packets++; 539 data_mapped = true; 540 #endif 541 } else if (skb->data_len && skb_len <= EFX_TX_CB_SIZE) { 542 /* Pad short packets or coalesce short fragmented packets. */ 543 if (efx_enqueue_skb_copy(tx_queue, skb)) 544 goto err; 545 tx_queue->cb_packets++; 546 data_mapped = true; 547 } 548 549 /* Map for DMA and create descriptors if we haven't done so already. */ 550 if (!data_mapped && (efx_tx_map_data(tx_queue, skb, segments))) 551 goto err; 552 553 efx_tx_maybe_stop_queue(tx_queue); 554 555 /* Pass off to hardware */ 556 if (__netdev_tx_sent_queue(tx_queue->core_txq, skb_len, xmit_more)) { 557 struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); 558 559 /* There could be packets left on the partner queue if 560 * xmit_more was set. If we do not push those they 561 * could be left for a long time and cause a netdev watchdog. 562 */ 563 if (txq2->xmit_more_available) 564 efx_nic_push_buffers(txq2); 565 566 efx_nic_push_buffers(tx_queue); 567 } else { 568 tx_queue->xmit_more_available = xmit_more; 569 } 570 571 if (segments) { 572 tx_queue->tso_bursts++; 573 tx_queue->tso_packets += segments; 574 tx_queue->tx_packets += segments; 575 } else { 576 tx_queue->tx_packets++; 577 } 578 579 return NETDEV_TX_OK; 580 581 582 err: 583 efx_enqueue_unwind(tx_queue, old_insert_count); 584 dev_kfree_skb_any(skb); 585 586 /* If we're not expecting another transmit and we had something to push 587 * on this queue or a partner queue then we need to push here to get the 588 * previous packets out. 589 */ 590 if (!xmit_more) { 591 struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); 592 593 if (txq2->xmit_more_available) 594 efx_nic_push_buffers(txq2); 595 596 efx_nic_push_buffers(tx_queue); 597 } 598 599 return NETDEV_TX_OK; 600 } 601 602 static void efx_xdp_return_frames(int n, struct xdp_frame **xdpfs) 603 { 604 int i; 605 606 for (i = 0; i < n; i++) 607 xdp_return_frame_rx_napi(xdpfs[i]); 608 } 609 610 /* Transmit a packet from an XDP buffer 611 * 612 * Returns number of packets sent on success, error code otherwise. 613 * Runs in NAPI context, either in our poll (for XDP TX) or a different NIC 614 * (for XDP redirect). 615 */ 616 int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, 617 bool flush) 618 { 619 struct efx_tx_buffer *tx_buffer; 620 struct efx_tx_queue *tx_queue; 621 struct xdp_frame *xdpf; 622 dma_addr_t dma_addr; 623 unsigned int len; 624 int space; 625 int cpu; 626 int i; 627 628 cpu = raw_smp_processor_id(); 629 630 if (!efx->xdp_tx_queue_count || 631 unlikely(cpu >= efx->xdp_tx_queue_count)) 632 return -EINVAL; 633 634 tx_queue = efx->xdp_tx_queues[cpu]; 635 if (unlikely(!tx_queue)) 636 return -EINVAL; 637 638 if (unlikely(n && !xdpfs)) 639 return -EINVAL; 640 641 if (!n) 642 return 0; 643 644 /* Check for available space. We should never need multiple 645 * descriptors per frame. 646 */ 647 space = efx->txq_entries + 648 tx_queue->read_count - tx_queue->insert_count; 649 650 for (i = 0; i < n; i++) { 651 xdpf = xdpfs[i]; 652 653 if (i >= space) 654 break; 655 656 /* We'll want a descriptor for this tx. */ 657 prefetchw(__efx_tx_queue_get_insert_buffer(tx_queue)); 658 659 len = xdpf->len; 660 661 /* Map for DMA. */ 662 dma_addr = dma_map_single(&efx->pci_dev->dev, 663 xdpf->data, len, 664 DMA_TO_DEVICE); 665 if (dma_mapping_error(&efx->pci_dev->dev, dma_addr)) 666 break; 667 668 /* Create descriptor and set up for unmapping DMA. */ 669 tx_buffer = efx_tx_map_chunk(tx_queue, dma_addr, len); 670 tx_buffer->xdpf = xdpf; 671 tx_buffer->flags = EFX_TX_BUF_XDP | 672 EFX_TX_BUF_MAP_SINGLE; 673 tx_buffer->dma_offset = 0; 674 tx_buffer->unmap_len = len; 675 tx_queue->tx_packets++; 676 } 677 678 /* Pass mapped frames to hardware. */ 679 if (flush && i > 0) 680 efx_nic_push_buffers(tx_queue); 681 682 if (i == 0) 683 return -EIO; 684 685 efx_xdp_return_frames(n - i, xdpfs + i); 686 687 return i; 688 } 689 690 /* Remove packets from the TX queue 691 * 692 * This removes packets from the TX queue, up to and including the 693 * specified index. 694 */ 695 static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, 696 unsigned int index, 697 unsigned int *pkts_compl, 698 unsigned int *bytes_compl) 699 { 700 struct efx_nic *efx = tx_queue->efx; 701 unsigned int stop_index, read_ptr; 702 703 stop_index = (index + 1) & tx_queue->ptr_mask; 704 read_ptr = tx_queue->read_count & tx_queue->ptr_mask; 705 706 while (read_ptr != stop_index) { 707 struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; 708 709 if (!(buffer->flags & EFX_TX_BUF_OPTION) && 710 unlikely(buffer->len == 0)) { 711 netif_err(efx, tx_err, efx->net_dev, 712 "TX queue %d spurious TX completion id %x\n", 713 tx_queue->queue, read_ptr); 714 efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); 715 return; 716 } 717 718 efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); 719 720 ++tx_queue->read_count; 721 read_ptr = tx_queue->read_count & tx_queue->ptr_mask; 722 } 723 } 724 725 /* Initiate a packet transmission. We use one channel per CPU 726 * (sharing when we have more CPUs than channels). On Falcon, the TX 727 * completion events will be directed back to the CPU that transmitted 728 * the packet, which should be cache-efficient. 729 * 730 * Context: non-blocking. 731 * Note that returning anything other than NETDEV_TX_OK will cause the 732 * OS to free the skb. 733 */ 734 netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, 735 struct net_device *net_dev) 736 { 737 struct efx_nic *efx = netdev_priv(net_dev); 738 struct efx_tx_queue *tx_queue; 739 unsigned index, type; 740 741 EFX_WARN_ON_PARANOID(!netif_device_present(net_dev)); 742 743 /* PTP "event" packet */ 744 if (unlikely(efx_xmit_with_hwtstamp(skb)) && 745 unlikely(efx_ptp_is_ptp_tx(efx, skb))) { 746 return efx_ptp_tx(efx, skb); 747 } 748 749 index = skb_get_queue_mapping(skb); 750 type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0; 751 if (index >= efx->n_tx_channels) { 752 index -= efx->n_tx_channels; 753 type |= EFX_TXQ_TYPE_HIGHPRI; 754 } 755 tx_queue = efx_get_tx_queue(efx, index, type); 756 757 return efx_enqueue_skb(tx_queue, skb); 758 } 759 760 void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue) 761 { 762 struct efx_nic *efx = tx_queue->efx; 763 764 /* Must be inverse of queue lookup in efx_hard_start_xmit() */ 765 tx_queue->core_txq = 766 netdev_get_tx_queue(efx->net_dev, 767 tx_queue->queue / EFX_TXQ_TYPES + 768 ((tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI) ? 769 efx->n_tx_channels : 0)); 770 } 771 772 int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type, 773 void *type_data) 774 { 775 struct efx_nic *efx = netdev_priv(net_dev); 776 struct tc_mqprio_qopt *mqprio = type_data; 777 struct efx_channel *channel; 778 struct efx_tx_queue *tx_queue; 779 unsigned tc, num_tc; 780 int rc; 781 782 if (type != TC_SETUP_QDISC_MQPRIO) 783 return -EOPNOTSUPP; 784 785 num_tc = mqprio->num_tc; 786 787 if (num_tc > EFX_MAX_TX_TC) 788 return -EINVAL; 789 790 mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; 791 792 if (num_tc == net_dev->num_tc) 793 return 0; 794 795 for (tc = 0; tc < num_tc; tc++) { 796 net_dev->tc_to_txq[tc].offset = tc * efx->n_tx_channels; 797 net_dev->tc_to_txq[tc].count = efx->n_tx_channels; 798 } 799 800 if (num_tc > net_dev->num_tc) { 801 /* Initialise high-priority queues as necessary */ 802 efx_for_each_channel(channel, efx) { 803 efx_for_each_possible_channel_tx_queue(tx_queue, 804 channel) { 805 if (!(tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI)) 806 continue; 807 if (!tx_queue->buffer) { 808 rc = efx_probe_tx_queue(tx_queue); 809 if (rc) 810 return rc; 811 } 812 if (!tx_queue->initialised) 813 efx_init_tx_queue(tx_queue); 814 efx_init_tx_queue_core_txq(tx_queue); 815 } 816 } 817 } else { 818 /* Reduce number of classes before number of queues */ 819 net_dev->num_tc = num_tc; 820 } 821 822 rc = netif_set_real_num_tx_queues(net_dev, 823 max_t(int, num_tc, 1) * 824 efx->n_tx_channels); 825 if (rc) 826 return rc; 827 828 /* Do not destroy high-priority queues when they become 829 * unused. We would have to flush them first, and it is 830 * fairly difficult to flush a subset of TX queues. Leave 831 * it to efx_fini_channels(). 832 */ 833 834 net_dev->num_tc = num_tc; 835 return 0; 836 } 837 838 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) 839 { 840 unsigned fill_level; 841 struct efx_nic *efx = tx_queue->efx; 842 struct efx_tx_queue *txq2; 843 unsigned int pkts_compl = 0, bytes_compl = 0; 844 845 EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask); 846 847 efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); 848 tx_queue->pkts_compl += pkts_compl; 849 tx_queue->bytes_compl += bytes_compl; 850 851 if (pkts_compl > 1) 852 ++tx_queue->merge_events; 853 854 /* See if we need to restart the netif queue. This memory 855 * barrier ensures that we write read_count (inside 856 * efx_dequeue_buffers()) before reading the queue status. 857 */ 858 smp_mb(); 859 if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && 860 likely(efx->port_enabled) && 861 likely(netif_device_present(efx->net_dev))) { 862 txq2 = efx_tx_queue_partner(tx_queue); 863 fill_level = max(tx_queue->insert_count - tx_queue->read_count, 864 txq2->insert_count - txq2->read_count); 865 if (fill_level <= efx->txq_wake_thresh) 866 netif_tx_wake_queue(tx_queue->core_txq); 867 } 868 869 /* Check whether the hardware queue is now empty */ 870 if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { 871 tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); 872 if (tx_queue->read_count == tx_queue->old_write_count) { 873 smp_mb(); 874 tx_queue->empty_read_count = 875 tx_queue->read_count | EFX_EMPTY_COUNT_VALID; 876 } 877 } 878 } 879 880 static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) 881 { 882 return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EFX_TX_CB_ORDER); 883 } 884 885 int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) 886 { 887 struct efx_nic *efx = tx_queue->efx; 888 unsigned int entries; 889 int rc; 890 891 /* Create the smallest power-of-two aligned ring */ 892 entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); 893 EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); 894 tx_queue->ptr_mask = entries - 1; 895 896 netif_dbg(efx, probe, efx->net_dev, 897 "creating TX queue %d size %#x mask %#x\n", 898 tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask); 899 900 /* Allocate software ring */ 901 tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer), 902 GFP_KERNEL); 903 if (!tx_queue->buffer) 904 return -ENOMEM; 905 906 tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue), 907 sizeof(tx_queue->cb_page[0]), GFP_KERNEL); 908 if (!tx_queue->cb_page) { 909 rc = -ENOMEM; 910 goto fail1; 911 } 912 913 /* Allocate hardware ring */ 914 rc = efx_nic_probe_tx(tx_queue); 915 if (rc) 916 goto fail2; 917 918 return 0; 919 920 fail2: 921 kfree(tx_queue->cb_page); 922 tx_queue->cb_page = NULL; 923 fail1: 924 kfree(tx_queue->buffer); 925 tx_queue->buffer = NULL; 926 return rc; 927 } 928 929 void efx_init_tx_queue(struct efx_tx_queue *tx_queue) 930 { 931 struct efx_nic *efx = tx_queue->efx; 932 933 netif_dbg(efx, drv, efx->net_dev, 934 "initialising TX queue %d\n", tx_queue->queue); 935 936 tx_queue->insert_count = 0; 937 tx_queue->write_count = 0; 938 tx_queue->packet_write_count = 0; 939 tx_queue->old_write_count = 0; 940 tx_queue->read_count = 0; 941 tx_queue->old_read_count = 0; 942 tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; 943 tx_queue->xmit_more_available = false; 944 tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) && 945 tx_queue->channel == efx_ptp_channel(efx)); 946 tx_queue->completed_desc_ptr = tx_queue->ptr_mask; 947 tx_queue->completed_timestamp_major = 0; 948 tx_queue->completed_timestamp_minor = 0; 949 950 tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel); 951 952 /* Set up default function pointers. These may get replaced by 953 * efx_nic_init_tx() based off NIC/queue capabilities. 954 */ 955 tx_queue->handle_tso = efx_enqueue_skb_tso; 956 957 /* Set up TX descriptor ring */ 958 efx_nic_init_tx(tx_queue); 959 960 tx_queue->initialised = true; 961 } 962 963 void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) 964 { 965 struct efx_tx_buffer *buffer; 966 967 netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, 968 "shutting down TX queue %d\n", tx_queue->queue); 969 970 if (!tx_queue->buffer) 971 return; 972 973 /* Free any buffers left in the ring */ 974 while (tx_queue->read_count != tx_queue->write_count) { 975 unsigned int pkts_compl = 0, bytes_compl = 0; 976 buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; 977 efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); 978 979 ++tx_queue->read_count; 980 } 981 tx_queue->xmit_more_available = false; 982 netdev_tx_reset_queue(tx_queue->core_txq); 983 } 984 985 void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) 986 { 987 int i; 988 989 if (!tx_queue->buffer) 990 return; 991 992 netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, 993 "destroying TX queue %d\n", tx_queue->queue); 994 efx_nic_remove_tx(tx_queue); 995 996 if (tx_queue->cb_page) { 997 for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++) 998 efx_nic_free_buffer(tx_queue->efx, 999 &tx_queue->cb_page[i]); 1000 kfree(tx_queue->cb_page); 1001 tx_queue->cb_page = NULL; 1002 } 1003 1004 kfree(tx_queue->buffer); 1005 tx_queue->buffer = NULL; 1006 } 1007