1 /**************************************************************************** 2 * Driver for Solarflare network controllers and boards 3 * Copyright 2005-2006 Fen Systems Ltd. 4 * Copyright 2005-2013 Solarflare Communications Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11 #include <linux/pci.h> 12 #include <linux/tcp.h> 13 #include <linux/ip.h> 14 #include <linux/in.h> 15 #include <linux/ipv6.h> 16 #include <linux/slab.h> 17 #include <net/ipv6.h> 18 #include <linux/if_ether.h> 19 #include <linux/highmem.h> 20 #include <linux/cache.h> 21 #include "net_driver.h" 22 #include "efx.h" 23 #include "io.h" 24 #include "nic.h" 25 #include "tx.h" 26 #include "workarounds.h" 27 #include "ef10_regs.h" 28 29 #ifdef EFX_USE_PIO 30 31 #define EFX_PIOBUF_SIZE_DEF ALIGN(256, L1_CACHE_BYTES) 32 unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF; 33 34 #endif /* EFX_USE_PIO */ 35 36 static inline u8 *efx_tx_get_copy_buffer(struct efx_tx_queue *tx_queue, 37 struct efx_tx_buffer *buffer) 38 { 39 unsigned int index = efx_tx_queue_get_insert_index(tx_queue); 40 struct efx_buffer *page_buf = 41 &tx_queue->cb_page[index >> (PAGE_SHIFT - EFX_TX_CB_ORDER)]; 42 unsigned int offset = 43 ((index << EFX_TX_CB_ORDER) + NET_IP_ALIGN) & (PAGE_SIZE - 1); 44 45 if (unlikely(!page_buf->addr) && 46 efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE, 47 GFP_ATOMIC)) 48 return NULL; 49 buffer->dma_addr = page_buf->dma_addr + offset; 50 buffer->unmap_len = 0; 51 return (u8 *)page_buf->addr + offset; 52 } 53 54 u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue, 55 struct efx_tx_buffer *buffer, size_t len) 56 { 57 if (len > EFX_TX_CB_SIZE) 58 return NULL; 59 return efx_tx_get_copy_buffer(tx_queue, buffer); 60 } 61 62 static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, 63 struct efx_tx_buffer *buffer, 64 unsigned int *pkts_compl, 65 unsigned int *bytes_compl) 66 { 67 if (buffer->unmap_len) { 68 struct device *dma_dev = &tx_queue->efx->pci_dev->dev; 69 dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset; 70 if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) 71 dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, 72 DMA_TO_DEVICE); 73 else 74 dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, 75 DMA_TO_DEVICE); 76 buffer->unmap_len = 0; 77 } 78 79 if (buffer->flags & EFX_TX_BUF_SKB) { 80 (*pkts_compl)++; 81 (*bytes_compl) += buffer->skb->len; 82 dev_consume_skb_any((struct sk_buff *)buffer->skb); 83 netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, 84 "TX queue %d transmission id %x complete\n", 85 tx_queue->queue, tx_queue->read_count); 86 } 87 88 buffer->len = 0; 89 buffer->flags = 0; 90 } 91 92 unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) 93 { 94 /* Header and payload descriptor for each output segment, plus 95 * one for every input fragment boundary within a segment 96 */ 97 unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS; 98 99 /* Possibly one more per segment for option descriptors */ 100 if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) 101 max_descs += EFX_TSO_MAX_SEGS; 102 103 /* Possibly more for PCIe page boundaries within input fragments */ 104 if (PAGE_SIZE > EFX_PAGE_SIZE) 105 max_descs += max_t(unsigned int, MAX_SKB_FRAGS, 106 DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE)); 107 108 return max_descs; 109 } 110 111 static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) 112 { 113 /* We need to consider both queues that the net core sees as one */ 114 struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1); 115 struct efx_nic *efx = txq1->efx; 116 unsigned int fill_level; 117 118 fill_level = max(txq1->insert_count - txq1->old_read_count, 119 txq2->insert_count - txq2->old_read_count); 120 if (likely(fill_level < efx->txq_stop_thresh)) 121 return; 122 123 /* We used the stale old_read_count above, which gives us a 124 * pessimistic estimate of the fill level (which may even 125 * validly be >= efx->txq_entries). Now try again using 126 * read_count (more likely to be a cache miss). 127 * 128 * If we read read_count and then conditionally stop the 129 * queue, it is possible for the completion path to race with 130 * us and complete all outstanding descriptors in the middle, 131 * after which there will be no more completions to wake it. 132 * Therefore we stop the queue first, then read read_count 133 * (with a memory barrier to ensure the ordering), then 134 * restart the queue if the fill level turns out to be low 135 * enough. 136 */ 137 netif_tx_stop_queue(txq1->core_txq); 138 smp_mb(); 139 txq1->old_read_count = ACCESS_ONCE(txq1->read_count); 140 txq2->old_read_count = ACCESS_ONCE(txq2->read_count); 141 142 fill_level = max(txq1->insert_count - txq1->old_read_count, 143 txq2->insert_count - txq2->old_read_count); 144 EFX_WARN_ON_ONCE_PARANOID(fill_level >= efx->txq_entries); 145 if (likely(fill_level < efx->txq_stop_thresh)) { 146 smp_mb(); 147 if (likely(!efx->loopback_selftest)) 148 netif_tx_start_queue(txq1->core_txq); 149 } 150 } 151 152 static int efx_enqueue_skb_copy(struct efx_tx_queue *tx_queue, 153 struct sk_buff *skb) 154 { 155 unsigned int copy_len = skb->len; 156 struct efx_tx_buffer *buffer; 157 u8 *copy_buffer; 158 int rc; 159 160 EFX_WARN_ON_ONCE_PARANOID(copy_len > EFX_TX_CB_SIZE); 161 162 buffer = efx_tx_queue_get_insert_buffer(tx_queue); 163 164 copy_buffer = efx_tx_get_copy_buffer(tx_queue, buffer); 165 if (unlikely(!copy_buffer)) 166 return -ENOMEM; 167 168 rc = skb_copy_bits(skb, 0, copy_buffer, copy_len); 169 EFX_WARN_ON_PARANOID(rc); 170 buffer->len = copy_len; 171 172 buffer->skb = skb; 173 buffer->flags = EFX_TX_BUF_SKB; 174 175 ++tx_queue->insert_count; 176 return rc; 177 } 178 179 #ifdef EFX_USE_PIO 180 181 struct efx_short_copy_buffer { 182 int used; 183 u8 buf[L1_CACHE_BYTES]; 184 }; 185 186 /* Copy to PIO, respecting that writes to PIO buffers must be dword aligned. 187 * Advances piobuf pointer. Leaves additional data in the copy buffer. 188 */ 189 static void efx_memcpy_toio_aligned(struct efx_nic *efx, u8 __iomem **piobuf, 190 u8 *data, int len, 191 struct efx_short_copy_buffer *copy_buf) 192 { 193 int block_len = len & ~(sizeof(copy_buf->buf) - 1); 194 195 __iowrite64_copy(*piobuf, data, block_len >> 3); 196 *piobuf += block_len; 197 len -= block_len; 198 199 if (len) { 200 data += block_len; 201 BUG_ON(copy_buf->used); 202 BUG_ON(len > sizeof(copy_buf->buf)); 203 memcpy(copy_buf->buf, data, len); 204 copy_buf->used = len; 205 } 206 } 207 208 /* Copy to PIO, respecting dword alignment, popping data from copy buffer first. 209 * Advances piobuf pointer. Leaves additional data in the copy buffer. 210 */ 211 static void efx_memcpy_toio_aligned_cb(struct efx_nic *efx, u8 __iomem **piobuf, 212 u8 *data, int len, 213 struct efx_short_copy_buffer *copy_buf) 214 { 215 if (copy_buf->used) { 216 /* if the copy buffer is partially full, fill it up and write */ 217 int copy_to_buf = 218 min_t(int, sizeof(copy_buf->buf) - copy_buf->used, len); 219 220 memcpy(copy_buf->buf + copy_buf->used, data, copy_to_buf); 221 copy_buf->used += copy_to_buf; 222 223 /* if we didn't fill it up then we're done for now */ 224 if (copy_buf->used < sizeof(copy_buf->buf)) 225 return; 226 227 __iowrite64_copy(*piobuf, copy_buf->buf, 228 sizeof(copy_buf->buf) >> 3); 229 *piobuf += sizeof(copy_buf->buf); 230 data += copy_to_buf; 231 len -= copy_to_buf; 232 copy_buf->used = 0; 233 } 234 235 efx_memcpy_toio_aligned(efx, piobuf, data, len, copy_buf); 236 } 237 238 static void efx_flush_copy_buffer(struct efx_nic *efx, u8 __iomem *piobuf, 239 struct efx_short_copy_buffer *copy_buf) 240 { 241 /* if there's anything in it, write the whole buffer, including junk */ 242 if (copy_buf->used) 243 __iowrite64_copy(piobuf, copy_buf->buf, 244 sizeof(copy_buf->buf) >> 3); 245 } 246 247 /* Traverse skb structure and copy fragments in to PIO buffer. 248 * Advances piobuf pointer. 249 */ 250 static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb, 251 u8 __iomem **piobuf, 252 struct efx_short_copy_buffer *copy_buf) 253 { 254 int i; 255 256 efx_memcpy_toio_aligned(efx, piobuf, skb->data, skb_headlen(skb), 257 copy_buf); 258 259 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { 260 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 261 u8 *vaddr; 262 263 vaddr = kmap_atomic(skb_frag_page(f)); 264 265 efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + f->page_offset, 266 skb_frag_size(f), copy_buf); 267 kunmap_atomic(vaddr); 268 } 269 270 EFX_WARN_ON_ONCE_PARANOID(skb_shinfo(skb)->frag_list); 271 } 272 273 static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, 274 struct sk_buff *skb) 275 { 276 struct efx_tx_buffer *buffer = 277 efx_tx_queue_get_insert_buffer(tx_queue); 278 u8 __iomem *piobuf = tx_queue->piobuf; 279 280 /* Copy to PIO buffer. Ensure the writes are padded to the end 281 * of a cache line, as this is required for write-combining to be 282 * effective on at least x86. 283 */ 284 285 if (skb_shinfo(skb)->nr_frags) { 286 /* The size of the copy buffer will ensure all writes 287 * are the size of a cache line. 288 */ 289 struct efx_short_copy_buffer copy_buf; 290 291 copy_buf.used = 0; 292 293 efx_skb_copy_bits_to_pio(tx_queue->efx, skb, 294 &piobuf, ©_buf); 295 efx_flush_copy_buffer(tx_queue->efx, piobuf, ©_buf); 296 } else { 297 /* Pad the write to the size of a cache line. 298 * We can do this because we know the skb_shared_info struct is 299 * after the source, and the destination buffer is big enough. 300 */ 301 BUILD_BUG_ON(L1_CACHE_BYTES > 302 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); 303 __iowrite64_copy(tx_queue->piobuf, skb->data, 304 ALIGN(skb->len, L1_CACHE_BYTES) >> 3); 305 } 306 307 buffer->skb = skb; 308 buffer->flags = EFX_TX_BUF_SKB | EFX_TX_BUF_OPTION; 309 310 EFX_POPULATE_QWORD_5(buffer->option, 311 ESF_DZ_TX_DESC_IS_OPT, 1, 312 ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO, 313 ESF_DZ_TX_PIO_CONT, 0, 314 ESF_DZ_TX_PIO_BYTE_CNT, skb->len, 315 ESF_DZ_TX_PIO_BUF_ADDR, 316 tx_queue->piobuf_offset); 317 ++tx_queue->insert_count; 318 return 0; 319 } 320 #endif /* EFX_USE_PIO */ 321 322 static struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue, 323 dma_addr_t dma_addr, 324 size_t len) 325 { 326 const struct efx_nic_type *nic_type = tx_queue->efx->type; 327 struct efx_tx_buffer *buffer; 328 unsigned int dma_len; 329 330 /* Map the fragment taking account of NIC-dependent DMA limits. */ 331 do { 332 buffer = efx_tx_queue_get_insert_buffer(tx_queue); 333 dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len); 334 335 buffer->len = dma_len; 336 buffer->dma_addr = dma_addr; 337 buffer->flags = EFX_TX_BUF_CONT; 338 len -= dma_len; 339 dma_addr += dma_len; 340 ++tx_queue->insert_count; 341 } while (len); 342 343 return buffer; 344 } 345 346 /* Map all data from an SKB for DMA and create descriptors on the queue. 347 */ 348 static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, 349 unsigned int segment_count) 350 { 351 struct efx_nic *efx = tx_queue->efx; 352 struct device *dma_dev = &efx->pci_dev->dev; 353 unsigned int frag_index, nr_frags; 354 dma_addr_t dma_addr, unmap_addr; 355 unsigned short dma_flags; 356 size_t len, unmap_len; 357 358 nr_frags = skb_shinfo(skb)->nr_frags; 359 frag_index = 0; 360 361 /* Map header data. */ 362 len = skb_headlen(skb); 363 dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE); 364 dma_flags = EFX_TX_BUF_MAP_SINGLE; 365 unmap_len = len; 366 unmap_addr = dma_addr; 367 368 if (unlikely(dma_mapping_error(dma_dev, dma_addr))) 369 return -EIO; 370 371 if (segment_count) { 372 /* For TSO we need to put the header in to a separate 373 * descriptor. Map this separately if necessary. 374 */ 375 size_t header_len = skb_transport_header(skb) - skb->data + 376 (tcp_hdr(skb)->doff << 2u); 377 378 if (header_len != len) { 379 tx_queue->tso_long_headers++; 380 efx_tx_map_chunk(tx_queue, dma_addr, header_len); 381 len -= header_len; 382 dma_addr += header_len; 383 } 384 } 385 386 /* Add descriptors for each fragment. */ 387 do { 388 struct efx_tx_buffer *buffer; 389 skb_frag_t *fragment; 390 391 buffer = efx_tx_map_chunk(tx_queue, dma_addr, len); 392 393 /* The final descriptor for a fragment is responsible for 394 * unmapping the whole fragment. 395 */ 396 buffer->flags = EFX_TX_BUF_CONT | dma_flags; 397 buffer->unmap_len = unmap_len; 398 buffer->dma_offset = buffer->dma_addr - unmap_addr; 399 400 if (frag_index >= nr_frags) { 401 /* Store SKB details with the final buffer for 402 * the completion. 403 */ 404 buffer->skb = skb; 405 buffer->flags = EFX_TX_BUF_SKB | dma_flags; 406 return 0; 407 } 408 409 /* Move on to the next fragment. */ 410 fragment = &skb_shinfo(skb)->frags[frag_index++]; 411 len = skb_frag_size(fragment); 412 dma_addr = skb_frag_dma_map(dma_dev, fragment, 413 0, len, DMA_TO_DEVICE); 414 dma_flags = 0; 415 unmap_len = len; 416 unmap_addr = dma_addr; 417 418 if (unlikely(dma_mapping_error(dma_dev, dma_addr))) 419 return -EIO; 420 } while (1); 421 } 422 423 /* Remove buffers put into a tx_queue. None of the buffers must have 424 * an skb attached. 425 */ 426 static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) 427 { 428 struct efx_tx_buffer *buffer; 429 430 /* Work backwards until we hit the original insert pointer value */ 431 while (tx_queue->insert_count != tx_queue->write_count) { 432 --tx_queue->insert_count; 433 buffer = __efx_tx_queue_get_insert_buffer(tx_queue); 434 efx_dequeue_buffer(tx_queue, buffer, NULL, NULL); 435 } 436 } 437 438 /* 439 * Fallback to software TSO. 440 * 441 * This is used if we are unable to send a GSO packet through hardware TSO. 442 * This should only ever happen due to per-queue restrictions - unsupported 443 * packets should first be filtered by the feature flags. 444 * 445 * Returns 0 on success, error code otherwise. 446 */ 447 static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, 448 struct sk_buff *skb) 449 { 450 struct sk_buff *segments, *next; 451 452 segments = skb_gso_segment(skb, 0); 453 if (IS_ERR(segments)) 454 return PTR_ERR(segments); 455 456 dev_kfree_skb_any(skb); 457 skb = segments; 458 459 while (skb) { 460 next = skb->next; 461 skb->next = NULL; 462 463 if (next) 464 skb->xmit_more = true; 465 efx_enqueue_skb(tx_queue, skb); 466 skb = next; 467 } 468 469 return 0; 470 } 471 472 /* 473 * Add a socket buffer to a TX queue 474 * 475 * This maps all fragments of a socket buffer for DMA and adds them to 476 * the TX queue. The queue's insert pointer will be incremented by 477 * the number of fragments in the socket buffer. 478 * 479 * If any DMA mapping fails, any mapped fragments will be unmapped, 480 * the queue's insert pointer will be restored to its original value. 481 * 482 * This function is split out from efx_hard_start_xmit to allow the 483 * loopback test to direct packets via specific TX queues. 484 * 485 * Returns NETDEV_TX_OK. 486 * You must hold netif_tx_lock() to call this function. 487 */ 488 netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) 489 { 490 bool data_mapped = false; 491 unsigned int segments; 492 unsigned int skb_len; 493 int rc; 494 495 skb_len = skb->len; 496 segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0; 497 if (segments == 1) 498 segments = 0; /* Don't use TSO for a single segment. */ 499 500 /* Handle TSO first - it's *possible* (although unlikely) that we might 501 * be passed a packet to segment that's smaller than the copybreak/PIO 502 * size limit. 503 */ 504 if (segments) { 505 EFX_WARN_ON_ONCE_PARANOID(!tx_queue->handle_tso); 506 rc = tx_queue->handle_tso(tx_queue, skb, &data_mapped); 507 if (rc == -EINVAL) { 508 rc = efx_tx_tso_fallback(tx_queue, skb); 509 tx_queue->tso_fallbacks++; 510 if (rc == 0) 511 return 0; 512 } 513 if (rc) 514 goto err; 515 #ifdef EFX_USE_PIO 516 } else if (skb_len <= efx_piobuf_size && !skb->xmit_more && 517 efx_nic_may_tx_pio(tx_queue)) { 518 /* Use PIO for short packets with an empty queue. */ 519 if (efx_enqueue_skb_pio(tx_queue, skb)) 520 goto err; 521 tx_queue->pio_packets++; 522 data_mapped = true; 523 #endif 524 } else if (skb->data_len && skb_len <= EFX_TX_CB_SIZE) { 525 /* Pad short packets or coalesce short fragmented packets. */ 526 if (efx_enqueue_skb_copy(tx_queue, skb)) 527 goto err; 528 tx_queue->cb_packets++; 529 data_mapped = true; 530 } 531 532 /* Map for DMA and create descriptors if we haven't done so already. */ 533 if (!data_mapped && (efx_tx_map_data(tx_queue, skb, segments))) 534 goto err; 535 536 /* Update BQL */ 537 netdev_tx_sent_queue(tx_queue->core_txq, skb_len); 538 539 /* Pass off to hardware */ 540 if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) { 541 struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); 542 543 /* There could be packets left on the partner queue if those 544 * SKBs had skb->xmit_more set. If we do not push those they 545 * could be left for a long time and cause a netdev watchdog. 546 */ 547 if (txq2->xmit_more_available) 548 efx_nic_push_buffers(txq2); 549 550 efx_nic_push_buffers(tx_queue); 551 } else { 552 tx_queue->xmit_more_available = skb->xmit_more; 553 } 554 555 if (segments) { 556 tx_queue->tso_bursts++; 557 tx_queue->tso_packets += segments; 558 tx_queue->tx_packets += segments; 559 } else { 560 tx_queue->tx_packets++; 561 } 562 563 efx_tx_maybe_stop_queue(tx_queue); 564 565 return NETDEV_TX_OK; 566 567 568 err: 569 efx_enqueue_unwind(tx_queue); 570 dev_kfree_skb_any(skb); 571 return NETDEV_TX_OK; 572 } 573 574 /* Remove packets from the TX queue 575 * 576 * This removes packets from the TX queue, up to and including the 577 * specified index. 578 */ 579 static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, 580 unsigned int index, 581 unsigned int *pkts_compl, 582 unsigned int *bytes_compl) 583 { 584 struct efx_nic *efx = tx_queue->efx; 585 unsigned int stop_index, read_ptr; 586 587 stop_index = (index + 1) & tx_queue->ptr_mask; 588 read_ptr = tx_queue->read_count & tx_queue->ptr_mask; 589 590 while (read_ptr != stop_index) { 591 struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; 592 593 if (!(buffer->flags & EFX_TX_BUF_OPTION) && 594 unlikely(buffer->len == 0)) { 595 netif_err(efx, tx_err, efx->net_dev, 596 "TX queue %d spurious TX completion id %x\n", 597 tx_queue->queue, read_ptr); 598 efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); 599 return; 600 } 601 602 efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); 603 604 ++tx_queue->read_count; 605 read_ptr = tx_queue->read_count & tx_queue->ptr_mask; 606 } 607 } 608 609 /* Initiate a packet transmission. We use one channel per CPU 610 * (sharing when we have more CPUs than channels). On Falcon, the TX 611 * completion events will be directed back to the CPU that transmitted 612 * the packet, which should be cache-efficient. 613 * 614 * Context: non-blocking. 615 * Note that returning anything other than NETDEV_TX_OK will cause the 616 * OS to free the skb. 617 */ 618 netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, 619 struct net_device *net_dev) 620 { 621 struct efx_nic *efx = netdev_priv(net_dev); 622 struct efx_tx_queue *tx_queue; 623 unsigned index, type; 624 625 EFX_WARN_ON_PARANOID(!netif_device_present(net_dev)); 626 627 /* PTP "event" packet */ 628 if (unlikely(efx_xmit_with_hwtstamp(skb)) && 629 unlikely(efx_ptp_is_ptp_tx(efx, skb))) { 630 return efx_ptp_tx(efx, skb); 631 } 632 633 index = skb_get_queue_mapping(skb); 634 type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0; 635 if (index >= efx->n_tx_channels) { 636 index -= efx->n_tx_channels; 637 type |= EFX_TXQ_TYPE_HIGHPRI; 638 } 639 tx_queue = efx_get_tx_queue(efx, index, type); 640 641 return efx_enqueue_skb(tx_queue, skb); 642 } 643 644 void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue) 645 { 646 struct efx_nic *efx = tx_queue->efx; 647 648 /* Must be inverse of queue lookup in efx_hard_start_xmit() */ 649 tx_queue->core_txq = 650 netdev_get_tx_queue(efx->net_dev, 651 tx_queue->queue / EFX_TXQ_TYPES + 652 ((tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI) ? 653 efx->n_tx_channels : 0)); 654 } 655 656 int efx_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto, 657 struct tc_to_netdev *ntc) 658 { 659 struct efx_nic *efx = netdev_priv(net_dev); 660 struct efx_channel *channel; 661 struct efx_tx_queue *tx_queue; 662 unsigned tc, num_tc; 663 int rc; 664 665 if (ntc->type != TC_SETUP_MQPRIO) 666 return -EINVAL; 667 668 num_tc = ntc->tc; 669 670 if (num_tc > EFX_MAX_TX_TC) 671 return -EINVAL; 672 673 if (num_tc == net_dev->num_tc) 674 return 0; 675 676 for (tc = 0; tc < num_tc; tc++) { 677 net_dev->tc_to_txq[tc].offset = tc * efx->n_tx_channels; 678 net_dev->tc_to_txq[tc].count = efx->n_tx_channels; 679 } 680 681 if (num_tc > net_dev->num_tc) { 682 /* Initialise high-priority queues as necessary */ 683 efx_for_each_channel(channel, efx) { 684 efx_for_each_possible_channel_tx_queue(tx_queue, 685 channel) { 686 if (!(tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI)) 687 continue; 688 if (!tx_queue->buffer) { 689 rc = efx_probe_tx_queue(tx_queue); 690 if (rc) 691 return rc; 692 } 693 if (!tx_queue->initialised) 694 efx_init_tx_queue(tx_queue); 695 efx_init_tx_queue_core_txq(tx_queue); 696 } 697 } 698 } else { 699 /* Reduce number of classes before number of queues */ 700 net_dev->num_tc = num_tc; 701 } 702 703 rc = netif_set_real_num_tx_queues(net_dev, 704 max_t(int, num_tc, 1) * 705 efx->n_tx_channels); 706 if (rc) 707 return rc; 708 709 /* Do not destroy high-priority queues when they become 710 * unused. We would have to flush them first, and it is 711 * fairly difficult to flush a subset of TX queues. Leave 712 * it to efx_fini_channels(). 713 */ 714 715 net_dev->num_tc = num_tc; 716 return 0; 717 } 718 719 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) 720 { 721 unsigned fill_level; 722 struct efx_nic *efx = tx_queue->efx; 723 struct efx_tx_queue *txq2; 724 unsigned int pkts_compl = 0, bytes_compl = 0; 725 726 EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask); 727 728 efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); 729 tx_queue->pkts_compl += pkts_compl; 730 tx_queue->bytes_compl += bytes_compl; 731 732 if (pkts_compl > 1) 733 ++tx_queue->merge_events; 734 735 /* See if we need to restart the netif queue. This memory 736 * barrier ensures that we write read_count (inside 737 * efx_dequeue_buffers()) before reading the queue status. 738 */ 739 smp_mb(); 740 if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && 741 likely(efx->port_enabled) && 742 likely(netif_device_present(efx->net_dev))) { 743 txq2 = efx_tx_queue_partner(tx_queue); 744 fill_level = max(tx_queue->insert_count - tx_queue->read_count, 745 txq2->insert_count - txq2->read_count); 746 if (fill_level <= efx->txq_wake_thresh) 747 netif_tx_wake_queue(tx_queue->core_txq); 748 } 749 750 /* Check whether the hardware queue is now empty */ 751 if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { 752 tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count); 753 if (tx_queue->read_count == tx_queue->old_write_count) { 754 smp_mb(); 755 tx_queue->empty_read_count = 756 tx_queue->read_count | EFX_EMPTY_COUNT_VALID; 757 } 758 } 759 } 760 761 static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) 762 { 763 return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EFX_TX_CB_ORDER); 764 } 765 766 int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) 767 { 768 struct efx_nic *efx = tx_queue->efx; 769 unsigned int entries; 770 int rc; 771 772 /* Create the smallest power-of-two aligned ring */ 773 entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); 774 EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); 775 tx_queue->ptr_mask = entries - 1; 776 777 netif_dbg(efx, probe, efx->net_dev, 778 "creating TX queue %d size %#x mask %#x\n", 779 tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask); 780 781 /* Allocate software ring */ 782 tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer), 783 GFP_KERNEL); 784 if (!tx_queue->buffer) 785 return -ENOMEM; 786 787 tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue), 788 sizeof(tx_queue->cb_page[0]), GFP_KERNEL); 789 if (!tx_queue->cb_page) { 790 rc = -ENOMEM; 791 goto fail1; 792 } 793 794 /* Allocate hardware ring */ 795 rc = efx_nic_probe_tx(tx_queue); 796 if (rc) 797 goto fail2; 798 799 return 0; 800 801 fail2: 802 kfree(tx_queue->cb_page); 803 tx_queue->cb_page = NULL; 804 fail1: 805 kfree(tx_queue->buffer); 806 tx_queue->buffer = NULL; 807 return rc; 808 } 809 810 void efx_init_tx_queue(struct efx_tx_queue *tx_queue) 811 { 812 struct efx_nic *efx = tx_queue->efx; 813 814 netif_dbg(efx, drv, efx->net_dev, 815 "initialising TX queue %d\n", tx_queue->queue); 816 817 tx_queue->insert_count = 0; 818 tx_queue->write_count = 0; 819 tx_queue->packet_write_count = 0; 820 tx_queue->old_write_count = 0; 821 tx_queue->read_count = 0; 822 tx_queue->old_read_count = 0; 823 tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; 824 tx_queue->xmit_more_available = false; 825 826 /* Set up default function pointers. These may get replaced by 827 * efx_nic_init_tx() based off NIC/queue capabilities. 828 */ 829 tx_queue->handle_tso = efx_enqueue_skb_tso; 830 831 /* Set up TX descriptor ring */ 832 efx_nic_init_tx(tx_queue); 833 834 tx_queue->initialised = true; 835 } 836 837 void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) 838 { 839 struct efx_tx_buffer *buffer; 840 841 netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, 842 "shutting down TX queue %d\n", tx_queue->queue); 843 844 if (!tx_queue->buffer) 845 return; 846 847 /* Free any buffers left in the ring */ 848 while (tx_queue->read_count != tx_queue->write_count) { 849 unsigned int pkts_compl = 0, bytes_compl = 0; 850 buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; 851 efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); 852 853 ++tx_queue->read_count; 854 } 855 tx_queue->xmit_more_available = false; 856 netdev_tx_reset_queue(tx_queue->core_txq); 857 } 858 859 void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) 860 { 861 int i; 862 863 if (!tx_queue->buffer) 864 return; 865 866 netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, 867 "destroying TX queue %d\n", tx_queue->queue); 868 efx_nic_remove_tx(tx_queue); 869 870 if (tx_queue->cb_page) { 871 for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++) 872 efx_nic_free_buffer(tx_queue->efx, 873 &tx_queue->cb_page[i]); 874 kfree(tx_queue->cb_page); 875 tx_queue->cb_page = NULL; 876 } 877 878 kfree(tx_queue->buffer); 879 tx_queue->buffer = NULL; 880 } 881