1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018 Intel Corporation */ 3 4 #include <linux/module.h> 5 #include <linux/types.h> 6 #include <linux/if_vlan.h> 7 #include <linux/tcp.h> 8 #include <linux/udp.h> 9 #include <linux/ip.h> 10 #include <linux/pm_runtime.h> 11 #include <net/pkt_sched.h> 12 #include <linux/bpf_trace.h> 13 #include <net/xdp_sock_drv.h> 14 #include <linux/pci.h> 15 #include <linux/mdio.h> 16 17 #include <net/ipv6.h> 18 19 #include "igc.h" 20 #include "igc_hw.h" 21 #include "igc_tsn.h" 22 #include "igc_xdp.h" 23 24 #define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver" 25 26 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) 27 28 #define IGC_XDP_PASS 0 29 #define IGC_XDP_CONSUMED BIT(0) 30 #define IGC_XDP_TX BIT(1) 31 #define IGC_XDP_REDIRECT BIT(2) 32 33 static int debug = -1; 34 35 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); 36 MODULE_DESCRIPTION(DRV_SUMMARY); 37 MODULE_LICENSE("GPL v2"); 38 module_param(debug, int, 0); 39 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 40 41 char igc_driver_name[] = "igc"; 42 static const char igc_driver_string[] = DRV_SUMMARY; 43 static const char igc_copyright[] = 44 "Copyright(c) 2018 Intel Corporation."; 45 46 static const struct igc_info *igc_info_tbl[] = { 47 [board_base] = &igc_base_info, 48 }; 49 50 static const struct pci_device_id igc_pci_tbl[] = { 51 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base }, 52 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base }, 53 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base }, 54 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base }, 55 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base }, 56 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base }, 57 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base }, 58 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base }, 59 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base }, 60 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base }, 61 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base }, 62 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base }, 63 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base }, 64 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base }, 65 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base }, 66 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base }, 67 /* required last entry */ 68 {0, } 69 }; 70 71 MODULE_DEVICE_TABLE(pci, igc_pci_tbl); 72 73 enum latency_range { 74 lowest_latency = 0, 75 low_latency = 1, 76 bulk_latency = 2, 77 latency_invalid = 255 78 }; 79 80 void igc_reset(struct igc_adapter *adapter) 81 { 82 struct net_device *dev = adapter->netdev; 83 struct igc_hw *hw = &adapter->hw; 84 struct igc_fc_info *fc = &hw->fc; 85 u32 pba, hwm; 86 87 /* Repartition PBA for greater than 9k MTU if required */ 88 pba = IGC_PBA_34K; 89 90 /* flow control settings 91 * The high water mark must be low enough to fit one full frame 92 * after transmitting the pause frame. As such we must have enough 93 * space to allow for us to complete our current transmit and then 94 * receive the frame that is in progress from the link partner. 95 * Set it to: 96 * - the full Rx FIFO size minus one full Tx plus one full Rx frame 97 */ 98 hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); 99 100 fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ 101 fc->low_water = fc->high_water - 16; 102 fc->pause_time = 0xFFFF; 103 fc->send_xon = 1; 104 fc->current_mode = fc->requested_mode; 105 106 hw->mac.ops.reset_hw(hw); 107 108 if (hw->mac.ops.init_hw(hw)) 109 netdev_err(dev, "Error on hardware initialization\n"); 110 111 /* Re-establish EEE setting */ 112 igc_set_eee_i225(hw, true, true, true); 113 114 if (!netif_running(adapter->netdev)) 115 igc_power_down_phy_copper_base(&adapter->hw); 116 117 /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */ 118 wr32(IGC_VET, ETH_P_8021Q); 119 120 /* Re-enable PTP, where applicable. */ 121 igc_ptp_reset(adapter); 122 123 /* Re-enable TSN offloading, where applicable. */ 124 igc_tsn_reset(adapter); 125 126 igc_get_phy_info(hw); 127 } 128 129 /** 130 * igc_power_up_link - Power up the phy link 131 * @adapter: address of board private structure 132 */ 133 static void igc_power_up_link(struct igc_adapter *adapter) 134 { 135 igc_reset_phy(&adapter->hw); 136 137 igc_power_up_phy_copper(&adapter->hw); 138 139 igc_setup_link(&adapter->hw); 140 } 141 142 /** 143 * igc_release_hw_control - release control of the h/w to f/w 144 * @adapter: address of board private structure 145 * 146 * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 147 * For ASF and Pass Through versions of f/w this means that the 148 * driver is no longer loaded. 149 */ 150 static void igc_release_hw_control(struct igc_adapter *adapter) 151 { 152 struct igc_hw *hw = &adapter->hw; 153 u32 ctrl_ext; 154 155 if (!pci_device_is_present(adapter->pdev)) 156 return; 157 158 /* Let firmware take over control of h/w */ 159 ctrl_ext = rd32(IGC_CTRL_EXT); 160 wr32(IGC_CTRL_EXT, 161 ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD); 162 } 163 164 /** 165 * igc_get_hw_control - get control of the h/w from f/w 166 * @adapter: address of board private structure 167 * 168 * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 169 * For ASF and Pass Through versions of f/w this means that 170 * the driver is loaded. 171 */ 172 static void igc_get_hw_control(struct igc_adapter *adapter) 173 { 174 struct igc_hw *hw = &adapter->hw; 175 u32 ctrl_ext; 176 177 /* Let firmware know the driver has taken over */ 178 ctrl_ext = rd32(IGC_CTRL_EXT); 179 wr32(IGC_CTRL_EXT, 180 ctrl_ext | IGC_CTRL_EXT_DRV_LOAD); 181 } 182 183 static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf) 184 { 185 dma_unmap_single(dev, dma_unmap_addr(buf, dma), 186 dma_unmap_len(buf, len), DMA_TO_DEVICE); 187 188 dma_unmap_len_set(buf, len, 0); 189 } 190 191 /** 192 * igc_clean_tx_ring - Free Tx Buffers 193 * @tx_ring: ring to be cleaned 194 */ 195 static void igc_clean_tx_ring(struct igc_ring *tx_ring) 196 { 197 u16 i = tx_ring->next_to_clean; 198 struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; 199 u32 xsk_frames = 0; 200 201 while (i != tx_ring->next_to_use) { 202 union igc_adv_tx_desc *eop_desc, *tx_desc; 203 204 switch (tx_buffer->type) { 205 case IGC_TX_BUFFER_TYPE_XSK: 206 xsk_frames++; 207 break; 208 case IGC_TX_BUFFER_TYPE_XDP: 209 xdp_return_frame(tx_buffer->xdpf); 210 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 211 break; 212 case IGC_TX_BUFFER_TYPE_SKB: 213 dev_kfree_skb_any(tx_buffer->skb); 214 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 215 break; 216 default: 217 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 218 break; 219 } 220 221 /* check for eop_desc to determine the end of the packet */ 222 eop_desc = tx_buffer->next_to_watch; 223 tx_desc = IGC_TX_DESC(tx_ring, i); 224 225 /* unmap remaining buffers */ 226 while (tx_desc != eop_desc) { 227 tx_buffer++; 228 tx_desc++; 229 i++; 230 if (unlikely(i == tx_ring->count)) { 231 i = 0; 232 tx_buffer = tx_ring->tx_buffer_info; 233 tx_desc = IGC_TX_DESC(tx_ring, 0); 234 } 235 236 /* unmap any remaining paged data */ 237 if (dma_unmap_len(tx_buffer, len)) 238 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 239 } 240 241 tx_buffer->next_to_watch = NULL; 242 243 /* move us one more past the eop_desc for start of next pkt */ 244 tx_buffer++; 245 i++; 246 if (unlikely(i == tx_ring->count)) { 247 i = 0; 248 tx_buffer = tx_ring->tx_buffer_info; 249 } 250 } 251 252 if (tx_ring->xsk_pool && xsk_frames) 253 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 254 255 /* reset BQL for queue */ 256 netdev_tx_reset_queue(txring_txq(tx_ring)); 257 258 /* Zero out the buffer ring */ 259 memset(tx_ring->tx_buffer_info, 0, 260 sizeof(*tx_ring->tx_buffer_info) * tx_ring->count); 261 262 /* Zero out the descriptor ring */ 263 memset(tx_ring->desc, 0, tx_ring->size); 264 265 /* reset next_to_use and next_to_clean */ 266 tx_ring->next_to_use = 0; 267 tx_ring->next_to_clean = 0; 268 } 269 270 /** 271 * igc_free_tx_resources - Free Tx Resources per Queue 272 * @tx_ring: Tx descriptor ring for a specific queue 273 * 274 * Free all transmit software resources 275 */ 276 void igc_free_tx_resources(struct igc_ring *tx_ring) 277 { 278 igc_disable_tx_ring(tx_ring); 279 280 vfree(tx_ring->tx_buffer_info); 281 tx_ring->tx_buffer_info = NULL; 282 283 /* if not set, then don't free */ 284 if (!tx_ring->desc) 285 return; 286 287 dma_free_coherent(tx_ring->dev, tx_ring->size, 288 tx_ring->desc, tx_ring->dma); 289 290 tx_ring->desc = NULL; 291 } 292 293 /** 294 * igc_free_all_tx_resources - Free Tx Resources for All Queues 295 * @adapter: board private structure 296 * 297 * Free all transmit software resources 298 */ 299 static void igc_free_all_tx_resources(struct igc_adapter *adapter) 300 { 301 int i; 302 303 for (i = 0; i < adapter->num_tx_queues; i++) 304 igc_free_tx_resources(adapter->tx_ring[i]); 305 } 306 307 /** 308 * igc_clean_all_tx_rings - Free Tx Buffers for all queues 309 * @adapter: board private structure 310 */ 311 static void igc_clean_all_tx_rings(struct igc_adapter *adapter) 312 { 313 int i; 314 315 for (i = 0; i < adapter->num_tx_queues; i++) 316 if (adapter->tx_ring[i]) 317 igc_clean_tx_ring(adapter->tx_ring[i]); 318 } 319 320 static void igc_disable_tx_ring_hw(struct igc_ring *ring) 321 { 322 struct igc_hw *hw = &ring->q_vector->adapter->hw; 323 u8 idx = ring->reg_idx; 324 u32 txdctl; 325 326 txdctl = rd32(IGC_TXDCTL(idx)); 327 txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; 328 txdctl |= IGC_TXDCTL_SWFLUSH; 329 wr32(IGC_TXDCTL(idx), txdctl); 330 } 331 332 /** 333 * igc_disable_all_tx_rings_hw - Disable all transmit queue operation 334 * @adapter: board private structure 335 */ 336 static void igc_disable_all_tx_rings_hw(struct igc_adapter *adapter) 337 { 338 int i; 339 340 for (i = 0; i < adapter->num_tx_queues; i++) { 341 struct igc_ring *tx_ring = adapter->tx_ring[i]; 342 343 igc_disable_tx_ring_hw(tx_ring); 344 } 345 } 346 347 /** 348 * igc_setup_tx_resources - allocate Tx resources (Descriptors) 349 * @tx_ring: tx descriptor ring (for a specific queue) to setup 350 * 351 * Return 0 on success, negative on failure 352 */ 353 int igc_setup_tx_resources(struct igc_ring *tx_ring) 354 { 355 struct net_device *ndev = tx_ring->netdev; 356 struct device *dev = tx_ring->dev; 357 int size = 0; 358 359 size = sizeof(struct igc_tx_buffer) * tx_ring->count; 360 tx_ring->tx_buffer_info = vzalloc(size); 361 if (!tx_ring->tx_buffer_info) 362 goto err; 363 364 /* round up to nearest 4K */ 365 tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc); 366 tx_ring->size = ALIGN(tx_ring->size, 4096); 367 368 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 369 &tx_ring->dma, GFP_KERNEL); 370 371 if (!tx_ring->desc) 372 goto err; 373 374 tx_ring->next_to_use = 0; 375 tx_ring->next_to_clean = 0; 376 377 return 0; 378 379 err: 380 vfree(tx_ring->tx_buffer_info); 381 netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n"); 382 return -ENOMEM; 383 } 384 385 /** 386 * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues 387 * @adapter: board private structure 388 * 389 * Return 0 on success, negative on failure 390 */ 391 static int igc_setup_all_tx_resources(struct igc_adapter *adapter) 392 { 393 struct net_device *dev = adapter->netdev; 394 int i, err = 0; 395 396 for (i = 0; i < adapter->num_tx_queues; i++) { 397 err = igc_setup_tx_resources(adapter->tx_ring[i]); 398 if (err) { 399 netdev_err(dev, "Error on Tx queue %u setup\n", i); 400 for (i--; i >= 0; i--) 401 igc_free_tx_resources(adapter->tx_ring[i]); 402 break; 403 } 404 } 405 406 return err; 407 } 408 409 static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring) 410 { 411 u16 i = rx_ring->next_to_clean; 412 413 dev_kfree_skb(rx_ring->skb); 414 rx_ring->skb = NULL; 415 416 /* Free all the Rx ring sk_buffs */ 417 while (i != rx_ring->next_to_alloc) { 418 struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 419 420 /* Invalidate cache lines that may have been written to by 421 * device so that we avoid corrupting memory. 422 */ 423 dma_sync_single_range_for_cpu(rx_ring->dev, 424 buffer_info->dma, 425 buffer_info->page_offset, 426 igc_rx_bufsz(rx_ring), 427 DMA_FROM_DEVICE); 428 429 /* free resources associated with mapping */ 430 dma_unmap_page_attrs(rx_ring->dev, 431 buffer_info->dma, 432 igc_rx_pg_size(rx_ring), 433 DMA_FROM_DEVICE, 434 IGC_RX_DMA_ATTR); 435 __page_frag_cache_drain(buffer_info->page, 436 buffer_info->pagecnt_bias); 437 438 i++; 439 if (i == rx_ring->count) 440 i = 0; 441 } 442 } 443 444 static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring) 445 { 446 struct igc_rx_buffer *bi; 447 u16 i; 448 449 for (i = 0; i < ring->count; i++) { 450 bi = &ring->rx_buffer_info[i]; 451 if (!bi->xdp) 452 continue; 453 454 xsk_buff_free(bi->xdp); 455 bi->xdp = NULL; 456 } 457 } 458 459 /** 460 * igc_clean_rx_ring - Free Rx Buffers per Queue 461 * @ring: ring to free buffers from 462 */ 463 static void igc_clean_rx_ring(struct igc_ring *ring) 464 { 465 if (ring->xsk_pool) 466 igc_clean_rx_ring_xsk_pool(ring); 467 else 468 igc_clean_rx_ring_page_shared(ring); 469 470 clear_ring_uses_large_buffer(ring); 471 472 ring->next_to_alloc = 0; 473 ring->next_to_clean = 0; 474 ring->next_to_use = 0; 475 } 476 477 /** 478 * igc_clean_all_rx_rings - Free Rx Buffers for all queues 479 * @adapter: board private structure 480 */ 481 static void igc_clean_all_rx_rings(struct igc_adapter *adapter) 482 { 483 int i; 484 485 for (i = 0; i < adapter->num_rx_queues; i++) 486 if (adapter->rx_ring[i]) 487 igc_clean_rx_ring(adapter->rx_ring[i]); 488 } 489 490 /** 491 * igc_free_rx_resources - Free Rx Resources 492 * @rx_ring: ring to clean the resources from 493 * 494 * Free all receive software resources 495 */ 496 void igc_free_rx_resources(struct igc_ring *rx_ring) 497 { 498 igc_clean_rx_ring(rx_ring); 499 500 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 501 502 vfree(rx_ring->rx_buffer_info); 503 rx_ring->rx_buffer_info = NULL; 504 505 /* if not set, then don't free */ 506 if (!rx_ring->desc) 507 return; 508 509 dma_free_coherent(rx_ring->dev, rx_ring->size, 510 rx_ring->desc, rx_ring->dma); 511 512 rx_ring->desc = NULL; 513 } 514 515 /** 516 * igc_free_all_rx_resources - Free Rx Resources for All Queues 517 * @adapter: board private structure 518 * 519 * Free all receive software resources 520 */ 521 static void igc_free_all_rx_resources(struct igc_adapter *adapter) 522 { 523 int i; 524 525 for (i = 0; i < adapter->num_rx_queues; i++) 526 igc_free_rx_resources(adapter->rx_ring[i]); 527 } 528 529 /** 530 * igc_setup_rx_resources - allocate Rx resources (Descriptors) 531 * @rx_ring: rx descriptor ring (for a specific queue) to setup 532 * 533 * Returns 0 on success, negative on failure 534 */ 535 int igc_setup_rx_resources(struct igc_ring *rx_ring) 536 { 537 struct net_device *ndev = rx_ring->netdev; 538 struct device *dev = rx_ring->dev; 539 u8 index = rx_ring->queue_index; 540 int size, desc_len, res; 541 542 /* XDP RX-queue info */ 543 if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) 544 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 545 res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, 546 rx_ring->q_vector->napi.napi_id); 547 if (res < 0) { 548 netdev_err(ndev, "Failed to register xdp_rxq index %u\n", 549 index); 550 return res; 551 } 552 553 size = sizeof(struct igc_rx_buffer) * rx_ring->count; 554 rx_ring->rx_buffer_info = vzalloc(size); 555 if (!rx_ring->rx_buffer_info) 556 goto err; 557 558 desc_len = sizeof(union igc_adv_rx_desc); 559 560 /* Round up to nearest 4K */ 561 rx_ring->size = rx_ring->count * desc_len; 562 rx_ring->size = ALIGN(rx_ring->size, 4096); 563 564 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 565 &rx_ring->dma, GFP_KERNEL); 566 567 if (!rx_ring->desc) 568 goto err; 569 570 rx_ring->next_to_alloc = 0; 571 rx_ring->next_to_clean = 0; 572 rx_ring->next_to_use = 0; 573 574 return 0; 575 576 err: 577 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 578 vfree(rx_ring->rx_buffer_info); 579 rx_ring->rx_buffer_info = NULL; 580 netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n"); 581 return -ENOMEM; 582 } 583 584 /** 585 * igc_setup_all_rx_resources - wrapper to allocate Rx resources 586 * (Descriptors) for all queues 587 * @adapter: board private structure 588 * 589 * Return 0 on success, negative on failure 590 */ 591 static int igc_setup_all_rx_resources(struct igc_adapter *adapter) 592 { 593 struct net_device *dev = adapter->netdev; 594 int i, err = 0; 595 596 for (i = 0; i < adapter->num_rx_queues; i++) { 597 err = igc_setup_rx_resources(adapter->rx_ring[i]); 598 if (err) { 599 netdev_err(dev, "Error on Rx queue %u setup\n", i); 600 for (i--; i >= 0; i--) 601 igc_free_rx_resources(adapter->rx_ring[i]); 602 break; 603 } 604 } 605 606 return err; 607 } 608 609 static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter, 610 struct igc_ring *ring) 611 { 612 if (!igc_xdp_is_enabled(adapter) || 613 !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags)) 614 return NULL; 615 616 return xsk_get_pool_from_qid(ring->netdev, ring->queue_index); 617 } 618 619 /** 620 * igc_configure_rx_ring - Configure a receive ring after Reset 621 * @adapter: board private structure 622 * @ring: receive ring to be configured 623 * 624 * Configure the Rx unit of the MAC after a reset. 625 */ 626 static void igc_configure_rx_ring(struct igc_adapter *adapter, 627 struct igc_ring *ring) 628 { 629 struct igc_hw *hw = &adapter->hw; 630 union igc_adv_rx_desc *rx_desc; 631 int reg_idx = ring->reg_idx; 632 u32 srrctl = 0, rxdctl = 0; 633 u64 rdba = ring->dma; 634 u32 buf_size; 635 636 xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); 637 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 638 if (ring->xsk_pool) { 639 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 640 MEM_TYPE_XSK_BUFF_POOL, 641 NULL)); 642 xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); 643 } else { 644 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 645 MEM_TYPE_PAGE_SHARED, 646 NULL)); 647 } 648 649 if (igc_xdp_is_enabled(adapter)) 650 set_ring_uses_large_buffer(ring); 651 652 /* disable the queue */ 653 wr32(IGC_RXDCTL(reg_idx), 0); 654 655 /* Set DMA base address registers */ 656 wr32(IGC_RDBAL(reg_idx), 657 rdba & 0x00000000ffffffffULL); 658 wr32(IGC_RDBAH(reg_idx), rdba >> 32); 659 wr32(IGC_RDLEN(reg_idx), 660 ring->count * sizeof(union igc_adv_rx_desc)); 661 662 /* initialize head and tail */ 663 ring->tail = adapter->io_addr + IGC_RDT(reg_idx); 664 wr32(IGC_RDH(reg_idx), 0); 665 writel(0, ring->tail); 666 667 /* reset next-to- use/clean to place SW in sync with hardware */ 668 ring->next_to_clean = 0; 669 ring->next_to_use = 0; 670 671 if (ring->xsk_pool) 672 buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool); 673 else if (ring_uses_large_buffer(ring)) 674 buf_size = IGC_RXBUFFER_3072; 675 else 676 buf_size = IGC_RXBUFFER_2048; 677 678 srrctl = rd32(IGC_SRRCTL(reg_idx)); 679 srrctl &= ~(IGC_SRRCTL_BSIZEPKT_MASK | IGC_SRRCTL_BSIZEHDR_MASK | 680 IGC_SRRCTL_DESCTYPE_MASK); 681 srrctl |= IGC_SRRCTL_BSIZEHDR(IGC_RX_HDR_LEN); 682 srrctl |= IGC_SRRCTL_BSIZEPKT(buf_size); 683 srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; 684 685 wr32(IGC_SRRCTL(reg_idx), srrctl); 686 687 rxdctl |= IGC_RX_PTHRESH; 688 rxdctl |= IGC_RX_HTHRESH << 8; 689 rxdctl |= IGC_RX_WTHRESH << 16; 690 691 /* initialize rx_buffer_info */ 692 memset(ring->rx_buffer_info, 0, 693 sizeof(struct igc_rx_buffer) * ring->count); 694 695 /* initialize Rx descriptor 0 */ 696 rx_desc = IGC_RX_DESC(ring, 0); 697 rx_desc->wb.upper.length = 0; 698 699 /* enable receive descriptor fetching */ 700 rxdctl |= IGC_RXDCTL_QUEUE_ENABLE; 701 702 wr32(IGC_RXDCTL(reg_idx), rxdctl); 703 } 704 705 /** 706 * igc_configure_rx - Configure receive Unit after Reset 707 * @adapter: board private structure 708 * 709 * Configure the Rx unit of the MAC after a reset. 710 */ 711 static void igc_configure_rx(struct igc_adapter *adapter) 712 { 713 int i; 714 715 /* Setup the HW Rx Head and Tail Descriptor Pointers and 716 * the Base and Length of the Rx Descriptor Ring 717 */ 718 for (i = 0; i < adapter->num_rx_queues; i++) 719 igc_configure_rx_ring(adapter, adapter->rx_ring[i]); 720 } 721 722 /** 723 * igc_configure_tx_ring - Configure transmit ring after Reset 724 * @adapter: board private structure 725 * @ring: tx ring to configure 726 * 727 * Configure a transmit ring after a reset. 728 */ 729 static void igc_configure_tx_ring(struct igc_adapter *adapter, 730 struct igc_ring *ring) 731 { 732 struct igc_hw *hw = &adapter->hw; 733 int reg_idx = ring->reg_idx; 734 u64 tdba = ring->dma; 735 u32 txdctl = 0; 736 737 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 738 739 /* disable the queue */ 740 wr32(IGC_TXDCTL(reg_idx), 0); 741 wrfl(); 742 743 wr32(IGC_TDLEN(reg_idx), 744 ring->count * sizeof(union igc_adv_tx_desc)); 745 wr32(IGC_TDBAL(reg_idx), 746 tdba & 0x00000000ffffffffULL); 747 wr32(IGC_TDBAH(reg_idx), tdba >> 32); 748 749 ring->tail = adapter->io_addr + IGC_TDT(reg_idx); 750 wr32(IGC_TDH(reg_idx), 0); 751 writel(0, ring->tail); 752 753 txdctl |= IGC_TX_PTHRESH; 754 txdctl |= IGC_TX_HTHRESH << 8; 755 txdctl |= IGC_TX_WTHRESH << 16; 756 757 txdctl |= IGC_TXDCTL_QUEUE_ENABLE; 758 wr32(IGC_TXDCTL(reg_idx), txdctl); 759 } 760 761 /** 762 * igc_configure_tx - Configure transmit Unit after Reset 763 * @adapter: board private structure 764 * 765 * Configure the Tx unit of the MAC after a reset. 766 */ 767 static void igc_configure_tx(struct igc_adapter *adapter) 768 { 769 int i; 770 771 for (i = 0; i < adapter->num_tx_queues; i++) 772 igc_configure_tx_ring(adapter, adapter->tx_ring[i]); 773 } 774 775 /** 776 * igc_setup_mrqc - configure the multiple receive queue control registers 777 * @adapter: Board private structure 778 */ 779 static void igc_setup_mrqc(struct igc_adapter *adapter) 780 { 781 struct igc_hw *hw = &adapter->hw; 782 u32 j, num_rx_queues; 783 u32 mrqc, rxcsum; 784 u32 rss_key[10]; 785 786 netdev_rss_key_fill(rss_key, sizeof(rss_key)); 787 for (j = 0; j < 10; j++) 788 wr32(IGC_RSSRK(j), rss_key[j]); 789 790 num_rx_queues = adapter->rss_queues; 791 792 if (adapter->rss_indir_tbl_init != num_rx_queues) { 793 for (j = 0; j < IGC_RETA_SIZE; j++) 794 adapter->rss_indir_tbl[j] = 795 (j * num_rx_queues) / IGC_RETA_SIZE; 796 adapter->rss_indir_tbl_init = num_rx_queues; 797 } 798 igc_write_rss_indir_tbl(adapter); 799 800 /* Disable raw packet checksumming so that RSS hash is placed in 801 * descriptor on writeback. No need to enable TCP/UDP/IP checksum 802 * offloads as they are enabled by default 803 */ 804 rxcsum = rd32(IGC_RXCSUM); 805 rxcsum |= IGC_RXCSUM_PCSD; 806 807 /* Enable Receive Checksum Offload for SCTP */ 808 rxcsum |= IGC_RXCSUM_CRCOFL; 809 810 /* Don't need to set TUOFL or IPOFL, they default to 1 */ 811 wr32(IGC_RXCSUM, rxcsum); 812 813 /* Generate RSS hash based on packet types, TCP/UDP 814 * port numbers and/or IPv4/v6 src and dst addresses 815 */ 816 mrqc = IGC_MRQC_RSS_FIELD_IPV4 | 817 IGC_MRQC_RSS_FIELD_IPV4_TCP | 818 IGC_MRQC_RSS_FIELD_IPV6 | 819 IGC_MRQC_RSS_FIELD_IPV6_TCP | 820 IGC_MRQC_RSS_FIELD_IPV6_TCP_EX; 821 822 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP) 823 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP; 824 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP) 825 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP; 826 827 mrqc |= IGC_MRQC_ENABLE_RSS_MQ; 828 829 wr32(IGC_MRQC, mrqc); 830 } 831 832 /** 833 * igc_setup_rctl - configure the receive control registers 834 * @adapter: Board private structure 835 */ 836 static void igc_setup_rctl(struct igc_adapter *adapter) 837 { 838 struct igc_hw *hw = &adapter->hw; 839 u32 rctl; 840 841 rctl = rd32(IGC_RCTL); 842 843 rctl &= ~(3 << IGC_RCTL_MO_SHIFT); 844 rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC); 845 846 rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF | 847 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT); 848 849 /* enable stripping of CRC. Newer features require 850 * that the HW strips the CRC. 851 */ 852 rctl |= IGC_RCTL_SECRC; 853 854 /* disable store bad packets and clear size bits. */ 855 rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256); 856 857 /* enable LPE to allow for reception of jumbo frames */ 858 rctl |= IGC_RCTL_LPE; 859 860 /* disable queue 0 to prevent tail write w/o re-config */ 861 wr32(IGC_RXDCTL(0), 0); 862 863 /* This is useful for sniffing bad packets. */ 864 if (adapter->netdev->features & NETIF_F_RXALL) { 865 /* UPE and MPE will be handled by normal PROMISC logic 866 * in set_rx_mode 867 */ 868 rctl |= (IGC_RCTL_SBP | /* Receive bad packets */ 869 IGC_RCTL_BAM | /* RX All Bcast Pkts */ 870 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ 871 872 rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */ 873 IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */ 874 } 875 876 wr32(IGC_RCTL, rctl); 877 } 878 879 /** 880 * igc_setup_tctl - configure the transmit control registers 881 * @adapter: Board private structure 882 */ 883 static void igc_setup_tctl(struct igc_adapter *adapter) 884 { 885 struct igc_hw *hw = &adapter->hw; 886 u32 tctl; 887 888 /* disable queue 0 which icould be enabled by default */ 889 wr32(IGC_TXDCTL(0), 0); 890 891 /* Program the Transmit Control Register */ 892 tctl = rd32(IGC_TCTL); 893 tctl &= ~IGC_TCTL_CT; 894 tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC | 895 (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT); 896 897 /* Enable transmits */ 898 tctl |= IGC_TCTL_EN; 899 900 wr32(IGC_TCTL, tctl); 901 } 902 903 /** 904 * igc_set_mac_filter_hw() - Set MAC address filter in hardware 905 * @adapter: Pointer to adapter where the filter should be set 906 * @index: Filter index 907 * @type: MAC address filter type (source or destination) 908 * @addr: MAC address 909 * @queue: If non-negative, queue assignment feature is enabled and frames 910 * matching the filter are enqueued onto 'queue'. Otherwise, queue 911 * assignment is disabled. 912 */ 913 static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index, 914 enum igc_mac_filter_type type, 915 const u8 *addr, int queue) 916 { 917 struct net_device *dev = adapter->netdev; 918 struct igc_hw *hw = &adapter->hw; 919 u32 ral, rah; 920 921 if (WARN_ON(index >= hw->mac.rar_entry_count)) 922 return; 923 924 ral = le32_to_cpup((__le32 *)(addr)); 925 rah = le16_to_cpup((__le16 *)(addr + 4)); 926 927 if (type == IGC_MAC_FILTER_TYPE_SRC) { 928 rah &= ~IGC_RAH_ASEL_MASK; 929 rah |= IGC_RAH_ASEL_SRC_ADDR; 930 } 931 932 if (queue >= 0) { 933 rah &= ~IGC_RAH_QSEL_MASK; 934 rah |= (queue << IGC_RAH_QSEL_SHIFT); 935 rah |= IGC_RAH_QSEL_ENABLE; 936 } 937 938 rah |= IGC_RAH_AV; 939 940 wr32(IGC_RAL(index), ral); 941 wr32(IGC_RAH(index), rah); 942 943 netdev_dbg(dev, "MAC address filter set in HW: index %d", index); 944 } 945 946 /** 947 * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware 948 * @adapter: Pointer to adapter where the filter should be cleared 949 * @index: Filter index 950 */ 951 static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index) 952 { 953 struct net_device *dev = adapter->netdev; 954 struct igc_hw *hw = &adapter->hw; 955 956 if (WARN_ON(index >= hw->mac.rar_entry_count)) 957 return; 958 959 wr32(IGC_RAL(index), 0); 960 wr32(IGC_RAH(index), 0); 961 962 netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index); 963 } 964 965 /* Set default MAC address for the PF in the first RAR entry */ 966 static void igc_set_default_mac_filter(struct igc_adapter *adapter) 967 { 968 struct net_device *dev = adapter->netdev; 969 u8 *addr = adapter->hw.mac.addr; 970 971 netdev_dbg(dev, "Set default MAC address filter: address %pM", addr); 972 973 igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1); 974 } 975 976 /** 977 * igc_set_mac - Change the Ethernet Address of the NIC 978 * @netdev: network interface device structure 979 * @p: pointer to an address structure 980 * 981 * Returns 0 on success, negative on failure 982 */ 983 static int igc_set_mac(struct net_device *netdev, void *p) 984 { 985 struct igc_adapter *adapter = netdev_priv(netdev); 986 struct igc_hw *hw = &adapter->hw; 987 struct sockaddr *addr = p; 988 989 if (!is_valid_ether_addr(addr->sa_data)) 990 return -EADDRNOTAVAIL; 991 992 eth_hw_addr_set(netdev, addr->sa_data); 993 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); 994 995 /* set the correct pool for the new PF MAC address in entry 0 */ 996 igc_set_default_mac_filter(adapter); 997 998 return 0; 999 } 1000 1001 /** 1002 * igc_write_mc_addr_list - write multicast addresses to MTA 1003 * @netdev: network interface device structure 1004 * 1005 * Writes multicast address list to the MTA hash table. 1006 * Returns: -ENOMEM on failure 1007 * 0 on no addresses written 1008 * X on writing X addresses to MTA 1009 **/ 1010 static int igc_write_mc_addr_list(struct net_device *netdev) 1011 { 1012 struct igc_adapter *adapter = netdev_priv(netdev); 1013 struct igc_hw *hw = &adapter->hw; 1014 struct netdev_hw_addr *ha; 1015 u8 *mta_list; 1016 int i; 1017 1018 if (netdev_mc_empty(netdev)) { 1019 /* nothing to program, so clear mc list */ 1020 igc_update_mc_addr_list(hw, NULL, 0); 1021 return 0; 1022 } 1023 1024 mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC); 1025 if (!mta_list) 1026 return -ENOMEM; 1027 1028 /* The shared function expects a packed array of only addresses. */ 1029 i = 0; 1030 netdev_for_each_mc_addr(ha, netdev) 1031 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); 1032 1033 igc_update_mc_addr_list(hw, mta_list, i); 1034 kfree(mta_list); 1035 1036 return netdev_mc_count(netdev); 1037 } 1038 1039 static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, 1040 bool *first_flag, bool *insert_empty) 1041 { 1042 struct igc_adapter *adapter = netdev_priv(ring->netdev); 1043 ktime_t cycle_time = adapter->cycle_time; 1044 ktime_t base_time = adapter->base_time; 1045 ktime_t now = ktime_get_clocktai(); 1046 ktime_t baset_est, end_of_cycle; 1047 s32 launchtime; 1048 s64 n; 1049 1050 n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); 1051 1052 baset_est = ktime_add_ns(base_time, cycle_time * (n)); 1053 end_of_cycle = ktime_add_ns(baset_est, cycle_time); 1054 1055 if (ktime_compare(txtime, end_of_cycle) >= 0) { 1056 if (baset_est != ring->last_ff_cycle) { 1057 *first_flag = true; 1058 ring->last_ff_cycle = baset_est; 1059 1060 if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) 1061 *insert_empty = true; 1062 } 1063 } 1064 1065 /* Introducing a window at end of cycle on which packets 1066 * potentially not honor launchtime. Window of 5us chosen 1067 * considering software update the tail pointer and packets 1068 * are dma'ed to packet buffer. 1069 */ 1070 if ((ktime_sub_ns(end_of_cycle, now) < 5 * NSEC_PER_USEC)) 1071 netdev_warn(ring->netdev, "Packet with txtime=%llu may not be honoured\n", 1072 txtime); 1073 1074 ring->last_tx_cycle = end_of_cycle; 1075 1076 launchtime = ktime_sub_ns(txtime, baset_est); 1077 if (launchtime > 0) 1078 div_s64_rem(launchtime, cycle_time, &launchtime); 1079 else 1080 launchtime = 0; 1081 1082 return cpu_to_le32(launchtime); 1083 } 1084 1085 static int igc_init_empty_frame(struct igc_ring *ring, 1086 struct igc_tx_buffer *buffer, 1087 struct sk_buff *skb) 1088 { 1089 unsigned int size; 1090 dma_addr_t dma; 1091 1092 size = skb_headlen(skb); 1093 1094 dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE); 1095 if (dma_mapping_error(ring->dev, dma)) { 1096 netdev_err_once(ring->netdev, "Failed to map DMA for TX\n"); 1097 return -ENOMEM; 1098 } 1099 1100 buffer->skb = skb; 1101 buffer->protocol = 0; 1102 buffer->bytecount = skb->len; 1103 buffer->gso_segs = 1; 1104 buffer->time_stamp = jiffies; 1105 dma_unmap_len_set(buffer, len, skb->len); 1106 dma_unmap_addr_set(buffer, dma, dma); 1107 1108 return 0; 1109 } 1110 1111 static int igc_init_tx_empty_descriptor(struct igc_ring *ring, 1112 struct sk_buff *skb, 1113 struct igc_tx_buffer *first) 1114 { 1115 union igc_adv_tx_desc *desc; 1116 u32 cmd_type, olinfo_status; 1117 int err; 1118 1119 if (!igc_desc_unused(ring)) 1120 return -EBUSY; 1121 1122 err = igc_init_empty_frame(ring, first, skb); 1123 if (err) 1124 return err; 1125 1126 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 1127 IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | 1128 first->bytecount; 1129 olinfo_status = first->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; 1130 1131 desc = IGC_TX_DESC(ring, ring->next_to_use); 1132 desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1133 desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1134 desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(first, dma)); 1135 1136 netdev_tx_sent_queue(txring_txq(ring), skb->len); 1137 1138 first->next_to_watch = desc; 1139 1140 ring->next_to_use++; 1141 if (ring->next_to_use == ring->count) 1142 ring->next_to_use = 0; 1143 1144 return 0; 1145 } 1146 1147 #define IGC_EMPTY_FRAME_SIZE 60 1148 1149 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, 1150 __le32 launch_time, bool first_flag, 1151 u32 vlan_macip_lens, u32 type_tucmd, 1152 u32 mss_l4len_idx) 1153 { 1154 struct igc_adv_tx_context_desc *context_desc; 1155 u16 i = tx_ring->next_to_use; 1156 1157 context_desc = IGC_TX_CTXTDESC(tx_ring, i); 1158 1159 i++; 1160 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1161 1162 /* set bits to identify this as an advanced context descriptor */ 1163 type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT; 1164 1165 /* For i225, context index must be unique per ring. */ 1166 if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 1167 mss_l4len_idx |= tx_ring->reg_idx << 4; 1168 1169 if (first_flag) 1170 mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FIRST; 1171 1172 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 1173 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 1174 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 1175 context_desc->launch_time = launch_time; 1176 } 1177 1178 static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first, 1179 __le32 launch_time, bool first_flag) 1180 { 1181 struct sk_buff *skb = first->skb; 1182 u32 vlan_macip_lens = 0; 1183 u32 type_tucmd = 0; 1184 1185 if (skb->ip_summed != CHECKSUM_PARTIAL) { 1186 csum_failed: 1187 if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) && 1188 !tx_ring->launchtime_enable) 1189 return; 1190 goto no_csum; 1191 } 1192 1193 switch (skb->csum_offset) { 1194 case offsetof(struct tcphdr, check): 1195 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1196 fallthrough; 1197 case offsetof(struct udphdr, check): 1198 break; 1199 case offsetof(struct sctphdr, checksum): 1200 /* validate that this is actually an SCTP request */ 1201 if (skb_csum_is_sctp(skb)) { 1202 type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP; 1203 break; 1204 } 1205 fallthrough; 1206 default: 1207 skb_checksum_help(skb); 1208 goto csum_failed; 1209 } 1210 1211 /* update TX checksum flag */ 1212 first->tx_flags |= IGC_TX_FLAGS_CSUM; 1213 vlan_macip_lens = skb_checksum_start_offset(skb) - 1214 skb_network_offset(skb); 1215 no_csum: 1216 vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT; 1217 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1218 1219 igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, 1220 vlan_macip_lens, type_tucmd, 0); 1221 } 1222 1223 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1224 { 1225 struct net_device *netdev = tx_ring->netdev; 1226 1227 netif_stop_subqueue(netdev, tx_ring->queue_index); 1228 1229 /* memory barriier comment */ 1230 smp_mb(); 1231 1232 /* We need to check again in a case another CPU has just 1233 * made room available. 1234 */ 1235 if (igc_desc_unused(tx_ring) < size) 1236 return -EBUSY; 1237 1238 /* A reprieve! */ 1239 netif_wake_subqueue(netdev, tx_ring->queue_index); 1240 1241 u64_stats_update_begin(&tx_ring->tx_syncp2); 1242 tx_ring->tx_stats.restart_queue2++; 1243 u64_stats_update_end(&tx_ring->tx_syncp2); 1244 1245 return 0; 1246 } 1247 1248 static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1249 { 1250 if (igc_desc_unused(tx_ring) >= size) 1251 return 0; 1252 return __igc_maybe_stop_tx(tx_ring, size); 1253 } 1254 1255 #define IGC_SET_FLAG(_input, _flag, _result) \ 1256 (((_flag) <= (_result)) ? \ 1257 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ 1258 ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) 1259 1260 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) 1261 { 1262 /* set type for advanced descriptor with frame checksum insertion */ 1263 u32 cmd_type = IGC_ADVTXD_DTYP_DATA | 1264 IGC_ADVTXD_DCMD_DEXT | 1265 IGC_ADVTXD_DCMD_IFCS; 1266 1267 /* set HW vlan bit if vlan is present */ 1268 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN, 1269 IGC_ADVTXD_DCMD_VLE); 1270 1271 /* set segmentation bits for TSO */ 1272 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO, 1273 (IGC_ADVTXD_DCMD_TSE)); 1274 1275 /* set timestamp bit if present, will select the register set 1276 * based on the _TSTAMP(_X) bit. 1277 */ 1278 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP, 1279 (IGC_ADVTXD_MAC_TSTAMP)); 1280 1281 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_1, 1282 (IGC_ADVTXD_TSTAMP_REG_1)); 1283 1284 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_2, 1285 (IGC_ADVTXD_TSTAMP_REG_2)); 1286 1287 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_3, 1288 (IGC_ADVTXD_TSTAMP_REG_3)); 1289 1290 /* insert frame checksum */ 1291 cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS); 1292 1293 return cmd_type; 1294 } 1295 1296 static void igc_tx_olinfo_status(struct igc_ring *tx_ring, 1297 union igc_adv_tx_desc *tx_desc, 1298 u32 tx_flags, unsigned int paylen) 1299 { 1300 u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT; 1301 1302 /* insert L4 checksum */ 1303 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_CSUM, 1304 (IGC_TXD_POPTS_TXSM << 8)); 1305 1306 /* insert IPv4 checksum */ 1307 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_IPV4, 1308 (IGC_TXD_POPTS_IXSM << 8)); 1309 1310 /* Use the second timer (free running, in general) for the timestamp */ 1311 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_TIMER_1, 1312 IGC_TXD_PTP2_TIMER_1); 1313 1314 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1315 } 1316 1317 static int igc_tx_map(struct igc_ring *tx_ring, 1318 struct igc_tx_buffer *first, 1319 const u8 hdr_len) 1320 { 1321 struct sk_buff *skb = first->skb; 1322 struct igc_tx_buffer *tx_buffer; 1323 union igc_adv_tx_desc *tx_desc; 1324 u32 tx_flags = first->tx_flags; 1325 skb_frag_t *frag; 1326 u16 i = tx_ring->next_to_use; 1327 unsigned int data_len, size; 1328 dma_addr_t dma; 1329 u32 cmd_type; 1330 1331 cmd_type = igc_tx_cmd_type(skb, tx_flags); 1332 tx_desc = IGC_TX_DESC(tx_ring, i); 1333 1334 igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); 1335 1336 size = skb_headlen(skb); 1337 data_len = skb->data_len; 1338 1339 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 1340 1341 tx_buffer = first; 1342 1343 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 1344 if (dma_mapping_error(tx_ring->dev, dma)) 1345 goto dma_error; 1346 1347 /* record length, and DMA address */ 1348 dma_unmap_len_set(tx_buffer, len, size); 1349 dma_unmap_addr_set(tx_buffer, dma, dma); 1350 1351 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1352 1353 while (unlikely(size > IGC_MAX_DATA_PER_TXD)) { 1354 tx_desc->read.cmd_type_len = 1355 cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD); 1356 1357 i++; 1358 tx_desc++; 1359 if (i == tx_ring->count) { 1360 tx_desc = IGC_TX_DESC(tx_ring, 0); 1361 i = 0; 1362 } 1363 tx_desc->read.olinfo_status = 0; 1364 1365 dma += IGC_MAX_DATA_PER_TXD; 1366 size -= IGC_MAX_DATA_PER_TXD; 1367 1368 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1369 } 1370 1371 if (likely(!data_len)) 1372 break; 1373 1374 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); 1375 1376 i++; 1377 tx_desc++; 1378 if (i == tx_ring->count) { 1379 tx_desc = IGC_TX_DESC(tx_ring, 0); 1380 i = 0; 1381 } 1382 tx_desc->read.olinfo_status = 0; 1383 1384 size = skb_frag_size(frag); 1385 data_len -= size; 1386 1387 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, 1388 size, DMA_TO_DEVICE); 1389 1390 tx_buffer = &tx_ring->tx_buffer_info[i]; 1391 } 1392 1393 /* write last descriptor with RS and EOP bits */ 1394 cmd_type |= size | IGC_TXD_DCMD; 1395 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1396 1397 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 1398 1399 /* set the timestamp */ 1400 first->time_stamp = jiffies; 1401 1402 skb_tx_timestamp(skb); 1403 1404 /* Force memory writes to complete before letting h/w know there 1405 * are new descriptors to fetch. (Only applicable for weak-ordered 1406 * memory model archs, such as IA-64). 1407 * 1408 * We also need this memory barrier to make certain all of the 1409 * status bits have been updated before next_to_watch is written. 1410 */ 1411 wmb(); 1412 1413 /* set next_to_watch value indicating a packet is present */ 1414 first->next_to_watch = tx_desc; 1415 1416 i++; 1417 if (i == tx_ring->count) 1418 i = 0; 1419 1420 tx_ring->next_to_use = i; 1421 1422 /* Make sure there is space in the ring for the next send. */ 1423 igc_maybe_stop_tx(tx_ring, DESC_NEEDED); 1424 1425 if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { 1426 writel(i, tx_ring->tail); 1427 } 1428 1429 return 0; 1430 dma_error: 1431 netdev_err(tx_ring->netdev, "TX DMA map failed\n"); 1432 tx_buffer = &tx_ring->tx_buffer_info[i]; 1433 1434 /* clear dma mappings for failed tx_buffer_info map */ 1435 while (tx_buffer != first) { 1436 if (dma_unmap_len(tx_buffer, len)) 1437 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1438 1439 if (i-- == 0) 1440 i += tx_ring->count; 1441 tx_buffer = &tx_ring->tx_buffer_info[i]; 1442 } 1443 1444 if (dma_unmap_len(tx_buffer, len)) 1445 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1446 1447 dev_kfree_skb_any(tx_buffer->skb); 1448 tx_buffer->skb = NULL; 1449 1450 tx_ring->next_to_use = i; 1451 1452 return -1; 1453 } 1454 1455 static int igc_tso(struct igc_ring *tx_ring, 1456 struct igc_tx_buffer *first, 1457 __le32 launch_time, bool first_flag, 1458 u8 *hdr_len) 1459 { 1460 u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; 1461 struct sk_buff *skb = first->skb; 1462 union { 1463 struct iphdr *v4; 1464 struct ipv6hdr *v6; 1465 unsigned char *hdr; 1466 } ip; 1467 union { 1468 struct tcphdr *tcp; 1469 struct udphdr *udp; 1470 unsigned char *hdr; 1471 } l4; 1472 u32 paylen, l4_offset; 1473 int err; 1474 1475 if (skb->ip_summed != CHECKSUM_PARTIAL) 1476 return 0; 1477 1478 if (!skb_is_gso(skb)) 1479 return 0; 1480 1481 err = skb_cow_head(skb, 0); 1482 if (err < 0) 1483 return err; 1484 1485 ip.hdr = skb_network_header(skb); 1486 l4.hdr = skb_checksum_start(skb); 1487 1488 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ 1489 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1490 1491 /* initialize outer IP header fields */ 1492 if (ip.v4->version == 4) { 1493 unsigned char *csum_start = skb_checksum_start(skb); 1494 unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); 1495 1496 /* IP header will have to cancel out any data that 1497 * is not a part of the outer IP header 1498 */ 1499 ip.v4->check = csum_fold(csum_partial(trans_start, 1500 csum_start - trans_start, 1501 0)); 1502 type_tucmd |= IGC_ADVTXD_TUCMD_IPV4; 1503 1504 ip.v4->tot_len = 0; 1505 first->tx_flags |= IGC_TX_FLAGS_TSO | 1506 IGC_TX_FLAGS_CSUM | 1507 IGC_TX_FLAGS_IPV4; 1508 } else { 1509 ip.v6->payload_len = 0; 1510 first->tx_flags |= IGC_TX_FLAGS_TSO | 1511 IGC_TX_FLAGS_CSUM; 1512 } 1513 1514 /* determine offset of inner transport header */ 1515 l4_offset = l4.hdr - skb->data; 1516 1517 /* remove payload length from inner checksum */ 1518 paylen = skb->len - l4_offset; 1519 if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) { 1520 /* compute length of segmentation header */ 1521 *hdr_len = (l4.tcp->doff * 4) + l4_offset; 1522 csum_replace_by_diff(&l4.tcp->check, 1523 (__force __wsum)htonl(paylen)); 1524 } else { 1525 /* compute length of segmentation header */ 1526 *hdr_len = sizeof(*l4.udp) + l4_offset; 1527 csum_replace_by_diff(&l4.udp->check, 1528 (__force __wsum)htonl(paylen)); 1529 } 1530 1531 /* update gso size and bytecount with header size */ 1532 first->gso_segs = skb_shinfo(skb)->gso_segs; 1533 first->bytecount += (first->gso_segs - 1) * *hdr_len; 1534 1535 /* MSS L4LEN IDX */ 1536 mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT; 1537 mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT; 1538 1539 /* VLAN MACLEN IPLEN */ 1540 vlan_macip_lens = l4.hdr - ip.hdr; 1541 vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT; 1542 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1543 1544 igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, 1545 vlan_macip_lens, type_tucmd, mss_l4len_idx); 1546 1547 return 1; 1548 } 1549 1550 static bool igc_request_tx_tstamp(struct igc_adapter *adapter, struct sk_buff *skb, u32 *flags) 1551 { 1552 int i; 1553 1554 for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { 1555 struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; 1556 1557 if (tstamp->skb) 1558 continue; 1559 1560 tstamp->skb = skb_get(skb); 1561 tstamp->start = jiffies; 1562 *flags = tstamp->flags; 1563 1564 return true; 1565 } 1566 1567 return false; 1568 } 1569 1570 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, 1571 struct igc_ring *tx_ring) 1572 { 1573 struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); 1574 bool first_flag = false, insert_empty = false; 1575 u16 count = TXD_USE_COUNT(skb_headlen(skb)); 1576 __be16 protocol = vlan_get_protocol(skb); 1577 struct igc_tx_buffer *first; 1578 __le32 launch_time = 0; 1579 u32 tx_flags = 0; 1580 unsigned short f; 1581 ktime_t txtime; 1582 u8 hdr_len = 0; 1583 int tso = 0; 1584 1585 /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD, 1586 * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD, 1587 * + 2 desc gap to keep tail from touching head, 1588 * + 1 desc for context descriptor, 1589 * otherwise try next time 1590 */ 1591 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1592 count += TXD_USE_COUNT(skb_frag_size( 1593 &skb_shinfo(skb)->frags[f])); 1594 1595 if (igc_maybe_stop_tx(tx_ring, count + 5)) { 1596 /* this is a hard error */ 1597 return NETDEV_TX_BUSY; 1598 } 1599 1600 if (!tx_ring->launchtime_enable) 1601 goto done; 1602 1603 txtime = skb->tstamp; 1604 skb->tstamp = ktime_set(0, 0); 1605 launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty); 1606 1607 if (insert_empty) { 1608 struct igc_tx_buffer *empty_info; 1609 struct sk_buff *empty; 1610 void *data; 1611 1612 empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1613 empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC); 1614 if (!empty) 1615 goto done; 1616 1617 data = skb_put(empty, IGC_EMPTY_FRAME_SIZE); 1618 memset(data, 0, IGC_EMPTY_FRAME_SIZE); 1619 1620 igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0); 1621 1622 if (igc_init_tx_empty_descriptor(tx_ring, 1623 empty, 1624 empty_info) < 0) 1625 dev_kfree_skb_any(empty); 1626 } 1627 1628 done: 1629 /* record the location of the first descriptor for this packet */ 1630 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1631 first->type = IGC_TX_BUFFER_TYPE_SKB; 1632 first->skb = skb; 1633 first->bytecount = skb->len; 1634 first->gso_segs = 1; 1635 1636 if (adapter->qbv_transition || tx_ring->oper_gate_closed) 1637 goto out_drop; 1638 1639 if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) { 1640 adapter->stats.txdrop++; 1641 goto out_drop; 1642 } 1643 1644 if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && 1645 skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 1646 unsigned long flags; 1647 u32 tstamp_flags; 1648 1649 spin_lock_irqsave(&adapter->ptp_tx_lock, flags); 1650 if (igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) { 1651 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 1652 tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags; 1653 if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_USE_CYCLES) 1654 tx_flags |= IGC_TX_FLAGS_TSTAMP_TIMER_1; 1655 } else { 1656 adapter->tx_hwtstamp_skipped++; 1657 } 1658 1659 spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); 1660 } 1661 1662 if (skb_vlan_tag_present(skb)) { 1663 tx_flags |= IGC_TX_FLAGS_VLAN; 1664 tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT); 1665 } 1666 1667 /* record initial flags and protocol */ 1668 first->tx_flags = tx_flags; 1669 first->protocol = protocol; 1670 1671 tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len); 1672 if (tso < 0) 1673 goto out_drop; 1674 else if (!tso) 1675 igc_tx_csum(tx_ring, first, launch_time, first_flag); 1676 1677 igc_tx_map(tx_ring, first, hdr_len); 1678 1679 return NETDEV_TX_OK; 1680 1681 out_drop: 1682 dev_kfree_skb_any(first->skb); 1683 first->skb = NULL; 1684 1685 return NETDEV_TX_OK; 1686 } 1687 1688 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter, 1689 struct sk_buff *skb) 1690 { 1691 unsigned int r_idx = skb->queue_mapping; 1692 1693 if (r_idx >= adapter->num_tx_queues) 1694 r_idx = r_idx % adapter->num_tx_queues; 1695 1696 return adapter->tx_ring[r_idx]; 1697 } 1698 1699 static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, 1700 struct net_device *netdev) 1701 { 1702 struct igc_adapter *adapter = netdev_priv(netdev); 1703 1704 /* The minimum packet size with TCTL.PSP set is 17 so pad the skb 1705 * in order to meet this minimum size requirement. 1706 */ 1707 if (skb->len < 17) { 1708 if (skb_padto(skb, 17)) 1709 return NETDEV_TX_OK; 1710 skb->len = 17; 1711 } 1712 1713 return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb)); 1714 } 1715 1716 static void igc_rx_checksum(struct igc_ring *ring, 1717 union igc_adv_rx_desc *rx_desc, 1718 struct sk_buff *skb) 1719 { 1720 skb_checksum_none_assert(skb); 1721 1722 /* Ignore Checksum bit is set */ 1723 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM)) 1724 return; 1725 1726 /* Rx checksum disabled via ethtool */ 1727 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 1728 return; 1729 1730 /* TCP/UDP checksum error bit is set */ 1731 if (igc_test_staterr(rx_desc, 1732 IGC_RXDEXT_STATERR_L4E | 1733 IGC_RXDEXT_STATERR_IPE)) { 1734 /* work around errata with sctp packets where the TCPE aka 1735 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) 1736 * packets (aka let the stack check the crc32c) 1737 */ 1738 if (!(skb->len == 60 && 1739 test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { 1740 u64_stats_update_begin(&ring->rx_syncp); 1741 ring->rx_stats.csum_err++; 1742 u64_stats_update_end(&ring->rx_syncp); 1743 } 1744 /* let the stack verify checksum errors */ 1745 return; 1746 } 1747 /* It must be a TCP or UDP packet with a valid checksum */ 1748 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS | 1749 IGC_RXD_STAT_UDPCS)) 1750 skb->ip_summed = CHECKSUM_UNNECESSARY; 1751 1752 netdev_dbg(ring->netdev, "cksum success: bits %08X\n", 1753 le32_to_cpu(rx_desc->wb.upper.status_error)); 1754 } 1755 1756 /* Mapping HW RSS Type to enum pkt_hash_types */ 1757 static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = { 1758 [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2, 1759 [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4, 1760 [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3, 1761 [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4, 1762 [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3, 1763 [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3, 1764 [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4, 1765 [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4, 1766 [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4, 1767 [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4, 1768 [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ 1769 [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */ 1770 [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */ 1771 [13] = PKT_HASH_TYPE_NONE, 1772 [14] = PKT_HASH_TYPE_NONE, 1773 [15] = PKT_HASH_TYPE_NONE, 1774 }; 1775 1776 static inline void igc_rx_hash(struct igc_ring *ring, 1777 union igc_adv_rx_desc *rx_desc, 1778 struct sk_buff *skb) 1779 { 1780 if (ring->netdev->features & NETIF_F_RXHASH) { 1781 u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); 1782 u32 rss_type = igc_rss_type(rx_desc); 1783 1784 skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]); 1785 } 1786 } 1787 1788 static void igc_rx_vlan(struct igc_ring *rx_ring, 1789 union igc_adv_rx_desc *rx_desc, 1790 struct sk_buff *skb) 1791 { 1792 struct net_device *dev = rx_ring->netdev; 1793 u16 vid; 1794 1795 if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && 1796 igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) { 1797 if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) && 1798 test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) 1799 vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan); 1800 else 1801 vid = le16_to_cpu(rx_desc->wb.upper.vlan); 1802 1803 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); 1804 } 1805 } 1806 1807 /** 1808 * igc_process_skb_fields - Populate skb header fields from Rx descriptor 1809 * @rx_ring: rx descriptor ring packet is being transacted on 1810 * @rx_desc: pointer to the EOP Rx descriptor 1811 * @skb: pointer to current skb being populated 1812 * 1813 * This function checks the ring, descriptor, and packet information in order 1814 * to populate the hash, checksum, VLAN, protocol, and other fields within the 1815 * skb. 1816 */ 1817 static void igc_process_skb_fields(struct igc_ring *rx_ring, 1818 union igc_adv_rx_desc *rx_desc, 1819 struct sk_buff *skb) 1820 { 1821 igc_rx_hash(rx_ring, rx_desc, skb); 1822 1823 igc_rx_checksum(rx_ring, rx_desc, skb); 1824 1825 igc_rx_vlan(rx_ring, rx_desc, skb); 1826 1827 skb_record_rx_queue(skb, rx_ring->queue_index); 1828 1829 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1830 } 1831 1832 static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features) 1833 { 1834 bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); 1835 struct igc_adapter *adapter = netdev_priv(netdev); 1836 struct igc_hw *hw = &adapter->hw; 1837 u32 ctrl; 1838 1839 ctrl = rd32(IGC_CTRL); 1840 1841 if (enable) { 1842 /* enable VLAN tag insert/strip */ 1843 ctrl |= IGC_CTRL_VME; 1844 } else { 1845 /* disable VLAN tag insert/strip */ 1846 ctrl &= ~IGC_CTRL_VME; 1847 } 1848 wr32(IGC_CTRL, ctrl); 1849 } 1850 1851 static void igc_restore_vlan(struct igc_adapter *adapter) 1852 { 1853 igc_vlan_mode(adapter->netdev, adapter->netdev->features); 1854 } 1855 1856 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, 1857 const unsigned int size, 1858 int *rx_buffer_pgcnt) 1859 { 1860 struct igc_rx_buffer *rx_buffer; 1861 1862 rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; 1863 *rx_buffer_pgcnt = 1864 #if (PAGE_SIZE < 8192) 1865 page_count(rx_buffer->page); 1866 #else 1867 0; 1868 #endif 1869 prefetchw(rx_buffer->page); 1870 1871 /* we are reusing so sync this buffer for CPU use */ 1872 dma_sync_single_range_for_cpu(rx_ring->dev, 1873 rx_buffer->dma, 1874 rx_buffer->page_offset, 1875 size, 1876 DMA_FROM_DEVICE); 1877 1878 rx_buffer->pagecnt_bias--; 1879 1880 return rx_buffer; 1881 } 1882 1883 static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer, 1884 unsigned int truesize) 1885 { 1886 #if (PAGE_SIZE < 8192) 1887 buffer->page_offset ^= truesize; 1888 #else 1889 buffer->page_offset += truesize; 1890 #endif 1891 } 1892 1893 static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring, 1894 unsigned int size) 1895 { 1896 unsigned int truesize; 1897 1898 #if (PAGE_SIZE < 8192) 1899 truesize = igc_rx_pg_size(ring) / 2; 1900 #else 1901 truesize = ring_uses_build_skb(ring) ? 1902 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + 1903 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1904 SKB_DATA_ALIGN(size); 1905 #endif 1906 return truesize; 1907 } 1908 1909 /** 1910 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff 1911 * @rx_ring: rx descriptor ring to transact packets on 1912 * @rx_buffer: buffer containing page to add 1913 * @skb: sk_buff to place the data into 1914 * @size: size of buffer to be added 1915 * 1916 * This function will add the data contained in rx_buffer->page to the skb. 1917 */ 1918 static void igc_add_rx_frag(struct igc_ring *rx_ring, 1919 struct igc_rx_buffer *rx_buffer, 1920 struct sk_buff *skb, 1921 unsigned int size) 1922 { 1923 unsigned int truesize; 1924 1925 #if (PAGE_SIZE < 8192) 1926 truesize = igc_rx_pg_size(rx_ring) / 2; 1927 #else 1928 truesize = ring_uses_build_skb(rx_ring) ? 1929 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1930 SKB_DATA_ALIGN(size); 1931 #endif 1932 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, 1933 rx_buffer->page_offset, size, truesize); 1934 1935 igc_rx_buffer_flip(rx_buffer, truesize); 1936 } 1937 1938 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, 1939 struct igc_rx_buffer *rx_buffer, 1940 struct xdp_buff *xdp) 1941 { 1942 unsigned int size = xdp->data_end - xdp->data; 1943 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1944 unsigned int metasize = xdp->data - xdp->data_meta; 1945 struct sk_buff *skb; 1946 1947 /* prefetch first cache line of first page */ 1948 net_prefetch(xdp->data_meta); 1949 1950 /* build an skb around the page buffer */ 1951 skb = napi_build_skb(xdp->data_hard_start, truesize); 1952 if (unlikely(!skb)) 1953 return NULL; 1954 1955 /* update pointers within the skb to store the data */ 1956 skb_reserve(skb, xdp->data - xdp->data_hard_start); 1957 __skb_put(skb, size); 1958 if (metasize) 1959 skb_metadata_set(skb, metasize); 1960 1961 igc_rx_buffer_flip(rx_buffer, truesize); 1962 return skb; 1963 } 1964 1965 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, 1966 struct igc_rx_buffer *rx_buffer, 1967 struct igc_xdp_buff *ctx) 1968 { 1969 struct xdp_buff *xdp = &ctx->xdp; 1970 unsigned int metasize = xdp->data - xdp->data_meta; 1971 unsigned int size = xdp->data_end - xdp->data; 1972 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1973 void *va = xdp->data; 1974 unsigned int headlen; 1975 struct sk_buff *skb; 1976 1977 /* prefetch first cache line of first page */ 1978 net_prefetch(xdp->data_meta); 1979 1980 /* allocate a skb to store the frags */ 1981 skb = napi_alloc_skb(&rx_ring->q_vector->napi, 1982 IGC_RX_HDR_LEN + metasize); 1983 if (unlikely(!skb)) 1984 return NULL; 1985 1986 if (ctx->rx_ts) { 1987 skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; 1988 skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; 1989 } 1990 1991 /* Determine available headroom for copy */ 1992 headlen = size; 1993 if (headlen > IGC_RX_HDR_LEN) 1994 headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN); 1995 1996 /* align pull length to size of long to optimize memcpy performance */ 1997 memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta, 1998 ALIGN(headlen + metasize, sizeof(long))); 1999 2000 if (metasize) { 2001 skb_metadata_set(skb, metasize); 2002 __skb_pull(skb, metasize); 2003 } 2004 2005 /* update all of the pointers */ 2006 size -= headlen; 2007 if (size) { 2008 skb_add_rx_frag(skb, 0, rx_buffer->page, 2009 (va + headlen) - page_address(rx_buffer->page), 2010 size, truesize); 2011 igc_rx_buffer_flip(rx_buffer, truesize); 2012 } else { 2013 rx_buffer->pagecnt_bias++; 2014 } 2015 2016 return skb; 2017 } 2018 2019 /** 2020 * igc_reuse_rx_page - page flip buffer and store it back on the ring 2021 * @rx_ring: rx descriptor ring to store buffers on 2022 * @old_buff: donor buffer to have page reused 2023 * 2024 * Synchronizes page for reuse by the adapter 2025 */ 2026 static void igc_reuse_rx_page(struct igc_ring *rx_ring, 2027 struct igc_rx_buffer *old_buff) 2028 { 2029 u16 nta = rx_ring->next_to_alloc; 2030 struct igc_rx_buffer *new_buff; 2031 2032 new_buff = &rx_ring->rx_buffer_info[nta]; 2033 2034 /* update, and store next to alloc */ 2035 nta++; 2036 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 2037 2038 /* Transfer page from old buffer to new buffer. 2039 * Move each member individually to avoid possible store 2040 * forwarding stalls. 2041 */ 2042 new_buff->dma = old_buff->dma; 2043 new_buff->page = old_buff->page; 2044 new_buff->page_offset = old_buff->page_offset; 2045 new_buff->pagecnt_bias = old_buff->pagecnt_bias; 2046 } 2047 2048 static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer, 2049 int rx_buffer_pgcnt) 2050 { 2051 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; 2052 struct page *page = rx_buffer->page; 2053 2054 /* avoid re-using remote and pfmemalloc pages */ 2055 if (!dev_page_is_reusable(page)) 2056 return false; 2057 2058 #if (PAGE_SIZE < 8192) 2059 /* if we are only owner of page we can reuse it */ 2060 if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) 2061 return false; 2062 #else 2063 #define IGC_LAST_OFFSET \ 2064 (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048) 2065 2066 if (rx_buffer->page_offset > IGC_LAST_OFFSET) 2067 return false; 2068 #endif 2069 2070 /* If we have drained the page fragment pool we need to update 2071 * the pagecnt_bias and page count so that we fully restock the 2072 * number of references the driver holds. 2073 */ 2074 if (unlikely(pagecnt_bias == 1)) { 2075 page_ref_add(page, USHRT_MAX - 1); 2076 rx_buffer->pagecnt_bias = USHRT_MAX; 2077 } 2078 2079 return true; 2080 } 2081 2082 /** 2083 * igc_is_non_eop - process handling of non-EOP buffers 2084 * @rx_ring: Rx ring being processed 2085 * @rx_desc: Rx descriptor for current buffer 2086 * 2087 * This function updates next to clean. If the buffer is an EOP buffer 2088 * this function exits returning false, otherwise it will place the 2089 * sk_buff in the next buffer to be chained and return true indicating 2090 * that this is in fact a non-EOP buffer. 2091 */ 2092 static bool igc_is_non_eop(struct igc_ring *rx_ring, 2093 union igc_adv_rx_desc *rx_desc) 2094 { 2095 u32 ntc = rx_ring->next_to_clean + 1; 2096 2097 /* fetch, update, and store next to clean */ 2098 ntc = (ntc < rx_ring->count) ? ntc : 0; 2099 rx_ring->next_to_clean = ntc; 2100 2101 prefetch(IGC_RX_DESC(rx_ring, ntc)); 2102 2103 if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP))) 2104 return false; 2105 2106 return true; 2107 } 2108 2109 /** 2110 * igc_cleanup_headers - Correct corrupted or empty headers 2111 * @rx_ring: rx descriptor ring packet is being transacted on 2112 * @rx_desc: pointer to the EOP Rx descriptor 2113 * @skb: pointer to current skb being fixed 2114 * 2115 * Address the case where we are pulling data in on pages only 2116 * and as such no data is present in the skb header. 2117 * 2118 * In addition if skb is not at least 60 bytes we need to pad it so that 2119 * it is large enough to qualify as a valid Ethernet frame. 2120 * 2121 * Returns true if an error was encountered and skb was freed. 2122 */ 2123 static bool igc_cleanup_headers(struct igc_ring *rx_ring, 2124 union igc_adv_rx_desc *rx_desc, 2125 struct sk_buff *skb) 2126 { 2127 /* XDP packets use error pointer so abort at this point */ 2128 if (IS_ERR(skb)) 2129 return true; 2130 2131 if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) { 2132 struct net_device *netdev = rx_ring->netdev; 2133 2134 if (!(netdev->features & NETIF_F_RXALL)) { 2135 dev_kfree_skb_any(skb); 2136 return true; 2137 } 2138 } 2139 2140 /* if eth_skb_pad returns an error the skb was freed */ 2141 if (eth_skb_pad(skb)) 2142 return true; 2143 2144 return false; 2145 } 2146 2147 static void igc_put_rx_buffer(struct igc_ring *rx_ring, 2148 struct igc_rx_buffer *rx_buffer, 2149 int rx_buffer_pgcnt) 2150 { 2151 if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { 2152 /* hand second half of page back to the ring */ 2153 igc_reuse_rx_page(rx_ring, rx_buffer); 2154 } else { 2155 /* We are not reusing the buffer so unmap it and free 2156 * any references we are holding to it 2157 */ 2158 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, 2159 igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE, 2160 IGC_RX_DMA_ATTR); 2161 __page_frag_cache_drain(rx_buffer->page, 2162 rx_buffer->pagecnt_bias); 2163 } 2164 2165 /* clear contents of rx_buffer */ 2166 rx_buffer->page = NULL; 2167 } 2168 2169 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) 2170 { 2171 struct igc_adapter *adapter = rx_ring->q_vector->adapter; 2172 2173 if (ring_uses_build_skb(rx_ring)) 2174 return IGC_SKB_PAD; 2175 if (igc_xdp_is_enabled(adapter)) 2176 return XDP_PACKET_HEADROOM; 2177 2178 return 0; 2179 } 2180 2181 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, 2182 struct igc_rx_buffer *bi) 2183 { 2184 struct page *page = bi->page; 2185 dma_addr_t dma; 2186 2187 /* since we are recycling buffers we should seldom need to alloc */ 2188 if (likely(page)) 2189 return true; 2190 2191 /* alloc new page for storage */ 2192 page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); 2193 if (unlikely(!page)) { 2194 rx_ring->rx_stats.alloc_failed++; 2195 return false; 2196 } 2197 2198 /* map page for use */ 2199 dma = dma_map_page_attrs(rx_ring->dev, page, 0, 2200 igc_rx_pg_size(rx_ring), 2201 DMA_FROM_DEVICE, 2202 IGC_RX_DMA_ATTR); 2203 2204 /* if mapping failed free memory back to system since 2205 * there isn't much point in holding memory we can't use 2206 */ 2207 if (dma_mapping_error(rx_ring->dev, dma)) { 2208 __free_page(page); 2209 2210 rx_ring->rx_stats.alloc_failed++; 2211 return false; 2212 } 2213 2214 bi->dma = dma; 2215 bi->page = page; 2216 bi->page_offset = igc_rx_offset(rx_ring); 2217 page_ref_add(page, USHRT_MAX - 1); 2218 bi->pagecnt_bias = USHRT_MAX; 2219 2220 return true; 2221 } 2222 2223 /** 2224 * igc_alloc_rx_buffers - Replace used receive buffers; packet split 2225 * @rx_ring: rx descriptor ring 2226 * @cleaned_count: number of buffers to clean 2227 */ 2228 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) 2229 { 2230 union igc_adv_rx_desc *rx_desc; 2231 u16 i = rx_ring->next_to_use; 2232 struct igc_rx_buffer *bi; 2233 u16 bufsz; 2234 2235 /* nothing to do */ 2236 if (!cleaned_count) 2237 return; 2238 2239 rx_desc = IGC_RX_DESC(rx_ring, i); 2240 bi = &rx_ring->rx_buffer_info[i]; 2241 i -= rx_ring->count; 2242 2243 bufsz = igc_rx_bufsz(rx_ring); 2244 2245 do { 2246 if (!igc_alloc_mapped_page(rx_ring, bi)) 2247 break; 2248 2249 /* sync the buffer for use by the device */ 2250 dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 2251 bi->page_offset, bufsz, 2252 DMA_FROM_DEVICE); 2253 2254 /* Refresh the desc even if buffer_addrs didn't change 2255 * because each write-back erases this info. 2256 */ 2257 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); 2258 2259 rx_desc++; 2260 bi++; 2261 i++; 2262 if (unlikely(!i)) { 2263 rx_desc = IGC_RX_DESC(rx_ring, 0); 2264 bi = rx_ring->rx_buffer_info; 2265 i -= rx_ring->count; 2266 } 2267 2268 /* clear the length for the next_to_use descriptor */ 2269 rx_desc->wb.upper.length = 0; 2270 2271 cleaned_count--; 2272 } while (cleaned_count); 2273 2274 i += rx_ring->count; 2275 2276 if (rx_ring->next_to_use != i) { 2277 /* record the next descriptor to use */ 2278 rx_ring->next_to_use = i; 2279 2280 /* update next to alloc since we have filled the ring */ 2281 rx_ring->next_to_alloc = i; 2282 2283 /* Force memory writes to complete before letting h/w 2284 * know there are new descriptors to fetch. (Only 2285 * applicable for weak-ordered memory model archs, 2286 * such as IA-64). 2287 */ 2288 wmb(); 2289 writel(i, rx_ring->tail); 2290 } 2291 } 2292 2293 static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) 2294 { 2295 union igc_adv_rx_desc *desc; 2296 u16 i = ring->next_to_use; 2297 struct igc_rx_buffer *bi; 2298 dma_addr_t dma; 2299 bool ok = true; 2300 2301 if (!count) 2302 return ok; 2303 2304 XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff); 2305 2306 desc = IGC_RX_DESC(ring, i); 2307 bi = &ring->rx_buffer_info[i]; 2308 i -= ring->count; 2309 2310 do { 2311 bi->xdp = xsk_buff_alloc(ring->xsk_pool); 2312 if (!bi->xdp) { 2313 ok = false; 2314 break; 2315 } 2316 2317 dma = xsk_buff_xdp_get_dma(bi->xdp); 2318 desc->read.pkt_addr = cpu_to_le64(dma); 2319 2320 desc++; 2321 bi++; 2322 i++; 2323 if (unlikely(!i)) { 2324 desc = IGC_RX_DESC(ring, 0); 2325 bi = ring->rx_buffer_info; 2326 i -= ring->count; 2327 } 2328 2329 /* Clear the length for the next_to_use descriptor. */ 2330 desc->wb.upper.length = 0; 2331 2332 count--; 2333 } while (count); 2334 2335 i += ring->count; 2336 2337 if (ring->next_to_use != i) { 2338 ring->next_to_use = i; 2339 2340 /* Force memory writes to complete before letting h/w 2341 * know there are new descriptors to fetch. (Only 2342 * applicable for weak-ordered memory model archs, 2343 * such as IA-64). 2344 */ 2345 wmb(); 2346 writel(i, ring->tail); 2347 } 2348 2349 return ok; 2350 } 2351 2352 /* This function requires __netif_tx_lock is held by the caller. */ 2353 static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, 2354 struct xdp_frame *xdpf) 2355 { 2356 struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); 2357 u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0; 2358 u16 count, index = ring->next_to_use; 2359 struct igc_tx_buffer *head = &ring->tx_buffer_info[index]; 2360 struct igc_tx_buffer *buffer = head; 2361 union igc_adv_tx_desc *desc = IGC_TX_DESC(ring, index); 2362 u32 olinfo_status, len = xdpf->len, cmd_type; 2363 void *data = xdpf->data; 2364 u16 i; 2365 2366 count = TXD_USE_COUNT(len); 2367 for (i = 0; i < nr_frags; i++) 2368 count += TXD_USE_COUNT(skb_frag_size(&sinfo->frags[i])); 2369 2370 if (igc_maybe_stop_tx(ring, count + 3)) { 2371 /* this is a hard error */ 2372 return -EBUSY; 2373 } 2374 2375 i = 0; 2376 head->bytecount = xdp_get_frame_len(xdpf); 2377 head->type = IGC_TX_BUFFER_TYPE_XDP; 2378 head->gso_segs = 1; 2379 head->xdpf = xdpf; 2380 2381 olinfo_status = head->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; 2382 desc->read.olinfo_status = cpu_to_le32(olinfo_status); 2383 2384 for (;;) { 2385 dma_addr_t dma; 2386 2387 dma = dma_map_single(ring->dev, data, len, DMA_TO_DEVICE); 2388 if (dma_mapping_error(ring->dev, dma)) { 2389 netdev_err_once(ring->netdev, 2390 "Failed to map DMA for TX\n"); 2391 goto unmap; 2392 } 2393 2394 dma_unmap_len_set(buffer, len, len); 2395 dma_unmap_addr_set(buffer, dma, dma); 2396 2397 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 2398 IGC_ADVTXD_DCMD_IFCS | len; 2399 2400 desc->read.cmd_type_len = cpu_to_le32(cmd_type); 2401 desc->read.buffer_addr = cpu_to_le64(dma); 2402 2403 buffer->protocol = 0; 2404 2405 if (++index == ring->count) 2406 index = 0; 2407 2408 if (i == nr_frags) 2409 break; 2410 2411 buffer = &ring->tx_buffer_info[index]; 2412 desc = IGC_TX_DESC(ring, index); 2413 desc->read.olinfo_status = 0; 2414 2415 data = skb_frag_address(&sinfo->frags[i]); 2416 len = skb_frag_size(&sinfo->frags[i]); 2417 i++; 2418 } 2419 desc->read.cmd_type_len |= cpu_to_le32(IGC_TXD_DCMD); 2420 2421 netdev_tx_sent_queue(txring_txq(ring), head->bytecount); 2422 /* set the timestamp */ 2423 head->time_stamp = jiffies; 2424 /* set next_to_watch value indicating a packet is present */ 2425 head->next_to_watch = desc; 2426 ring->next_to_use = index; 2427 2428 return 0; 2429 2430 unmap: 2431 for (;;) { 2432 buffer = &ring->tx_buffer_info[index]; 2433 if (dma_unmap_len(buffer, len)) 2434 dma_unmap_page(ring->dev, 2435 dma_unmap_addr(buffer, dma), 2436 dma_unmap_len(buffer, len), 2437 DMA_TO_DEVICE); 2438 dma_unmap_len_set(buffer, len, 0); 2439 if (buffer == head) 2440 break; 2441 2442 if (!index) 2443 index += ring->count; 2444 index--; 2445 } 2446 2447 return -ENOMEM; 2448 } 2449 2450 static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter, 2451 int cpu) 2452 { 2453 int index = cpu; 2454 2455 if (unlikely(index < 0)) 2456 index = 0; 2457 2458 while (index >= adapter->num_tx_queues) 2459 index -= adapter->num_tx_queues; 2460 2461 return adapter->tx_ring[index]; 2462 } 2463 2464 static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) 2465 { 2466 struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); 2467 int cpu = smp_processor_id(); 2468 struct netdev_queue *nq; 2469 struct igc_ring *ring; 2470 int res; 2471 2472 if (unlikely(!xdpf)) 2473 return -EFAULT; 2474 2475 ring = igc_xdp_get_tx_ring(adapter, cpu); 2476 nq = txring_txq(ring); 2477 2478 __netif_tx_lock(nq, cpu); 2479 /* Avoid transmit queue timeout since we share it with the slow path */ 2480 txq_trans_cond_update(nq); 2481 res = igc_xdp_init_tx_descriptor(ring, xdpf); 2482 __netif_tx_unlock(nq); 2483 return res; 2484 } 2485 2486 /* This function assumes rcu_read_lock() is held by the caller. */ 2487 static int __igc_xdp_run_prog(struct igc_adapter *adapter, 2488 struct bpf_prog *prog, 2489 struct xdp_buff *xdp) 2490 { 2491 u32 act = bpf_prog_run_xdp(prog, xdp); 2492 2493 switch (act) { 2494 case XDP_PASS: 2495 return IGC_XDP_PASS; 2496 case XDP_TX: 2497 if (igc_xdp_xmit_back(adapter, xdp) < 0) 2498 goto out_failure; 2499 return IGC_XDP_TX; 2500 case XDP_REDIRECT: 2501 if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0) 2502 goto out_failure; 2503 return IGC_XDP_REDIRECT; 2504 break; 2505 default: 2506 bpf_warn_invalid_xdp_action(adapter->netdev, prog, act); 2507 fallthrough; 2508 case XDP_ABORTED: 2509 out_failure: 2510 trace_xdp_exception(adapter->netdev, prog, act); 2511 fallthrough; 2512 case XDP_DROP: 2513 return IGC_XDP_CONSUMED; 2514 } 2515 } 2516 2517 static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter, 2518 struct xdp_buff *xdp) 2519 { 2520 struct bpf_prog *prog; 2521 int res; 2522 2523 prog = READ_ONCE(adapter->xdp_prog); 2524 if (!prog) { 2525 res = IGC_XDP_PASS; 2526 goto out; 2527 } 2528 2529 res = __igc_xdp_run_prog(adapter, prog, xdp); 2530 2531 out: 2532 return ERR_PTR(-res); 2533 } 2534 2535 /* This function assumes __netif_tx_lock is held by the caller. */ 2536 static void igc_flush_tx_descriptors(struct igc_ring *ring) 2537 { 2538 /* Once tail pointer is updated, hardware can fetch the descriptors 2539 * any time so we issue a write membar here to ensure all memory 2540 * writes are complete before the tail pointer is updated. 2541 */ 2542 wmb(); 2543 writel(ring->next_to_use, ring->tail); 2544 } 2545 2546 static void igc_finalize_xdp(struct igc_adapter *adapter, int status) 2547 { 2548 int cpu = smp_processor_id(); 2549 struct netdev_queue *nq; 2550 struct igc_ring *ring; 2551 2552 if (status & IGC_XDP_TX) { 2553 ring = igc_xdp_get_tx_ring(adapter, cpu); 2554 nq = txring_txq(ring); 2555 2556 __netif_tx_lock(nq, cpu); 2557 igc_flush_tx_descriptors(ring); 2558 __netif_tx_unlock(nq); 2559 } 2560 2561 if (status & IGC_XDP_REDIRECT) 2562 xdp_do_flush(); 2563 } 2564 2565 static void igc_update_rx_stats(struct igc_q_vector *q_vector, 2566 unsigned int packets, unsigned int bytes) 2567 { 2568 struct igc_ring *ring = q_vector->rx.ring; 2569 2570 u64_stats_update_begin(&ring->rx_syncp); 2571 ring->rx_stats.packets += packets; 2572 ring->rx_stats.bytes += bytes; 2573 u64_stats_update_end(&ring->rx_syncp); 2574 2575 q_vector->rx.total_packets += packets; 2576 q_vector->rx.total_bytes += bytes; 2577 } 2578 2579 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) 2580 { 2581 unsigned int total_bytes = 0, total_packets = 0; 2582 struct igc_adapter *adapter = q_vector->adapter; 2583 struct igc_ring *rx_ring = q_vector->rx.ring; 2584 struct sk_buff *skb = rx_ring->skb; 2585 u16 cleaned_count = igc_desc_unused(rx_ring); 2586 int xdp_status = 0, rx_buffer_pgcnt; 2587 2588 while (likely(total_packets < budget)) { 2589 struct igc_xdp_buff ctx = { .rx_ts = NULL }; 2590 struct igc_rx_buffer *rx_buffer; 2591 union igc_adv_rx_desc *rx_desc; 2592 unsigned int size, truesize; 2593 int pkt_offset = 0; 2594 void *pktbuf; 2595 2596 /* return some buffers to hardware, one at a time is too slow */ 2597 if (cleaned_count >= IGC_RX_BUFFER_WRITE) { 2598 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2599 cleaned_count = 0; 2600 } 2601 2602 rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean); 2603 size = le16_to_cpu(rx_desc->wb.upper.length); 2604 if (!size) 2605 break; 2606 2607 /* This memory barrier is needed to keep us from reading 2608 * any other fields out of the rx_desc until we know the 2609 * descriptor has been written back 2610 */ 2611 dma_rmb(); 2612 2613 rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); 2614 truesize = igc_get_rx_frame_truesize(rx_ring, size); 2615 2616 pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; 2617 2618 if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { 2619 ctx.rx_ts = pktbuf; 2620 pkt_offset = IGC_TS_HDR_LEN; 2621 size -= IGC_TS_HDR_LEN; 2622 } 2623 2624 if (!skb) { 2625 xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq); 2626 xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring), 2627 igc_rx_offset(rx_ring) + pkt_offset, 2628 size, true); 2629 xdp_buff_clear_frags_flag(&ctx.xdp); 2630 ctx.rx_desc = rx_desc; 2631 2632 skb = igc_xdp_run_prog(adapter, &ctx.xdp); 2633 } 2634 2635 if (IS_ERR(skb)) { 2636 unsigned int xdp_res = -PTR_ERR(skb); 2637 2638 switch (xdp_res) { 2639 case IGC_XDP_CONSUMED: 2640 rx_buffer->pagecnt_bias++; 2641 break; 2642 case IGC_XDP_TX: 2643 case IGC_XDP_REDIRECT: 2644 igc_rx_buffer_flip(rx_buffer, truesize); 2645 xdp_status |= xdp_res; 2646 break; 2647 } 2648 2649 total_packets++; 2650 total_bytes += size; 2651 } else if (skb) 2652 igc_add_rx_frag(rx_ring, rx_buffer, skb, size); 2653 else if (ring_uses_build_skb(rx_ring)) 2654 skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp); 2655 else 2656 skb = igc_construct_skb(rx_ring, rx_buffer, &ctx); 2657 2658 /* exit if we failed to retrieve a buffer */ 2659 if (!skb) { 2660 rx_ring->rx_stats.alloc_failed++; 2661 rx_buffer->pagecnt_bias++; 2662 break; 2663 } 2664 2665 igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); 2666 cleaned_count++; 2667 2668 /* fetch next buffer in frame if non-eop */ 2669 if (igc_is_non_eop(rx_ring, rx_desc)) 2670 continue; 2671 2672 /* verify the packet layout is correct */ 2673 if (igc_cleanup_headers(rx_ring, rx_desc, skb)) { 2674 skb = NULL; 2675 continue; 2676 } 2677 2678 /* probably a little skewed due to removing CRC */ 2679 total_bytes += skb->len; 2680 2681 /* populate checksum, VLAN, and protocol */ 2682 igc_process_skb_fields(rx_ring, rx_desc, skb); 2683 2684 napi_gro_receive(&q_vector->napi, skb); 2685 2686 /* reset skb pointer */ 2687 skb = NULL; 2688 2689 /* update budget accounting */ 2690 total_packets++; 2691 } 2692 2693 if (xdp_status) 2694 igc_finalize_xdp(adapter, xdp_status); 2695 2696 /* place incomplete frames back on ring for completion */ 2697 rx_ring->skb = skb; 2698 2699 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2700 2701 if (cleaned_count) 2702 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2703 2704 return total_packets; 2705 } 2706 2707 static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, 2708 struct xdp_buff *xdp) 2709 { 2710 unsigned int totalsize = xdp->data_end - xdp->data_meta; 2711 unsigned int metasize = xdp->data - xdp->data_meta; 2712 struct sk_buff *skb; 2713 2714 net_prefetch(xdp->data_meta); 2715 2716 skb = napi_alloc_skb(&ring->q_vector->napi, totalsize); 2717 if (unlikely(!skb)) 2718 return NULL; 2719 2720 memcpy(__skb_put(skb, totalsize), xdp->data_meta, 2721 ALIGN(totalsize, sizeof(long))); 2722 2723 if (metasize) { 2724 skb_metadata_set(skb, metasize); 2725 __skb_pull(skb, metasize); 2726 } 2727 2728 return skb; 2729 } 2730 2731 static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, 2732 union igc_adv_rx_desc *desc, 2733 struct xdp_buff *xdp, 2734 ktime_t timestamp) 2735 { 2736 struct igc_ring *ring = q_vector->rx.ring; 2737 struct sk_buff *skb; 2738 2739 skb = igc_construct_skb_zc(ring, xdp); 2740 if (!skb) { 2741 ring->rx_stats.alloc_failed++; 2742 return; 2743 } 2744 2745 if (timestamp) 2746 skb_hwtstamps(skb)->hwtstamp = timestamp; 2747 2748 if (igc_cleanup_headers(ring, desc, skb)) 2749 return; 2750 2751 igc_process_skb_fields(ring, desc, skb); 2752 napi_gro_receive(&q_vector->napi, skb); 2753 } 2754 2755 static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp) 2756 { 2757 /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The 2758 * igc_xdp_buff shares its layout with xdp_buff_xsk and private 2759 * igc_xdp_buff fields fall into xdp_buff_xsk->cb 2760 */ 2761 return (struct igc_xdp_buff *)xdp; 2762 } 2763 2764 static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) 2765 { 2766 struct igc_adapter *adapter = q_vector->adapter; 2767 struct igc_ring *ring = q_vector->rx.ring; 2768 u16 cleaned_count = igc_desc_unused(ring); 2769 int total_bytes = 0, total_packets = 0; 2770 u16 ntc = ring->next_to_clean; 2771 struct bpf_prog *prog; 2772 bool failure = false; 2773 int xdp_status = 0; 2774 2775 rcu_read_lock(); 2776 2777 prog = READ_ONCE(adapter->xdp_prog); 2778 2779 while (likely(total_packets < budget)) { 2780 union igc_adv_rx_desc *desc; 2781 struct igc_rx_buffer *bi; 2782 struct igc_xdp_buff *ctx; 2783 ktime_t timestamp = 0; 2784 unsigned int size; 2785 int res; 2786 2787 desc = IGC_RX_DESC(ring, ntc); 2788 size = le16_to_cpu(desc->wb.upper.length); 2789 if (!size) 2790 break; 2791 2792 /* This memory barrier is needed to keep us from reading 2793 * any other fields out of the rx_desc until we know the 2794 * descriptor has been written back 2795 */ 2796 dma_rmb(); 2797 2798 bi = &ring->rx_buffer_info[ntc]; 2799 2800 ctx = xsk_buff_to_igc_ctx(bi->xdp); 2801 ctx->rx_desc = desc; 2802 2803 if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { 2804 ctx->rx_ts = bi->xdp->data; 2805 2806 bi->xdp->data += IGC_TS_HDR_LEN; 2807 2808 /* HW timestamp has been copied into local variable. Metadata 2809 * length when XDP program is called should be 0. 2810 */ 2811 bi->xdp->data_meta += IGC_TS_HDR_LEN; 2812 size -= IGC_TS_HDR_LEN; 2813 } 2814 2815 bi->xdp->data_end = bi->xdp->data + size; 2816 xsk_buff_dma_sync_for_cpu(bi->xdp); 2817 2818 res = __igc_xdp_run_prog(adapter, prog, bi->xdp); 2819 switch (res) { 2820 case IGC_XDP_PASS: 2821 igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp); 2822 fallthrough; 2823 case IGC_XDP_CONSUMED: 2824 xsk_buff_free(bi->xdp); 2825 break; 2826 case IGC_XDP_TX: 2827 case IGC_XDP_REDIRECT: 2828 xdp_status |= res; 2829 break; 2830 } 2831 2832 bi->xdp = NULL; 2833 total_bytes += size; 2834 total_packets++; 2835 cleaned_count++; 2836 ntc++; 2837 if (ntc == ring->count) 2838 ntc = 0; 2839 } 2840 2841 ring->next_to_clean = ntc; 2842 rcu_read_unlock(); 2843 2844 if (cleaned_count >= IGC_RX_BUFFER_WRITE) 2845 failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count); 2846 2847 if (xdp_status) 2848 igc_finalize_xdp(adapter, xdp_status); 2849 2850 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2851 2852 if (xsk_uses_need_wakeup(ring->xsk_pool)) { 2853 if (failure || ring->next_to_clean == ring->next_to_use) 2854 xsk_set_rx_need_wakeup(ring->xsk_pool); 2855 else 2856 xsk_clear_rx_need_wakeup(ring->xsk_pool); 2857 return total_packets; 2858 } 2859 2860 return failure ? budget : total_packets; 2861 } 2862 2863 static void igc_update_tx_stats(struct igc_q_vector *q_vector, 2864 unsigned int packets, unsigned int bytes) 2865 { 2866 struct igc_ring *ring = q_vector->tx.ring; 2867 2868 u64_stats_update_begin(&ring->tx_syncp); 2869 ring->tx_stats.bytes += bytes; 2870 ring->tx_stats.packets += packets; 2871 u64_stats_update_end(&ring->tx_syncp); 2872 2873 q_vector->tx.total_bytes += bytes; 2874 q_vector->tx.total_packets += packets; 2875 } 2876 2877 static void igc_xsk_request_timestamp(void *_priv) 2878 { 2879 struct igc_metadata_request *meta_req = _priv; 2880 struct igc_ring *tx_ring = meta_req->tx_ring; 2881 struct igc_tx_timestamp_request *tstamp; 2882 u32 tx_flags = IGC_TX_FLAGS_TSTAMP; 2883 struct igc_adapter *adapter; 2884 unsigned long lock_flags; 2885 bool found = false; 2886 int i; 2887 2888 if (test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags)) { 2889 adapter = netdev_priv(tx_ring->netdev); 2890 2891 spin_lock_irqsave(&adapter->ptp_tx_lock, lock_flags); 2892 2893 /* Search for available tstamp regs */ 2894 for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { 2895 tstamp = &adapter->tx_tstamp[i]; 2896 2897 /* tstamp->skb and tstamp->xsk_tx_buffer are in union. 2898 * When tstamp->skb is equal to NULL, 2899 * tstamp->xsk_tx_buffer is equal to NULL as well. 2900 * This condition means that the particular tstamp reg 2901 * is not occupied by other packet. 2902 */ 2903 if (!tstamp->skb) { 2904 found = true; 2905 break; 2906 } 2907 } 2908 2909 /* Return if no available tstamp regs */ 2910 if (!found) { 2911 adapter->tx_hwtstamp_skipped++; 2912 spin_unlock_irqrestore(&adapter->ptp_tx_lock, 2913 lock_flags); 2914 return; 2915 } 2916 2917 tstamp->start = jiffies; 2918 tstamp->xsk_queue_index = tx_ring->queue_index; 2919 tstamp->xsk_tx_buffer = meta_req->tx_buffer; 2920 tstamp->buffer_type = IGC_TX_BUFFER_TYPE_XSK; 2921 2922 /* Hold the transmit completion until timestamp is ready */ 2923 meta_req->tx_buffer->xsk_pending_ts = true; 2924 2925 /* Keep the pointer to tx_timestamp, which is located in XDP 2926 * metadata area. It is the location to store the value of 2927 * tx hardware timestamp. 2928 */ 2929 xsk_tx_metadata_to_compl(meta_req->meta, &tstamp->xsk_meta); 2930 2931 /* Set timestamp bit based on the _TSTAMP(_X) bit. */ 2932 tx_flags |= tstamp->flags; 2933 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2934 IGC_TX_FLAGS_TSTAMP, 2935 (IGC_ADVTXD_MAC_TSTAMP)); 2936 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2937 IGC_TX_FLAGS_TSTAMP_1, 2938 (IGC_ADVTXD_TSTAMP_REG_1)); 2939 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2940 IGC_TX_FLAGS_TSTAMP_2, 2941 (IGC_ADVTXD_TSTAMP_REG_2)); 2942 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2943 IGC_TX_FLAGS_TSTAMP_3, 2944 (IGC_ADVTXD_TSTAMP_REG_3)); 2945 2946 spin_unlock_irqrestore(&adapter->ptp_tx_lock, lock_flags); 2947 } 2948 } 2949 2950 static u64 igc_xsk_fill_timestamp(void *_priv) 2951 { 2952 return *(u64 *)_priv; 2953 } 2954 2955 const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops = { 2956 .tmo_request_timestamp = igc_xsk_request_timestamp, 2957 .tmo_fill_timestamp = igc_xsk_fill_timestamp, 2958 }; 2959 2960 static void igc_xdp_xmit_zc(struct igc_ring *ring) 2961 { 2962 struct xsk_buff_pool *pool = ring->xsk_pool; 2963 struct netdev_queue *nq = txring_txq(ring); 2964 union igc_adv_tx_desc *tx_desc = NULL; 2965 int cpu = smp_processor_id(); 2966 struct xdp_desc xdp_desc; 2967 u16 budget, ntu; 2968 2969 if (!netif_carrier_ok(ring->netdev)) 2970 return; 2971 2972 __netif_tx_lock(nq, cpu); 2973 2974 /* Avoid transmit queue timeout since we share it with the slow path */ 2975 txq_trans_cond_update(nq); 2976 2977 ntu = ring->next_to_use; 2978 budget = igc_desc_unused(ring); 2979 2980 while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { 2981 struct igc_metadata_request meta_req; 2982 struct xsk_tx_metadata *meta = NULL; 2983 struct igc_tx_buffer *bi; 2984 u32 olinfo_status; 2985 dma_addr_t dma; 2986 2987 meta_req.cmd_type = IGC_ADVTXD_DTYP_DATA | 2988 IGC_ADVTXD_DCMD_DEXT | 2989 IGC_ADVTXD_DCMD_IFCS | 2990 IGC_TXD_DCMD | xdp_desc.len; 2991 olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT; 2992 2993 dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr); 2994 meta = xsk_buff_get_metadata(pool, xdp_desc.addr); 2995 xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len); 2996 bi = &ring->tx_buffer_info[ntu]; 2997 2998 meta_req.tx_ring = ring; 2999 meta_req.tx_buffer = bi; 3000 meta_req.meta = meta; 3001 xsk_tx_metadata_request(meta, &igc_xsk_tx_metadata_ops, 3002 &meta_req); 3003 3004 tx_desc = IGC_TX_DESC(ring, ntu); 3005 tx_desc->read.cmd_type_len = cpu_to_le32(meta_req.cmd_type); 3006 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 3007 tx_desc->read.buffer_addr = cpu_to_le64(dma); 3008 3009 bi->type = IGC_TX_BUFFER_TYPE_XSK; 3010 bi->protocol = 0; 3011 bi->bytecount = xdp_desc.len; 3012 bi->gso_segs = 1; 3013 bi->time_stamp = jiffies; 3014 bi->next_to_watch = tx_desc; 3015 3016 netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len); 3017 3018 ntu++; 3019 if (ntu == ring->count) 3020 ntu = 0; 3021 } 3022 3023 ring->next_to_use = ntu; 3024 if (tx_desc) { 3025 igc_flush_tx_descriptors(ring); 3026 xsk_tx_release(pool); 3027 } 3028 3029 __netif_tx_unlock(nq); 3030 } 3031 3032 /** 3033 * igc_clean_tx_irq - Reclaim resources after transmit completes 3034 * @q_vector: pointer to q_vector containing needed info 3035 * @napi_budget: Used to determine if we are in netpoll 3036 * 3037 * returns true if ring is completely cleaned 3038 */ 3039 static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) 3040 { 3041 struct igc_adapter *adapter = q_vector->adapter; 3042 unsigned int total_bytes = 0, total_packets = 0; 3043 unsigned int budget = q_vector->tx.work_limit; 3044 struct igc_ring *tx_ring = q_vector->tx.ring; 3045 unsigned int i = tx_ring->next_to_clean; 3046 struct igc_tx_buffer *tx_buffer; 3047 union igc_adv_tx_desc *tx_desc; 3048 u32 xsk_frames = 0; 3049 3050 if (test_bit(__IGC_DOWN, &adapter->state)) 3051 return true; 3052 3053 tx_buffer = &tx_ring->tx_buffer_info[i]; 3054 tx_desc = IGC_TX_DESC(tx_ring, i); 3055 i -= tx_ring->count; 3056 3057 do { 3058 union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; 3059 3060 /* if next_to_watch is not set then there is no work pending */ 3061 if (!eop_desc) 3062 break; 3063 3064 /* prevent any other reads prior to eop_desc */ 3065 smp_rmb(); 3066 3067 /* if DD is not set pending work has not been completed */ 3068 if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) 3069 break; 3070 3071 /* Hold the completions while there's a pending tx hardware 3072 * timestamp request from XDP Tx metadata. 3073 */ 3074 if (tx_buffer->type == IGC_TX_BUFFER_TYPE_XSK && 3075 tx_buffer->xsk_pending_ts) 3076 break; 3077 3078 /* clear next_to_watch to prevent false hangs */ 3079 tx_buffer->next_to_watch = NULL; 3080 3081 /* update the statistics for this packet */ 3082 total_bytes += tx_buffer->bytecount; 3083 total_packets += tx_buffer->gso_segs; 3084 3085 switch (tx_buffer->type) { 3086 case IGC_TX_BUFFER_TYPE_XSK: 3087 xsk_frames++; 3088 break; 3089 case IGC_TX_BUFFER_TYPE_XDP: 3090 xdp_return_frame(tx_buffer->xdpf); 3091 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3092 break; 3093 case IGC_TX_BUFFER_TYPE_SKB: 3094 napi_consume_skb(tx_buffer->skb, napi_budget); 3095 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3096 break; 3097 default: 3098 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 3099 break; 3100 } 3101 3102 /* clear last DMA location and unmap remaining buffers */ 3103 while (tx_desc != eop_desc) { 3104 tx_buffer++; 3105 tx_desc++; 3106 i++; 3107 if (unlikely(!i)) { 3108 i -= tx_ring->count; 3109 tx_buffer = tx_ring->tx_buffer_info; 3110 tx_desc = IGC_TX_DESC(tx_ring, 0); 3111 } 3112 3113 /* unmap any remaining paged data */ 3114 if (dma_unmap_len(tx_buffer, len)) 3115 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3116 } 3117 3118 /* move us one more past the eop_desc for start of next pkt */ 3119 tx_buffer++; 3120 tx_desc++; 3121 i++; 3122 if (unlikely(!i)) { 3123 i -= tx_ring->count; 3124 tx_buffer = tx_ring->tx_buffer_info; 3125 tx_desc = IGC_TX_DESC(tx_ring, 0); 3126 } 3127 3128 /* issue prefetch for next Tx descriptor */ 3129 prefetch(tx_desc); 3130 3131 /* update budget accounting */ 3132 budget--; 3133 } while (likely(budget)); 3134 3135 netdev_tx_completed_queue(txring_txq(tx_ring), 3136 total_packets, total_bytes); 3137 3138 i += tx_ring->count; 3139 tx_ring->next_to_clean = i; 3140 3141 igc_update_tx_stats(q_vector, total_packets, total_bytes); 3142 3143 if (tx_ring->xsk_pool) { 3144 if (xsk_frames) 3145 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 3146 if (xsk_uses_need_wakeup(tx_ring->xsk_pool)) 3147 xsk_set_tx_need_wakeup(tx_ring->xsk_pool); 3148 igc_xdp_xmit_zc(tx_ring); 3149 } 3150 3151 if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { 3152 struct igc_hw *hw = &adapter->hw; 3153 3154 /* Detect a transmit hang in hardware, this serializes the 3155 * check with the clearing of time_stamp and movement of i 3156 */ 3157 clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 3158 if (tx_buffer->next_to_watch && 3159 time_after(jiffies, tx_buffer->time_stamp + 3160 (adapter->tx_timeout_factor * HZ)) && 3161 !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && 3162 (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) && 3163 !tx_ring->oper_gate_closed) { 3164 /* detected Tx unit hang */ 3165 netdev_err(tx_ring->netdev, 3166 "Detected Tx Unit Hang\n" 3167 " Tx Queue <%d>\n" 3168 " TDH <%x>\n" 3169 " TDT <%x>\n" 3170 " next_to_use <%x>\n" 3171 " next_to_clean <%x>\n" 3172 "buffer_info[next_to_clean]\n" 3173 " time_stamp <%lx>\n" 3174 " next_to_watch <%p>\n" 3175 " jiffies <%lx>\n" 3176 " desc.status <%x>\n", 3177 tx_ring->queue_index, 3178 rd32(IGC_TDH(tx_ring->reg_idx)), 3179 readl(tx_ring->tail), 3180 tx_ring->next_to_use, 3181 tx_ring->next_to_clean, 3182 tx_buffer->time_stamp, 3183 tx_buffer->next_to_watch, 3184 jiffies, 3185 tx_buffer->next_to_watch->wb.status); 3186 netif_stop_subqueue(tx_ring->netdev, 3187 tx_ring->queue_index); 3188 3189 /* we are about to reset, no point in enabling stuff */ 3190 return true; 3191 } 3192 } 3193 3194 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 3195 if (unlikely(total_packets && 3196 netif_carrier_ok(tx_ring->netdev) && 3197 igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { 3198 /* Make sure that anybody stopping the queue after this 3199 * sees the new next_to_clean. 3200 */ 3201 smp_mb(); 3202 if (__netif_subqueue_stopped(tx_ring->netdev, 3203 tx_ring->queue_index) && 3204 !(test_bit(__IGC_DOWN, &adapter->state))) { 3205 netif_wake_subqueue(tx_ring->netdev, 3206 tx_ring->queue_index); 3207 3208 u64_stats_update_begin(&tx_ring->tx_syncp); 3209 tx_ring->tx_stats.restart_queue++; 3210 u64_stats_update_end(&tx_ring->tx_syncp); 3211 } 3212 } 3213 3214 return !!budget; 3215 } 3216 3217 static int igc_find_mac_filter(struct igc_adapter *adapter, 3218 enum igc_mac_filter_type type, const u8 *addr) 3219 { 3220 struct igc_hw *hw = &adapter->hw; 3221 int max_entries = hw->mac.rar_entry_count; 3222 u32 ral, rah; 3223 int i; 3224 3225 for (i = 0; i < max_entries; i++) { 3226 ral = rd32(IGC_RAL(i)); 3227 rah = rd32(IGC_RAH(i)); 3228 3229 if (!(rah & IGC_RAH_AV)) 3230 continue; 3231 if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type) 3232 continue; 3233 if ((rah & IGC_RAH_RAH_MASK) != 3234 le16_to_cpup((__le16 *)(addr + 4))) 3235 continue; 3236 if (ral != le32_to_cpup((__le32 *)(addr))) 3237 continue; 3238 3239 return i; 3240 } 3241 3242 return -1; 3243 } 3244 3245 static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter) 3246 { 3247 struct igc_hw *hw = &adapter->hw; 3248 int max_entries = hw->mac.rar_entry_count; 3249 u32 rah; 3250 int i; 3251 3252 for (i = 0; i < max_entries; i++) { 3253 rah = rd32(IGC_RAH(i)); 3254 3255 if (!(rah & IGC_RAH_AV)) 3256 return i; 3257 } 3258 3259 return -1; 3260 } 3261 3262 /** 3263 * igc_add_mac_filter() - Add MAC address filter 3264 * @adapter: Pointer to adapter where the filter should be added 3265 * @type: MAC address filter type (source or destination) 3266 * @addr: MAC address 3267 * @queue: If non-negative, queue assignment feature is enabled and frames 3268 * matching the filter are enqueued onto 'queue'. Otherwise, queue 3269 * assignment is disabled. 3270 * 3271 * Return: 0 in case of success, negative errno code otherwise. 3272 */ 3273 static int igc_add_mac_filter(struct igc_adapter *adapter, 3274 enum igc_mac_filter_type type, const u8 *addr, 3275 int queue) 3276 { 3277 struct net_device *dev = adapter->netdev; 3278 int index; 3279 3280 index = igc_find_mac_filter(adapter, type, addr); 3281 if (index >= 0) 3282 goto update_filter; 3283 3284 index = igc_get_avail_mac_filter_slot(adapter); 3285 if (index < 0) 3286 return -ENOSPC; 3287 3288 netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n", 3289 index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 3290 addr, queue); 3291 3292 update_filter: 3293 igc_set_mac_filter_hw(adapter, index, type, addr, queue); 3294 return 0; 3295 } 3296 3297 /** 3298 * igc_del_mac_filter() - Delete MAC address filter 3299 * @adapter: Pointer to adapter where the filter should be deleted from 3300 * @type: MAC address filter type (source or destination) 3301 * @addr: MAC address 3302 */ 3303 static void igc_del_mac_filter(struct igc_adapter *adapter, 3304 enum igc_mac_filter_type type, const u8 *addr) 3305 { 3306 struct net_device *dev = adapter->netdev; 3307 int index; 3308 3309 index = igc_find_mac_filter(adapter, type, addr); 3310 if (index < 0) 3311 return; 3312 3313 if (index == 0) { 3314 /* If this is the default filter, we don't actually delete it. 3315 * We just reset to its default value i.e. disable queue 3316 * assignment. 3317 */ 3318 netdev_dbg(dev, "Disable default MAC filter queue assignment"); 3319 3320 igc_set_mac_filter_hw(adapter, 0, type, addr, -1); 3321 } else { 3322 netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n", 3323 index, 3324 type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 3325 addr); 3326 3327 igc_clear_mac_filter_hw(adapter, index); 3328 } 3329 } 3330 3331 /** 3332 * igc_add_vlan_prio_filter() - Add VLAN priority filter 3333 * @adapter: Pointer to adapter where the filter should be added 3334 * @prio: VLAN priority value 3335 * @queue: Queue number which matching frames are assigned to 3336 * 3337 * Return: 0 in case of success, negative errno code otherwise. 3338 */ 3339 static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio, 3340 int queue) 3341 { 3342 struct net_device *dev = adapter->netdev; 3343 struct igc_hw *hw = &adapter->hw; 3344 u32 vlanpqf; 3345 3346 vlanpqf = rd32(IGC_VLANPQF); 3347 3348 if (vlanpqf & IGC_VLANPQF_VALID(prio)) { 3349 netdev_dbg(dev, "VLAN priority filter already in use\n"); 3350 return -EEXIST; 3351 } 3352 3353 vlanpqf |= IGC_VLANPQF_QSEL(prio, queue); 3354 vlanpqf |= IGC_VLANPQF_VALID(prio); 3355 3356 wr32(IGC_VLANPQF, vlanpqf); 3357 3358 netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n", 3359 prio, queue); 3360 return 0; 3361 } 3362 3363 /** 3364 * igc_del_vlan_prio_filter() - Delete VLAN priority filter 3365 * @adapter: Pointer to adapter where the filter should be deleted from 3366 * @prio: VLAN priority value 3367 */ 3368 static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio) 3369 { 3370 struct igc_hw *hw = &adapter->hw; 3371 u32 vlanpqf; 3372 3373 vlanpqf = rd32(IGC_VLANPQF); 3374 3375 vlanpqf &= ~IGC_VLANPQF_VALID(prio); 3376 vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK); 3377 3378 wr32(IGC_VLANPQF, vlanpqf); 3379 3380 netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n", 3381 prio); 3382 } 3383 3384 static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter) 3385 { 3386 struct igc_hw *hw = &adapter->hw; 3387 int i; 3388 3389 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3390 u32 etqf = rd32(IGC_ETQF(i)); 3391 3392 if (!(etqf & IGC_ETQF_FILTER_ENABLE)) 3393 return i; 3394 } 3395 3396 return -1; 3397 } 3398 3399 /** 3400 * igc_add_etype_filter() - Add ethertype filter 3401 * @adapter: Pointer to adapter where the filter should be added 3402 * @etype: Ethertype value 3403 * @queue: If non-negative, queue assignment feature is enabled and frames 3404 * matching the filter are enqueued onto 'queue'. Otherwise, queue 3405 * assignment is disabled. 3406 * 3407 * Return: 0 in case of success, negative errno code otherwise. 3408 */ 3409 static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, 3410 int queue) 3411 { 3412 struct igc_hw *hw = &adapter->hw; 3413 int index; 3414 u32 etqf; 3415 3416 index = igc_get_avail_etype_filter_slot(adapter); 3417 if (index < 0) 3418 return -ENOSPC; 3419 3420 etqf = rd32(IGC_ETQF(index)); 3421 3422 etqf &= ~IGC_ETQF_ETYPE_MASK; 3423 etqf |= etype; 3424 3425 if (queue >= 0) { 3426 etqf &= ~IGC_ETQF_QUEUE_MASK; 3427 etqf |= (queue << IGC_ETQF_QUEUE_SHIFT); 3428 etqf |= IGC_ETQF_QUEUE_ENABLE; 3429 } 3430 3431 etqf |= IGC_ETQF_FILTER_ENABLE; 3432 3433 wr32(IGC_ETQF(index), etqf); 3434 3435 netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n", 3436 etype, queue); 3437 return 0; 3438 } 3439 3440 static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype) 3441 { 3442 struct igc_hw *hw = &adapter->hw; 3443 int i; 3444 3445 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3446 u32 etqf = rd32(IGC_ETQF(i)); 3447 3448 if ((etqf & IGC_ETQF_ETYPE_MASK) == etype) 3449 return i; 3450 } 3451 3452 return -1; 3453 } 3454 3455 /** 3456 * igc_del_etype_filter() - Delete ethertype filter 3457 * @adapter: Pointer to adapter where the filter should be deleted from 3458 * @etype: Ethertype value 3459 */ 3460 static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype) 3461 { 3462 struct igc_hw *hw = &adapter->hw; 3463 int index; 3464 3465 index = igc_find_etype_filter(adapter, etype); 3466 if (index < 0) 3467 return; 3468 3469 wr32(IGC_ETQF(index), 0); 3470 3471 netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n", 3472 etype); 3473 } 3474 3475 static int igc_flex_filter_select(struct igc_adapter *adapter, 3476 struct igc_flex_filter *input, 3477 u32 *fhft) 3478 { 3479 struct igc_hw *hw = &adapter->hw; 3480 u8 fhft_index; 3481 u32 fhftsl; 3482 3483 if (input->index >= MAX_FLEX_FILTER) { 3484 netdev_err(adapter->netdev, "Wrong Flex Filter index selected!\n"); 3485 return -EINVAL; 3486 } 3487 3488 /* Indirect table select register */ 3489 fhftsl = rd32(IGC_FHFTSL); 3490 fhftsl &= ~IGC_FHFTSL_FTSL_MASK; 3491 switch (input->index) { 3492 case 0 ... 7: 3493 fhftsl |= 0x00; 3494 break; 3495 case 8 ... 15: 3496 fhftsl |= 0x01; 3497 break; 3498 case 16 ... 23: 3499 fhftsl |= 0x02; 3500 break; 3501 case 24 ... 31: 3502 fhftsl |= 0x03; 3503 break; 3504 } 3505 wr32(IGC_FHFTSL, fhftsl); 3506 3507 /* Normalize index down to host table register */ 3508 fhft_index = input->index % 8; 3509 3510 *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) : 3511 IGC_FHFT_EXT(fhft_index - 4); 3512 3513 return 0; 3514 } 3515 3516 static int igc_write_flex_filter_ll(struct igc_adapter *adapter, 3517 struct igc_flex_filter *input) 3518 { 3519 struct igc_hw *hw = &adapter->hw; 3520 u8 *data = input->data; 3521 u8 *mask = input->mask; 3522 u32 queuing; 3523 u32 fhft; 3524 u32 wufc; 3525 int ret; 3526 int i; 3527 3528 /* Length has to be aligned to 8. Otherwise the filter will fail. Bail 3529 * out early to avoid surprises later. 3530 */ 3531 if (input->length % 8 != 0) { 3532 netdev_err(adapter->netdev, "The length of a flex filter has to be 8 byte aligned!\n"); 3533 return -EINVAL; 3534 } 3535 3536 /* Select corresponding flex filter register and get base for host table. */ 3537 ret = igc_flex_filter_select(adapter, input, &fhft); 3538 if (ret) 3539 return ret; 3540 3541 /* When adding a filter globally disable flex filter feature. That is 3542 * recommended within the datasheet. 3543 */ 3544 wufc = rd32(IGC_WUFC); 3545 wufc &= ~IGC_WUFC_FLEX_HQ; 3546 wr32(IGC_WUFC, wufc); 3547 3548 /* Configure filter */ 3549 queuing = input->length & IGC_FHFT_LENGTH_MASK; 3550 queuing |= FIELD_PREP(IGC_FHFT_QUEUE_MASK, input->rx_queue); 3551 queuing |= FIELD_PREP(IGC_FHFT_PRIO_MASK, input->prio); 3552 3553 if (input->immediate_irq) 3554 queuing |= IGC_FHFT_IMM_INT; 3555 3556 if (input->drop) 3557 queuing |= IGC_FHFT_DROP; 3558 3559 wr32(fhft + 0xFC, queuing); 3560 3561 /* Write data (128 byte) and mask (128 bit) */ 3562 for (i = 0; i < 16; ++i) { 3563 const size_t data_idx = i * 8; 3564 const size_t row_idx = i * 16; 3565 u32 dw0 = 3566 (data[data_idx + 0] << 0) | 3567 (data[data_idx + 1] << 8) | 3568 (data[data_idx + 2] << 16) | 3569 (data[data_idx + 3] << 24); 3570 u32 dw1 = 3571 (data[data_idx + 4] << 0) | 3572 (data[data_idx + 5] << 8) | 3573 (data[data_idx + 6] << 16) | 3574 (data[data_idx + 7] << 24); 3575 u32 tmp; 3576 3577 /* Write row: dw0, dw1 and mask */ 3578 wr32(fhft + row_idx, dw0); 3579 wr32(fhft + row_idx + 4, dw1); 3580 3581 /* mask is only valid for MASK(7, 0) */ 3582 tmp = rd32(fhft + row_idx + 8); 3583 tmp &= ~GENMASK(7, 0); 3584 tmp |= mask[i]; 3585 wr32(fhft + row_idx + 8, tmp); 3586 } 3587 3588 /* Enable filter. */ 3589 wufc |= IGC_WUFC_FLEX_HQ; 3590 if (input->index > 8) { 3591 /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */ 3592 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3593 3594 wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8)); 3595 3596 wr32(IGC_WUFC_EXT, wufc_ext); 3597 } else { 3598 wufc |= (IGC_WUFC_FLX0 << input->index); 3599 } 3600 wr32(IGC_WUFC, wufc); 3601 3602 netdev_dbg(adapter->netdev, "Added flex filter %u to HW.\n", 3603 input->index); 3604 3605 return 0; 3606 } 3607 3608 static void igc_flex_filter_add_field(struct igc_flex_filter *flex, 3609 const void *src, unsigned int offset, 3610 size_t len, const void *mask) 3611 { 3612 int i; 3613 3614 /* data */ 3615 memcpy(&flex->data[offset], src, len); 3616 3617 /* mask */ 3618 for (i = 0; i < len; ++i) { 3619 const unsigned int idx = i + offset; 3620 const u8 *ptr = mask; 3621 3622 if (mask) { 3623 if (ptr[i] & 0xff) 3624 flex->mask[idx / 8] |= BIT(idx % 8); 3625 3626 continue; 3627 } 3628 3629 flex->mask[idx / 8] |= BIT(idx % 8); 3630 } 3631 } 3632 3633 static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter) 3634 { 3635 struct igc_hw *hw = &adapter->hw; 3636 u32 wufc, wufc_ext; 3637 int i; 3638 3639 wufc = rd32(IGC_WUFC); 3640 wufc_ext = rd32(IGC_WUFC_EXT); 3641 3642 for (i = 0; i < MAX_FLEX_FILTER; i++) { 3643 if (i < 8) { 3644 if (!(wufc & (IGC_WUFC_FLX0 << i))) 3645 return i; 3646 } else { 3647 if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8)))) 3648 return i; 3649 } 3650 } 3651 3652 return -ENOSPC; 3653 } 3654 3655 static bool igc_flex_filter_in_use(struct igc_adapter *adapter) 3656 { 3657 struct igc_hw *hw = &adapter->hw; 3658 u32 wufc, wufc_ext; 3659 3660 wufc = rd32(IGC_WUFC); 3661 wufc_ext = rd32(IGC_WUFC_EXT); 3662 3663 if (wufc & IGC_WUFC_FILTER_MASK) 3664 return true; 3665 3666 if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK) 3667 return true; 3668 3669 return false; 3670 } 3671 3672 static int igc_add_flex_filter(struct igc_adapter *adapter, 3673 struct igc_nfc_rule *rule) 3674 { 3675 struct igc_nfc_filter *filter = &rule->filter; 3676 unsigned int eth_offset, user_offset; 3677 struct igc_flex_filter flex = { }; 3678 int ret, index; 3679 bool vlan; 3680 3681 index = igc_find_avail_flex_filter_slot(adapter); 3682 if (index < 0) 3683 return -ENOSPC; 3684 3685 /* Construct the flex filter: 3686 * -> dest_mac [6] 3687 * -> src_mac [6] 3688 * -> tpid [2] 3689 * -> vlan tci [2] 3690 * -> ether type [2] 3691 * -> user data [8] 3692 * -> = 26 bytes => 32 length 3693 */ 3694 flex.index = index; 3695 flex.length = 32; 3696 flex.rx_queue = rule->action; 3697 3698 vlan = rule->filter.vlan_tci || rule->filter.vlan_etype; 3699 eth_offset = vlan ? 16 : 12; 3700 user_offset = vlan ? 18 : 14; 3701 3702 /* Add destination MAC */ 3703 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3704 igc_flex_filter_add_field(&flex, &filter->dst_addr, 0, 3705 ETH_ALEN, NULL); 3706 3707 /* Add source MAC */ 3708 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3709 igc_flex_filter_add_field(&flex, &filter->src_addr, 6, 3710 ETH_ALEN, NULL); 3711 3712 /* Add VLAN etype */ 3713 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) { 3714 __be16 vlan_etype = cpu_to_be16(filter->vlan_etype); 3715 3716 igc_flex_filter_add_field(&flex, &vlan_etype, 12, 3717 sizeof(vlan_etype), NULL); 3718 } 3719 3720 /* Add VLAN TCI */ 3721 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) 3722 igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14, 3723 sizeof(filter->vlan_tci), NULL); 3724 3725 /* Add Ether type */ 3726 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3727 __be16 etype = cpu_to_be16(filter->etype); 3728 3729 igc_flex_filter_add_field(&flex, &etype, eth_offset, 3730 sizeof(etype), NULL); 3731 } 3732 3733 /* Add user data */ 3734 if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) 3735 igc_flex_filter_add_field(&flex, &filter->user_data, 3736 user_offset, 3737 sizeof(filter->user_data), 3738 filter->user_mask); 3739 3740 /* Add it down to the hardware and enable it. */ 3741 ret = igc_write_flex_filter_ll(adapter, &flex); 3742 if (ret) 3743 return ret; 3744 3745 filter->flex_index = index; 3746 3747 return 0; 3748 } 3749 3750 static void igc_del_flex_filter(struct igc_adapter *adapter, 3751 u16 reg_index) 3752 { 3753 struct igc_hw *hw = &adapter->hw; 3754 u32 wufc; 3755 3756 /* Just disable the filter. The filter table itself is kept 3757 * intact. Another flex_filter_add() should override the "old" data 3758 * then. 3759 */ 3760 if (reg_index > 8) { 3761 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3762 3763 wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8)); 3764 wr32(IGC_WUFC_EXT, wufc_ext); 3765 } else { 3766 wufc = rd32(IGC_WUFC); 3767 3768 wufc &= ~(IGC_WUFC_FLX0 << reg_index); 3769 wr32(IGC_WUFC, wufc); 3770 } 3771 3772 if (igc_flex_filter_in_use(adapter)) 3773 return; 3774 3775 /* No filters are in use, we may disable flex filters */ 3776 wufc = rd32(IGC_WUFC); 3777 wufc &= ~IGC_WUFC_FLEX_HQ; 3778 wr32(IGC_WUFC, wufc); 3779 } 3780 3781 static int igc_enable_nfc_rule(struct igc_adapter *adapter, 3782 struct igc_nfc_rule *rule) 3783 { 3784 int err; 3785 3786 if (rule->flex) { 3787 return igc_add_flex_filter(adapter, rule); 3788 } 3789 3790 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3791 err = igc_add_etype_filter(adapter, rule->filter.etype, 3792 rule->action); 3793 if (err) 3794 return err; 3795 } 3796 3797 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { 3798 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3799 rule->filter.src_addr, rule->action); 3800 if (err) 3801 return err; 3802 } 3803 3804 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { 3805 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3806 rule->filter.dst_addr, rule->action); 3807 if (err) 3808 return err; 3809 } 3810 3811 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3812 int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); 3813 3814 err = igc_add_vlan_prio_filter(adapter, prio, rule->action); 3815 if (err) 3816 return err; 3817 } 3818 3819 return 0; 3820 } 3821 3822 static void igc_disable_nfc_rule(struct igc_adapter *adapter, 3823 const struct igc_nfc_rule *rule) 3824 { 3825 if (rule->flex) { 3826 igc_del_flex_filter(adapter, rule->filter.flex_index); 3827 return; 3828 } 3829 3830 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) 3831 igc_del_etype_filter(adapter, rule->filter.etype); 3832 3833 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3834 int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); 3835 3836 igc_del_vlan_prio_filter(adapter, prio); 3837 } 3838 3839 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3840 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3841 rule->filter.src_addr); 3842 3843 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3844 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3845 rule->filter.dst_addr); 3846 } 3847 3848 /** 3849 * igc_get_nfc_rule() - Get NFC rule 3850 * @adapter: Pointer to adapter 3851 * @location: Rule location 3852 * 3853 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3854 * 3855 * Return: Pointer to NFC rule at @location. If not found, NULL. 3856 */ 3857 struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter, 3858 u32 location) 3859 { 3860 struct igc_nfc_rule *rule; 3861 3862 list_for_each_entry(rule, &adapter->nfc_rule_list, list) { 3863 if (rule->location == location) 3864 return rule; 3865 if (rule->location > location) 3866 break; 3867 } 3868 3869 return NULL; 3870 } 3871 3872 /** 3873 * igc_del_nfc_rule() - Delete NFC rule 3874 * @adapter: Pointer to adapter 3875 * @rule: Pointer to rule to be deleted 3876 * 3877 * Disable NFC rule in hardware and delete it from adapter. 3878 * 3879 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3880 */ 3881 void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 3882 { 3883 igc_disable_nfc_rule(adapter, rule); 3884 3885 list_del(&rule->list); 3886 adapter->nfc_rule_count--; 3887 3888 kfree(rule); 3889 } 3890 3891 static void igc_flush_nfc_rules(struct igc_adapter *adapter) 3892 { 3893 struct igc_nfc_rule *rule, *tmp; 3894 3895 mutex_lock(&adapter->nfc_rule_lock); 3896 3897 list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list) 3898 igc_del_nfc_rule(adapter, rule); 3899 3900 mutex_unlock(&adapter->nfc_rule_lock); 3901 } 3902 3903 /** 3904 * igc_add_nfc_rule() - Add NFC rule 3905 * @adapter: Pointer to adapter 3906 * @rule: Pointer to rule to be added 3907 * 3908 * Enable NFC rule in hardware and add it to adapter. 3909 * 3910 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3911 * 3912 * Return: 0 on success, negative errno on failure. 3913 */ 3914 int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 3915 { 3916 struct igc_nfc_rule *pred, *cur; 3917 int err; 3918 3919 err = igc_enable_nfc_rule(adapter, rule); 3920 if (err) 3921 return err; 3922 3923 pred = NULL; 3924 list_for_each_entry(cur, &adapter->nfc_rule_list, list) { 3925 if (cur->location >= rule->location) 3926 break; 3927 pred = cur; 3928 } 3929 3930 list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list); 3931 adapter->nfc_rule_count++; 3932 return 0; 3933 } 3934 3935 static void igc_restore_nfc_rules(struct igc_adapter *adapter) 3936 { 3937 struct igc_nfc_rule *rule; 3938 3939 mutex_lock(&adapter->nfc_rule_lock); 3940 3941 list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list) 3942 igc_enable_nfc_rule(adapter, rule); 3943 3944 mutex_unlock(&adapter->nfc_rule_lock); 3945 } 3946 3947 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr) 3948 { 3949 struct igc_adapter *adapter = netdev_priv(netdev); 3950 3951 return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1); 3952 } 3953 3954 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) 3955 { 3956 struct igc_adapter *adapter = netdev_priv(netdev); 3957 3958 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr); 3959 return 0; 3960 } 3961 3962 /** 3963 * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set 3964 * @netdev: network interface device structure 3965 * 3966 * The set_rx_mode entry point is called whenever the unicast or multicast 3967 * address lists or the network interface flags are updated. This routine is 3968 * responsible for configuring the hardware for proper unicast, multicast, 3969 * promiscuous mode, and all-multi behavior. 3970 */ 3971 static void igc_set_rx_mode(struct net_device *netdev) 3972 { 3973 struct igc_adapter *adapter = netdev_priv(netdev); 3974 struct igc_hw *hw = &adapter->hw; 3975 u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE; 3976 int count; 3977 3978 /* Check for Promiscuous and All Multicast modes */ 3979 if (netdev->flags & IFF_PROMISC) { 3980 rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE; 3981 } else { 3982 if (netdev->flags & IFF_ALLMULTI) { 3983 rctl |= IGC_RCTL_MPE; 3984 } else { 3985 /* Write addresses to the MTA, if the attempt fails 3986 * then we should just turn on promiscuous mode so 3987 * that we can at least receive multicast traffic 3988 */ 3989 count = igc_write_mc_addr_list(netdev); 3990 if (count < 0) 3991 rctl |= IGC_RCTL_MPE; 3992 } 3993 } 3994 3995 /* Write addresses to available RAR registers, if there is not 3996 * sufficient space to store all the addresses then enable 3997 * unicast promiscuous mode 3998 */ 3999 if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync)) 4000 rctl |= IGC_RCTL_UPE; 4001 4002 /* update state of unicast and multicast */ 4003 rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE); 4004 wr32(IGC_RCTL, rctl); 4005 4006 #if (PAGE_SIZE < 8192) 4007 if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB) 4008 rlpml = IGC_MAX_FRAME_BUILD_SKB; 4009 #endif 4010 wr32(IGC_RLPML, rlpml); 4011 } 4012 4013 /** 4014 * igc_configure - configure the hardware for RX and TX 4015 * @adapter: private board structure 4016 */ 4017 static void igc_configure(struct igc_adapter *adapter) 4018 { 4019 struct net_device *netdev = adapter->netdev; 4020 int i = 0; 4021 4022 igc_get_hw_control(adapter); 4023 igc_set_rx_mode(netdev); 4024 4025 igc_restore_vlan(adapter); 4026 4027 igc_setup_tctl(adapter); 4028 igc_setup_mrqc(adapter); 4029 igc_setup_rctl(adapter); 4030 4031 igc_set_default_mac_filter(adapter); 4032 igc_restore_nfc_rules(adapter); 4033 4034 igc_configure_tx(adapter); 4035 igc_configure_rx(adapter); 4036 4037 igc_rx_fifo_flush_base(&adapter->hw); 4038 4039 /* call igc_desc_unused which always leaves 4040 * at least 1 descriptor unused to make sure 4041 * next_to_use != next_to_clean 4042 */ 4043 for (i = 0; i < adapter->num_rx_queues; i++) { 4044 struct igc_ring *ring = adapter->rx_ring[i]; 4045 4046 if (ring->xsk_pool) 4047 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 4048 else 4049 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 4050 } 4051 } 4052 4053 /** 4054 * igc_write_ivar - configure ivar for given MSI-X vector 4055 * @hw: pointer to the HW structure 4056 * @msix_vector: vector number we are allocating to a given ring 4057 * @index: row index of IVAR register to write within IVAR table 4058 * @offset: column offset of in IVAR, should be multiple of 8 4059 * 4060 * The IVAR table consists of 2 columns, 4061 * each containing an cause allocation for an Rx and Tx ring, and a 4062 * variable number of rows depending on the number of queues supported. 4063 */ 4064 static void igc_write_ivar(struct igc_hw *hw, int msix_vector, 4065 int index, int offset) 4066 { 4067 u32 ivar = array_rd32(IGC_IVAR0, index); 4068 4069 /* clear any bits that are currently set */ 4070 ivar &= ~((u32)0xFF << offset); 4071 4072 /* write vector and valid bit */ 4073 ivar |= (msix_vector | IGC_IVAR_VALID) << offset; 4074 4075 array_wr32(IGC_IVAR0, index, ivar); 4076 } 4077 4078 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector) 4079 { 4080 struct igc_adapter *adapter = q_vector->adapter; 4081 struct igc_hw *hw = &adapter->hw; 4082 int rx_queue = IGC_N0_QUEUE; 4083 int tx_queue = IGC_N0_QUEUE; 4084 4085 if (q_vector->rx.ring) 4086 rx_queue = q_vector->rx.ring->reg_idx; 4087 if (q_vector->tx.ring) 4088 tx_queue = q_vector->tx.ring->reg_idx; 4089 4090 switch (hw->mac.type) { 4091 case igc_i225: 4092 if (rx_queue > IGC_N0_QUEUE) 4093 igc_write_ivar(hw, msix_vector, 4094 rx_queue >> 1, 4095 (rx_queue & 0x1) << 4); 4096 if (tx_queue > IGC_N0_QUEUE) 4097 igc_write_ivar(hw, msix_vector, 4098 tx_queue >> 1, 4099 ((tx_queue & 0x1) << 4) + 8); 4100 q_vector->eims_value = BIT(msix_vector); 4101 break; 4102 default: 4103 WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n"); 4104 break; 4105 } 4106 4107 /* add q_vector eims value to global eims_enable_mask */ 4108 adapter->eims_enable_mask |= q_vector->eims_value; 4109 4110 /* configure q_vector to set itr on first interrupt */ 4111 q_vector->set_itr = 1; 4112 } 4113 4114 /** 4115 * igc_configure_msix - Configure MSI-X hardware 4116 * @adapter: Pointer to adapter structure 4117 * 4118 * igc_configure_msix sets up the hardware to properly 4119 * generate MSI-X interrupts. 4120 */ 4121 static void igc_configure_msix(struct igc_adapter *adapter) 4122 { 4123 struct igc_hw *hw = &adapter->hw; 4124 int i, vector = 0; 4125 u32 tmp; 4126 4127 adapter->eims_enable_mask = 0; 4128 4129 /* set vector for other causes, i.e. link changes */ 4130 switch (hw->mac.type) { 4131 case igc_i225: 4132 /* Turn on MSI-X capability first, or our settings 4133 * won't stick. And it will take days to debug. 4134 */ 4135 wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE | 4136 IGC_GPIE_PBA | IGC_GPIE_EIAME | 4137 IGC_GPIE_NSICR); 4138 4139 /* enable msix_other interrupt */ 4140 adapter->eims_other = BIT(vector); 4141 tmp = (vector++ | IGC_IVAR_VALID) << 8; 4142 4143 wr32(IGC_IVAR_MISC, tmp); 4144 break; 4145 default: 4146 /* do nothing, since nothing else supports MSI-X */ 4147 break; 4148 } /* switch (hw->mac.type) */ 4149 4150 adapter->eims_enable_mask |= adapter->eims_other; 4151 4152 for (i = 0; i < adapter->num_q_vectors; i++) 4153 igc_assign_vector(adapter->q_vector[i], vector++); 4154 4155 wrfl(); 4156 } 4157 4158 /** 4159 * igc_irq_enable - Enable default interrupt generation settings 4160 * @adapter: board private structure 4161 */ 4162 static void igc_irq_enable(struct igc_adapter *adapter) 4163 { 4164 struct igc_hw *hw = &adapter->hw; 4165 4166 if (adapter->msix_entries) { 4167 u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA; 4168 u32 regval = rd32(IGC_EIAC); 4169 4170 wr32(IGC_EIAC, regval | adapter->eims_enable_mask); 4171 regval = rd32(IGC_EIAM); 4172 wr32(IGC_EIAM, regval | adapter->eims_enable_mask); 4173 wr32(IGC_EIMS, adapter->eims_enable_mask); 4174 wr32(IGC_IMS, ims); 4175 } else { 4176 wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 4177 wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 4178 } 4179 } 4180 4181 /** 4182 * igc_irq_disable - Mask off interrupt generation on the NIC 4183 * @adapter: board private structure 4184 */ 4185 static void igc_irq_disable(struct igc_adapter *adapter) 4186 { 4187 struct igc_hw *hw = &adapter->hw; 4188 4189 if (adapter->msix_entries) { 4190 u32 regval = rd32(IGC_EIAM); 4191 4192 wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask); 4193 wr32(IGC_EIMC, adapter->eims_enable_mask); 4194 regval = rd32(IGC_EIAC); 4195 wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask); 4196 } 4197 4198 wr32(IGC_IAM, 0); 4199 wr32(IGC_IMC, ~0); 4200 wrfl(); 4201 4202 if (adapter->msix_entries) { 4203 int vector = 0, i; 4204 4205 synchronize_irq(adapter->msix_entries[vector++].vector); 4206 4207 for (i = 0; i < adapter->num_q_vectors; i++) 4208 synchronize_irq(adapter->msix_entries[vector++].vector); 4209 } else { 4210 synchronize_irq(adapter->pdev->irq); 4211 } 4212 } 4213 4214 void igc_set_flag_queue_pairs(struct igc_adapter *adapter, 4215 const u32 max_rss_queues) 4216 { 4217 /* Determine if we need to pair queues. */ 4218 /* If rss_queues > half of max_rss_queues, pair the queues in 4219 * order to conserve interrupts due to limited supply. 4220 */ 4221 if (adapter->rss_queues > (max_rss_queues / 2)) 4222 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 4223 else 4224 adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS; 4225 } 4226 4227 unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) 4228 { 4229 return IGC_MAX_RX_QUEUES; 4230 } 4231 4232 static void igc_init_queue_configuration(struct igc_adapter *adapter) 4233 { 4234 u32 max_rss_queues; 4235 4236 max_rss_queues = igc_get_max_rss_queues(adapter); 4237 adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); 4238 4239 igc_set_flag_queue_pairs(adapter, max_rss_queues); 4240 } 4241 4242 /** 4243 * igc_reset_q_vector - Reset config for interrupt vector 4244 * @adapter: board private structure to initialize 4245 * @v_idx: Index of vector to be reset 4246 * 4247 * If NAPI is enabled it will delete any references to the 4248 * NAPI struct. This is preparation for igc_free_q_vector. 4249 */ 4250 static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx) 4251 { 4252 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 4253 4254 /* if we're coming from igc_set_interrupt_capability, the vectors are 4255 * not yet allocated 4256 */ 4257 if (!q_vector) 4258 return; 4259 4260 if (q_vector->tx.ring) 4261 adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; 4262 4263 if (q_vector->rx.ring) 4264 adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; 4265 4266 netif_napi_del(&q_vector->napi); 4267 } 4268 4269 /** 4270 * igc_free_q_vector - Free memory allocated for specific interrupt vector 4271 * @adapter: board private structure to initialize 4272 * @v_idx: Index of vector to be freed 4273 * 4274 * This function frees the memory allocated to the q_vector. 4275 */ 4276 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx) 4277 { 4278 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 4279 4280 adapter->q_vector[v_idx] = NULL; 4281 4282 /* igc_get_stats64() might access the rings on this vector, 4283 * we must wait a grace period before freeing it. 4284 */ 4285 if (q_vector) 4286 kfree_rcu(q_vector, rcu); 4287 } 4288 4289 /** 4290 * igc_free_q_vectors - Free memory allocated for interrupt vectors 4291 * @adapter: board private structure to initialize 4292 * 4293 * This function frees the memory allocated to the q_vectors. In addition if 4294 * NAPI is enabled it will delete any references to the NAPI struct prior 4295 * to freeing the q_vector. 4296 */ 4297 static void igc_free_q_vectors(struct igc_adapter *adapter) 4298 { 4299 int v_idx = adapter->num_q_vectors; 4300 4301 adapter->num_tx_queues = 0; 4302 adapter->num_rx_queues = 0; 4303 adapter->num_q_vectors = 0; 4304 4305 while (v_idx--) { 4306 igc_reset_q_vector(adapter, v_idx); 4307 igc_free_q_vector(adapter, v_idx); 4308 } 4309 } 4310 4311 /** 4312 * igc_update_itr - update the dynamic ITR value based on statistics 4313 * @q_vector: pointer to q_vector 4314 * @ring_container: ring info to update the itr for 4315 * 4316 * Stores a new ITR value based on packets and byte 4317 * counts during the last interrupt. The advantage of per interrupt 4318 * computation is faster updates and more accurate ITR for the current 4319 * traffic pattern. Constants in this function were computed 4320 * based on theoretical maximum wire speed and thresholds were set based 4321 * on testing data as well as attempting to minimize response time 4322 * while increasing bulk throughput. 4323 * NOTE: These calculations are only valid when operating in a single- 4324 * queue environment. 4325 */ 4326 static void igc_update_itr(struct igc_q_vector *q_vector, 4327 struct igc_ring_container *ring_container) 4328 { 4329 unsigned int packets = ring_container->total_packets; 4330 unsigned int bytes = ring_container->total_bytes; 4331 u8 itrval = ring_container->itr; 4332 4333 /* no packets, exit with status unchanged */ 4334 if (packets == 0) 4335 return; 4336 4337 switch (itrval) { 4338 case lowest_latency: 4339 /* handle TSO and jumbo frames */ 4340 if (bytes / packets > 8000) 4341 itrval = bulk_latency; 4342 else if ((packets < 5) && (bytes > 512)) 4343 itrval = low_latency; 4344 break; 4345 case low_latency: /* 50 usec aka 20000 ints/s */ 4346 if (bytes > 10000) { 4347 /* this if handles the TSO accounting */ 4348 if (bytes / packets > 8000) 4349 itrval = bulk_latency; 4350 else if ((packets < 10) || ((bytes / packets) > 1200)) 4351 itrval = bulk_latency; 4352 else if ((packets > 35)) 4353 itrval = lowest_latency; 4354 } else if (bytes / packets > 2000) { 4355 itrval = bulk_latency; 4356 } else if (packets <= 2 && bytes < 512) { 4357 itrval = lowest_latency; 4358 } 4359 break; 4360 case bulk_latency: /* 250 usec aka 4000 ints/s */ 4361 if (bytes > 25000) { 4362 if (packets > 35) 4363 itrval = low_latency; 4364 } else if (bytes < 1500) { 4365 itrval = low_latency; 4366 } 4367 break; 4368 } 4369 4370 /* clear work counters since we have the values we need */ 4371 ring_container->total_bytes = 0; 4372 ring_container->total_packets = 0; 4373 4374 /* write updated itr to ring container */ 4375 ring_container->itr = itrval; 4376 } 4377 4378 static void igc_set_itr(struct igc_q_vector *q_vector) 4379 { 4380 struct igc_adapter *adapter = q_vector->adapter; 4381 u32 new_itr = q_vector->itr_val; 4382 u8 current_itr = 0; 4383 4384 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ 4385 switch (adapter->link_speed) { 4386 case SPEED_10: 4387 case SPEED_100: 4388 current_itr = 0; 4389 new_itr = IGC_4K_ITR; 4390 goto set_itr_now; 4391 default: 4392 break; 4393 } 4394 4395 igc_update_itr(q_vector, &q_vector->tx); 4396 igc_update_itr(q_vector, &q_vector->rx); 4397 4398 current_itr = max(q_vector->rx.itr, q_vector->tx.itr); 4399 4400 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4401 if (current_itr == lowest_latency && 4402 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4403 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4404 current_itr = low_latency; 4405 4406 switch (current_itr) { 4407 /* counts and packets in update_itr are dependent on these numbers */ 4408 case lowest_latency: 4409 new_itr = IGC_70K_ITR; /* 70,000 ints/sec */ 4410 break; 4411 case low_latency: 4412 new_itr = IGC_20K_ITR; /* 20,000 ints/sec */ 4413 break; 4414 case bulk_latency: 4415 new_itr = IGC_4K_ITR; /* 4,000 ints/sec */ 4416 break; 4417 default: 4418 break; 4419 } 4420 4421 set_itr_now: 4422 if (new_itr != q_vector->itr_val) { 4423 /* this attempts to bias the interrupt rate towards Bulk 4424 * by adding intermediate steps when interrupt rate is 4425 * increasing 4426 */ 4427 new_itr = new_itr > q_vector->itr_val ? 4428 max((new_itr * q_vector->itr_val) / 4429 (new_itr + (q_vector->itr_val >> 2)), 4430 new_itr) : new_itr; 4431 /* Don't write the value here; it resets the adapter's 4432 * internal timer, and causes us to delay far longer than 4433 * we should between interrupts. Instead, we write the ITR 4434 * value at the beginning of the next interrupt so the timing 4435 * ends up being correct. 4436 */ 4437 q_vector->itr_val = new_itr; 4438 q_vector->set_itr = 1; 4439 } 4440 } 4441 4442 static void igc_reset_interrupt_capability(struct igc_adapter *adapter) 4443 { 4444 int v_idx = adapter->num_q_vectors; 4445 4446 if (adapter->msix_entries) { 4447 pci_disable_msix(adapter->pdev); 4448 kfree(adapter->msix_entries); 4449 adapter->msix_entries = NULL; 4450 } else if (adapter->flags & IGC_FLAG_HAS_MSI) { 4451 pci_disable_msi(adapter->pdev); 4452 } 4453 4454 while (v_idx--) 4455 igc_reset_q_vector(adapter, v_idx); 4456 } 4457 4458 /** 4459 * igc_set_interrupt_capability - set MSI or MSI-X if supported 4460 * @adapter: Pointer to adapter structure 4461 * @msix: boolean value for MSI-X capability 4462 * 4463 * Attempt to configure interrupts using the best available 4464 * capabilities of the hardware and kernel. 4465 */ 4466 static void igc_set_interrupt_capability(struct igc_adapter *adapter, 4467 bool msix) 4468 { 4469 int numvecs, i; 4470 int err; 4471 4472 if (!msix) 4473 goto msi_only; 4474 adapter->flags |= IGC_FLAG_HAS_MSIX; 4475 4476 /* Number of supported queues. */ 4477 adapter->num_rx_queues = adapter->rss_queues; 4478 4479 adapter->num_tx_queues = adapter->rss_queues; 4480 4481 /* start with one vector for every Rx queue */ 4482 numvecs = adapter->num_rx_queues; 4483 4484 /* if Tx handler is separate add 1 for every Tx queue */ 4485 if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) 4486 numvecs += adapter->num_tx_queues; 4487 4488 /* store the number of vectors reserved for queues */ 4489 adapter->num_q_vectors = numvecs; 4490 4491 /* add 1 vector for link status interrupts */ 4492 numvecs++; 4493 4494 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), 4495 GFP_KERNEL); 4496 4497 if (!adapter->msix_entries) 4498 return; 4499 4500 /* populate entry values */ 4501 for (i = 0; i < numvecs; i++) 4502 adapter->msix_entries[i].entry = i; 4503 4504 err = pci_enable_msix_range(adapter->pdev, 4505 adapter->msix_entries, 4506 numvecs, 4507 numvecs); 4508 if (err > 0) 4509 return; 4510 4511 kfree(adapter->msix_entries); 4512 adapter->msix_entries = NULL; 4513 4514 igc_reset_interrupt_capability(adapter); 4515 4516 msi_only: 4517 adapter->flags &= ~IGC_FLAG_HAS_MSIX; 4518 4519 adapter->rss_queues = 1; 4520 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 4521 adapter->num_rx_queues = 1; 4522 adapter->num_tx_queues = 1; 4523 adapter->num_q_vectors = 1; 4524 if (!pci_enable_msi(adapter->pdev)) 4525 adapter->flags |= IGC_FLAG_HAS_MSI; 4526 } 4527 4528 /** 4529 * igc_update_ring_itr - update the dynamic ITR value based on packet size 4530 * @q_vector: pointer to q_vector 4531 * 4532 * Stores a new ITR value based on strictly on packet size. This 4533 * algorithm is less sophisticated than that used in igc_update_itr, 4534 * due to the difficulty of synchronizing statistics across multiple 4535 * receive rings. The divisors and thresholds used by this function 4536 * were determined based on theoretical maximum wire speed and testing 4537 * data, in order to minimize response time while increasing bulk 4538 * throughput. 4539 * NOTE: This function is called only when operating in a multiqueue 4540 * receive environment. 4541 */ 4542 static void igc_update_ring_itr(struct igc_q_vector *q_vector) 4543 { 4544 struct igc_adapter *adapter = q_vector->adapter; 4545 int new_val = q_vector->itr_val; 4546 int avg_wire_size = 0; 4547 unsigned int packets; 4548 4549 /* For non-gigabit speeds, just fix the interrupt rate at 4000 4550 * ints/sec - ITR timer value of 120 ticks. 4551 */ 4552 switch (adapter->link_speed) { 4553 case SPEED_10: 4554 case SPEED_100: 4555 new_val = IGC_4K_ITR; 4556 goto set_itr_val; 4557 default: 4558 break; 4559 } 4560 4561 packets = q_vector->rx.total_packets; 4562 if (packets) 4563 avg_wire_size = q_vector->rx.total_bytes / packets; 4564 4565 packets = q_vector->tx.total_packets; 4566 if (packets) 4567 avg_wire_size = max_t(u32, avg_wire_size, 4568 q_vector->tx.total_bytes / packets); 4569 4570 /* if avg_wire_size isn't set no work was done */ 4571 if (!avg_wire_size) 4572 goto clear_counts; 4573 4574 /* Add 24 bytes to size to account for CRC, preamble, and gap */ 4575 avg_wire_size += 24; 4576 4577 /* Don't starve jumbo frames */ 4578 avg_wire_size = min(avg_wire_size, 3000); 4579 4580 /* Give a little boost to mid-size frames */ 4581 if (avg_wire_size > 300 && avg_wire_size < 1200) 4582 new_val = avg_wire_size / 3; 4583 else 4584 new_val = avg_wire_size / 2; 4585 4586 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4587 if (new_val < IGC_20K_ITR && 4588 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4589 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4590 new_val = IGC_20K_ITR; 4591 4592 set_itr_val: 4593 if (new_val != q_vector->itr_val) { 4594 q_vector->itr_val = new_val; 4595 q_vector->set_itr = 1; 4596 } 4597 clear_counts: 4598 q_vector->rx.total_bytes = 0; 4599 q_vector->rx.total_packets = 0; 4600 q_vector->tx.total_bytes = 0; 4601 q_vector->tx.total_packets = 0; 4602 } 4603 4604 static void igc_ring_irq_enable(struct igc_q_vector *q_vector) 4605 { 4606 struct igc_adapter *adapter = q_vector->adapter; 4607 struct igc_hw *hw = &adapter->hw; 4608 4609 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || 4610 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { 4611 if (adapter->num_q_vectors == 1) 4612 igc_set_itr(q_vector); 4613 else 4614 igc_update_ring_itr(q_vector); 4615 } 4616 4617 if (!test_bit(__IGC_DOWN, &adapter->state)) { 4618 if (adapter->msix_entries) 4619 wr32(IGC_EIMS, q_vector->eims_value); 4620 else 4621 igc_irq_enable(adapter); 4622 } 4623 } 4624 4625 static void igc_add_ring(struct igc_ring *ring, 4626 struct igc_ring_container *head) 4627 { 4628 head->ring = ring; 4629 head->count++; 4630 } 4631 4632 /** 4633 * igc_cache_ring_register - Descriptor ring to register mapping 4634 * @adapter: board private structure to initialize 4635 * 4636 * Once we know the feature-set enabled for the device, we'll cache 4637 * the register offset the descriptor ring is assigned to. 4638 */ 4639 static void igc_cache_ring_register(struct igc_adapter *adapter) 4640 { 4641 int i = 0, j = 0; 4642 4643 switch (adapter->hw.mac.type) { 4644 case igc_i225: 4645 default: 4646 for (; i < adapter->num_rx_queues; i++) 4647 adapter->rx_ring[i]->reg_idx = i; 4648 for (; j < adapter->num_tx_queues; j++) 4649 adapter->tx_ring[j]->reg_idx = j; 4650 break; 4651 } 4652 } 4653 4654 /** 4655 * igc_poll - NAPI Rx polling callback 4656 * @napi: napi polling structure 4657 * @budget: count of how many packets we should handle 4658 */ 4659 static int igc_poll(struct napi_struct *napi, int budget) 4660 { 4661 struct igc_q_vector *q_vector = container_of(napi, 4662 struct igc_q_vector, 4663 napi); 4664 struct igc_ring *rx_ring = q_vector->rx.ring; 4665 bool clean_complete = true; 4666 int work_done = 0; 4667 4668 if (q_vector->tx.ring) 4669 clean_complete = igc_clean_tx_irq(q_vector, budget); 4670 4671 if (rx_ring) { 4672 int cleaned = rx_ring->xsk_pool ? 4673 igc_clean_rx_irq_zc(q_vector, budget) : 4674 igc_clean_rx_irq(q_vector, budget); 4675 4676 work_done += cleaned; 4677 if (cleaned >= budget) 4678 clean_complete = false; 4679 } 4680 4681 /* If all work not completed, return budget and keep polling */ 4682 if (!clean_complete) 4683 return budget; 4684 4685 /* Exit the polling mode, but don't re-enable interrupts if stack might 4686 * poll us due to busy-polling 4687 */ 4688 if (likely(napi_complete_done(napi, work_done))) 4689 igc_ring_irq_enable(q_vector); 4690 4691 return min(work_done, budget - 1); 4692 } 4693 4694 /** 4695 * igc_alloc_q_vector - Allocate memory for a single interrupt vector 4696 * @adapter: board private structure to initialize 4697 * @v_count: q_vectors allocated on adapter, used for ring interleaving 4698 * @v_idx: index of vector in adapter struct 4699 * @txr_count: total number of Tx rings to allocate 4700 * @txr_idx: index of first Tx ring to allocate 4701 * @rxr_count: total number of Rx rings to allocate 4702 * @rxr_idx: index of first Rx ring to allocate 4703 * 4704 * We allocate one q_vector. If allocation fails we return -ENOMEM. 4705 */ 4706 static int igc_alloc_q_vector(struct igc_adapter *adapter, 4707 unsigned int v_count, unsigned int v_idx, 4708 unsigned int txr_count, unsigned int txr_idx, 4709 unsigned int rxr_count, unsigned int rxr_idx) 4710 { 4711 struct igc_q_vector *q_vector; 4712 struct igc_ring *ring; 4713 int ring_count; 4714 4715 /* igc only supports 1 Tx and/or 1 Rx queue per vector */ 4716 if (txr_count > 1 || rxr_count > 1) 4717 return -ENOMEM; 4718 4719 ring_count = txr_count + rxr_count; 4720 4721 /* allocate q_vector and rings */ 4722 q_vector = adapter->q_vector[v_idx]; 4723 if (!q_vector) 4724 q_vector = kzalloc(struct_size(q_vector, ring, ring_count), 4725 GFP_KERNEL); 4726 else 4727 memset(q_vector, 0, struct_size(q_vector, ring, ring_count)); 4728 if (!q_vector) 4729 return -ENOMEM; 4730 4731 /* initialize NAPI */ 4732 netif_napi_add(adapter->netdev, &q_vector->napi, igc_poll); 4733 4734 /* tie q_vector and adapter together */ 4735 adapter->q_vector[v_idx] = q_vector; 4736 q_vector->adapter = adapter; 4737 4738 /* initialize work limits */ 4739 q_vector->tx.work_limit = adapter->tx_work_limit; 4740 4741 /* initialize ITR configuration */ 4742 q_vector->itr_register = adapter->io_addr + IGC_EITR(0); 4743 q_vector->itr_val = IGC_START_ITR; 4744 4745 /* initialize pointer to rings */ 4746 ring = q_vector->ring; 4747 4748 /* initialize ITR */ 4749 if (rxr_count) { 4750 /* rx or rx/tx vector */ 4751 if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) 4752 q_vector->itr_val = adapter->rx_itr_setting; 4753 } else { 4754 /* tx only vector */ 4755 if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) 4756 q_vector->itr_val = adapter->tx_itr_setting; 4757 } 4758 4759 if (txr_count) { 4760 /* assign generic ring traits */ 4761 ring->dev = &adapter->pdev->dev; 4762 ring->netdev = adapter->netdev; 4763 4764 /* configure backlink on ring */ 4765 ring->q_vector = q_vector; 4766 4767 /* update q_vector Tx values */ 4768 igc_add_ring(ring, &q_vector->tx); 4769 4770 /* apply Tx specific ring traits */ 4771 ring->count = adapter->tx_ring_count; 4772 ring->queue_index = txr_idx; 4773 4774 /* assign ring to adapter */ 4775 adapter->tx_ring[txr_idx] = ring; 4776 4777 /* push pointer to next ring */ 4778 ring++; 4779 } 4780 4781 if (rxr_count) { 4782 /* assign generic ring traits */ 4783 ring->dev = &adapter->pdev->dev; 4784 ring->netdev = adapter->netdev; 4785 4786 /* configure backlink on ring */ 4787 ring->q_vector = q_vector; 4788 4789 /* update q_vector Rx values */ 4790 igc_add_ring(ring, &q_vector->rx); 4791 4792 /* apply Rx specific ring traits */ 4793 ring->count = adapter->rx_ring_count; 4794 ring->queue_index = rxr_idx; 4795 4796 /* assign ring to adapter */ 4797 adapter->rx_ring[rxr_idx] = ring; 4798 } 4799 4800 return 0; 4801 } 4802 4803 /** 4804 * igc_alloc_q_vectors - Allocate memory for interrupt vectors 4805 * @adapter: board private structure to initialize 4806 * 4807 * We allocate one q_vector per queue interrupt. If allocation fails we 4808 * return -ENOMEM. 4809 */ 4810 static int igc_alloc_q_vectors(struct igc_adapter *adapter) 4811 { 4812 int rxr_remaining = adapter->num_rx_queues; 4813 int txr_remaining = adapter->num_tx_queues; 4814 int rxr_idx = 0, txr_idx = 0, v_idx = 0; 4815 int q_vectors = adapter->num_q_vectors; 4816 int err; 4817 4818 if (q_vectors >= (rxr_remaining + txr_remaining)) { 4819 for (; rxr_remaining; v_idx++) { 4820 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4821 0, 0, 1, rxr_idx); 4822 4823 if (err) 4824 goto err_out; 4825 4826 /* update counts and index */ 4827 rxr_remaining--; 4828 rxr_idx++; 4829 } 4830 } 4831 4832 for (; v_idx < q_vectors; v_idx++) { 4833 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 4834 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 4835 4836 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4837 tqpv, txr_idx, rqpv, rxr_idx); 4838 4839 if (err) 4840 goto err_out; 4841 4842 /* update counts and index */ 4843 rxr_remaining -= rqpv; 4844 txr_remaining -= tqpv; 4845 rxr_idx++; 4846 txr_idx++; 4847 } 4848 4849 return 0; 4850 4851 err_out: 4852 adapter->num_tx_queues = 0; 4853 adapter->num_rx_queues = 0; 4854 adapter->num_q_vectors = 0; 4855 4856 while (v_idx--) 4857 igc_free_q_vector(adapter, v_idx); 4858 4859 return -ENOMEM; 4860 } 4861 4862 /** 4863 * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors 4864 * @adapter: Pointer to adapter structure 4865 * @msix: boolean for MSI-X capability 4866 * 4867 * This function initializes the interrupts and allocates all of the queues. 4868 */ 4869 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) 4870 { 4871 struct net_device *dev = adapter->netdev; 4872 int err = 0; 4873 4874 igc_set_interrupt_capability(adapter, msix); 4875 4876 err = igc_alloc_q_vectors(adapter); 4877 if (err) { 4878 netdev_err(dev, "Unable to allocate memory for vectors\n"); 4879 goto err_alloc_q_vectors; 4880 } 4881 4882 igc_cache_ring_register(adapter); 4883 4884 return 0; 4885 4886 err_alloc_q_vectors: 4887 igc_reset_interrupt_capability(adapter); 4888 return err; 4889 } 4890 4891 /** 4892 * igc_sw_init - Initialize general software structures (struct igc_adapter) 4893 * @adapter: board private structure to initialize 4894 * 4895 * igc_sw_init initializes the Adapter private data structure. 4896 * Fields are initialized based on PCI device information and 4897 * OS network device settings (MTU size). 4898 */ 4899 static int igc_sw_init(struct igc_adapter *adapter) 4900 { 4901 struct net_device *netdev = adapter->netdev; 4902 struct pci_dev *pdev = adapter->pdev; 4903 struct igc_hw *hw = &adapter->hw; 4904 4905 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 4906 4907 /* set default ring sizes */ 4908 adapter->tx_ring_count = IGC_DEFAULT_TXD; 4909 adapter->rx_ring_count = IGC_DEFAULT_RXD; 4910 4911 /* set default ITR values */ 4912 adapter->rx_itr_setting = IGC_DEFAULT_ITR; 4913 adapter->tx_itr_setting = IGC_DEFAULT_ITR; 4914 4915 /* set default work limits */ 4916 adapter->tx_work_limit = IGC_DEFAULT_TX_WORK; 4917 4918 /* adjust max frame to be at least the size of a standard frame */ 4919 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 4920 VLAN_HLEN; 4921 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 4922 4923 mutex_init(&adapter->nfc_rule_lock); 4924 INIT_LIST_HEAD(&adapter->nfc_rule_list); 4925 adapter->nfc_rule_count = 0; 4926 4927 spin_lock_init(&adapter->stats64_lock); 4928 spin_lock_init(&adapter->qbv_tx_lock); 4929 /* Assume MSI-X interrupts, will be checked during IRQ allocation */ 4930 adapter->flags |= IGC_FLAG_HAS_MSIX; 4931 4932 igc_init_queue_configuration(adapter); 4933 4934 /* This call may decrease the number of queues */ 4935 if (igc_init_interrupt_scheme(adapter, true)) { 4936 netdev_err(netdev, "Unable to allocate memory for queues\n"); 4937 return -ENOMEM; 4938 } 4939 4940 /* Explicitly disable IRQ since the NIC can be in any state. */ 4941 igc_irq_disable(adapter); 4942 4943 set_bit(__IGC_DOWN, &adapter->state); 4944 4945 return 0; 4946 } 4947 4948 /** 4949 * igc_up - Open the interface and prepare it to handle traffic 4950 * @adapter: board private structure 4951 */ 4952 void igc_up(struct igc_adapter *adapter) 4953 { 4954 struct igc_hw *hw = &adapter->hw; 4955 int i = 0; 4956 4957 /* hardware has been reset, we need to reload some things */ 4958 igc_configure(adapter); 4959 4960 clear_bit(__IGC_DOWN, &adapter->state); 4961 4962 for (i = 0; i < adapter->num_q_vectors; i++) 4963 napi_enable(&adapter->q_vector[i]->napi); 4964 4965 if (adapter->msix_entries) 4966 igc_configure_msix(adapter); 4967 else 4968 igc_assign_vector(adapter->q_vector[0], 0); 4969 4970 /* Clear any pending interrupts. */ 4971 rd32(IGC_ICR); 4972 igc_irq_enable(adapter); 4973 4974 netif_tx_start_all_queues(adapter->netdev); 4975 4976 /* start the watchdog. */ 4977 hw->mac.get_link_status = true; 4978 schedule_work(&adapter->watchdog_task); 4979 4980 adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T | 4981 MDIO_EEE_2_5GT; 4982 } 4983 4984 /** 4985 * igc_update_stats - Update the board statistics counters 4986 * @adapter: board private structure 4987 */ 4988 void igc_update_stats(struct igc_adapter *adapter) 4989 { 4990 struct rtnl_link_stats64 *net_stats = &adapter->stats64; 4991 struct pci_dev *pdev = adapter->pdev; 4992 struct igc_hw *hw = &adapter->hw; 4993 u64 _bytes, _packets; 4994 u64 bytes, packets; 4995 unsigned int start; 4996 u32 mpc; 4997 int i; 4998 4999 /* Prevent stats update while adapter is being reset, or if the pci 5000 * connection is down. 5001 */ 5002 if (adapter->link_speed == 0) 5003 return; 5004 if (pci_channel_offline(pdev)) 5005 return; 5006 5007 packets = 0; 5008 bytes = 0; 5009 5010 rcu_read_lock(); 5011 for (i = 0; i < adapter->num_rx_queues; i++) { 5012 struct igc_ring *ring = adapter->rx_ring[i]; 5013 u32 rqdpc = rd32(IGC_RQDPC(i)); 5014 5015 if (hw->mac.type >= igc_i225) 5016 wr32(IGC_RQDPC(i), 0); 5017 5018 if (rqdpc) { 5019 ring->rx_stats.drops += rqdpc; 5020 net_stats->rx_fifo_errors += rqdpc; 5021 } 5022 5023 do { 5024 start = u64_stats_fetch_begin(&ring->rx_syncp); 5025 _bytes = ring->rx_stats.bytes; 5026 _packets = ring->rx_stats.packets; 5027 } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); 5028 bytes += _bytes; 5029 packets += _packets; 5030 } 5031 5032 net_stats->rx_bytes = bytes; 5033 net_stats->rx_packets = packets; 5034 5035 packets = 0; 5036 bytes = 0; 5037 for (i = 0; i < adapter->num_tx_queues; i++) { 5038 struct igc_ring *ring = adapter->tx_ring[i]; 5039 5040 do { 5041 start = u64_stats_fetch_begin(&ring->tx_syncp); 5042 _bytes = ring->tx_stats.bytes; 5043 _packets = ring->tx_stats.packets; 5044 } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); 5045 bytes += _bytes; 5046 packets += _packets; 5047 } 5048 net_stats->tx_bytes = bytes; 5049 net_stats->tx_packets = packets; 5050 rcu_read_unlock(); 5051 5052 /* read stats registers */ 5053 adapter->stats.crcerrs += rd32(IGC_CRCERRS); 5054 adapter->stats.gprc += rd32(IGC_GPRC); 5055 adapter->stats.gorc += rd32(IGC_GORCL); 5056 rd32(IGC_GORCH); /* clear GORCL */ 5057 adapter->stats.bprc += rd32(IGC_BPRC); 5058 adapter->stats.mprc += rd32(IGC_MPRC); 5059 adapter->stats.roc += rd32(IGC_ROC); 5060 5061 adapter->stats.prc64 += rd32(IGC_PRC64); 5062 adapter->stats.prc127 += rd32(IGC_PRC127); 5063 adapter->stats.prc255 += rd32(IGC_PRC255); 5064 adapter->stats.prc511 += rd32(IGC_PRC511); 5065 adapter->stats.prc1023 += rd32(IGC_PRC1023); 5066 adapter->stats.prc1522 += rd32(IGC_PRC1522); 5067 adapter->stats.tlpic += rd32(IGC_TLPIC); 5068 adapter->stats.rlpic += rd32(IGC_RLPIC); 5069 adapter->stats.hgptc += rd32(IGC_HGPTC); 5070 5071 mpc = rd32(IGC_MPC); 5072 adapter->stats.mpc += mpc; 5073 net_stats->rx_fifo_errors += mpc; 5074 adapter->stats.scc += rd32(IGC_SCC); 5075 adapter->stats.ecol += rd32(IGC_ECOL); 5076 adapter->stats.mcc += rd32(IGC_MCC); 5077 adapter->stats.latecol += rd32(IGC_LATECOL); 5078 adapter->stats.dc += rd32(IGC_DC); 5079 adapter->stats.rlec += rd32(IGC_RLEC); 5080 adapter->stats.xonrxc += rd32(IGC_XONRXC); 5081 adapter->stats.xontxc += rd32(IGC_XONTXC); 5082 adapter->stats.xoffrxc += rd32(IGC_XOFFRXC); 5083 adapter->stats.xofftxc += rd32(IGC_XOFFTXC); 5084 adapter->stats.fcruc += rd32(IGC_FCRUC); 5085 adapter->stats.gptc += rd32(IGC_GPTC); 5086 adapter->stats.gotc += rd32(IGC_GOTCL); 5087 rd32(IGC_GOTCH); /* clear GOTCL */ 5088 adapter->stats.rnbc += rd32(IGC_RNBC); 5089 adapter->stats.ruc += rd32(IGC_RUC); 5090 adapter->stats.rfc += rd32(IGC_RFC); 5091 adapter->stats.rjc += rd32(IGC_RJC); 5092 adapter->stats.tor += rd32(IGC_TORH); 5093 adapter->stats.tot += rd32(IGC_TOTH); 5094 adapter->stats.tpr += rd32(IGC_TPR); 5095 5096 adapter->stats.ptc64 += rd32(IGC_PTC64); 5097 adapter->stats.ptc127 += rd32(IGC_PTC127); 5098 adapter->stats.ptc255 += rd32(IGC_PTC255); 5099 adapter->stats.ptc511 += rd32(IGC_PTC511); 5100 adapter->stats.ptc1023 += rd32(IGC_PTC1023); 5101 adapter->stats.ptc1522 += rd32(IGC_PTC1522); 5102 5103 adapter->stats.mptc += rd32(IGC_MPTC); 5104 adapter->stats.bptc += rd32(IGC_BPTC); 5105 5106 adapter->stats.tpt += rd32(IGC_TPT); 5107 adapter->stats.colc += rd32(IGC_COLC); 5108 adapter->stats.colc += rd32(IGC_RERC); 5109 5110 adapter->stats.algnerrc += rd32(IGC_ALGNERRC); 5111 5112 adapter->stats.tsctc += rd32(IGC_TSCTC); 5113 5114 adapter->stats.iac += rd32(IGC_IAC); 5115 5116 /* Fill out the OS statistics structure */ 5117 net_stats->multicast = adapter->stats.mprc; 5118 net_stats->collisions = adapter->stats.colc; 5119 5120 /* Rx Errors */ 5121 5122 /* RLEC on some newer hardware can be incorrect so build 5123 * our own version based on RUC and ROC 5124 */ 5125 net_stats->rx_errors = adapter->stats.rxerrc + 5126 adapter->stats.crcerrs + adapter->stats.algnerrc + 5127 adapter->stats.ruc + adapter->stats.roc + 5128 adapter->stats.cexterr; 5129 net_stats->rx_length_errors = adapter->stats.ruc + 5130 adapter->stats.roc; 5131 net_stats->rx_crc_errors = adapter->stats.crcerrs; 5132 net_stats->rx_frame_errors = adapter->stats.algnerrc; 5133 net_stats->rx_missed_errors = adapter->stats.mpc; 5134 5135 /* Tx Errors */ 5136 net_stats->tx_errors = adapter->stats.ecol + 5137 adapter->stats.latecol; 5138 net_stats->tx_aborted_errors = adapter->stats.ecol; 5139 net_stats->tx_window_errors = adapter->stats.latecol; 5140 net_stats->tx_carrier_errors = adapter->stats.tncrs; 5141 5142 /* Tx Dropped */ 5143 net_stats->tx_dropped = adapter->stats.txdrop; 5144 5145 /* Management Stats */ 5146 adapter->stats.mgptc += rd32(IGC_MGTPTC); 5147 adapter->stats.mgprc += rd32(IGC_MGTPRC); 5148 adapter->stats.mgpdc += rd32(IGC_MGTPDC); 5149 } 5150 5151 /** 5152 * igc_down - Close the interface 5153 * @adapter: board private structure 5154 */ 5155 void igc_down(struct igc_adapter *adapter) 5156 { 5157 struct net_device *netdev = adapter->netdev; 5158 struct igc_hw *hw = &adapter->hw; 5159 u32 tctl, rctl; 5160 int i = 0; 5161 5162 set_bit(__IGC_DOWN, &adapter->state); 5163 5164 igc_ptp_suspend(adapter); 5165 5166 if (pci_device_is_present(adapter->pdev)) { 5167 /* disable receives in the hardware */ 5168 rctl = rd32(IGC_RCTL); 5169 wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); 5170 /* flush and sleep below */ 5171 } 5172 /* set trans_start so we don't get spurious watchdogs during reset */ 5173 netif_trans_update(netdev); 5174 5175 netif_carrier_off(netdev); 5176 netif_tx_stop_all_queues(netdev); 5177 5178 if (pci_device_is_present(adapter->pdev)) { 5179 /* disable transmits in the hardware */ 5180 tctl = rd32(IGC_TCTL); 5181 tctl &= ~IGC_TCTL_EN; 5182 wr32(IGC_TCTL, tctl); 5183 /* flush both disables and wait for them to finish */ 5184 wrfl(); 5185 usleep_range(10000, 20000); 5186 5187 igc_irq_disable(adapter); 5188 } 5189 5190 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5191 5192 for (i = 0; i < adapter->num_q_vectors; i++) { 5193 if (adapter->q_vector[i]) { 5194 napi_synchronize(&adapter->q_vector[i]->napi); 5195 napi_disable(&adapter->q_vector[i]->napi); 5196 } 5197 } 5198 5199 del_timer_sync(&adapter->watchdog_timer); 5200 del_timer_sync(&adapter->phy_info_timer); 5201 5202 /* record the stats before reset*/ 5203 spin_lock(&adapter->stats64_lock); 5204 igc_update_stats(adapter); 5205 spin_unlock(&adapter->stats64_lock); 5206 5207 adapter->link_speed = 0; 5208 adapter->link_duplex = 0; 5209 5210 if (!pci_channel_offline(adapter->pdev)) 5211 igc_reset(adapter); 5212 5213 /* clear VLAN promisc flag so VFTA will be updated if necessary */ 5214 adapter->flags &= ~IGC_FLAG_VLAN_PROMISC; 5215 5216 igc_disable_all_tx_rings_hw(adapter); 5217 igc_clean_all_tx_rings(adapter); 5218 igc_clean_all_rx_rings(adapter); 5219 } 5220 5221 void igc_reinit_locked(struct igc_adapter *adapter) 5222 { 5223 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 5224 usleep_range(1000, 2000); 5225 igc_down(adapter); 5226 igc_up(adapter); 5227 clear_bit(__IGC_RESETTING, &adapter->state); 5228 } 5229 5230 static void igc_reset_task(struct work_struct *work) 5231 { 5232 struct igc_adapter *adapter; 5233 5234 adapter = container_of(work, struct igc_adapter, reset_task); 5235 5236 rtnl_lock(); 5237 /* If we're already down or resetting, just bail */ 5238 if (test_bit(__IGC_DOWN, &adapter->state) || 5239 test_bit(__IGC_RESETTING, &adapter->state)) { 5240 rtnl_unlock(); 5241 return; 5242 } 5243 5244 igc_rings_dump(adapter); 5245 igc_regs_dump(adapter); 5246 netdev_err(adapter->netdev, "Reset adapter\n"); 5247 igc_reinit_locked(adapter); 5248 rtnl_unlock(); 5249 } 5250 5251 /** 5252 * igc_change_mtu - Change the Maximum Transfer Unit 5253 * @netdev: network interface device structure 5254 * @new_mtu: new value for maximum frame size 5255 * 5256 * Returns 0 on success, negative on failure 5257 */ 5258 static int igc_change_mtu(struct net_device *netdev, int new_mtu) 5259 { 5260 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 5261 struct igc_adapter *adapter = netdev_priv(netdev); 5262 5263 if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) { 5264 netdev_dbg(netdev, "Jumbo frames not supported with XDP"); 5265 return -EINVAL; 5266 } 5267 5268 /* adjust max frame to be at least the size of a standard frame */ 5269 if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) 5270 max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; 5271 5272 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 5273 usleep_range(1000, 2000); 5274 5275 /* igc_down has a dependency on max_frame_size */ 5276 adapter->max_frame_size = max_frame; 5277 5278 if (netif_running(netdev)) 5279 igc_down(adapter); 5280 5281 netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); 5282 WRITE_ONCE(netdev->mtu, new_mtu); 5283 5284 if (netif_running(netdev)) 5285 igc_up(adapter); 5286 else 5287 igc_reset(adapter); 5288 5289 clear_bit(__IGC_RESETTING, &adapter->state); 5290 5291 return 0; 5292 } 5293 5294 /** 5295 * igc_tx_timeout - Respond to a Tx Hang 5296 * @netdev: network interface device structure 5297 * @txqueue: queue number that timed out 5298 **/ 5299 static void igc_tx_timeout(struct net_device *netdev, 5300 unsigned int __always_unused txqueue) 5301 { 5302 struct igc_adapter *adapter = netdev_priv(netdev); 5303 struct igc_hw *hw = &adapter->hw; 5304 5305 /* Do the reset outside of interrupt context */ 5306 adapter->tx_timeout_count++; 5307 schedule_work(&adapter->reset_task); 5308 wr32(IGC_EICS, 5309 (adapter->eims_enable_mask & ~adapter->eims_other)); 5310 } 5311 5312 /** 5313 * igc_get_stats64 - Get System Network Statistics 5314 * @netdev: network interface device structure 5315 * @stats: rtnl_link_stats64 pointer 5316 * 5317 * Returns the address of the device statistics structure. 5318 * The statistics are updated here and also from the timer callback. 5319 */ 5320 static void igc_get_stats64(struct net_device *netdev, 5321 struct rtnl_link_stats64 *stats) 5322 { 5323 struct igc_adapter *adapter = netdev_priv(netdev); 5324 5325 spin_lock(&adapter->stats64_lock); 5326 if (!test_bit(__IGC_RESETTING, &adapter->state)) 5327 igc_update_stats(adapter); 5328 memcpy(stats, &adapter->stats64, sizeof(*stats)); 5329 spin_unlock(&adapter->stats64_lock); 5330 } 5331 5332 static netdev_features_t igc_fix_features(struct net_device *netdev, 5333 netdev_features_t features) 5334 { 5335 /* Since there is no support for separate Rx/Tx vlan accel 5336 * enable/disable make sure Tx flag is always in same state as Rx. 5337 */ 5338 if (features & NETIF_F_HW_VLAN_CTAG_RX) 5339 features |= NETIF_F_HW_VLAN_CTAG_TX; 5340 else 5341 features &= ~NETIF_F_HW_VLAN_CTAG_TX; 5342 5343 return features; 5344 } 5345 5346 static int igc_set_features(struct net_device *netdev, 5347 netdev_features_t features) 5348 { 5349 netdev_features_t changed = netdev->features ^ features; 5350 struct igc_adapter *adapter = netdev_priv(netdev); 5351 5352 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 5353 igc_vlan_mode(netdev, features); 5354 5355 /* Add VLAN support */ 5356 if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) 5357 return 0; 5358 5359 if (!(features & NETIF_F_NTUPLE)) 5360 igc_flush_nfc_rules(adapter); 5361 5362 netdev->features = features; 5363 5364 if (netif_running(netdev)) 5365 igc_reinit_locked(adapter); 5366 else 5367 igc_reset(adapter); 5368 5369 return 1; 5370 } 5371 5372 static netdev_features_t 5373 igc_features_check(struct sk_buff *skb, struct net_device *dev, 5374 netdev_features_t features) 5375 { 5376 unsigned int network_hdr_len, mac_hdr_len; 5377 5378 /* Make certain the headers can be described by a context descriptor */ 5379 mac_hdr_len = skb_network_offset(skb); 5380 if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) 5381 return features & ~(NETIF_F_HW_CSUM | 5382 NETIF_F_SCTP_CRC | 5383 NETIF_F_HW_VLAN_CTAG_TX | 5384 NETIF_F_TSO | 5385 NETIF_F_TSO6); 5386 5387 network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); 5388 if (unlikely(network_hdr_len > IGC_MAX_NETWORK_HDR_LEN)) 5389 return features & ~(NETIF_F_HW_CSUM | 5390 NETIF_F_SCTP_CRC | 5391 NETIF_F_TSO | 5392 NETIF_F_TSO6); 5393 5394 /* We can only support IPv4 TSO in tunnels if we can mangle the 5395 * inner IP ID field, so strip TSO if MANGLEID is not supported. 5396 */ 5397 if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) 5398 features &= ~NETIF_F_TSO; 5399 5400 return features; 5401 } 5402 5403 static void igc_tsync_interrupt(struct igc_adapter *adapter) 5404 { 5405 struct igc_hw *hw = &adapter->hw; 5406 u32 tsauxc, sec, nsec, tsicr; 5407 struct ptp_clock_event event; 5408 struct timespec64 ts; 5409 5410 tsicr = rd32(IGC_TSICR); 5411 5412 if (tsicr & IGC_TSICR_SYS_WRAP) { 5413 event.type = PTP_CLOCK_PPS; 5414 if (adapter->ptp_caps.pps) 5415 ptp_clock_event(adapter->ptp_clock, &event); 5416 } 5417 5418 if (tsicr & IGC_TSICR_TXTS) { 5419 /* retrieve hardware timestamp */ 5420 igc_ptp_tx_tstamp_event(adapter); 5421 } 5422 5423 if (tsicr & IGC_TSICR_TT0) { 5424 spin_lock(&adapter->tmreg_lock); 5425 ts = timespec64_add(adapter->perout[0].start, 5426 adapter->perout[0].period); 5427 wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5428 wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec); 5429 tsauxc = rd32(IGC_TSAUXC); 5430 tsauxc |= IGC_TSAUXC_EN_TT0; 5431 wr32(IGC_TSAUXC, tsauxc); 5432 adapter->perout[0].start = ts; 5433 spin_unlock(&adapter->tmreg_lock); 5434 } 5435 5436 if (tsicr & IGC_TSICR_TT1) { 5437 spin_lock(&adapter->tmreg_lock); 5438 ts = timespec64_add(adapter->perout[1].start, 5439 adapter->perout[1].period); 5440 wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5441 wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec); 5442 tsauxc = rd32(IGC_TSAUXC); 5443 tsauxc |= IGC_TSAUXC_EN_TT1; 5444 wr32(IGC_TSAUXC, tsauxc); 5445 adapter->perout[1].start = ts; 5446 spin_unlock(&adapter->tmreg_lock); 5447 } 5448 5449 if (tsicr & IGC_TSICR_AUTT0) { 5450 nsec = rd32(IGC_AUXSTMPL0); 5451 sec = rd32(IGC_AUXSTMPH0); 5452 event.type = PTP_CLOCK_EXTTS; 5453 event.index = 0; 5454 event.timestamp = sec * NSEC_PER_SEC + nsec; 5455 ptp_clock_event(adapter->ptp_clock, &event); 5456 } 5457 5458 if (tsicr & IGC_TSICR_AUTT1) { 5459 nsec = rd32(IGC_AUXSTMPL1); 5460 sec = rd32(IGC_AUXSTMPH1); 5461 event.type = PTP_CLOCK_EXTTS; 5462 event.index = 1; 5463 event.timestamp = sec * NSEC_PER_SEC + nsec; 5464 ptp_clock_event(adapter->ptp_clock, &event); 5465 } 5466 } 5467 5468 /** 5469 * igc_msix_other - msix other interrupt handler 5470 * @irq: interrupt number 5471 * @data: pointer to a q_vector 5472 */ 5473 static irqreturn_t igc_msix_other(int irq, void *data) 5474 { 5475 struct igc_adapter *adapter = data; 5476 struct igc_hw *hw = &adapter->hw; 5477 u32 icr = rd32(IGC_ICR); 5478 5479 /* reading ICR causes bit 31 of EICR to be cleared */ 5480 if (icr & IGC_ICR_DRSTA) 5481 schedule_work(&adapter->reset_task); 5482 5483 if (icr & IGC_ICR_DOUTSYNC) { 5484 /* HW is reporting DMA is out of sync */ 5485 adapter->stats.doosync++; 5486 } 5487 5488 if (icr & IGC_ICR_LSC) { 5489 hw->mac.get_link_status = true; 5490 /* guard against interrupt when we're going down */ 5491 if (!test_bit(__IGC_DOWN, &adapter->state)) 5492 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5493 } 5494 5495 if (icr & IGC_ICR_TS) 5496 igc_tsync_interrupt(adapter); 5497 5498 wr32(IGC_EIMS, adapter->eims_other); 5499 5500 return IRQ_HANDLED; 5501 } 5502 5503 static void igc_write_itr(struct igc_q_vector *q_vector) 5504 { 5505 u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK; 5506 5507 if (!q_vector->set_itr) 5508 return; 5509 5510 if (!itr_val) 5511 itr_val = IGC_ITR_VAL_MASK; 5512 5513 itr_val |= IGC_EITR_CNT_IGNR; 5514 5515 writel(itr_val, q_vector->itr_register); 5516 q_vector->set_itr = 0; 5517 } 5518 5519 static irqreturn_t igc_msix_ring(int irq, void *data) 5520 { 5521 struct igc_q_vector *q_vector = data; 5522 5523 /* Write the ITR value calculated from the previous interrupt. */ 5524 igc_write_itr(q_vector); 5525 5526 napi_schedule(&q_vector->napi); 5527 5528 return IRQ_HANDLED; 5529 } 5530 5531 /** 5532 * igc_request_msix - Initialize MSI-X interrupts 5533 * @adapter: Pointer to adapter structure 5534 * 5535 * igc_request_msix allocates MSI-X vectors and requests interrupts from the 5536 * kernel. 5537 */ 5538 static int igc_request_msix(struct igc_adapter *adapter) 5539 { 5540 unsigned int num_q_vectors = adapter->num_q_vectors; 5541 int i = 0, err = 0, vector = 0, free_vector = 0; 5542 struct net_device *netdev = adapter->netdev; 5543 5544 err = request_irq(adapter->msix_entries[vector].vector, 5545 &igc_msix_other, 0, netdev->name, adapter); 5546 if (err) 5547 goto err_out; 5548 5549 if (num_q_vectors > MAX_Q_VECTORS) { 5550 num_q_vectors = MAX_Q_VECTORS; 5551 dev_warn(&adapter->pdev->dev, 5552 "The number of queue vectors (%d) is higher than max allowed (%d)\n", 5553 adapter->num_q_vectors, MAX_Q_VECTORS); 5554 } 5555 for (i = 0; i < num_q_vectors; i++) { 5556 struct igc_q_vector *q_vector = adapter->q_vector[i]; 5557 5558 vector++; 5559 5560 q_vector->itr_register = adapter->io_addr + IGC_EITR(vector); 5561 5562 if (q_vector->rx.ring && q_vector->tx.ring) 5563 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, 5564 q_vector->rx.ring->queue_index); 5565 else if (q_vector->tx.ring) 5566 sprintf(q_vector->name, "%s-tx-%u", netdev->name, 5567 q_vector->tx.ring->queue_index); 5568 else if (q_vector->rx.ring) 5569 sprintf(q_vector->name, "%s-rx-%u", netdev->name, 5570 q_vector->rx.ring->queue_index); 5571 else 5572 sprintf(q_vector->name, "%s-unused", netdev->name); 5573 5574 err = request_irq(adapter->msix_entries[vector].vector, 5575 igc_msix_ring, 0, q_vector->name, 5576 q_vector); 5577 if (err) 5578 goto err_free; 5579 } 5580 5581 igc_configure_msix(adapter); 5582 return 0; 5583 5584 err_free: 5585 /* free already assigned IRQs */ 5586 free_irq(adapter->msix_entries[free_vector++].vector, adapter); 5587 5588 vector--; 5589 for (i = 0; i < vector; i++) { 5590 free_irq(adapter->msix_entries[free_vector++].vector, 5591 adapter->q_vector[i]); 5592 } 5593 err_out: 5594 return err; 5595 } 5596 5597 /** 5598 * igc_clear_interrupt_scheme - reset the device to a state of no interrupts 5599 * @adapter: Pointer to adapter structure 5600 * 5601 * This function resets the device so that it has 0 rx queues, tx queues, and 5602 * MSI-X interrupts allocated. 5603 */ 5604 static void igc_clear_interrupt_scheme(struct igc_adapter *adapter) 5605 { 5606 igc_free_q_vectors(adapter); 5607 igc_reset_interrupt_capability(adapter); 5608 } 5609 5610 /* Need to wait a few seconds after link up to get diagnostic information from 5611 * the phy 5612 */ 5613 static void igc_update_phy_info(struct timer_list *t) 5614 { 5615 struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer); 5616 5617 igc_get_phy_info(&adapter->hw); 5618 } 5619 5620 /** 5621 * igc_has_link - check shared code for link and determine up/down 5622 * @adapter: pointer to driver private info 5623 */ 5624 bool igc_has_link(struct igc_adapter *adapter) 5625 { 5626 struct igc_hw *hw = &adapter->hw; 5627 bool link_active = false; 5628 5629 /* get_link_status is set on LSC (link status) interrupt or 5630 * rx sequence error interrupt. get_link_status will stay 5631 * false until the igc_check_for_link establishes link 5632 * for copper adapters ONLY 5633 */ 5634 if (!hw->mac.get_link_status) 5635 return true; 5636 hw->mac.ops.check_for_link(hw); 5637 link_active = !hw->mac.get_link_status; 5638 5639 if (hw->mac.type == igc_i225) { 5640 if (!netif_carrier_ok(adapter->netdev)) { 5641 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5642 } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { 5643 adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE; 5644 adapter->link_check_timeout = jiffies; 5645 } 5646 } 5647 5648 return link_active; 5649 } 5650 5651 /** 5652 * igc_watchdog - Timer Call-back 5653 * @t: timer for the watchdog 5654 */ 5655 static void igc_watchdog(struct timer_list *t) 5656 { 5657 struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer); 5658 /* Do the rest outside of interrupt context */ 5659 schedule_work(&adapter->watchdog_task); 5660 } 5661 5662 static void igc_watchdog_task(struct work_struct *work) 5663 { 5664 struct igc_adapter *adapter = container_of(work, 5665 struct igc_adapter, 5666 watchdog_task); 5667 struct net_device *netdev = adapter->netdev; 5668 struct igc_hw *hw = &adapter->hw; 5669 struct igc_phy_info *phy = &hw->phy; 5670 u16 phy_data, retry_count = 20; 5671 u32 link; 5672 int i; 5673 5674 link = igc_has_link(adapter); 5675 5676 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) { 5677 if (time_after(jiffies, (adapter->link_check_timeout + HZ))) 5678 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5679 else 5680 link = false; 5681 } 5682 5683 if (link) { 5684 /* Cancel scheduled suspend requests. */ 5685 pm_runtime_resume(netdev->dev.parent); 5686 5687 if (!netif_carrier_ok(netdev)) { 5688 u32 ctrl; 5689 5690 hw->mac.ops.get_speed_and_duplex(hw, 5691 &adapter->link_speed, 5692 &adapter->link_duplex); 5693 5694 ctrl = rd32(IGC_CTRL); 5695 /* Link status message must follow this format */ 5696 netdev_info(netdev, 5697 "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", 5698 adapter->link_speed, 5699 adapter->link_duplex == FULL_DUPLEX ? 5700 "Full" : "Half", 5701 (ctrl & IGC_CTRL_TFCE) && 5702 (ctrl & IGC_CTRL_RFCE) ? "RX/TX" : 5703 (ctrl & IGC_CTRL_RFCE) ? "RX" : 5704 (ctrl & IGC_CTRL_TFCE) ? "TX" : "None"); 5705 5706 /* disable EEE if enabled */ 5707 if ((adapter->flags & IGC_FLAG_EEE) && 5708 adapter->link_duplex == HALF_DUPLEX) { 5709 netdev_info(netdev, 5710 "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n"); 5711 adapter->hw.dev_spec._base.eee_enable = false; 5712 adapter->flags &= ~IGC_FLAG_EEE; 5713 } 5714 5715 /* check if SmartSpeed worked */ 5716 igc_check_downshift(hw); 5717 if (phy->speed_downgraded) 5718 netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n"); 5719 5720 /* adjust timeout factor according to speed/duplex */ 5721 adapter->tx_timeout_factor = 1; 5722 switch (adapter->link_speed) { 5723 case SPEED_10: 5724 adapter->tx_timeout_factor = 14; 5725 break; 5726 case SPEED_100: 5727 case SPEED_1000: 5728 case SPEED_2500: 5729 adapter->tx_timeout_factor = 1; 5730 break; 5731 } 5732 5733 /* Once the launch time has been set on the wire, there 5734 * is a delay before the link speed can be determined 5735 * based on link-up activity. Write into the register 5736 * as soon as we know the correct link speed. 5737 */ 5738 igc_tsn_adjust_txtime_offset(adapter); 5739 5740 if (adapter->link_speed != SPEED_1000) 5741 goto no_wait; 5742 5743 /* wait for Remote receiver status OK */ 5744 retry_read_status: 5745 if (!igc_read_phy_reg(hw, PHY_1000T_STATUS, 5746 &phy_data)) { 5747 if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && 5748 retry_count) { 5749 msleep(100); 5750 retry_count--; 5751 goto retry_read_status; 5752 } else if (!retry_count) { 5753 netdev_err(netdev, "exceed max 2 second\n"); 5754 } 5755 } else { 5756 netdev_err(netdev, "read 1000Base-T Status Reg\n"); 5757 } 5758 no_wait: 5759 netif_carrier_on(netdev); 5760 5761 /* link state has changed, schedule phy info update */ 5762 if (!test_bit(__IGC_DOWN, &adapter->state)) 5763 mod_timer(&adapter->phy_info_timer, 5764 round_jiffies(jiffies + 2 * HZ)); 5765 } 5766 } else { 5767 if (netif_carrier_ok(netdev)) { 5768 adapter->link_speed = 0; 5769 adapter->link_duplex = 0; 5770 5771 /* Links status message must follow this format */ 5772 netdev_info(netdev, "NIC Link is Down\n"); 5773 netif_carrier_off(netdev); 5774 5775 /* link state has changed, schedule phy info update */ 5776 if (!test_bit(__IGC_DOWN, &adapter->state)) 5777 mod_timer(&adapter->phy_info_timer, 5778 round_jiffies(jiffies + 2 * HZ)); 5779 5780 pm_schedule_suspend(netdev->dev.parent, 5781 MSEC_PER_SEC * 5); 5782 } 5783 } 5784 5785 spin_lock(&adapter->stats64_lock); 5786 igc_update_stats(adapter); 5787 spin_unlock(&adapter->stats64_lock); 5788 5789 for (i = 0; i < adapter->num_tx_queues; i++) { 5790 struct igc_ring *tx_ring = adapter->tx_ring[i]; 5791 5792 if (!netif_carrier_ok(netdev)) { 5793 /* We've lost link, so the controller stops DMA, 5794 * but we've got queued Tx work that's never going 5795 * to get done, so reset controller to flush Tx. 5796 * (Do the reset outside of interrupt context). 5797 */ 5798 if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) { 5799 adapter->tx_timeout_count++; 5800 schedule_work(&adapter->reset_task); 5801 /* return immediately since reset is imminent */ 5802 return; 5803 } 5804 } 5805 5806 /* Force detection of hung controller every watchdog period */ 5807 set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 5808 } 5809 5810 /* Cause software interrupt to ensure Rx ring is cleaned */ 5811 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 5812 u32 eics = 0; 5813 5814 for (i = 0; i < adapter->num_q_vectors; i++) 5815 eics |= adapter->q_vector[i]->eims_value; 5816 wr32(IGC_EICS, eics); 5817 } else { 5818 wr32(IGC_ICS, IGC_ICS_RXDMT0); 5819 } 5820 5821 igc_ptp_tx_hang(adapter); 5822 5823 /* Reset the timer */ 5824 if (!test_bit(__IGC_DOWN, &adapter->state)) { 5825 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) 5826 mod_timer(&adapter->watchdog_timer, 5827 round_jiffies(jiffies + HZ)); 5828 else 5829 mod_timer(&adapter->watchdog_timer, 5830 round_jiffies(jiffies + 2 * HZ)); 5831 } 5832 } 5833 5834 /** 5835 * igc_intr_msi - Interrupt Handler 5836 * @irq: interrupt number 5837 * @data: pointer to a network interface device structure 5838 */ 5839 static irqreturn_t igc_intr_msi(int irq, void *data) 5840 { 5841 struct igc_adapter *adapter = data; 5842 struct igc_q_vector *q_vector = adapter->q_vector[0]; 5843 struct igc_hw *hw = &adapter->hw; 5844 /* read ICR disables interrupts using IAM */ 5845 u32 icr = rd32(IGC_ICR); 5846 5847 igc_write_itr(q_vector); 5848 5849 if (icr & IGC_ICR_DRSTA) 5850 schedule_work(&adapter->reset_task); 5851 5852 if (icr & IGC_ICR_DOUTSYNC) { 5853 /* HW is reporting DMA is out of sync */ 5854 adapter->stats.doosync++; 5855 } 5856 5857 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 5858 hw->mac.get_link_status = true; 5859 if (!test_bit(__IGC_DOWN, &adapter->state)) 5860 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5861 } 5862 5863 if (icr & IGC_ICR_TS) 5864 igc_tsync_interrupt(adapter); 5865 5866 napi_schedule(&q_vector->napi); 5867 5868 return IRQ_HANDLED; 5869 } 5870 5871 /** 5872 * igc_intr - Legacy Interrupt Handler 5873 * @irq: interrupt number 5874 * @data: pointer to a network interface device structure 5875 */ 5876 static irqreturn_t igc_intr(int irq, void *data) 5877 { 5878 struct igc_adapter *adapter = data; 5879 struct igc_q_vector *q_vector = adapter->q_vector[0]; 5880 struct igc_hw *hw = &adapter->hw; 5881 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No 5882 * need for the IMC write 5883 */ 5884 u32 icr = rd32(IGC_ICR); 5885 5886 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is 5887 * not set, then the adapter didn't send an interrupt 5888 */ 5889 if (!(icr & IGC_ICR_INT_ASSERTED)) 5890 return IRQ_NONE; 5891 5892 igc_write_itr(q_vector); 5893 5894 if (icr & IGC_ICR_DRSTA) 5895 schedule_work(&adapter->reset_task); 5896 5897 if (icr & IGC_ICR_DOUTSYNC) { 5898 /* HW is reporting DMA is out of sync */ 5899 adapter->stats.doosync++; 5900 } 5901 5902 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 5903 hw->mac.get_link_status = true; 5904 /* guard against interrupt when we're going down */ 5905 if (!test_bit(__IGC_DOWN, &adapter->state)) 5906 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5907 } 5908 5909 if (icr & IGC_ICR_TS) 5910 igc_tsync_interrupt(adapter); 5911 5912 napi_schedule(&q_vector->napi); 5913 5914 return IRQ_HANDLED; 5915 } 5916 5917 static void igc_free_irq(struct igc_adapter *adapter) 5918 { 5919 if (adapter->msix_entries) { 5920 int vector = 0, i; 5921 5922 free_irq(adapter->msix_entries[vector++].vector, adapter); 5923 5924 for (i = 0; i < adapter->num_q_vectors; i++) 5925 free_irq(adapter->msix_entries[vector++].vector, 5926 adapter->q_vector[i]); 5927 } else { 5928 free_irq(adapter->pdev->irq, adapter); 5929 } 5930 } 5931 5932 /** 5933 * igc_request_irq - initialize interrupts 5934 * @adapter: Pointer to adapter structure 5935 * 5936 * Attempts to configure interrupts using the best available 5937 * capabilities of the hardware and kernel. 5938 */ 5939 static int igc_request_irq(struct igc_adapter *adapter) 5940 { 5941 struct net_device *netdev = adapter->netdev; 5942 struct pci_dev *pdev = adapter->pdev; 5943 int err = 0; 5944 5945 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 5946 err = igc_request_msix(adapter); 5947 if (!err) 5948 goto request_done; 5949 /* fall back to MSI */ 5950 igc_free_all_tx_resources(adapter); 5951 igc_free_all_rx_resources(adapter); 5952 5953 igc_clear_interrupt_scheme(adapter); 5954 err = igc_init_interrupt_scheme(adapter, false); 5955 if (err) 5956 goto request_done; 5957 igc_setup_all_tx_resources(adapter); 5958 igc_setup_all_rx_resources(adapter); 5959 igc_configure(adapter); 5960 } 5961 5962 igc_assign_vector(adapter->q_vector[0], 0); 5963 5964 if (adapter->flags & IGC_FLAG_HAS_MSI) { 5965 err = request_irq(pdev->irq, &igc_intr_msi, 0, 5966 netdev->name, adapter); 5967 if (!err) 5968 goto request_done; 5969 5970 /* fall back to legacy interrupts */ 5971 igc_reset_interrupt_capability(adapter); 5972 adapter->flags &= ~IGC_FLAG_HAS_MSI; 5973 } 5974 5975 err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED, 5976 netdev->name, adapter); 5977 5978 if (err) 5979 netdev_err(netdev, "Error %d getting interrupt\n", err); 5980 5981 request_done: 5982 return err; 5983 } 5984 5985 /** 5986 * __igc_open - Called when a network interface is made active 5987 * @netdev: network interface device structure 5988 * @resuming: boolean indicating if the device is resuming 5989 * 5990 * Returns 0 on success, negative value on failure 5991 * 5992 * The open entry point is called when a network interface is made 5993 * active by the system (IFF_UP). At this point all resources needed 5994 * for transmit and receive operations are allocated, the interrupt 5995 * handler is registered with the OS, the watchdog timer is started, 5996 * and the stack is notified that the interface is ready. 5997 */ 5998 static int __igc_open(struct net_device *netdev, bool resuming) 5999 { 6000 struct igc_adapter *adapter = netdev_priv(netdev); 6001 struct pci_dev *pdev = adapter->pdev; 6002 struct igc_hw *hw = &adapter->hw; 6003 int err = 0; 6004 int i = 0; 6005 6006 /* disallow open during test */ 6007 6008 if (test_bit(__IGC_TESTING, &adapter->state)) { 6009 WARN_ON(resuming); 6010 return -EBUSY; 6011 } 6012 6013 if (!resuming) 6014 pm_runtime_get_sync(&pdev->dev); 6015 6016 netif_carrier_off(netdev); 6017 6018 /* allocate transmit descriptors */ 6019 err = igc_setup_all_tx_resources(adapter); 6020 if (err) 6021 goto err_setup_tx; 6022 6023 /* allocate receive descriptors */ 6024 err = igc_setup_all_rx_resources(adapter); 6025 if (err) 6026 goto err_setup_rx; 6027 6028 igc_power_up_link(adapter); 6029 6030 igc_configure(adapter); 6031 6032 err = igc_request_irq(adapter); 6033 if (err) 6034 goto err_req_irq; 6035 6036 clear_bit(__IGC_DOWN, &adapter->state); 6037 6038 for (i = 0; i < adapter->num_q_vectors; i++) 6039 napi_enable(&adapter->q_vector[i]->napi); 6040 6041 /* Clear any pending interrupts. */ 6042 rd32(IGC_ICR); 6043 igc_irq_enable(adapter); 6044 6045 if (!resuming) 6046 pm_runtime_put(&pdev->dev); 6047 6048 netif_tx_start_all_queues(netdev); 6049 6050 /* start the watchdog. */ 6051 hw->mac.get_link_status = true; 6052 schedule_work(&adapter->watchdog_task); 6053 6054 return IGC_SUCCESS; 6055 6056 err_req_irq: 6057 igc_release_hw_control(adapter); 6058 igc_power_down_phy_copper_base(&adapter->hw); 6059 igc_free_all_rx_resources(adapter); 6060 err_setup_rx: 6061 igc_free_all_tx_resources(adapter); 6062 err_setup_tx: 6063 igc_reset(adapter); 6064 if (!resuming) 6065 pm_runtime_put(&pdev->dev); 6066 6067 return err; 6068 } 6069 6070 int igc_open(struct net_device *netdev) 6071 { 6072 struct igc_adapter *adapter = netdev_priv(netdev); 6073 int err; 6074 6075 /* Notify the stack of the actual queue counts. */ 6076 err = netif_set_real_num_queues(netdev, adapter->num_tx_queues, 6077 adapter->num_rx_queues); 6078 if (err) { 6079 netdev_err(netdev, "error setting real queue count\n"); 6080 return err; 6081 } 6082 6083 return __igc_open(netdev, false); 6084 } 6085 6086 /** 6087 * __igc_close - Disables a network interface 6088 * @netdev: network interface device structure 6089 * @suspending: boolean indicating the device is suspending 6090 * 6091 * Returns 0, this is not allowed to fail 6092 * 6093 * The close entry point is called when an interface is de-activated 6094 * by the OS. The hardware is still under the driver's control, but 6095 * needs to be disabled. A global MAC reset is issued to stop the 6096 * hardware, and all transmit and receive resources are freed. 6097 */ 6098 static int __igc_close(struct net_device *netdev, bool suspending) 6099 { 6100 struct igc_adapter *adapter = netdev_priv(netdev); 6101 struct pci_dev *pdev = adapter->pdev; 6102 6103 WARN_ON(test_bit(__IGC_RESETTING, &adapter->state)); 6104 6105 if (!suspending) 6106 pm_runtime_get_sync(&pdev->dev); 6107 6108 igc_down(adapter); 6109 6110 igc_release_hw_control(adapter); 6111 6112 igc_free_irq(adapter); 6113 6114 igc_free_all_tx_resources(adapter); 6115 igc_free_all_rx_resources(adapter); 6116 6117 if (!suspending) 6118 pm_runtime_put_sync(&pdev->dev); 6119 6120 return 0; 6121 } 6122 6123 int igc_close(struct net_device *netdev) 6124 { 6125 if (netif_device_present(netdev) || netdev->dismantle) 6126 return __igc_close(netdev, false); 6127 return 0; 6128 } 6129 6130 /** 6131 * igc_ioctl - Access the hwtstamp interface 6132 * @netdev: network interface device structure 6133 * @ifr: interface request data 6134 * @cmd: ioctl command 6135 **/ 6136 static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 6137 { 6138 switch (cmd) { 6139 case SIOCGHWTSTAMP: 6140 return igc_ptp_get_ts_config(netdev, ifr); 6141 case SIOCSHWTSTAMP: 6142 return igc_ptp_set_ts_config(netdev, ifr); 6143 default: 6144 return -EOPNOTSUPP; 6145 } 6146 } 6147 6148 static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue, 6149 bool enable) 6150 { 6151 struct igc_ring *ring; 6152 6153 if (queue < 0 || queue >= adapter->num_tx_queues) 6154 return -EINVAL; 6155 6156 ring = adapter->tx_ring[queue]; 6157 ring->launchtime_enable = enable; 6158 6159 return 0; 6160 } 6161 6162 static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now) 6163 { 6164 struct timespec64 b; 6165 6166 b = ktime_to_timespec64(base_time); 6167 6168 return timespec64_compare(now, &b) > 0; 6169 } 6170 6171 static bool validate_schedule(struct igc_adapter *adapter, 6172 const struct tc_taprio_qopt_offload *qopt) 6173 { 6174 int queue_uses[IGC_MAX_TX_QUEUES] = { }; 6175 struct igc_hw *hw = &adapter->hw; 6176 struct timespec64 now; 6177 size_t n; 6178 6179 if (qopt->cycle_time_extension) 6180 return false; 6181 6182 igc_ptp_read(adapter, &now); 6183 6184 /* If we program the controller's BASET registers with a time 6185 * in the future, it will hold all the packets until that 6186 * time, causing a lot of TX Hangs, so to avoid that, we 6187 * reject schedules that would start in the future. 6188 * Note: Limitation above is no longer in i226. 6189 */ 6190 if (!is_base_time_past(qopt->base_time, &now) && 6191 igc_is_device_id_i225(hw)) 6192 return false; 6193 6194 for (n = 0; n < qopt->num_entries; n++) { 6195 const struct tc_taprio_sched_entry *e, *prev; 6196 int i; 6197 6198 prev = n ? &qopt->entries[n - 1] : NULL; 6199 e = &qopt->entries[n]; 6200 6201 /* i225 only supports "global" frame preemption 6202 * settings. 6203 */ 6204 if (e->command != TC_TAPRIO_CMD_SET_GATES) 6205 return false; 6206 6207 for (i = 0; i < adapter->num_tx_queues; i++) 6208 if (e->gate_mask & BIT(i)) { 6209 queue_uses[i]++; 6210 6211 /* There are limitations: A single queue cannot 6212 * be opened and closed multiple times per cycle 6213 * unless the gate stays open. Check for it. 6214 */ 6215 if (queue_uses[i] > 1 && 6216 !(prev->gate_mask & BIT(i))) 6217 return false; 6218 } 6219 } 6220 6221 return true; 6222 } 6223 6224 static int igc_tsn_enable_launchtime(struct igc_adapter *adapter, 6225 struct tc_etf_qopt_offload *qopt) 6226 { 6227 struct igc_hw *hw = &adapter->hw; 6228 int err; 6229 6230 if (hw->mac.type != igc_i225) 6231 return -EOPNOTSUPP; 6232 6233 err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable); 6234 if (err) 6235 return err; 6236 6237 return igc_tsn_offload_apply(adapter); 6238 } 6239 6240 static int igc_qbv_clear_schedule(struct igc_adapter *adapter) 6241 { 6242 unsigned long flags; 6243 int i; 6244 6245 adapter->base_time = 0; 6246 adapter->cycle_time = NSEC_PER_SEC; 6247 adapter->taprio_offload_enable = false; 6248 adapter->qbv_config_change_errors = 0; 6249 adapter->qbv_count = 0; 6250 6251 for (i = 0; i < adapter->num_tx_queues; i++) { 6252 struct igc_ring *ring = adapter->tx_ring[i]; 6253 6254 ring->start_time = 0; 6255 ring->end_time = NSEC_PER_SEC; 6256 ring->max_sdu = 0; 6257 } 6258 6259 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 6260 6261 adapter->qbv_transition = false; 6262 6263 for (i = 0; i < adapter->num_tx_queues; i++) { 6264 struct igc_ring *ring = adapter->tx_ring[i]; 6265 6266 ring->oper_gate_closed = false; 6267 ring->admin_gate_closed = false; 6268 } 6269 6270 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 6271 6272 return 0; 6273 } 6274 6275 static int igc_tsn_clear_schedule(struct igc_adapter *adapter) 6276 { 6277 igc_qbv_clear_schedule(adapter); 6278 6279 return 0; 6280 } 6281 6282 static void igc_taprio_stats(struct net_device *dev, 6283 struct tc_taprio_qopt_stats *stats) 6284 { 6285 /* When Strict_End is enabled, the tx_overruns counter 6286 * will always be zero. 6287 */ 6288 stats->tx_overruns = 0; 6289 } 6290 6291 static void igc_taprio_queue_stats(struct net_device *dev, 6292 struct tc_taprio_qopt_queue_stats *queue_stats) 6293 { 6294 struct tc_taprio_qopt_stats *stats = &queue_stats->stats; 6295 6296 /* When Strict_End is enabled, the tx_overruns counter 6297 * will always be zero. 6298 */ 6299 stats->tx_overruns = 0; 6300 } 6301 6302 static int igc_save_qbv_schedule(struct igc_adapter *adapter, 6303 struct tc_taprio_qopt_offload *qopt) 6304 { 6305 bool queue_configured[IGC_MAX_TX_QUEUES] = { }; 6306 struct igc_hw *hw = &adapter->hw; 6307 u32 start_time = 0, end_time = 0; 6308 struct timespec64 now; 6309 unsigned long flags; 6310 size_t n; 6311 int i; 6312 6313 switch (qopt->cmd) { 6314 case TAPRIO_CMD_REPLACE: 6315 break; 6316 case TAPRIO_CMD_DESTROY: 6317 return igc_tsn_clear_schedule(adapter); 6318 case TAPRIO_CMD_STATS: 6319 igc_taprio_stats(adapter->netdev, &qopt->stats); 6320 return 0; 6321 case TAPRIO_CMD_QUEUE_STATS: 6322 igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); 6323 return 0; 6324 default: 6325 return -EOPNOTSUPP; 6326 } 6327 6328 if (qopt->base_time < 0) 6329 return -ERANGE; 6330 6331 if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable) 6332 return -EALREADY; 6333 6334 if (!validate_schedule(adapter, qopt)) 6335 return -EINVAL; 6336 6337 adapter->cycle_time = qopt->cycle_time; 6338 adapter->base_time = qopt->base_time; 6339 adapter->taprio_offload_enable = true; 6340 6341 igc_ptp_read(adapter, &now); 6342 6343 for (n = 0; n < qopt->num_entries; n++) { 6344 struct tc_taprio_sched_entry *e = &qopt->entries[n]; 6345 6346 end_time += e->interval; 6347 6348 /* If any of the conditions below are true, we need to manually 6349 * control the end time of the cycle. 6350 * 1. Qbv users can specify a cycle time that is not equal 6351 * to the total GCL intervals. Hence, recalculation is 6352 * necessary here to exclude the time interval that 6353 * exceeds the cycle time. 6354 * 2. According to IEEE Std. 802.1Q-2018 section 8.6.9.2, 6355 * once the end of the list is reached, it will switch 6356 * to the END_OF_CYCLE state and leave the gates in the 6357 * same state until the next cycle is started. 6358 */ 6359 if (end_time > adapter->cycle_time || 6360 n + 1 == qopt->num_entries) 6361 end_time = adapter->cycle_time; 6362 6363 for (i = 0; i < adapter->num_tx_queues; i++) { 6364 struct igc_ring *ring = adapter->tx_ring[i]; 6365 6366 if (!(e->gate_mask & BIT(i))) 6367 continue; 6368 6369 /* Check whether a queue stays open for more than one 6370 * entry. If so, keep the start and advance the end 6371 * time. 6372 */ 6373 if (!queue_configured[i]) 6374 ring->start_time = start_time; 6375 ring->end_time = end_time; 6376 6377 if (ring->start_time >= adapter->cycle_time) 6378 queue_configured[i] = false; 6379 else 6380 queue_configured[i] = true; 6381 } 6382 6383 start_time += e->interval; 6384 } 6385 6386 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 6387 6388 /* Check whether a queue gets configured. 6389 * If not, set the start and end time to be end time. 6390 */ 6391 for (i = 0; i < adapter->num_tx_queues; i++) { 6392 struct igc_ring *ring = adapter->tx_ring[i]; 6393 6394 if (!is_base_time_past(qopt->base_time, &now)) { 6395 ring->admin_gate_closed = false; 6396 } else { 6397 ring->oper_gate_closed = false; 6398 ring->admin_gate_closed = false; 6399 } 6400 6401 if (!queue_configured[i]) { 6402 if (!is_base_time_past(qopt->base_time, &now)) 6403 ring->admin_gate_closed = true; 6404 else 6405 ring->oper_gate_closed = true; 6406 6407 ring->start_time = end_time; 6408 ring->end_time = end_time; 6409 } 6410 } 6411 6412 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 6413 6414 for (i = 0; i < adapter->num_tx_queues; i++) { 6415 struct igc_ring *ring = adapter->tx_ring[i]; 6416 struct net_device *dev = adapter->netdev; 6417 6418 if (qopt->max_sdu[i]) 6419 ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN; 6420 else 6421 ring->max_sdu = 0; 6422 } 6423 6424 return 0; 6425 } 6426 6427 static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter, 6428 struct tc_taprio_qopt_offload *qopt) 6429 { 6430 struct igc_hw *hw = &adapter->hw; 6431 int err; 6432 6433 if (hw->mac.type != igc_i225) 6434 return -EOPNOTSUPP; 6435 6436 err = igc_save_qbv_schedule(adapter, qopt); 6437 if (err) 6438 return err; 6439 6440 return igc_tsn_offload_apply(adapter); 6441 } 6442 6443 static int igc_save_cbs_params(struct igc_adapter *adapter, int queue, 6444 bool enable, int idleslope, int sendslope, 6445 int hicredit, int locredit) 6446 { 6447 bool cbs_status[IGC_MAX_SR_QUEUES] = { false }; 6448 struct net_device *netdev = adapter->netdev; 6449 struct igc_ring *ring; 6450 int i; 6451 6452 /* i225 has two sets of credit-based shaper logic. 6453 * Supporting it only on the top two priority queues 6454 */ 6455 if (queue < 0 || queue > 1) 6456 return -EINVAL; 6457 6458 ring = adapter->tx_ring[queue]; 6459 6460 for (i = 0; i < IGC_MAX_SR_QUEUES; i++) 6461 if (adapter->tx_ring[i]) 6462 cbs_status[i] = adapter->tx_ring[i]->cbs_enable; 6463 6464 /* CBS should be enabled on the highest priority queue first in order 6465 * for the CBS algorithm to operate as intended. 6466 */ 6467 if (enable) { 6468 if (queue == 1 && !cbs_status[0]) { 6469 netdev_err(netdev, 6470 "Enabling CBS on queue1 before queue0\n"); 6471 return -EINVAL; 6472 } 6473 } else { 6474 if (queue == 0 && cbs_status[1]) { 6475 netdev_err(netdev, 6476 "Disabling CBS on queue0 before queue1\n"); 6477 return -EINVAL; 6478 } 6479 } 6480 6481 ring->cbs_enable = enable; 6482 ring->idleslope = idleslope; 6483 ring->sendslope = sendslope; 6484 ring->hicredit = hicredit; 6485 ring->locredit = locredit; 6486 6487 return 0; 6488 } 6489 6490 static int igc_tsn_enable_cbs(struct igc_adapter *adapter, 6491 struct tc_cbs_qopt_offload *qopt) 6492 { 6493 struct igc_hw *hw = &adapter->hw; 6494 int err; 6495 6496 if (hw->mac.type != igc_i225) 6497 return -EOPNOTSUPP; 6498 6499 if (qopt->queue < 0 || qopt->queue > 1) 6500 return -EINVAL; 6501 6502 err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable, 6503 qopt->idleslope, qopt->sendslope, 6504 qopt->hicredit, qopt->locredit); 6505 if (err) 6506 return err; 6507 6508 return igc_tsn_offload_apply(adapter); 6509 } 6510 6511 static int igc_tc_query_caps(struct igc_adapter *adapter, 6512 struct tc_query_caps_base *base) 6513 { 6514 struct igc_hw *hw = &adapter->hw; 6515 6516 switch (base->type) { 6517 case TC_SETUP_QDISC_TAPRIO: { 6518 struct tc_taprio_caps *caps = base->caps; 6519 6520 caps->broken_mqprio = true; 6521 6522 if (hw->mac.type == igc_i225) { 6523 caps->supports_queue_max_sdu = true; 6524 caps->gate_mask_per_txq = true; 6525 } 6526 6527 return 0; 6528 } 6529 default: 6530 return -EOPNOTSUPP; 6531 } 6532 } 6533 6534 static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, 6535 void *type_data) 6536 { 6537 struct igc_adapter *adapter = netdev_priv(dev); 6538 6539 adapter->tc_setup_type = type; 6540 6541 switch (type) { 6542 case TC_QUERY_CAPS: 6543 return igc_tc_query_caps(adapter, type_data); 6544 case TC_SETUP_QDISC_TAPRIO: 6545 return igc_tsn_enable_qbv_scheduling(adapter, type_data); 6546 6547 case TC_SETUP_QDISC_ETF: 6548 return igc_tsn_enable_launchtime(adapter, type_data); 6549 6550 case TC_SETUP_QDISC_CBS: 6551 return igc_tsn_enable_cbs(adapter, type_data); 6552 6553 default: 6554 return -EOPNOTSUPP; 6555 } 6556 } 6557 6558 static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf) 6559 { 6560 struct igc_adapter *adapter = netdev_priv(dev); 6561 6562 switch (bpf->command) { 6563 case XDP_SETUP_PROG: 6564 return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack); 6565 case XDP_SETUP_XSK_POOL: 6566 return igc_xdp_setup_pool(adapter, bpf->xsk.pool, 6567 bpf->xsk.queue_id); 6568 default: 6569 return -EOPNOTSUPP; 6570 } 6571 } 6572 6573 static int igc_xdp_xmit(struct net_device *dev, int num_frames, 6574 struct xdp_frame **frames, u32 flags) 6575 { 6576 struct igc_adapter *adapter = netdev_priv(dev); 6577 int cpu = smp_processor_id(); 6578 struct netdev_queue *nq; 6579 struct igc_ring *ring; 6580 int i, nxmit; 6581 6582 if (unlikely(!netif_carrier_ok(dev))) 6583 return -ENETDOWN; 6584 6585 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 6586 return -EINVAL; 6587 6588 ring = igc_xdp_get_tx_ring(adapter, cpu); 6589 nq = txring_txq(ring); 6590 6591 __netif_tx_lock(nq, cpu); 6592 6593 /* Avoid transmit queue timeout since we share it with the slow path */ 6594 txq_trans_cond_update(nq); 6595 6596 nxmit = 0; 6597 for (i = 0; i < num_frames; i++) { 6598 int err; 6599 struct xdp_frame *xdpf = frames[i]; 6600 6601 err = igc_xdp_init_tx_descriptor(ring, xdpf); 6602 if (err) 6603 break; 6604 nxmit++; 6605 } 6606 6607 if (flags & XDP_XMIT_FLUSH) 6608 igc_flush_tx_descriptors(ring); 6609 6610 __netif_tx_unlock(nq); 6611 6612 return nxmit; 6613 } 6614 6615 static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, 6616 struct igc_q_vector *q_vector) 6617 { 6618 struct igc_hw *hw = &adapter->hw; 6619 u32 eics = 0; 6620 6621 eics |= q_vector->eims_value; 6622 wr32(IGC_EICS, eics); 6623 } 6624 6625 int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 6626 { 6627 struct igc_adapter *adapter = netdev_priv(dev); 6628 struct igc_q_vector *q_vector; 6629 struct igc_ring *ring; 6630 6631 if (test_bit(__IGC_DOWN, &adapter->state)) 6632 return -ENETDOWN; 6633 6634 if (!igc_xdp_is_enabled(adapter)) 6635 return -ENXIO; 6636 6637 if (queue_id >= adapter->num_rx_queues) 6638 return -EINVAL; 6639 6640 ring = adapter->rx_ring[queue_id]; 6641 6642 if (!ring->xsk_pool) 6643 return -ENXIO; 6644 6645 q_vector = adapter->q_vector[queue_id]; 6646 if (!napi_if_scheduled_mark_missed(&q_vector->napi)) 6647 igc_trigger_rxtxq_interrupt(adapter, q_vector); 6648 6649 return 0; 6650 } 6651 6652 static ktime_t igc_get_tstamp(struct net_device *dev, 6653 const struct skb_shared_hwtstamps *hwtstamps, 6654 bool cycles) 6655 { 6656 struct igc_adapter *adapter = netdev_priv(dev); 6657 struct igc_inline_rx_tstamps *tstamp; 6658 ktime_t timestamp; 6659 6660 tstamp = hwtstamps->netdev_data; 6661 6662 if (cycles) 6663 timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer1); 6664 else 6665 timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); 6666 6667 return timestamp; 6668 } 6669 6670 static const struct net_device_ops igc_netdev_ops = { 6671 .ndo_open = igc_open, 6672 .ndo_stop = igc_close, 6673 .ndo_start_xmit = igc_xmit_frame, 6674 .ndo_set_rx_mode = igc_set_rx_mode, 6675 .ndo_set_mac_address = igc_set_mac, 6676 .ndo_change_mtu = igc_change_mtu, 6677 .ndo_tx_timeout = igc_tx_timeout, 6678 .ndo_get_stats64 = igc_get_stats64, 6679 .ndo_fix_features = igc_fix_features, 6680 .ndo_set_features = igc_set_features, 6681 .ndo_features_check = igc_features_check, 6682 .ndo_eth_ioctl = igc_ioctl, 6683 .ndo_setup_tc = igc_setup_tc, 6684 .ndo_bpf = igc_bpf, 6685 .ndo_xdp_xmit = igc_xdp_xmit, 6686 .ndo_xsk_wakeup = igc_xsk_wakeup, 6687 .ndo_get_tstamp = igc_get_tstamp, 6688 }; 6689 6690 /* PCIe configuration access */ 6691 void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) 6692 { 6693 struct igc_adapter *adapter = hw->back; 6694 6695 pci_read_config_word(adapter->pdev, reg, value); 6696 } 6697 6698 void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) 6699 { 6700 struct igc_adapter *adapter = hw->back; 6701 6702 pci_write_config_word(adapter->pdev, reg, *value); 6703 } 6704 6705 s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) 6706 { 6707 struct igc_adapter *adapter = hw->back; 6708 6709 if (!pci_is_pcie(adapter->pdev)) 6710 return -IGC_ERR_CONFIG; 6711 6712 pcie_capability_read_word(adapter->pdev, reg, value); 6713 6714 return IGC_SUCCESS; 6715 } 6716 6717 s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) 6718 { 6719 struct igc_adapter *adapter = hw->back; 6720 6721 if (!pci_is_pcie(adapter->pdev)) 6722 return -IGC_ERR_CONFIG; 6723 6724 pcie_capability_write_word(adapter->pdev, reg, *value); 6725 6726 return IGC_SUCCESS; 6727 } 6728 6729 u32 igc_rd32(struct igc_hw *hw, u32 reg) 6730 { 6731 struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw); 6732 u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); 6733 u32 value = 0; 6734 6735 if (IGC_REMOVED(hw_addr)) 6736 return ~value; 6737 6738 value = readl(&hw_addr[reg]); 6739 6740 /* reads should not return all F's */ 6741 if (!(~value) && (!reg || !(~readl(hw_addr)))) { 6742 struct net_device *netdev = igc->netdev; 6743 6744 hw->hw_addr = NULL; 6745 netif_device_detach(netdev); 6746 netdev_err(netdev, "PCIe link lost, device now detached\n"); 6747 WARN(pci_device_is_present(igc->pdev), 6748 "igc: Failed to read reg 0x%x!\n", reg); 6749 } 6750 6751 return value; 6752 } 6753 6754 /* Mapping HW RSS Type to enum xdp_rss_hash_type */ 6755 static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = { 6756 [IGC_RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_L2, 6757 [IGC_RSS_TYPE_HASH_TCP_IPV4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6758 [IGC_RSS_TYPE_HASH_IPV4] = XDP_RSS_TYPE_L3_IPV4, 6759 [IGC_RSS_TYPE_HASH_TCP_IPV6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6760 [IGC_RSS_TYPE_HASH_IPV6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6761 [IGC_RSS_TYPE_HASH_IPV6] = XDP_RSS_TYPE_L3_IPV6, 6762 [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6763 [IGC_RSS_TYPE_HASH_UDP_IPV4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6764 [IGC_RSS_TYPE_HASH_UDP_IPV6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6765 [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX, 6766 [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ 6767 [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask */ 6768 [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons */ 6769 [13] = XDP_RSS_TYPE_NONE, 6770 [14] = XDP_RSS_TYPE_NONE, 6771 [15] = XDP_RSS_TYPE_NONE, 6772 }; 6773 6774 static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6775 enum xdp_rss_hash_type *rss_type) 6776 { 6777 const struct igc_xdp_buff *ctx = (void *)_ctx; 6778 6779 if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)) 6780 return -ENODATA; 6781 6782 *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss); 6783 *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)]; 6784 6785 return 0; 6786 } 6787 6788 static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp) 6789 { 6790 const struct igc_xdp_buff *ctx = (void *)_ctx; 6791 struct igc_adapter *adapter = netdev_priv(ctx->xdp.rxq->dev); 6792 struct igc_inline_rx_tstamps *tstamp = ctx->rx_ts; 6793 6794 if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) { 6795 *timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); 6796 6797 return 0; 6798 } 6799 6800 return -ENODATA; 6801 } 6802 6803 static const struct xdp_metadata_ops igc_xdp_metadata_ops = { 6804 .xmo_rx_hash = igc_xdp_rx_hash, 6805 .xmo_rx_timestamp = igc_xdp_rx_timestamp, 6806 }; 6807 6808 static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer) 6809 { 6810 struct igc_adapter *adapter = container_of(timer, struct igc_adapter, 6811 hrtimer); 6812 unsigned long flags; 6813 unsigned int i; 6814 6815 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 6816 6817 adapter->qbv_transition = true; 6818 for (i = 0; i < adapter->num_tx_queues; i++) { 6819 struct igc_ring *tx_ring = adapter->tx_ring[i]; 6820 6821 if (tx_ring->admin_gate_closed) { 6822 tx_ring->admin_gate_closed = false; 6823 tx_ring->oper_gate_closed = true; 6824 } else { 6825 tx_ring->oper_gate_closed = false; 6826 } 6827 } 6828 adapter->qbv_transition = false; 6829 6830 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 6831 6832 return HRTIMER_NORESTART; 6833 } 6834 6835 /** 6836 * igc_probe - Device Initialization Routine 6837 * @pdev: PCI device information struct 6838 * @ent: entry in igc_pci_tbl 6839 * 6840 * Returns 0 on success, negative on failure 6841 * 6842 * igc_probe initializes an adapter identified by a pci_dev structure. 6843 * The OS initialization, configuring the adapter private structure, 6844 * and a hardware reset occur. 6845 */ 6846 static int igc_probe(struct pci_dev *pdev, 6847 const struct pci_device_id *ent) 6848 { 6849 struct igc_adapter *adapter; 6850 struct net_device *netdev; 6851 struct igc_hw *hw; 6852 const struct igc_info *ei = igc_info_tbl[ent->driver_data]; 6853 int err; 6854 6855 err = pci_enable_device_mem(pdev); 6856 if (err) 6857 return err; 6858 6859 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 6860 if (err) { 6861 dev_err(&pdev->dev, 6862 "No usable DMA configuration, aborting\n"); 6863 goto err_dma; 6864 } 6865 6866 err = pci_request_mem_regions(pdev, igc_driver_name); 6867 if (err) 6868 goto err_pci_reg; 6869 6870 err = pci_enable_ptm(pdev, NULL); 6871 if (err < 0) 6872 dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n"); 6873 6874 pci_set_master(pdev); 6875 6876 err = -ENOMEM; 6877 netdev = alloc_etherdev_mq(sizeof(struct igc_adapter), 6878 IGC_MAX_TX_QUEUES); 6879 6880 if (!netdev) 6881 goto err_alloc_etherdev; 6882 6883 SET_NETDEV_DEV(netdev, &pdev->dev); 6884 6885 pci_set_drvdata(pdev, netdev); 6886 adapter = netdev_priv(netdev); 6887 adapter->netdev = netdev; 6888 adapter->pdev = pdev; 6889 hw = &adapter->hw; 6890 hw->back = adapter; 6891 adapter->port_num = hw->bus.func; 6892 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); 6893 6894 err = pci_save_state(pdev); 6895 if (err) 6896 goto err_ioremap; 6897 6898 err = -EIO; 6899 adapter->io_addr = ioremap(pci_resource_start(pdev, 0), 6900 pci_resource_len(pdev, 0)); 6901 if (!adapter->io_addr) 6902 goto err_ioremap; 6903 6904 /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */ 6905 hw->hw_addr = adapter->io_addr; 6906 6907 netdev->netdev_ops = &igc_netdev_ops; 6908 netdev->xdp_metadata_ops = &igc_xdp_metadata_ops; 6909 netdev->xsk_tx_metadata_ops = &igc_xsk_tx_metadata_ops; 6910 igc_ethtool_set_ops(netdev); 6911 netdev->watchdog_timeo = 5 * HZ; 6912 6913 netdev->mem_start = pci_resource_start(pdev, 0); 6914 netdev->mem_end = pci_resource_end(pdev, 0); 6915 6916 /* PCI config space info */ 6917 hw->vendor_id = pdev->vendor; 6918 hw->device_id = pdev->device; 6919 hw->revision_id = pdev->revision; 6920 hw->subsystem_vendor_id = pdev->subsystem_vendor; 6921 hw->subsystem_device_id = pdev->subsystem_device; 6922 6923 /* Copy the default MAC and PHY function pointers */ 6924 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); 6925 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); 6926 6927 /* Initialize skew-specific constants */ 6928 err = ei->get_invariants(hw); 6929 if (err) 6930 goto err_sw_init; 6931 6932 /* Add supported features to the features list*/ 6933 netdev->features |= NETIF_F_SG; 6934 netdev->features |= NETIF_F_TSO; 6935 netdev->features |= NETIF_F_TSO6; 6936 netdev->features |= NETIF_F_TSO_ECN; 6937 netdev->features |= NETIF_F_RXHASH; 6938 netdev->features |= NETIF_F_RXCSUM; 6939 netdev->features |= NETIF_F_HW_CSUM; 6940 netdev->features |= NETIF_F_SCTP_CRC; 6941 netdev->features |= NETIF_F_HW_TC; 6942 6943 #define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ 6944 NETIF_F_GSO_GRE_CSUM | \ 6945 NETIF_F_GSO_IPXIP4 | \ 6946 NETIF_F_GSO_IPXIP6 | \ 6947 NETIF_F_GSO_UDP_TUNNEL | \ 6948 NETIF_F_GSO_UDP_TUNNEL_CSUM) 6949 6950 netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES; 6951 netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES; 6952 6953 /* setup the private structure */ 6954 err = igc_sw_init(adapter); 6955 if (err) 6956 goto err_sw_init; 6957 6958 /* copy netdev features into list of user selectable features */ 6959 netdev->hw_features |= NETIF_F_NTUPLE; 6960 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; 6961 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 6962 netdev->hw_features |= netdev->features; 6963 6964 netdev->features |= NETIF_F_HIGHDMA; 6965 6966 netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; 6967 netdev->mpls_features |= NETIF_F_HW_CSUM; 6968 netdev->hw_enc_features |= netdev->vlan_features; 6969 6970 netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 6971 NETDEV_XDP_ACT_XSK_ZEROCOPY; 6972 6973 /* MTU range: 68 - 9216 */ 6974 netdev->min_mtu = ETH_MIN_MTU; 6975 netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; 6976 6977 /* before reading the NVM, reset the controller to put the device in a 6978 * known good starting state 6979 */ 6980 hw->mac.ops.reset_hw(hw); 6981 6982 if (igc_get_flash_presence_i225(hw)) { 6983 if (hw->nvm.ops.validate(hw) < 0) { 6984 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); 6985 err = -EIO; 6986 goto err_eeprom; 6987 } 6988 } 6989 6990 if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) { 6991 /* copy the MAC address out of the NVM */ 6992 if (hw->mac.ops.read_mac_addr(hw)) 6993 dev_err(&pdev->dev, "NVM Read Error\n"); 6994 } 6995 6996 eth_hw_addr_set(netdev, hw->mac.addr); 6997 6998 if (!is_valid_ether_addr(netdev->dev_addr)) { 6999 dev_err(&pdev->dev, "Invalid MAC Address\n"); 7000 err = -EIO; 7001 goto err_eeprom; 7002 } 7003 7004 /* configure RXPBSIZE and TXPBSIZE */ 7005 wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT); 7006 wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); 7007 7008 timer_setup(&adapter->watchdog_timer, igc_watchdog, 0); 7009 timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0); 7010 7011 INIT_WORK(&adapter->reset_task, igc_reset_task); 7012 INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); 7013 7014 hrtimer_init(&adapter->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 7015 adapter->hrtimer.function = &igc_qbv_scheduling_timer; 7016 7017 /* Initialize link properties that are user-changeable */ 7018 adapter->fc_autoneg = true; 7019 hw->mac.autoneg = true; 7020 hw->phy.autoneg_advertised = 0xaf; 7021 7022 hw->fc.requested_mode = igc_fc_default; 7023 hw->fc.current_mode = igc_fc_default; 7024 7025 /* By default, support wake on port A */ 7026 adapter->flags |= IGC_FLAG_WOL_SUPPORTED; 7027 7028 /* initialize the wol settings based on the eeprom settings */ 7029 if (adapter->flags & IGC_FLAG_WOL_SUPPORTED) 7030 adapter->wol |= IGC_WUFC_MAG; 7031 7032 device_set_wakeup_enable(&adapter->pdev->dev, 7033 adapter->flags & IGC_FLAG_WOL_SUPPORTED); 7034 7035 igc_ptp_init(adapter); 7036 7037 igc_tsn_clear_schedule(adapter); 7038 7039 /* reset the hardware with the new settings */ 7040 igc_reset(adapter); 7041 7042 /* let the f/w know that the h/w is now under the control of the 7043 * driver. 7044 */ 7045 igc_get_hw_control(adapter); 7046 7047 strscpy(netdev->name, "eth%d", sizeof(netdev->name)); 7048 err = register_netdev(netdev); 7049 if (err) 7050 goto err_register; 7051 7052 /* carrier off reporting is important to ethtool even BEFORE open */ 7053 netif_carrier_off(netdev); 7054 7055 /* Check if Media Autosense is enabled */ 7056 adapter->ei = *ei; 7057 7058 /* print pcie link status and MAC address */ 7059 pcie_print_link_status(pdev); 7060 netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr); 7061 7062 dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); 7063 /* Disable EEE for internal PHY devices */ 7064 hw->dev_spec._base.eee_enable = false; 7065 adapter->flags &= ~IGC_FLAG_EEE; 7066 igc_set_eee_i225(hw, false, false, false); 7067 7068 pm_runtime_put_noidle(&pdev->dev); 7069 7070 if (IS_ENABLED(CONFIG_IGC_LEDS)) { 7071 err = igc_led_setup(adapter); 7072 if (err) 7073 goto err_register; 7074 } 7075 7076 return 0; 7077 7078 err_register: 7079 igc_release_hw_control(adapter); 7080 err_eeprom: 7081 if (!igc_check_reset_block(hw)) 7082 igc_reset_phy(hw); 7083 err_sw_init: 7084 igc_clear_interrupt_scheme(adapter); 7085 iounmap(adapter->io_addr); 7086 err_ioremap: 7087 free_netdev(netdev); 7088 err_alloc_etherdev: 7089 pci_release_mem_regions(pdev); 7090 err_pci_reg: 7091 err_dma: 7092 pci_disable_device(pdev); 7093 return err; 7094 } 7095 7096 /** 7097 * igc_remove - Device Removal Routine 7098 * @pdev: PCI device information struct 7099 * 7100 * igc_remove is called by the PCI subsystem to alert the driver 7101 * that it should release a PCI device. This could be caused by a 7102 * Hot-Plug event, or because the driver is going to be removed from 7103 * memory. 7104 */ 7105 static void igc_remove(struct pci_dev *pdev) 7106 { 7107 struct net_device *netdev = pci_get_drvdata(pdev); 7108 struct igc_adapter *adapter = netdev_priv(netdev); 7109 7110 pm_runtime_get_noresume(&pdev->dev); 7111 7112 igc_flush_nfc_rules(adapter); 7113 7114 igc_ptp_stop(adapter); 7115 7116 pci_disable_ptm(pdev); 7117 pci_clear_master(pdev); 7118 7119 set_bit(__IGC_DOWN, &adapter->state); 7120 7121 del_timer_sync(&adapter->watchdog_timer); 7122 del_timer_sync(&adapter->phy_info_timer); 7123 7124 cancel_work_sync(&adapter->reset_task); 7125 cancel_work_sync(&adapter->watchdog_task); 7126 hrtimer_cancel(&adapter->hrtimer); 7127 7128 if (IS_ENABLED(CONFIG_IGC_LEDS)) 7129 igc_led_free(adapter); 7130 7131 /* Release control of h/w to f/w. If f/w is AMT enabled, this 7132 * would have already happened in close and is redundant. 7133 */ 7134 igc_release_hw_control(adapter); 7135 unregister_netdev(netdev); 7136 7137 igc_clear_interrupt_scheme(adapter); 7138 pci_iounmap(pdev, adapter->io_addr); 7139 pci_release_mem_regions(pdev); 7140 7141 free_netdev(netdev); 7142 7143 pci_disable_device(pdev); 7144 } 7145 7146 static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake, 7147 bool runtime) 7148 { 7149 struct net_device *netdev = pci_get_drvdata(pdev); 7150 struct igc_adapter *adapter = netdev_priv(netdev); 7151 u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol; 7152 struct igc_hw *hw = &adapter->hw; 7153 u32 ctrl, rctl, status; 7154 bool wake; 7155 7156 rtnl_lock(); 7157 netif_device_detach(netdev); 7158 7159 if (netif_running(netdev)) 7160 __igc_close(netdev, true); 7161 7162 igc_ptp_suspend(adapter); 7163 7164 igc_clear_interrupt_scheme(adapter); 7165 rtnl_unlock(); 7166 7167 status = rd32(IGC_STATUS); 7168 if (status & IGC_STATUS_LU) 7169 wufc &= ~IGC_WUFC_LNKC; 7170 7171 if (wufc) { 7172 igc_setup_rctl(adapter); 7173 igc_set_rx_mode(netdev); 7174 7175 /* turn on all-multi mode if wake on multicast is enabled */ 7176 if (wufc & IGC_WUFC_MC) { 7177 rctl = rd32(IGC_RCTL); 7178 rctl |= IGC_RCTL_MPE; 7179 wr32(IGC_RCTL, rctl); 7180 } 7181 7182 ctrl = rd32(IGC_CTRL); 7183 ctrl |= IGC_CTRL_ADVD3WUC; 7184 wr32(IGC_CTRL, ctrl); 7185 7186 /* Allow time for pending master requests to run */ 7187 igc_disable_pcie_master(hw); 7188 7189 wr32(IGC_WUC, IGC_WUC_PME_EN); 7190 wr32(IGC_WUFC, wufc); 7191 } else { 7192 wr32(IGC_WUC, 0); 7193 wr32(IGC_WUFC, 0); 7194 } 7195 7196 wake = wufc || adapter->en_mng_pt; 7197 if (!wake) 7198 igc_power_down_phy_copper_base(&adapter->hw); 7199 else 7200 igc_power_up_link(adapter); 7201 7202 if (enable_wake) 7203 *enable_wake = wake; 7204 7205 /* Release control of h/w to f/w. If f/w is AMT enabled, this 7206 * would have already happened in close and is redundant. 7207 */ 7208 igc_release_hw_control(adapter); 7209 7210 pci_disable_device(pdev); 7211 7212 return 0; 7213 } 7214 7215 static int igc_runtime_suspend(struct device *dev) 7216 { 7217 return __igc_shutdown(to_pci_dev(dev), NULL, 1); 7218 } 7219 7220 static void igc_deliver_wake_packet(struct net_device *netdev) 7221 { 7222 struct igc_adapter *adapter = netdev_priv(netdev); 7223 struct igc_hw *hw = &adapter->hw; 7224 struct sk_buff *skb; 7225 u32 wupl; 7226 7227 wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK; 7228 7229 /* WUPM stores only the first 128 bytes of the wake packet. 7230 * Read the packet only if we have the whole thing. 7231 */ 7232 if (wupl == 0 || wupl > IGC_WUPM_BYTES) 7233 return; 7234 7235 skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES); 7236 if (!skb) 7237 return; 7238 7239 skb_put(skb, wupl); 7240 7241 /* Ensure reads are 32-bit aligned */ 7242 wupl = roundup(wupl, 4); 7243 7244 memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl); 7245 7246 skb->protocol = eth_type_trans(skb, netdev); 7247 netif_rx(skb); 7248 } 7249 7250 static int igc_resume(struct device *dev) 7251 { 7252 struct pci_dev *pdev = to_pci_dev(dev); 7253 struct net_device *netdev = pci_get_drvdata(pdev); 7254 struct igc_adapter *adapter = netdev_priv(netdev); 7255 struct igc_hw *hw = &adapter->hw; 7256 u32 err, val; 7257 7258 pci_set_power_state(pdev, PCI_D0); 7259 pci_restore_state(pdev); 7260 pci_save_state(pdev); 7261 7262 if (!pci_device_is_present(pdev)) 7263 return -ENODEV; 7264 err = pci_enable_device_mem(pdev); 7265 if (err) { 7266 netdev_err(netdev, "Cannot enable PCI device from suspend\n"); 7267 return err; 7268 } 7269 pci_set_master(pdev); 7270 7271 pci_enable_wake(pdev, PCI_D3hot, 0); 7272 pci_enable_wake(pdev, PCI_D3cold, 0); 7273 7274 if (igc_init_interrupt_scheme(adapter, true)) { 7275 netdev_err(netdev, "Unable to allocate memory for queues\n"); 7276 return -ENOMEM; 7277 } 7278 7279 igc_reset(adapter); 7280 7281 /* let the f/w know that the h/w is now under the control of the 7282 * driver. 7283 */ 7284 igc_get_hw_control(adapter); 7285 7286 val = rd32(IGC_WUS); 7287 if (val & WAKE_PKT_WUS) 7288 igc_deliver_wake_packet(netdev); 7289 7290 wr32(IGC_WUS, ~0); 7291 7292 if (netif_running(netdev)) { 7293 err = __igc_open(netdev, true); 7294 if (!err) 7295 netif_device_attach(netdev); 7296 } 7297 7298 return err; 7299 } 7300 7301 static int igc_runtime_resume(struct device *dev) 7302 { 7303 return igc_resume(dev); 7304 } 7305 7306 static int igc_suspend(struct device *dev) 7307 { 7308 return __igc_shutdown(to_pci_dev(dev), NULL, 0); 7309 } 7310 7311 static int __maybe_unused igc_runtime_idle(struct device *dev) 7312 { 7313 struct net_device *netdev = dev_get_drvdata(dev); 7314 struct igc_adapter *adapter = netdev_priv(netdev); 7315 7316 if (!igc_has_link(adapter)) 7317 pm_schedule_suspend(dev, MSEC_PER_SEC * 5); 7318 7319 return -EBUSY; 7320 } 7321 7322 static void igc_shutdown(struct pci_dev *pdev) 7323 { 7324 bool wake; 7325 7326 __igc_shutdown(pdev, &wake, 0); 7327 7328 if (system_state == SYSTEM_POWER_OFF) { 7329 pci_wake_from_d3(pdev, wake); 7330 pci_set_power_state(pdev, PCI_D3hot); 7331 } 7332 } 7333 7334 /** 7335 * igc_io_error_detected - called when PCI error is detected 7336 * @pdev: Pointer to PCI device 7337 * @state: The current PCI connection state 7338 * 7339 * This function is called after a PCI bus error affecting 7340 * this device has been detected. 7341 **/ 7342 static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev, 7343 pci_channel_state_t state) 7344 { 7345 struct net_device *netdev = pci_get_drvdata(pdev); 7346 struct igc_adapter *adapter = netdev_priv(netdev); 7347 7348 netif_device_detach(netdev); 7349 7350 if (state == pci_channel_io_perm_failure) 7351 return PCI_ERS_RESULT_DISCONNECT; 7352 7353 if (netif_running(netdev)) 7354 igc_down(adapter); 7355 pci_disable_device(pdev); 7356 7357 /* Request a slot reset. */ 7358 return PCI_ERS_RESULT_NEED_RESET; 7359 } 7360 7361 /** 7362 * igc_io_slot_reset - called after the PCI bus has been reset. 7363 * @pdev: Pointer to PCI device 7364 * 7365 * Restart the card from scratch, as if from a cold-boot. Implementation 7366 * resembles the first-half of the igc_resume routine. 7367 **/ 7368 static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) 7369 { 7370 struct net_device *netdev = pci_get_drvdata(pdev); 7371 struct igc_adapter *adapter = netdev_priv(netdev); 7372 struct igc_hw *hw = &adapter->hw; 7373 pci_ers_result_t result; 7374 7375 if (pci_enable_device_mem(pdev)) { 7376 netdev_err(netdev, "Could not re-enable PCI device after reset\n"); 7377 result = PCI_ERS_RESULT_DISCONNECT; 7378 } else { 7379 pci_set_master(pdev); 7380 pci_restore_state(pdev); 7381 pci_save_state(pdev); 7382 7383 pci_enable_wake(pdev, PCI_D3hot, 0); 7384 pci_enable_wake(pdev, PCI_D3cold, 0); 7385 7386 /* In case of PCI error, adapter loses its HW address 7387 * so we should re-assign it here. 7388 */ 7389 hw->hw_addr = adapter->io_addr; 7390 7391 igc_reset(adapter); 7392 wr32(IGC_WUS, ~0); 7393 result = PCI_ERS_RESULT_RECOVERED; 7394 } 7395 7396 return result; 7397 } 7398 7399 /** 7400 * igc_io_resume - called when traffic can start to flow again. 7401 * @pdev: Pointer to PCI device 7402 * 7403 * This callback is called when the error recovery driver tells us that 7404 * its OK to resume normal operation. Implementation resembles the 7405 * second-half of the igc_resume routine. 7406 */ 7407 static void igc_io_resume(struct pci_dev *pdev) 7408 { 7409 struct net_device *netdev = pci_get_drvdata(pdev); 7410 struct igc_adapter *adapter = netdev_priv(netdev); 7411 7412 rtnl_lock(); 7413 if (netif_running(netdev)) { 7414 if (igc_open(netdev)) { 7415 netdev_err(netdev, "igc_open failed after reset\n"); 7416 return; 7417 } 7418 } 7419 7420 netif_device_attach(netdev); 7421 7422 /* let the f/w know that the h/w is now under the control of the 7423 * driver. 7424 */ 7425 igc_get_hw_control(adapter); 7426 rtnl_unlock(); 7427 } 7428 7429 static const struct pci_error_handlers igc_err_handler = { 7430 .error_detected = igc_io_error_detected, 7431 .slot_reset = igc_io_slot_reset, 7432 .resume = igc_io_resume, 7433 }; 7434 7435 static _DEFINE_DEV_PM_OPS(igc_pm_ops, igc_suspend, igc_resume, 7436 igc_runtime_suspend, igc_runtime_resume, 7437 igc_runtime_idle); 7438 7439 static struct pci_driver igc_driver = { 7440 .name = igc_driver_name, 7441 .id_table = igc_pci_tbl, 7442 .probe = igc_probe, 7443 .remove = igc_remove, 7444 .driver.pm = pm_ptr(&igc_pm_ops), 7445 .shutdown = igc_shutdown, 7446 .err_handler = &igc_err_handler, 7447 }; 7448 7449 /** 7450 * igc_reinit_queues - return error 7451 * @adapter: pointer to adapter structure 7452 */ 7453 int igc_reinit_queues(struct igc_adapter *adapter) 7454 { 7455 struct net_device *netdev = adapter->netdev; 7456 int err = 0; 7457 7458 if (netif_running(netdev)) 7459 igc_close(netdev); 7460 7461 igc_reset_interrupt_capability(adapter); 7462 7463 if (igc_init_interrupt_scheme(adapter, true)) { 7464 netdev_err(netdev, "Unable to allocate memory for queues\n"); 7465 return -ENOMEM; 7466 } 7467 7468 if (netif_running(netdev)) 7469 err = igc_open(netdev); 7470 7471 return err; 7472 } 7473 7474 /** 7475 * igc_get_hw_dev - return device 7476 * @hw: pointer to hardware structure 7477 * 7478 * used by hardware layer to print debugging information 7479 */ 7480 struct net_device *igc_get_hw_dev(struct igc_hw *hw) 7481 { 7482 struct igc_adapter *adapter = hw->back; 7483 7484 return adapter->netdev; 7485 } 7486 7487 static void igc_disable_rx_ring_hw(struct igc_ring *ring) 7488 { 7489 struct igc_hw *hw = &ring->q_vector->adapter->hw; 7490 u8 idx = ring->reg_idx; 7491 u32 rxdctl; 7492 7493 rxdctl = rd32(IGC_RXDCTL(idx)); 7494 rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE; 7495 rxdctl |= IGC_RXDCTL_SWFLUSH; 7496 wr32(IGC_RXDCTL(idx), rxdctl); 7497 } 7498 7499 void igc_disable_rx_ring(struct igc_ring *ring) 7500 { 7501 igc_disable_rx_ring_hw(ring); 7502 igc_clean_rx_ring(ring); 7503 } 7504 7505 void igc_enable_rx_ring(struct igc_ring *ring) 7506 { 7507 struct igc_adapter *adapter = ring->q_vector->adapter; 7508 7509 igc_configure_rx_ring(adapter, ring); 7510 7511 if (ring->xsk_pool) 7512 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 7513 else 7514 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 7515 } 7516 7517 void igc_disable_tx_ring(struct igc_ring *ring) 7518 { 7519 igc_disable_tx_ring_hw(ring); 7520 igc_clean_tx_ring(ring); 7521 } 7522 7523 void igc_enable_tx_ring(struct igc_ring *ring) 7524 { 7525 struct igc_adapter *adapter = ring->q_vector->adapter; 7526 7527 igc_configure_tx_ring(adapter, ring); 7528 } 7529 7530 /** 7531 * igc_init_module - Driver Registration Routine 7532 * 7533 * igc_init_module is the first routine called when the driver is 7534 * loaded. All it does is register with the PCI subsystem. 7535 */ 7536 static int __init igc_init_module(void) 7537 { 7538 int ret; 7539 7540 pr_info("%s\n", igc_driver_string); 7541 pr_info("%s\n", igc_copyright); 7542 7543 ret = pci_register_driver(&igc_driver); 7544 return ret; 7545 } 7546 7547 module_init(igc_init_module); 7548 7549 /** 7550 * igc_exit_module - Driver Exit Cleanup Routine 7551 * 7552 * igc_exit_module is called just before the driver is removed 7553 * from memory. 7554 */ 7555 static void __exit igc_exit_module(void) 7556 { 7557 pci_unregister_driver(&igc_driver); 7558 } 7559 7560 module_exit(igc_exit_module); 7561 /* igc_main.c */ 7562