1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018 Intel Corporation */ 3 4 #include <linux/module.h> 5 #include <linux/types.h> 6 #include <linux/if_vlan.h> 7 #include <linux/tcp.h> 8 #include <linux/udp.h> 9 #include <linux/ip.h> 10 #include <linux/pm_runtime.h> 11 #include <net/pkt_sched.h> 12 #include <linux/bpf_trace.h> 13 #include <net/xdp_sock_drv.h> 14 #include <linux/pci.h> 15 #include <linux/mdio.h> 16 17 #include <net/ipv6.h> 18 19 #include "igc.h" 20 #include "igc_hw.h" 21 #include "igc_tsn.h" 22 #include "igc_xdp.h" 23 24 #define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver" 25 26 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) 27 28 #define IGC_XDP_PASS 0 29 #define IGC_XDP_CONSUMED BIT(0) 30 #define IGC_XDP_TX BIT(1) 31 #define IGC_XDP_REDIRECT BIT(2) 32 33 static int debug = -1; 34 35 MODULE_DESCRIPTION(DRV_SUMMARY); 36 MODULE_LICENSE("GPL v2"); 37 module_param(debug, int, 0); 38 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 39 40 char igc_driver_name[] = "igc"; 41 static const char igc_driver_string[] = DRV_SUMMARY; 42 static const char igc_copyright[] = 43 "Copyright(c) 2018 Intel Corporation."; 44 45 static const struct igc_info *igc_info_tbl[] = { 46 [board_base] = &igc_base_info, 47 }; 48 49 static const struct pci_device_id igc_pci_tbl[] = { 50 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base }, 51 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base }, 52 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base }, 53 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base }, 54 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base }, 55 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base }, 56 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base }, 57 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base }, 58 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base }, 59 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base }, 60 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base }, 61 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base }, 62 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base }, 63 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base }, 64 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base }, 65 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base }, 66 /* required last entry */ 67 {0, } 68 }; 69 70 MODULE_DEVICE_TABLE(pci, igc_pci_tbl); 71 72 enum latency_range { 73 lowest_latency = 0, 74 low_latency = 1, 75 bulk_latency = 2, 76 latency_invalid = 255 77 }; 78 79 void igc_reset(struct igc_adapter *adapter) 80 { 81 struct net_device *dev = adapter->netdev; 82 struct igc_hw *hw = &adapter->hw; 83 struct igc_fc_info *fc = &hw->fc; 84 u32 pba, hwm; 85 86 /* Repartition PBA for greater than 9k MTU if required */ 87 pba = IGC_PBA_34K; 88 89 /* flow control settings 90 * The high water mark must be low enough to fit one full frame 91 * after transmitting the pause frame. As such we must have enough 92 * space to allow for us to complete our current transmit and then 93 * receive the frame that is in progress from the link partner. 94 * Set it to: 95 * - the full Rx FIFO size minus one full Tx plus one full Rx frame 96 */ 97 hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); 98 99 fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ 100 fc->low_water = fc->high_water - 16; 101 fc->pause_time = 0xFFFF; 102 fc->send_xon = 1; 103 fc->current_mode = fc->requested_mode; 104 105 hw->mac.ops.reset_hw(hw); 106 107 if (hw->mac.ops.init_hw(hw)) 108 netdev_err(dev, "Error on hardware initialization\n"); 109 110 /* Re-establish EEE setting */ 111 igc_set_eee_i225(hw, true, true, true); 112 113 if (!netif_running(adapter->netdev)) 114 igc_power_down_phy_copper_base(&adapter->hw); 115 116 /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */ 117 wr32(IGC_VET, ETH_P_8021Q); 118 119 /* Re-enable PTP, where applicable. */ 120 igc_ptp_reset(adapter); 121 122 /* Re-enable TSN offloading, where applicable. */ 123 igc_tsn_reset(adapter); 124 125 igc_get_phy_info(hw); 126 } 127 128 /** 129 * igc_power_up_link - Power up the phy link 130 * @adapter: address of board private structure 131 */ 132 static void igc_power_up_link(struct igc_adapter *adapter) 133 { 134 igc_reset_phy(&adapter->hw); 135 136 igc_power_up_phy_copper(&adapter->hw); 137 138 igc_setup_link(&adapter->hw); 139 } 140 141 /** 142 * igc_release_hw_control - release control of the h/w to f/w 143 * @adapter: address of board private structure 144 * 145 * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 146 * For ASF and Pass Through versions of f/w this means that the 147 * driver is no longer loaded. 148 */ 149 static void igc_release_hw_control(struct igc_adapter *adapter) 150 { 151 struct igc_hw *hw = &adapter->hw; 152 u32 ctrl_ext; 153 154 if (!pci_device_is_present(adapter->pdev)) 155 return; 156 157 /* Let firmware take over control of h/w */ 158 ctrl_ext = rd32(IGC_CTRL_EXT); 159 wr32(IGC_CTRL_EXT, 160 ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD); 161 } 162 163 /** 164 * igc_get_hw_control - get control of the h/w from f/w 165 * @adapter: address of board private structure 166 * 167 * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 168 * For ASF and Pass Through versions of f/w this means that 169 * the driver is loaded. 170 */ 171 static void igc_get_hw_control(struct igc_adapter *adapter) 172 { 173 struct igc_hw *hw = &adapter->hw; 174 u32 ctrl_ext; 175 176 /* Let firmware know the driver has taken over */ 177 ctrl_ext = rd32(IGC_CTRL_EXT); 178 wr32(IGC_CTRL_EXT, 179 ctrl_ext | IGC_CTRL_EXT_DRV_LOAD); 180 } 181 182 static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf) 183 { 184 dma_unmap_single(dev, dma_unmap_addr(buf, dma), 185 dma_unmap_len(buf, len), DMA_TO_DEVICE); 186 187 dma_unmap_len_set(buf, len, 0); 188 } 189 190 /** 191 * igc_clean_tx_ring - Free Tx Buffers 192 * @tx_ring: ring to be cleaned 193 */ 194 static void igc_clean_tx_ring(struct igc_ring *tx_ring) 195 { 196 u16 i = tx_ring->next_to_clean; 197 struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; 198 u32 xsk_frames = 0; 199 200 while (i != tx_ring->next_to_use) { 201 union igc_adv_tx_desc *eop_desc, *tx_desc; 202 203 switch (tx_buffer->type) { 204 case IGC_TX_BUFFER_TYPE_XSK: 205 xsk_frames++; 206 break; 207 case IGC_TX_BUFFER_TYPE_XDP: 208 xdp_return_frame(tx_buffer->xdpf); 209 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 210 break; 211 case IGC_TX_BUFFER_TYPE_SKB: 212 dev_kfree_skb_any(tx_buffer->skb); 213 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 214 break; 215 default: 216 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 217 break; 218 } 219 220 /* check for eop_desc to determine the end of the packet */ 221 eop_desc = tx_buffer->next_to_watch; 222 tx_desc = IGC_TX_DESC(tx_ring, i); 223 224 /* unmap remaining buffers */ 225 while (tx_desc != eop_desc) { 226 tx_buffer++; 227 tx_desc++; 228 i++; 229 if (unlikely(i == tx_ring->count)) { 230 i = 0; 231 tx_buffer = tx_ring->tx_buffer_info; 232 tx_desc = IGC_TX_DESC(tx_ring, 0); 233 } 234 235 /* unmap any remaining paged data */ 236 if (dma_unmap_len(tx_buffer, len)) 237 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 238 } 239 240 tx_buffer->next_to_watch = NULL; 241 242 /* move us one more past the eop_desc for start of next pkt */ 243 tx_buffer++; 244 i++; 245 if (unlikely(i == tx_ring->count)) { 246 i = 0; 247 tx_buffer = tx_ring->tx_buffer_info; 248 } 249 } 250 251 if (tx_ring->xsk_pool && xsk_frames) 252 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 253 254 /* reset BQL for queue */ 255 netdev_tx_reset_queue(txring_txq(tx_ring)); 256 257 /* Zero out the buffer ring */ 258 memset(tx_ring->tx_buffer_info, 0, 259 sizeof(*tx_ring->tx_buffer_info) * tx_ring->count); 260 261 /* Zero out the descriptor ring */ 262 memset(tx_ring->desc, 0, tx_ring->size); 263 264 /* reset next_to_use and next_to_clean */ 265 tx_ring->next_to_use = 0; 266 tx_ring->next_to_clean = 0; 267 } 268 269 /** 270 * igc_free_tx_resources - Free Tx Resources per Queue 271 * @tx_ring: Tx descriptor ring for a specific queue 272 * 273 * Free all transmit software resources 274 */ 275 void igc_free_tx_resources(struct igc_ring *tx_ring) 276 { 277 igc_disable_tx_ring(tx_ring); 278 279 vfree(tx_ring->tx_buffer_info); 280 tx_ring->tx_buffer_info = NULL; 281 282 /* if not set, then don't free */ 283 if (!tx_ring->desc) 284 return; 285 286 dma_free_coherent(tx_ring->dev, tx_ring->size, 287 tx_ring->desc, tx_ring->dma); 288 289 tx_ring->desc = NULL; 290 } 291 292 /** 293 * igc_free_all_tx_resources - Free Tx Resources for All Queues 294 * @adapter: board private structure 295 * 296 * Free all transmit software resources 297 */ 298 static void igc_free_all_tx_resources(struct igc_adapter *adapter) 299 { 300 int i; 301 302 for (i = 0; i < adapter->num_tx_queues; i++) 303 igc_free_tx_resources(adapter->tx_ring[i]); 304 } 305 306 /** 307 * igc_clean_all_tx_rings - Free Tx Buffers for all queues 308 * @adapter: board private structure 309 */ 310 static void igc_clean_all_tx_rings(struct igc_adapter *adapter) 311 { 312 int i; 313 314 for (i = 0; i < adapter->num_tx_queues; i++) 315 if (adapter->tx_ring[i]) 316 igc_clean_tx_ring(adapter->tx_ring[i]); 317 } 318 319 static void igc_disable_tx_ring_hw(struct igc_ring *ring) 320 { 321 struct igc_hw *hw = &ring->q_vector->adapter->hw; 322 u8 idx = ring->reg_idx; 323 u32 txdctl; 324 325 txdctl = rd32(IGC_TXDCTL(idx)); 326 txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; 327 txdctl |= IGC_TXDCTL_SWFLUSH; 328 wr32(IGC_TXDCTL(idx), txdctl); 329 } 330 331 /** 332 * igc_disable_all_tx_rings_hw - Disable all transmit queue operation 333 * @adapter: board private structure 334 */ 335 static void igc_disable_all_tx_rings_hw(struct igc_adapter *adapter) 336 { 337 int i; 338 339 for (i = 0; i < adapter->num_tx_queues; i++) { 340 struct igc_ring *tx_ring = adapter->tx_ring[i]; 341 342 igc_disable_tx_ring_hw(tx_ring); 343 } 344 } 345 346 /** 347 * igc_setup_tx_resources - allocate Tx resources (Descriptors) 348 * @tx_ring: tx descriptor ring (for a specific queue) to setup 349 * 350 * Return 0 on success, negative on failure 351 */ 352 int igc_setup_tx_resources(struct igc_ring *tx_ring) 353 { 354 struct net_device *ndev = tx_ring->netdev; 355 struct device *dev = tx_ring->dev; 356 int size = 0; 357 358 size = sizeof(struct igc_tx_buffer) * tx_ring->count; 359 tx_ring->tx_buffer_info = vzalloc(size); 360 if (!tx_ring->tx_buffer_info) 361 goto err; 362 363 /* round up to nearest 4K */ 364 tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc); 365 tx_ring->size = ALIGN(tx_ring->size, 4096); 366 367 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 368 &tx_ring->dma, GFP_KERNEL); 369 370 if (!tx_ring->desc) 371 goto err; 372 373 tx_ring->next_to_use = 0; 374 tx_ring->next_to_clean = 0; 375 376 return 0; 377 378 err: 379 vfree(tx_ring->tx_buffer_info); 380 netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n"); 381 return -ENOMEM; 382 } 383 384 /** 385 * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues 386 * @adapter: board private structure 387 * 388 * Return 0 on success, negative on failure 389 */ 390 static int igc_setup_all_tx_resources(struct igc_adapter *adapter) 391 { 392 struct net_device *dev = adapter->netdev; 393 int i, err = 0; 394 395 for (i = 0; i < adapter->num_tx_queues; i++) { 396 err = igc_setup_tx_resources(adapter->tx_ring[i]); 397 if (err) { 398 netdev_err(dev, "Error on Tx queue %u setup\n", i); 399 for (i--; i >= 0; i--) 400 igc_free_tx_resources(adapter->tx_ring[i]); 401 break; 402 } 403 } 404 405 return err; 406 } 407 408 static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring) 409 { 410 u16 i = rx_ring->next_to_clean; 411 412 dev_kfree_skb(rx_ring->skb); 413 rx_ring->skb = NULL; 414 415 /* Free all the Rx ring sk_buffs */ 416 while (i != rx_ring->next_to_alloc) { 417 struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 418 419 /* Invalidate cache lines that may have been written to by 420 * device so that we avoid corrupting memory. 421 */ 422 dma_sync_single_range_for_cpu(rx_ring->dev, 423 buffer_info->dma, 424 buffer_info->page_offset, 425 igc_rx_bufsz(rx_ring), 426 DMA_FROM_DEVICE); 427 428 /* free resources associated with mapping */ 429 dma_unmap_page_attrs(rx_ring->dev, 430 buffer_info->dma, 431 igc_rx_pg_size(rx_ring), 432 DMA_FROM_DEVICE, 433 IGC_RX_DMA_ATTR); 434 __page_frag_cache_drain(buffer_info->page, 435 buffer_info->pagecnt_bias); 436 437 i++; 438 if (i == rx_ring->count) 439 i = 0; 440 } 441 } 442 443 static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring) 444 { 445 struct igc_rx_buffer *bi; 446 u16 i; 447 448 for (i = 0; i < ring->count; i++) { 449 bi = &ring->rx_buffer_info[i]; 450 if (!bi->xdp) 451 continue; 452 453 xsk_buff_free(bi->xdp); 454 bi->xdp = NULL; 455 } 456 } 457 458 /** 459 * igc_clean_rx_ring - Free Rx Buffers per Queue 460 * @ring: ring to free buffers from 461 */ 462 static void igc_clean_rx_ring(struct igc_ring *ring) 463 { 464 if (ring->xsk_pool) 465 igc_clean_rx_ring_xsk_pool(ring); 466 else 467 igc_clean_rx_ring_page_shared(ring); 468 469 clear_ring_uses_large_buffer(ring); 470 471 ring->next_to_alloc = 0; 472 ring->next_to_clean = 0; 473 ring->next_to_use = 0; 474 } 475 476 /** 477 * igc_clean_all_rx_rings - Free Rx Buffers for all queues 478 * @adapter: board private structure 479 */ 480 static void igc_clean_all_rx_rings(struct igc_adapter *adapter) 481 { 482 int i; 483 484 for (i = 0; i < adapter->num_rx_queues; i++) 485 if (adapter->rx_ring[i]) 486 igc_clean_rx_ring(adapter->rx_ring[i]); 487 } 488 489 /** 490 * igc_free_rx_resources - Free Rx Resources 491 * @rx_ring: ring to clean the resources from 492 * 493 * Free all receive software resources 494 */ 495 void igc_free_rx_resources(struct igc_ring *rx_ring) 496 { 497 igc_clean_rx_ring(rx_ring); 498 499 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 500 501 vfree(rx_ring->rx_buffer_info); 502 rx_ring->rx_buffer_info = NULL; 503 504 /* if not set, then don't free */ 505 if (!rx_ring->desc) 506 return; 507 508 dma_free_coherent(rx_ring->dev, rx_ring->size, 509 rx_ring->desc, rx_ring->dma); 510 511 rx_ring->desc = NULL; 512 } 513 514 /** 515 * igc_free_all_rx_resources - Free Rx Resources for All Queues 516 * @adapter: board private structure 517 * 518 * Free all receive software resources 519 */ 520 static void igc_free_all_rx_resources(struct igc_adapter *adapter) 521 { 522 int i; 523 524 for (i = 0; i < adapter->num_rx_queues; i++) 525 igc_free_rx_resources(adapter->rx_ring[i]); 526 } 527 528 /** 529 * igc_setup_rx_resources - allocate Rx resources (Descriptors) 530 * @rx_ring: rx descriptor ring (for a specific queue) to setup 531 * 532 * Returns 0 on success, negative on failure 533 */ 534 int igc_setup_rx_resources(struct igc_ring *rx_ring) 535 { 536 struct net_device *ndev = rx_ring->netdev; 537 struct device *dev = rx_ring->dev; 538 u8 index = rx_ring->queue_index; 539 int size, desc_len, res; 540 541 /* XDP RX-queue info */ 542 if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) 543 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 544 res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, 545 rx_ring->q_vector->napi.napi_id); 546 if (res < 0) { 547 netdev_err(ndev, "Failed to register xdp_rxq index %u\n", 548 index); 549 return res; 550 } 551 552 size = sizeof(struct igc_rx_buffer) * rx_ring->count; 553 rx_ring->rx_buffer_info = vzalloc(size); 554 if (!rx_ring->rx_buffer_info) 555 goto err; 556 557 desc_len = sizeof(union igc_adv_rx_desc); 558 559 /* Round up to nearest 4K */ 560 rx_ring->size = rx_ring->count * desc_len; 561 rx_ring->size = ALIGN(rx_ring->size, 4096); 562 563 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 564 &rx_ring->dma, GFP_KERNEL); 565 566 if (!rx_ring->desc) 567 goto err; 568 569 rx_ring->next_to_alloc = 0; 570 rx_ring->next_to_clean = 0; 571 rx_ring->next_to_use = 0; 572 573 return 0; 574 575 err: 576 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 577 vfree(rx_ring->rx_buffer_info); 578 rx_ring->rx_buffer_info = NULL; 579 netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n"); 580 return -ENOMEM; 581 } 582 583 /** 584 * igc_setup_all_rx_resources - wrapper to allocate Rx resources 585 * (Descriptors) for all queues 586 * @adapter: board private structure 587 * 588 * Return 0 on success, negative on failure 589 */ 590 static int igc_setup_all_rx_resources(struct igc_adapter *adapter) 591 { 592 struct net_device *dev = adapter->netdev; 593 int i, err = 0; 594 595 for (i = 0; i < adapter->num_rx_queues; i++) { 596 err = igc_setup_rx_resources(adapter->rx_ring[i]); 597 if (err) { 598 netdev_err(dev, "Error on Rx queue %u setup\n", i); 599 for (i--; i >= 0; i--) 600 igc_free_rx_resources(adapter->rx_ring[i]); 601 break; 602 } 603 } 604 605 return err; 606 } 607 608 static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter, 609 struct igc_ring *ring) 610 { 611 if (!igc_xdp_is_enabled(adapter) || 612 !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags)) 613 return NULL; 614 615 return xsk_get_pool_from_qid(ring->netdev, ring->queue_index); 616 } 617 618 /** 619 * igc_configure_rx_ring - Configure a receive ring after Reset 620 * @adapter: board private structure 621 * @ring: receive ring to be configured 622 * 623 * Configure the Rx unit of the MAC after a reset. 624 */ 625 static void igc_configure_rx_ring(struct igc_adapter *adapter, 626 struct igc_ring *ring) 627 { 628 struct igc_hw *hw = &adapter->hw; 629 union igc_adv_rx_desc *rx_desc; 630 int reg_idx = ring->reg_idx; 631 u32 srrctl = 0, rxdctl = 0; 632 u64 rdba = ring->dma; 633 u32 buf_size; 634 635 xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); 636 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 637 if (ring->xsk_pool) { 638 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 639 MEM_TYPE_XSK_BUFF_POOL, 640 NULL)); 641 xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); 642 } else { 643 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 644 MEM_TYPE_PAGE_SHARED, 645 NULL)); 646 } 647 648 if (igc_xdp_is_enabled(adapter)) 649 set_ring_uses_large_buffer(ring); 650 651 /* disable the queue */ 652 wr32(IGC_RXDCTL(reg_idx), 0); 653 654 /* Set DMA base address registers */ 655 wr32(IGC_RDBAL(reg_idx), 656 rdba & 0x00000000ffffffffULL); 657 wr32(IGC_RDBAH(reg_idx), rdba >> 32); 658 wr32(IGC_RDLEN(reg_idx), 659 ring->count * sizeof(union igc_adv_rx_desc)); 660 661 /* initialize head and tail */ 662 ring->tail = adapter->io_addr + IGC_RDT(reg_idx); 663 wr32(IGC_RDH(reg_idx), 0); 664 writel(0, ring->tail); 665 666 /* reset next-to- use/clean to place SW in sync with hardware */ 667 ring->next_to_clean = 0; 668 ring->next_to_use = 0; 669 670 if (ring->xsk_pool) 671 buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool); 672 else if (ring_uses_large_buffer(ring)) 673 buf_size = IGC_RXBUFFER_3072; 674 else 675 buf_size = IGC_RXBUFFER_2048; 676 677 srrctl = rd32(IGC_SRRCTL(reg_idx)); 678 srrctl &= ~(IGC_SRRCTL_BSIZEPKT_MASK | IGC_SRRCTL_BSIZEHDR_MASK | 679 IGC_SRRCTL_DESCTYPE_MASK); 680 srrctl |= IGC_SRRCTL_BSIZEHDR(IGC_RX_HDR_LEN); 681 srrctl |= IGC_SRRCTL_BSIZEPKT(buf_size); 682 srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; 683 684 wr32(IGC_SRRCTL(reg_idx), srrctl); 685 686 rxdctl |= IGC_RX_PTHRESH; 687 rxdctl |= IGC_RX_HTHRESH << 8; 688 rxdctl |= IGC_RX_WTHRESH << 16; 689 690 /* initialize rx_buffer_info */ 691 memset(ring->rx_buffer_info, 0, 692 sizeof(struct igc_rx_buffer) * ring->count); 693 694 /* initialize Rx descriptor 0 */ 695 rx_desc = IGC_RX_DESC(ring, 0); 696 rx_desc->wb.upper.length = 0; 697 698 /* enable receive descriptor fetching */ 699 rxdctl |= IGC_RXDCTL_QUEUE_ENABLE; 700 701 wr32(IGC_RXDCTL(reg_idx), rxdctl); 702 } 703 704 /** 705 * igc_configure_rx - Configure receive Unit after Reset 706 * @adapter: board private structure 707 * 708 * Configure the Rx unit of the MAC after a reset. 709 */ 710 static void igc_configure_rx(struct igc_adapter *adapter) 711 { 712 int i; 713 714 /* Setup the HW Rx Head and Tail Descriptor Pointers and 715 * the Base and Length of the Rx Descriptor Ring 716 */ 717 for (i = 0; i < adapter->num_rx_queues; i++) 718 igc_configure_rx_ring(adapter, adapter->rx_ring[i]); 719 } 720 721 /** 722 * igc_configure_tx_ring - Configure transmit ring after Reset 723 * @adapter: board private structure 724 * @ring: tx ring to configure 725 * 726 * Configure a transmit ring after a reset. 727 */ 728 static void igc_configure_tx_ring(struct igc_adapter *adapter, 729 struct igc_ring *ring) 730 { 731 struct igc_hw *hw = &adapter->hw; 732 int reg_idx = ring->reg_idx; 733 u64 tdba = ring->dma; 734 u32 txdctl = 0; 735 736 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 737 738 /* disable the queue */ 739 wr32(IGC_TXDCTL(reg_idx), 0); 740 wrfl(); 741 742 wr32(IGC_TDLEN(reg_idx), 743 ring->count * sizeof(union igc_adv_tx_desc)); 744 wr32(IGC_TDBAL(reg_idx), 745 tdba & 0x00000000ffffffffULL); 746 wr32(IGC_TDBAH(reg_idx), tdba >> 32); 747 748 ring->tail = adapter->io_addr + IGC_TDT(reg_idx); 749 wr32(IGC_TDH(reg_idx), 0); 750 writel(0, ring->tail); 751 752 txdctl |= IGC_TX_PTHRESH; 753 txdctl |= IGC_TX_HTHRESH << 8; 754 txdctl |= IGC_TX_WTHRESH << 16; 755 756 txdctl |= IGC_TXDCTL_QUEUE_ENABLE; 757 wr32(IGC_TXDCTL(reg_idx), txdctl); 758 } 759 760 /** 761 * igc_configure_tx - Configure transmit Unit after Reset 762 * @adapter: board private structure 763 * 764 * Configure the Tx unit of the MAC after a reset. 765 */ 766 static void igc_configure_tx(struct igc_adapter *adapter) 767 { 768 int i; 769 770 for (i = 0; i < adapter->num_tx_queues; i++) 771 igc_configure_tx_ring(adapter, adapter->tx_ring[i]); 772 } 773 774 /** 775 * igc_setup_mrqc - configure the multiple receive queue control registers 776 * @adapter: Board private structure 777 */ 778 static void igc_setup_mrqc(struct igc_adapter *adapter) 779 { 780 struct igc_hw *hw = &adapter->hw; 781 u32 j, num_rx_queues; 782 u32 mrqc, rxcsum; 783 u32 rss_key[10]; 784 785 netdev_rss_key_fill(rss_key, sizeof(rss_key)); 786 for (j = 0; j < 10; j++) 787 wr32(IGC_RSSRK(j), rss_key[j]); 788 789 num_rx_queues = adapter->rss_queues; 790 791 if (adapter->rss_indir_tbl_init != num_rx_queues) { 792 for (j = 0; j < IGC_RETA_SIZE; j++) 793 adapter->rss_indir_tbl[j] = 794 (j * num_rx_queues) / IGC_RETA_SIZE; 795 adapter->rss_indir_tbl_init = num_rx_queues; 796 } 797 igc_write_rss_indir_tbl(adapter); 798 799 /* Disable raw packet checksumming so that RSS hash is placed in 800 * descriptor on writeback. No need to enable TCP/UDP/IP checksum 801 * offloads as they are enabled by default 802 */ 803 rxcsum = rd32(IGC_RXCSUM); 804 rxcsum |= IGC_RXCSUM_PCSD; 805 806 /* Enable Receive Checksum Offload for SCTP */ 807 rxcsum |= IGC_RXCSUM_CRCOFL; 808 809 /* Don't need to set TUOFL or IPOFL, they default to 1 */ 810 wr32(IGC_RXCSUM, rxcsum); 811 812 /* Generate RSS hash based on packet types, TCP/UDP 813 * port numbers and/or IPv4/v6 src and dst addresses 814 */ 815 mrqc = IGC_MRQC_RSS_FIELD_IPV4 | 816 IGC_MRQC_RSS_FIELD_IPV4_TCP | 817 IGC_MRQC_RSS_FIELD_IPV6 | 818 IGC_MRQC_RSS_FIELD_IPV6_TCP | 819 IGC_MRQC_RSS_FIELD_IPV6_TCP_EX; 820 821 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP) 822 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP; 823 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP) 824 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP; 825 826 mrqc |= IGC_MRQC_ENABLE_RSS_MQ; 827 828 wr32(IGC_MRQC, mrqc); 829 } 830 831 /** 832 * igc_setup_rctl - configure the receive control registers 833 * @adapter: Board private structure 834 */ 835 static void igc_setup_rctl(struct igc_adapter *adapter) 836 { 837 struct igc_hw *hw = &adapter->hw; 838 u32 rctl; 839 840 rctl = rd32(IGC_RCTL); 841 842 rctl &= ~(3 << IGC_RCTL_MO_SHIFT); 843 rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC); 844 845 rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF | 846 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT); 847 848 /* enable stripping of CRC. Newer features require 849 * that the HW strips the CRC. 850 */ 851 rctl |= IGC_RCTL_SECRC; 852 853 /* disable store bad packets and clear size bits. */ 854 rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256); 855 856 /* enable LPE to allow for reception of jumbo frames */ 857 rctl |= IGC_RCTL_LPE; 858 859 /* disable queue 0 to prevent tail write w/o re-config */ 860 wr32(IGC_RXDCTL(0), 0); 861 862 /* This is useful for sniffing bad packets. */ 863 if (adapter->netdev->features & NETIF_F_RXALL) { 864 /* UPE and MPE will be handled by normal PROMISC logic 865 * in set_rx_mode 866 */ 867 rctl |= (IGC_RCTL_SBP | /* Receive bad packets */ 868 IGC_RCTL_BAM | /* RX All Bcast Pkts */ 869 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ 870 871 rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */ 872 IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */ 873 } 874 875 wr32(IGC_RCTL, rctl); 876 } 877 878 /** 879 * igc_setup_tctl - configure the transmit control registers 880 * @adapter: Board private structure 881 */ 882 static void igc_setup_tctl(struct igc_adapter *adapter) 883 { 884 struct igc_hw *hw = &adapter->hw; 885 u32 tctl; 886 887 /* disable queue 0 which icould be enabled by default */ 888 wr32(IGC_TXDCTL(0), 0); 889 890 /* Program the Transmit Control Register */ 891 tctl = rd32(IGC_TCTL); 892 tctl &= ~IGC_TCTL_CT; 893 tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC | 894 (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT); 895 896 /* Enable transmits */ 897 tctl |= IGC_TCTL_EN; 898 899 wr32(IGC_TCTL, tctl); 900 } 901 902 /** 903 * igc_set_mac_filter_hw() - Set MAC address filter in hardware 904 * @adapter: Pointer to adapter where the filter should be set 905 * @index: Filter index 906 * @type: MAC address filter type (source or destination) 907 * @addr: MAC address 908 * @queue: If non-negative, queue assignment feature is enabled and frames 909 * matching the filter are enqueued onto 'queue'. Otherwise, queue 910 * assignment is disabled. 911 */ 912 static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index, 913 enum igc_mac_filter_type type, 914 const u8 *addr, int queue) 915 { 916 struct net_device *dev = adapter->netdev; 917 struct igc_hw *hw = &adapter->hw; 918 u32 ral, rah; 919 920 if (WARN_ON(index >= hw->mac.rar_entry_count)) 921 return; 922 923 ral = le32_to_cpup((__le32 *)(addr)); 924 rah = le16_to_cpup((__le16 *)(addr + 4)); 925 926 if (type == IGC_MAC_FILTER_TYPE_SRC) { 927 rah &= ~IGC_RAH_ASEL_MASK; 928 rah |= IGC_RAH_ASEL_SRC_ADDR; 929 } 930 931 if (queue >= 0) { 932 rah &= ~IGC_RAH_QSEL_MASK; 933 rah |= (queue << IGC_RAH_QSEL_SHIFT); 934 rah |= IGC_RAH_QSEL_ENABLE; 935 } 936 937 rah |= IGC_RAH_AV; 938 939 wr32(IGC_RAL(index), ral); 940 wr32(IGC_RAH(index), rah); 941 942 netdev_dbg(dev, "MAC address filter set in HW: index %d", index); 943 } 944 945 /** 946 * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware 947 * @adapter: Pointer to adapter where the filter should be cleared 948 * @index: Filter index 949 */ 950 static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index) 951 { 952 struct net_device *dev = adapter->netdev; 953 struct igc_hw *hw = &adapter->hw; 954 955 if (WARN_ON(index >= hw->mac.rar_entry_count)) 956 return; 957 958 wr32(IGC_RAL(index), 0); 959 wr32(IGC_RAH(index), 0); 960 961 netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index); 962 } 963 964 /* Set default MAC address for the PF in the first RAR entry */ 965 static void igc_set_default_mac_filter(struct igc_adapter *adapter) 966 { 967 struct net_device *dev = adapter->netdev; 968 u8 *addr = adapter->hw.mac.addr; 969 970 netdev_dbg(dev, "Set default MAC address filter: address %pM", addr); 971 972 igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1); 973 } 974 975 /** 976 * igc_set_mac - Change the Ethernet Address of the NIC 977 * @netdev: network interface device structure 978 * @p: pointer to an address structure 979 * 980 * Returns 0 on success, negative on failure 981 */ 982 static int igc_set_mac(struct net_device *netdev, void *p) 983 { 984 struct igc_adapter *adapter = netdev_priv(netdev); 985 struct igc_hw *hw = &adapter->hw; 986 struct sockaddr *addr = p; 987 988 if (!is_valid_ether_addr(addr->sa_data)) 989 return -EADDRNOTAVAIL; 990 991 eth_hw_addr_set(netdev, addr->sa_data); 992 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); 993 994 /* set the correct pool for the new PF MAC address in entry 0 */ 995 igc_set_default_mac_filter(adapter); 996 997 return 0; 998 } 999 1000 /** 1001 * igc_write_mc_addr_list - write multicast addresses to MTA 1002 * @netdev: network interface device structure 1003 * 1004 * Writes multicast address list to the MTA hash table. 1005 * Returns: -ENOMEM on failure 1006 * 0 on no addresses written 1007 * X on writing X addresses to MTA 1008 **/ 1009 static int igc_write_mc_addr_list(struct net_device *netdev) 1010 { 1011 struct igc_adapter *adapter = netdev_priv(netdev); 1012 struct igc_hw *hw = &adapter->hw; 1013 struct netdev_hw_addr *ha; 1014 u8 *mta_list; 1015 int i; 1016 1017 if (netdev_mc_empty(netdev)) { 1018 /* nothing to program, so clear mc list */ 1019 igc_update_mc_addr_list(hw, NULL, 0); 1020 return 0; 1021 } 1022 1023 mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC); 1024 if (!mta_list) 1025 return -ENOMEM; 1026 1027 /* The shared function expects a packed array of only addresses. */ 1028 i = 0; 1029 netdev_for_each_mc_addr(ha, netdev) 1030 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); 1031 1032 igc_update_mc_addr_list(hw, mta_list, i); 1033 kfree(mta_list); 1034 1035 return netdev_mc_count(netdev); 1036 } 1037 1038 static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, 1039 bool *first_flag, bool *insert_empty) 1040 { 1041 struct igc_adapter *adapter = netdev_priv(ring->netdev); 1042 ktime_t cycle_time = adapter->cycle_time; 1043 ktime_t base_time = adapter->base_time; 1044 ktime_t now = ktime_get_clocktai(); 1045 ktime_t baset_est, end_of_cycle; 1046 s32 launchtime; 1047 s64 n; 1048 1049 n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); 1050 1051 baset_est = ktime_add_ns(base_time, cycle_time * (n)); 1052 end_of_cycle = ktime_add_ns(baset_est, cycle_time); 1053 1054 if (ktime_compare(txtime, end_of_cycle) >= 0) { 1055 if (baset_est != ring->last_ff_cycle) { 1056 *first_flag = true; 1057 ring->last_ff_cycle = baset_est; 1058 1059 if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) 1060 *insert_empty = true; 1061 } 1062 } 1063 1064 /* Introducing a window at end of cycle on which packets 1065 * potentially not honor launchtime. Window of 5us chosen 1066 * considering software update the tail pointer and packets 1067 * are dma'ed to packet buffer. 1068 */ 1069 if ((ktime_sub_ns(end_of_cycle, now) < 5 * NSEC_PER_USEC)) 1070 netdev_warn(ring->netdev, "Packet with txtime=%llu may not be honoured\n", 1071 txtime); 1072 1073 ring->last_tx_cycle = end_of_cycle; 1074 1075 launchtime = ktime_sub_ns(txtime, baset_est); 1076 if (launchtime > 0) 1077 div_s64_rem(launchtime, cycle_time, &launchtime); 1078 else 1079 launchtime = 0; 1080 1081 return cpu_to_le32(launchtime); 1082 } 1083 1084 static int igc_init_empty_frame(struct igc_ring *ring, 1085 struct igc_tx_buffer *buffer, 1086 struct sk_buff *skb) 1087 { 1088 unsigned int size; 1089 dma_addr_t dma; 1090 1091 size = skb_headlen(skb); 1092 1093 dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE); 1094 if (dma_mapping_error(ring->dev, dma)) { 1095 net_err_ratelimited("%s: DMA mapping error for empty frame\n", 1096 netdev_name(ring->netdev)); 1097 return -ENOMEM; 1098 } 1099 1100 buffer->type = IGC_TX_BUFFER_TYPE_SKB; 1101 buffer->skb = skb; 1102 buffer->protocol = 0; 1103 buffer->bytecount = skb->len; 1104 buffer->gso_segs = 1; 1105 buffer->time_stamp = jiffies; 1106 dma_unmap_len_set(buffer, len, skb->len); 1107 dma_unmap_addr_set(buffer, dma, dma); 1108 1109 return 0; 1110 } 1111 1112 static void igc_init_tx_empty_descriptor(struct igc_ring *ring, 1113 struct sk_buff *skb, 1114 struct igc_tx_buffer *first) 1115 { 1116 union igc_adv_tx_desc *desc; 1117 u32 cmd_type, olinfo_status; 1118 1119 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 1120 IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | 1121 first->bytecount; 1122 olinfo_status = first->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; 1123 1124 desc = IGC_TX_DESC(ring, ring->next_to_use); 1125 desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1126 desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1127 desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(first, dma)); 1128 1129 netdev_tx_sent_queue(txring_txq(ring), skb->len); 1130 1131 first->next_to_watch = desc; 1132 1133 ring->next_to_use++; 1134 if (ring->next_to_use == ring->count) 1135 ring->next_to_use = 0; 1136 } 1137 1138 #define IGC_EMPTY_FRAME_SIZE 60 1139 1140 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, 1141 __le32 launch_time, bool first_flag, 1142 u32 vlan_macip_lens, u32 type_tucmd, 1143 u32 mss_l4len_idx) 1144 { 1145 struct igc_adv_tx_context_desc *context_desc; 1146 u16 i = tx_ring->next_to_use; 1147 1148 context_desc = IGC_TX_CTXTDESC(tx_ring, i); 1149 1150 i++; 1151 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1152 1153 /* set bits to identify this as an advanced context descriptor */ 1154 type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT; 1155 1156 /* For i225, context index must be unique per ring. */ 1157 if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 1158 mss_l4len_idx |= tx_ring->reg_idx << 4; 1159 1160 if (first_flag) 1161 mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FIRST; 1162 1163 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 1164 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 1165 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 1166 context_desc->launch_time = launch_time; 1167 } 1168 1169 static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first, 1170 __le32 launch_time, bool first_flag) 1171 { 1172 struct sk_buff *skb = first->skb; 1173 u32 vlan_macip_lens = 0; 1174 u32 type_tucmd = 0; 1175 1176 if (skb->ip_summed != CHECKSUM_PARTIAL) { 1177 csum_failed: 1178 if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) && 1179 !tx_ring->launchtime_enable) 1180 return; 1181 goto no_csum; 1182 } 1183 1184 switch (skb->csum_offset) { 1185 case offsetof(struct tcphdr, check): 1186 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1187 fallthrough; 1188 case offsetof(struct udphdr, check): 1189 break; 1190 case offsetof(struct sctphdr, checksum): 1191 /* validate that this is actually an SCTP request */ 1192 if (skb_csum_is_sctp(skb)) { 1193 type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP; 1194 break; 1195 } 1196 fallthrough; 1197 default: 1198 skb_checksum_help(skb); 1199 goto csum_failed; 1200 } 1201 1202 /* update TX checksum flag */ 1203 first->tx_flags |= IGC_TX_FLAGS_CSUM; 1204 vlan_macip_lens = skb_checksum_start_offset(skb) - 1205 skb_network_offset(skb); 1206 no_csum: 1207 vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT; 1208 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1209 1210 igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, 1211 vlan_macip_lens, type_tucmd, 0); 1212 } 1213 1214 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1215 { 1216 struct net_device *netdev = tx_ring->netdev; 1217 1218 netif_stop_subqueue(netdev, tx_ring->queue_index); 1219 1220 /* memory barriier comment */ 1221 smp_mb(); 1222 1223 /* We need to check again in a case another CPU has just 1224 * made room available. 1225 */ 1226 if (igc_desc_unused(tx_ring) < size) 1227 return -EBUSY; 1228 1229 /* A reprieve! */ 1230 netif_wake_subqueue(netdev, tx_ring->queue_index); 1231 1232 u64_stats_update_begin(&tx_ring->tx_syncp2); 1233 tx_ring->tx_stats.restart_queue2++; 1234 u64_stats_update_end(&tx_ring->tx_syncp2); 1235 1236 return 0; 1237 } 1238 1239 static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1240 { 1241 if (igc_desc_unused(tx_ring) >= size) 1242 return 0; 1243 return __igc_maybe_stop_tx(tx_ring, size); 1244 } 1245 1246 #define IGC_SET_FLAG(_input, _flag, _result) \ 1247 (((_flag) <= (_result)) ? \ 1248 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ 1249 ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) 1250 1251 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) 1252 { 1253 /* set type for advanced descriptor with frame checksum insertion */ 1254 u32 cmd_type = IGC_ADVTXD_DTYP_DATA | 1255 IGC_ADVTXD_DCMD_DEXT | 1256 IGC_ADVTXD_DCMD_IFCS; 1257 1258 /* set HW vlan bit if vlan is present */ 1259 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN, 1260 IGC_ADVTXD_DCMD_VLE); 1261 1262 /* set segmentation bits for TSO */ 1263 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO, 1264 (IGC_ADVTXD_DCMD_TSE)); 1265 1266 /* set timestamp bit if present, will select the register set 1267 * based on the _TSTAMP(_X) bit. 1268 */ 1269 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP, 1270 (IGC_ADVTXD_MAC_TSTAMP)); 1271 1272 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_1, 1273 (IGC_ADVTXD_TSTAMP_REG_1)); 1274 1275 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_2, 1276 (IGC_ADVTXD_TSTAMP_REG_2)); 1277 1278 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_3, 1279 (IGC_ADVTXD_TSTAMP_REG_3)); 1280 1281 /* insert frame checksum */ 1282 cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS); 1283 1284 return cmd_type; 1285 } 1286 1287 static void igc_tx_olinfo_status(struct igc_ring *tx_ring, 1288 union igc_adv_tx_desc *tx_desc, 1289 u32 tx_flags, unsigned int paylen) 1290 { 1291 u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT; 1292 1293 /* insert L4 checksum */ 1294 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_CSUM, 1295 (IGC_TXD_POPTS_TXSM << 8)); 1296 1297 /* insert IPv4 checksum */ 1298 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_IPV4, 1299 (IGC_TXD_POPTS_IXSM << 8)); 1300 1301 /* Use the second timer (free running, in general) for the timestamp */ 1302 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_TIMER_1, 1303 IGC_TXD_PTP2_TIMER_1); 1304 1305 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1306 } 1307 1308 static int igc_tx_map(struct igc_ring *tx_ring, 1309 struct igc_tx_buffer *first, 1310 const u8 hdr_len) 1311 { 1312 struct sk_buff *skb = first->skb; 1313 struct igc_tx_buffer *tx_buffer; 1314 union igc_adv_tx_desc *tx_desc; 1315 u32 tx_flags = first->tx_flags; 1316 skb_frag_t *frag; 1317 u16 i = tx_ring->next_to_use; 1318 unsigned int data_len, size; 1319 dma_addr_t dma; 1320 u32 cmd_type; 1321 1322 cmd_type = igc_tx_cmd_type(skb, tx_flags); 1323 tx_desc = IGC_TX_DESC(tx_ring, i); 1324 1325 igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); 1326 1327 size = skb_headlen(skb); 1328 data_len = skb->data_len; 1329 1330 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 1331 1332 tx_buffer = first; 1333 1334 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 1335 if (dma_mapping_error(tx_ring->dev, dma)) 1336 goto dma_error; 1337 1338 /* record length, and DMA address */ 1339 dma_unmap_len_set(tx_buffer, len, size); 1340 dma_unmap_addr_set(tx_buffer, dma, dma); 1341 1342 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1343 1344 while (unlikely(size > IGC_MAX_DATA_PER_TXD)) { 1345 tx_desc->read.cmd_type_len = 1346 cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD); 1347 1348 i++; 1349 tx_desc++; 1350 if (i == tx_ring->count) { 1351 tx_desc = IGC_TX_DESC(tx_ring, 0); 1352 i = 0; 1353 } 1354 tx_desc->read.olinfo_status = 0; 1355 1356 dma += IGC_MAX_DATA_PER_TXD; 1357 size -= IGC_MAX_DATA_PER_TXD; 1358 1359 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1360 } 1361 1362 if (likely(!data_len)) 1363 break; 1364 1365 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); 1366 1367 i++; 1368 tx_desc++; 1369 if (i == tx_ring->count) { 1370 tx_desc = IGC_TX_DESC(tx_ring, 0); 1371 i = 0; 1372 } 1373 tx_desc->read.olinfo_status = 0; 1374 1375 size = skb_frag_size(frag); 1376 data_len -= size; 1377 1378 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, 1379 size, DMA_TO_DEVICE); 1380 1381 tx_buffer = &tx_ring->tx_buffer_info[i]; 1382 } 1383 1384 /* write last descriptor with RS and EOP bits */ 1385 cmd_type |= size | IGC_TXD_DCMD; 1386 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1387 1388 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 1389 1390 /* set the timestamp */ 1391 first->time_stamp = jiffies; 1392 1393 skb_tx_timestamp(skb); 1394 1395 /* Force memory writes to complete before letting h/w know there 1396 * are new descriptors to fetch. (Only applicable for weak-ordered 1397 * memory model archs, such as IA-64). 1398 * 1399 * We also need this memory barrier to make certain all of the 1400 * status bits have been updated before next_to_watch is written. 1401 */ 1402 wmb(); 1403 1404 /* set next_to_watch value indicating a packet is present */ 1405 first->next_to_watch = tx_desc; 1406 1407 i++; 1408 if (i == tx_ring->count) 1409 i = 0; 1410 1411 tx_ring->next_to_use = i; 1412 1413 /* Make sure there is space in the ring for the next send. */ 1414 igc_maybe_stop_tx(tx_ring, DESC_NEEDED); 1415 1416 if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { 1417 writel(i, tx_ring->tail); 1418 } 1419 1420 return 0; 1421 dma_error: 1422 netdev_err(tx_ring->netdev, "TX DMA map failed\n"); 1423 tx_buffer = &tx_ring->tx_buffer_info[i]; 1424 1425 /* clear dma mappings for failed tx_buffer_info map */ 1426 while (tx_buffer != first) { 1427 if (dma_unmap_len(tx_buffer, len)) 1428 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1429 1430 if (i-- == 0) 1431 i += tx_ring->count; 1432 tx_buffer = &tx_ring->tx_buffer_info[i]; 1433 } 1434 1435 if (dma_unmap_len(tx_buffer, len)) 1436 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1437 1438 dev_kfree_skb_any(tx_buffer->skb); 1439 tx_buffer->skb = NULL; 1440 1441 tx_ring->next_to_use = i; 1442 1443 return -1; 1444 } 1445 1446 static int igc_tso(struct igc_ring *tx_ring, 1447 struct igc_tx_buffer *first, 1448 __le32 launch_time, bool first_flag, 1449 u8 *hdr_len) 1450 { 1451 u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; 1452 struct sk_buff *skb = first->skb; 1453 union { 1454 struct iphdr *v4; 1455 struct ipv6hdr *v6; 1456 unsigned char *hdr; 1457 } ip; 1458 union { 1459 struct tcphdr *tcp; 1460 struct udphdr *udp; 1461 unsigned char *hdr; 1462 } l4; 1463 u32 paylen, l4_offset; 1464 int err; 1465 1466 if (skb->ip_summed != CHECKSUM_PARTIAL) 1467 return 0; 1468 1469 if (!skb_is_gso(skb)) 1470 return 0; 1471 1472 err = skb_cow_head(skb, 0); 1473 if (err < 0) 1474 return err; 1475 1476 ip.hdr = skb_network_header(skb); 1477 l4.hdr = skb_checksum_start(skb); 1478 1479 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ 1480 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1481 1482 /* initialize outer IP header fields */ 1483 if (ip.v4->version == 4) { 1484 unsigned char *csum_start = skb_checksum_start(skb); 1485 unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); 1486 1487 /* IP header will have to cancel out any data that 1488 * is not a part of the outer IP header 1489 */ 1490 ip.v4->check = csum_fold(csum_partial(trans_start, 1491 csum_start - trans_start, 1492 0)); 1493 type_tucmd |= IGC_ADVTXD_TUCMD_IPV4; 1494 1495 ip.v4->tot_len = 0; 1496 first->tx_flags |= IGC_TX_FLAGS_TSO | 1497 IGC_TX_FLAGS_CSUM | 1498 IGC_TX_FLAGS_IPV4; 1499 } else { 1500 ip.v6->payload_len = 0; 1501 first->tx_flags |= IGC_TX_FLAGS_TSO | 1502 IGC_TX_FLAGS_CSUM; 1503 } 1504 1505 /* determine offset of inner transport header */ 1506 l4_offset = l4.hdr - skb->data; 1507 1508 /* remove payload length from inner checksum */ 1509 paylen = skb->len - l4_offset; 1510 if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) { 1511 /* compute length of segmentation header */ 1512 *hdr_len = (l4.tcp->doff * 4) + l4_offset; 1513 csum_replace_by_diff(&l4.tcp->check, 1514 (__force __wsum)htonl(paylen)); 1515 } else { 1516 /* compute length of segmentation header */ 1517 *hdr_len = sizeof(*l4.udp) + l4_offset; 1518 csum_replace_by_diff(&l4.udp->check, 1519 (__force __wsum)htonl(paylen)); 1520 } 1521 1522 /* update gso size and bytecount with header size */ 1523 first->gso_segs = skb_shinfo(skb)->gso_segs; 1524 first->bytecount += (first->gso_segs - 1) * *hdr_len; 1525 1526 /* MSS L4LEN IDX */ 1527 mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT; 1528 mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT; 1529 1530 /* VLAN MACLEN IPLEN */ 1531 vlan_macip_lens = l4.hdr - ip.hdr; 1532 vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT; 1533 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1534 1535 igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, 1536 vlan_macip_lens, type_tucmd, mss_l4len_idx); 1537 1538 return 1; 1539 } 1540 1541 static bool igc_request_tx_tstamp(struct igc_adapter *adapter, struct sk_buff *skb, u32 *flags) 1542 { 1543 int i; 1544 1545 for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { 1546 struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; 1547 1548 if (tstamp->skb) 1549 continue; 1550 1551 tstamp->skb = skb_get(skb); 1552 tstamp->start = jiffies; 1553 *flags = tstamp->flags; 1554 1555 return true; 1556 } 1557 1558 return false; 1559 } 1560 1561 static int igc_insert_empty_frame(struct igc_ring *tx_ring) 1562 { 1563 struct igc_tx_buffer *empty_info; 1564 struct sk_buff *empty_skb; 1565 void *data; 1566 int ret; 1567 1568 empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1569 empty_skb = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC); 1570 if (unlikely(!empty_skb)) { 1571 net_err_ratelimited("%s: skb alloc error for empty frame\n", 1572 netdev_name(tx_ring->netdev)); 1573 return -ENOMEM; 1574 } 1575 1576 data = skb_put(empty_skb, IGC_EMPTY_FRAME_SIZE); 1577 memset(data, 0, IGC_EMPTY_FRAME_SIZE); 1578 1579 /* Prepare DMA mapping and Tx buffer information */ 1580 ret = igc_init_empty_frame(tx_ring, empty_info, empty_skb); 1581 if (unlikely(ret)) { 1582 dev_kfree_skb_any(empty_skb); 1583 return ret; 1584 } 1585 1586 /* Prepare advanced context descriptor for empty packet */ 1587 igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0); 1588 1589 /* Prepare advanced data descriptor for empty packet */ 1590 igc_init_tx_empty_descriptor(tx_ring, empty_skb, empty_info); 1591 1592 return 0; 1593 } 1594 1595 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, 1596 struct igc_ring *tx_ring) 1597 { 1598 struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); 1599 bool first_flag = false, insert_empty = false; 1600 u16 count = TXD_USE_COUNT(skb_headlen(skb)); 1601 __be16 protocol = vlan_get_protocol(skb); 1602 struct igc_tx_buffer *first; 1603 __le32 launch_time = 0; 1604 u32 tx_flags = 0; 1605 unsigned short f; 1606 ktime_t txtime; 1607 u8 hdr_len = 0; 1608 int tso = 0; 1609 1610 /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD, 1611 * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD, 1612 * + 2 desc gap to keep tail from touching head, 1613 * + 1 desc for context descriptor, 1614 * + 2 desc for inserting an empty packet for launch time, 1615 * otherwise try next time 1616 */ 1617 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1618 count += TXD_USE_COUNT(skb_frag_size( 1619 &skb_shinfo(skb)->frags[f])); 1620 1621 if (igc_maybe_stop_tx(tx_ring, count + 5)) { 1622 /* this is a hard error */ 1623 return NETDEV_TX_BUSY; 1624 } 1625 1626 if (!tx_ring->launchtime_enable) 1627 goto done; 1628 1629 txtime = skb->tstamp; 1630 skb->tstamp = ktime_set(0, 0); 1631 launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty); 1632 1633 if (insert_empty) { 1634 /* Reset the launch time if the required empty frame fails to 1635 * be inserted. However, this packet is not dropped, so it 1636 * "dirties" the current Qbv cycle. This ensures that the 1637 * upcoming packet, which is scheduled in the next Qbv cycle, 1638 * does not require an empty frame. This way, the launch time 1639 * continues to function correctly despite the current failure 1640 * to insert the empty frame. 1641 */ 1642 if (igc_insert_empty_frame(tx_ring)) 1643 launch_time = 0; 1644 } 1645 1646 done: 1647 /* record the location of the first descriptor for this packet */ 1648 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1649 first->type = IGC_TX_BUFFER_TYPE_SKB; 1650 first->skb = skb; 1651 first->bytecount = skb->len; 1652 first->gso_segs = 1; 1653 1654 if (adapter->qbv_transition || tx_ring->oper_gate_closed) 1655 goto out_drop; 1656 1657 if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) { 1658 adapter->stats.txdrop++; 1659 goto out_drop; 1660 } 1661 1662 if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && 1663 skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 1664 unsigned long flags; 1665 u32 tstamp_flags; 1666 1667 spin_lock_irqsave(&adapter->ptp_tx_lock, flags); 1668 if (igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) { 1669 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 1670 tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags; 1671 if (skb->sk && 1672 READ_ONCE(skb->sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC) 1673 tx_flags |= IGC_TX_FLAGS_TSTAMP_TIMER_1; 1674 } else { 1675 adapter->tx_hwtstamp_skipped++; 1676 } 1677 1678 spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); 1679 } 1680 1681 if (skb_vlan_tag_present(skb)) { 1682 tx_flags |= IGC_TX_FLAGS_VLAN; 1683 tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT); 1684 } 1685 1686 /* record initial flags and protocol */ 1687 first->tx_flags = tx_flags; 1688 first->protocol = protocol; 1689 1690 tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len); 1691 if (tso < 0) 1692 goto out_drop; 1693 else if (!tso) 1694 igc_tx_csum(tx_ring, first, launch_time, first_flag); 1695 1696 igc_tx_map(tx_ring, first, hdr_len); 1697 1698 return NETDEV_TX_OK; 1699 1700 out_drop: 1701 dev_kfree_skb_any(first->skb); 1702 first->skb = NULL; 1703 1704 return NETDEV_TX_OK; 1705 } 1706 1707 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter, 1708 struct sk_buff *skb) 1709 { 1710 unsigned int r_idx = skb->queue_mapping; 1711 1712 if (r_idx >= adapter->num_tx_queues) 1713 r_idx = r_idx % adapter->num_tx_queues; 1714 1715 return adapter->tx_ring[r_idx]; 1716 } 1717 1718 static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, 1719 struct net_device *netdev) 1720 { 1721 struct igc_adapter *adapter = netdev_priv(netdev); 1722 1723 /* The minimum packet size with TCTL.PSP set is 17 so pad the skb 1724 * in order to meet this minimum size requirement. 1725 */ 1726 if (skb->len < 17) { 1727 if (skb_padto(skb, 17)) 1728 return NETDEV_TX_OK; 1729 skb->len = 17; 1730 } 1731 1732 return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb)); 1733 } 1734 1735 static void igc_rx_checksum(struct igc_ring *ring, 1736 union igc_adv_rx_desc *rx_desc, 1737 struct sk_buff *skb) 1738 { 1739 skb_checksum_none_assert(skb); 1740 1741 /* Ignore Checksum bit is set */ 1742 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM)) 1743 return; 1744 1745 /* Rx checksum disabled via ethtool */ 1746 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 1747 return; 1748 1749 /* TCP/UDP checksum error bit is set */ 1750 if (igc_test_staterr(rx_desc, 1751 IGC_RXDEXT_STATERR_L4E | 1752 IGC_RXDEXT_STATERR_IPE)) { 1753 /* work around errata with sctp packets where the TCPE aka 1754 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) 1755 * packets (aka let the stack check the crc32c) 1756 */ 1757 if (!(skb->len == 60 && 1758 test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { 1759 u64_stats_update_begin(&ring->rx_syncp); 1760 ring->rx_stats.csum_err++; 1761 u64_stats_update_end(&ring->rx_syncp); 1762 } 1763 /* let the stack verify checksum errors */ 1764 return; 1765 } 1766 /* It must be a TCP or UDP packet with a valid checksum */ 1767 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS | 1768 IGC_RXD_STAT_UDPCS)) 1769 skb->ip_summed = CHECKSUM_UNNECESSARY; 1770 1771 netdev_dbg(ring->netdev, "cksum success: bits %08X\n", 1772 le32_to_cpu(rx_desc->wb.upper.status_error)); 1773 } 1774 1775 /* Mapping HW RSS Type to enum pkt_hash_types */ 1776 static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = { 1777 [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2, 1778 [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4, 1779 [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3, 1780 [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4, 1781 [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3, 1782 [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3, 1783 [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4, 1784 [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4, 1785 [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4, 1786 [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4, 1787 [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ 1788 [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */ 1789 [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */ 1790 [13] = PKT_HASH_TYPE_NONE, 1791 [14] = PKT_HASH_TYPE_NONE, 1792 [15] = PKT_HASH_TYPE_NONE, 1793 }; 1794 1795 static inline void igc_rx_hash(struct igc_ring *ring, 1796 union igc_adv_rx_desc *rx_desc, 1797 struct sk_buff *skb) 1798 { 1799 if (ring->netdev->features & NETIF_F_RXHASH) { 1800 u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); 1801 u32 rss_type = igc_rss_type(rx_desc); 1802 1803 skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]); 1804 } 1805 } 1806 1807 static void igc_rx_vlan(struct igc_ring *rx_ring, 1808 union igc_adv_rx_desc *rx_desc, 1809 struct sk_buff *skb) 1810 { 1811 struct net_device *dev = rx_ring->netdev; 1812 u16 vid; 1813 1814 if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && 1815 igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) { 1816 if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) && 1817 test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) 1818 vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan); 1819 else 1820 vid = le16_to_cpu(rx_desc->wb.upper.vlan); 1821 1822 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); 1823 } 1824 } 1825 1826 /** 1827 * igc_process_skb_fields - Populate skb header fields from Rx descriptor 1828 * @rx_ring: rx descriptor ring packet is being transacted on 1829 * @rx_desc: pointer to the EOP Rx descriptor 1830 * @skb: pointer to current skb being populated 1831 * 1832 * This function checks the ring, descriptor, and packet information in order 1833 * to populate the hash, checksum, VLAN, protocol, and other fields within the 1834 * skb. 1835 */ 1836 static void igc_process_skb_fields(struct igc_ring *rx_ring, 1837 union igc_adv_rx_desc *rx_desc, 1838 struct sk_buff *skb) 1839 { 1840 igc_rx_hash(rx_ring, rx_desc, skb); 1841 1842 igc_rx_checksum(rx_ring, rx_desc, skb); 1843 1844 igc_rx_vlan(rx_ring, rx_desc, skb); 1845 1846 skb_record_rx_queue(skb, rx_ring->queue_index); 1847 1848 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1849 } 1850 1851 static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features) 1852 { 1853 bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); 1854 struct igc_adapter *adapter = netdev_priv(netdev); 1855 struct igc_hw *hw = &adapter->hw; 1856 u32 ctrl; 1857 1858 ctrl = rd32(IGC_CTRL); 1859 1860 if (enable) { 1861 /* enable VLAN tag insert/strip */ 1862 ctrl |= IGC_CTRL_VME; 1863 } else { 1864 /* disable VLAN tag insert/strip */ 1865 ctrl &= ~IGC_CTRL_VME; 1866 } 1867 wr32(IGC_CTRL, ctrl); 1868 } 1869 1870 static void igc_restore_vlan(struct igc_adapter *adapter) 1871 { 1872 igc_vlan_mode(adapter->netdev, adapter->netdev->features); 1873 } 1874 1875 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, 1876 const unsigned int size, 1877 int *rx_buffer_pgcnt) 1878 { 1879 struct igc_rx_buffer *rx_buffer; 1880 1881 rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; 1882 *rx_buffer_pgcnt = 1883 #if (PAGE_SIZE < 8192) 1884 page_count(rx_buffer->page); 1885 #else 1886 0; 1887 #endif 1888 prefetchw(rx_buffer->page); 1889 1890 /* we are reusing so sync this buffer for CPU use */ 1891 dma_sync_single_range_for_cpu(rx_ring->dev, 1892 rx_buffer->dma, 1893 rx_buffer->page_offset, 1894 size, 1895 DMA_FROM_DEVICE); 1896 1897 rx_buffer->pagecnt_bias--; 1898 1899 return rx_buffer; 1900 } 1901 1902 static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer, 1903 unsigned int truesize) 1904 { 1905 #if (PAGE_SIZE < 8192) 1906 buffer->page_offset ^= truesize; 1907 #else 1908 buffer->page_offset += truesize; 1909 #endif 1910 } 1911 1912 static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring, 1913 unsigned int size) 1914 { 1915 unsigned int truesize; 1916 1917 #if (PAGE_SIZE < 8192) 1918 truesize = igc_rx_pg_size(ring) / 2; 1919 #else 1920 truesize = ring_uses_build_skb(ring) ? 1921 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + 1922 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1923 SKB_DATA_ALIGN(size); 1924 #endif 1925 return truesize; 1926 } 1927 1928 /** 1929 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff 1930 * @rx_ring: rx descriptor ring to transact packets on 1931 * @rx_buffer: buffer containing page to add 1932 * @skb: sk_buff to place the data into 1933 * @size: size of buffer to be added 1934 * 1935 * This function will add the data contained in rx_buffer->page to the skb. 1936 */ 1937 static void igc_add_rx_frag(struct igc_ring *rx_ring, 1938 struct igc_rx_buffer *rx_buffer, 1939 struct sk_buff *skb, 1940 unsigned int size) 1941 { 1942 unsigned int truesize; 1943 1944 #if (PAGE_SIZE < 8192) 1945 truesize = igc_rx_pg_size(rx_ring) / 2; 1946 #else 1947 truesize = ring_uses_build_skb(rx_ring) ? 1948 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1949 SKB_DATA_ALIGN(size); 1950 #endif 1951 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, 1952 rx_buffer->page_offset, size, truesize); 1953 1954 igc_rx_buffer_flip(rx_buffer, truesize); 1955 } 1956 1957 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, 1958 struct igc_rx_buffer *rx_buffer, 1959 struct xdp_buff *xdp) 1960 { 1961 unsigned int size = xdp->data_end - xdp->data; 1962 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1963 unsigned int metasize = xdp->data - xdp->data_meta; 1964 struct sk_buff *skb; 1965 1966 /* prefetch first cache line of first page */ 1967 net_prefetch(xdp->data_meta); 1968 1969 /* build an skb around the page buffer */ 1970 skb = napi_build_skb(xdp->data_hard_start, truesize); 1971 if (unlikely(!skb)) 1972 return NULL; 1973 1974 /* update pointers within the skb to store the data */ 1975 skb_reserve(skb, xdp->data - xdp->data_hard_start); 1976 __skb_put(skb, size); 1977 if (metasize) 1978 skb_metadata_set(skb, metasize); 1979 1980 igc_rx_buffer_flip(rx_buffer, truesize); 1981 return skb; 1982 } 1983 1984 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, 1985 struct igc_rx_buffer *rx_buffer, 1986 struct igc_xdp_buff *ctx) 1987 { 1988 struct xdp_buff *xdp = &ctx->xdp; 1989 unsigned int metasize = xdp->data - xdp->data_meta; 1990 unsigned int size = xdp->data_end - xdp->data; 1991 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1992 void *va = xdp->data; 1993 unsigned int headlen; 1994 struct sk_buff *skb; 1995 1996 /* prefetch first cache line of first page */ 1997 net_prefetch(xdp->data_meta); 1998 1999 /* allocate a skb to store the frags */ 2000 skb = napi_alloc_skb(&rx_ring->q_vector->napi, 2001 IGC_RX_HDR_LEN + metasize); 2002 if (unlikely(!skb)) 2003 return NULL; 2004 2005 if (ctx->rx_ts) { 2006 skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; 2007 skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; 2008 } 2009 2010 /* Determine available headroom for copy */ 2011 headlen = size; 2012 if (headlen > IGC_RX_HDR_LEN) 2013 headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN); 2014 2015 /* align pull length to size of long to optimize memcpy performance */ 2016 memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta, 2017 ALIGN(headlen + metasize, sizeof(long))); 2018 2019 if (metasize) { 2020 skb_metadata_set(skb, metasize); 2021 __skb_pull(skb, metasize); 2022 } 2023 2024 /* update all of the pointers */ 2025 size -= headlen; 2026 if (size) { 2027 skb_add_rx_frag(skb, 0, rx_buffer->page, 2028 (va + headlen) - page_address(rx_buffer->page), 2029 size, truesize); 2030 igc_rx_buffer_flip(rx_buffer, truesize); 2031 } else { 2032 rx_buffer->pagecnt_bias++; 2033 } 2034 2035 return skb; 2036 } 2037 2038 /** 2039 * igc_reuse_rx_page - page flip buffer and store it back on the ring 2040 * @rx_ring: rx descriptor ring to store buffers on 2041 * @old_buff: donor buffer to have page reused 2042 * 2043 * Synchronizes page for reuse by the adapter 2044 */ 2045 static void igc_reuse_rx_page(struct igc_ring *rx_ring, 2046 struct igc_rx_buffer *old_buff) 2047 { 2048 u16 nta = rx_ring->next_to_alloc; 2049 struct igc_rx_buffer *new_buff; 2050 2051 new_buff = &rx_ring->rx_buffer_info[nta]; 2052 2053 /* update, and store next to alloc */ 2054 nta++; 2055 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 2056 2057 /* Transfer page from old buffer to new buffer. 2058 * Move each member individually to avoid possible store 2059 * forwarding stalls. 2060 */ 2061 new_buff->dma = old_buff->dma; 2062 new_buff->page = old_buff->page; 2063 new_buff->page_offset = old_buff->page_offset; 2064 new_buff->pagecnt_bias = old_buff->pagecnt_bias; 2065 } 2066 2067 static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer, 2068 int rx_buffer_pgcnt) 2069 { 2070 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; 2071 struct page *page = rx_buffer->page; 2072 2073 /* avoid re-using remote and pfmemalloc pages */ 2074 if (!dev_page_is_reusable(page)) 2075 return false; 2076 2077 #if (PAGE_SIZE < 8192) 2078 /* if we are only owner of page we can reuse it */ 2079 if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) 2080 return false; 2081 #else 2082 #define IGC_LAST_OFFSET \ 2083 (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048) 2084 2085 if (rx_buffer->page_offset > IGC_LAST_OFFSET) 2086 return false; 2087 #endif 2088 2089 /* If we have drained the page fragment pool we need to update 2090 * the pagecnt_bias and page count so that we fully restock the 2091 * number of references the driver holds. 2092 */ 2093 if (unlikely(pagecnt_bias == 1)) { 2094 page_ref_add(page, USHRT_MAX - 1); 2095 rx_buffer->pagecnt_bias = USHRT_MAX; 2096 } 2097 2098 return true; 2099 } 2100 2101 /** 2102 * igc_is_non_eop - process handling of non-EOP buffers 2103 * @rx_ring: Rx ring being processed 2104 * @rx_desc: Rx descriptor for current buffer 2105 * 2106 * This function updates next to clean. If the buffer is an EOP buffer 2107 * this function exits returning false, otherwise it will place the 2108 * sk_buff in the next buffer to be chained and return true indicating 2109 * that this is in fact a non-EOP buffer. 2110 */ 2111 static bool igc_is_non_eop(struct igc_ring *rx_ring, 2112 union igc_adv_rx_desc *rx_desc) 2113 { 2114 u32 ntc = rx_ring->next_to_clean + 1; 2115 2116 /* fetch, update, and store next to clean */ 2117 ntc = (ntc < rx_ring->count) ? ntc : 0; 2118 rx_ring->next_to_clean = ntc; 2119 2120 prefetch(IGC_RX_DESC(rx_ring, ntc)); 2121 2122 if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP))) 2123 return false; 2124 2125 return true; 2126 } 2127 2128 /** 2129 * igc_cleanup_headers - Correct corrupted or empty headers 2130 * @rx_ring: rx descriptor ring packet is being transacted on 2131 * @rx_desc: pointer to the EOP Rx descriptor 2132 * @skb: pointer to current skb being fixed 2133 * 2134 * Address the case where we are pulling data in on pages only 2135 * and as such no data is present in the skb header. 2136 * 2137 * In addition if skb is not at least 60 bytes we need to pad it so that 2138 * it is large enough to qualify as a valid Ethernet frame. 2139 * 2140 * Returns true if an error was encountered and skb was freed. 2141 */ 2142 static bool igc_cleanup_headers(struct igc_ring *rx_ring, 2143 union igc_adv_rx_desc *rx_desc, 2144 struct sk_buff *skb) 2145 { 2146 if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) { 2147 struct net_device *netdev = rx_ring->netdev; 2148 2149 if (!(netdev->features & NETIF_F_RXALL)) { 2150 dev_kfree_skb_any(skb); 2151 return true; 2152 } 2153 } 2154 2155 /* if eth_skb_pad returns an error the skb was freed */ 2156 if (eth_skb_pad(skb)) 2157 return true; 2158 2159 return false; 2160 } 2161 2162 static void igc_put_rx_buffer(struct igc_ring *rx_ring, 2163 struct igc_rx_buffer *rx_buffer, 2164 int rx_buffer_pgcnt) 2165 { 2166 if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { 2167 /* hand second half of page back to the ring */ 2168 igc_reuse_rx_page(rx_ring, rx_buffer); 2169 } else { 2170 /* We are not reusing the buffer so unmap it and free 2171 * any references we are holding to it 2172 */ 2173 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, 2174 igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE, 2175 IGC_RX_DMA_ATTR); 2176 __page_frag_cache_drain(rx_buffer->page, 2177 rx_buffer->pagecnt_bias); 2178 } 2179 2180 /* clear contents of rx_buffer */ 2181 rx_buffer->page = NULL; 2182 } 2183 2184 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) 2185 { 2186 struct igc_adapter *adapter = rx_ring->q_vector->adapter; 2187 2188 if (ring_uses_build_skb(rx_ring)) 2189 return IGC_SKB_PAD; 2190 if (igc_xdp_is_enabled(adapter)) 2191 return XDP_PACKET_HEADROOM; 2192 2193 return 0; 2194 } 2195 2196 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, 2197 struct igc_rx_buffer *bi) 2198 { 2199 struct page *page = bi->page; 2200 dma_addr_t dma; 2201 2202 /* since we are recycling buffers we should seldom need to alloc */ 2203 if (likely(page)) 2204 return true; 2205 2206 /* alloc new page for storage */ 2207 page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); 2208 if (unlikely(!page)) { 2209 rx_ring->rx_stats.alloc_failed++; 2210 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 2211 return false; 2212 } 2213 2214 /* map page for use */ 2215 dma = dma_map_page_attrs(rx_ring->dev, page, 0, 2216 igc_rx_pg_size(rx_ring), 2217 DMA_FROM_DEVICE, 2218 IGC_RX_DMA_ATTR); 2219 2220 /* if mapping failed free memory back to system since 2221 * there isn't much point in holding memory we can't use 2222 */ 2223 if (dma_mapping_error(rx_ring->dev, dma)) { 2224 __free_page(page); 2225 2226 rx_ring->rx_stats.alloc_failed++; 2227 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 2228 return false; 2229 } 2230 2231 bi->dma = dma; 2232 bi->page = page; 2233 bi->page_offset = igc_rx_offset(rx_ring); 2234 page_ref_add(page, USHRT_MAX - 1); 2235 bi->pagecnt_bias = USHRT_MAX; 2236 2237 return true; 2238 } 2239 2240 /** 2241 * igc_alloc_rx_buffers - Replace used receive buffers; packet split 2242 * @rx_ring: rx descriptor ring 2243 * @cleaned_count: number of buffers to clean 2244 */ 2245 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) 2246 { 2247 union igc_adv_rx_desc *rx_desc; 2248 u16 i = rx_ring->next_to_use; 2249 struct igc_rx_buffer *bi; 2250 u16 bufsz; 2251 2252 /* nothing to do */ 2253 if (!cleaned_count) 2254 return; 2255 2256 rx_desc = IGC_RX_DESC(rx_ring, i); 2257 bi = &rx_ring->rx_buffer_info[i]; 2258 i -= rx_ring->count; 2259 2260 bufsz = igc_rx_bufsz(rx_ring); 2261 2262 do { 2263 if (!igc_alloc_mapped_page(rx_ring, bi)) 2264 break; 2265 2266 /* sync the buffer for use by the device */ 2267 dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 2268 bi->page_offset, bufsz, 2269 DMA_FROM_DEVICE); 2270 2271 /* Refresh the desc even if buffer_addrs didn't change 2272 * because each write-back erases this info. 2273 */ 2274 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); 2275 2276 rx_desc++; 2277 bi++; 2278 i++; 2279 if (unlikely(!i)) { 2280 rx_desc = IGC_RX_DESC(rx_ring, 0); 2281 bi = rx_ring->rx_buffer_info; 2282 i -= rx_ring->count; 2283 } 2284 2285 /* clear the length for the next_to_use descriptor */ 2286 rx_desc->wb.upper.length = 0; 2287 2288 cleaned_count--; 2289 } while (cleaned_count); 2290 2291 i += rx_ring->count; 2292 2293 if (rx_ring->next_to_use != i) { 2294 /* record the next descriptor to use */ 2295 rx_ring->next_to_use = i; 2296 2297 /* update next to alloc since we have filled the ring */ 2298 rx_ring->next_to_alloc = i; 2299 2300 /* Force memory writes to complete before letting h/w 2301 * know there are new descriptors to fetch. (Only 2302 * applicable for weak-ordered memory model archs, 2303 * such as IA-64). 2304 */ 2305 wmb(); 2306 writel(i, rx_ring->tail); 2307 } 2308 } 2309 2310 static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) 2311 { 2312 union igc_adv_rx_desc *desc; 2313 u16 i = ring->next_to_use; 2314 struct igc_rx_buffer *bi; 2315 dma_addr_t dma; 2316 bool ok = true; 2317 2318 if (!count) 2319 return ok; 2320 2321 XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff); 2322 2323 desc = IGC_RX_DESC(ring, i); 2324 bi = &ring->rx_buffer_info[i]; 2325 i -= ring->count; 2326 2327 do { 2328 bi->xdp = xsk_buff_alloc(ring->xsk_pool); 2329 if (!bi->xdp) { 2330 ok = false; 2331 break; 2332 } 2333 2334 dma = xsk_buff_xdp_get_dma(bi->xdp); 2335 desc->read.pkt_addr = cpu_to_le64(dma); 2336 2337 desc++; 2338 bi++; 2339 i++; 2340 if (unlikely(!i)) { 2341 desc = IGC_RX_DESC(ring, 0); 2342 bi = ring->rx_buffer_info; 2343 i -= ring->count; 2344 } 2345 2346 /* Clear the length for the next_to_use descriptor. */ 2347 desc->wb.upper.length = 0; 2348 2349 count--; 2350 } while (count); 2351 2352 i += ring->count; 2353 2354 if (ring->next_to_use != i) { 2355 ring->next_to_use = i; 2356 2357 /* Force memory writes to complete before letting h/w 2358 * know there are new descriptors to fetch. (Only 2359 * applicable for weak-ordered memory model archs, 2360 * such as IA-64). 2361 */ 2362 wmb(); 2363 writel(i, ring->tail); 2364 } 2365 2366 return ok; 2367 } 2368 2369 /* This function requires __netif_tx_lock is held by the caller. */ 2370 static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, 2371 struct xdp_frame *xdpf) 2372 { 2373 struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); 2374 u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0; 2375 u16 count, index = ring->next_to_use; 2376 struct igc_tx_buffer *head = &ring->tx_buffer_info[index]; 2377 struct igc_tx_buffer *buffer = head; 2378 union igc_adv_tx_desc *desc = IGC_TX_DESC(ring, index); 2379 u32 olinfo_status, len = xdpf->len, cmd_type; 2380 void *data = xdpf->data; 2381 u16 i; 2382 2383 count = TXD_USE_COUNT(len); 2384 for (i = 0; i < nr_frags; i++) 2385 count += TXD_USE_COUNT(skb_frag_size(&sinfo->frags[i])); 2386 2387 if (igc_maybe_stop_tx(ring, count + 3)) { 2388 /* this is a hard error */ 2389 return -EBUSY; 2390 } 2391 2392 i = 0; 2393 head->bytecount = xdp_get_frame_len(xdpf); 2394 head->type = IGC_TX_BUFFER_TYPE_XDP; 2395 head->gso_segs = 1; 2396 head->xdpf = xdpf; 2397 2398 olinfo_status = head->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; 2399 desc->read.olinfo_status = cpu_to_le32(olinfo_status); 2400 2401 for (;;) { 2402 dma_addr_t dma; 2403 2404 dma = dma_map_single(ring->dev, data, len, DMA_TO_DEVICE); 2405 if (dma_mapping_error(ring->dev, dma)) { 2406 netdev_err_once(ring->netdev, 2407 "Failed to map DMA for TX\n"); 2408 goto unmap; 2409 } 2410 2411 dma_unmap_len_set(buffer, len, len); 2412 dma_unmap_addr_set(buffer, dma, dma); 2413 2414 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 2415 IGC_ADVTXD_DCMD_IFCS | len; 2416 2417 desc->read.cmd_type_len = cpu_to_le32(cmd_type); 2418 desc->read.buffer_addr = cpu_to_le64(dma); 2419 2420 buffer->protocol = 0; 2421 2422 if (++index == ring->count) 2423 index = 0; 2424 2425 if (i == nr_frags) 2426 break; 2427 2428 buffer = &ring->tx_buffer_info[index]; 2429 desc = IGC_TX_DESC(ring, index); 2430 desc->read.olinfo_status = 0; 2431 2432 data = skb_frag_address(&sinfo->frags[i]); 2433 len = skb_frag_size(&sinfo->frags[i]); 2434 i++; 2435 } 2436 desc->read.cmd_type_len |= cpu_to_le32(IGC_TXD_DCMD); 2437 2438 netdev_tx_sent_queue(txring_txq(ring), head->bytecount); 2439 /* set the timestamp */ 2440 head->time_stamp = jiffies; 2441 /* set next_to_watch value indicating a packet is present */ 2442 head->next_to_watch = desc; 2443 ring->next_to_use = index; 2444 2445 return 0; 2446 2447 unmap: 2448 for (;;) { 2449 buffer = &ring->tx_buffer_info[index]; 2450 if (dma_unmap_len(buffer, len)) 2451 dma_unmap_page(ring->dev, 2452 dma_unmap_addr(buffer, dma), 2453 dma_unmap_len(buffer, len), 2454 DMA_TO_DEVICE); 2455 dma_unmap_len_set(buffer, len, 0); 2456 if (buffer == head) 2457 break; 2458 2459 if (!index) 2460 index += ring->count; 2461 index--; 2462 } 2463 2464 return -ENOMEM; 2465 } 2466 2467 struct igc_ring *igc_get_tx_ring(struct igc_adapter *adapter, int cpu) 2468 { 2469 int index = cpu; 2470 2471 if (unlikely(index < 0)) 2472 index = 0; 2473 2474 while (index >= adapter->num_tx_queues) 2475 index -= adapter->num_tx_queues; 2476 2477 return adapter->tx_ring[index]; 2478 } 2479 2480 static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) 2481 { 2482 struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); 2483 int cpu = smp_processor_id(); 2484 struct netdev_queue *nq; 2485 struct igc_ring *ring; 2486 int res; 2487 2488 if (unlikely(!xdpf)) 2489 return -EFAULT; 2490 2491 ring = igc_get_tx_ring(adapter, cpu); 2492 nq = txring_txq(ring); 2493 2494 __netif_tx_lock(nq, cpu); 2495 /* Avoid transmit queue timeout since we share it with the slow path */ 2496 txq_trans_cond_update(nq); 2497 res = igc_xdp_init_tx_descriptor(ring, xdpf); 2498 __netif_tx_unlock(nq); 2499 return res; 2500 } 2501 2502 /* This function assumes rcu_read_lock() is held by the caller. */ 2503 static int __igc_xdp_run_prog(struct igc_adapter *adapter, 2504 struct bpf_prog *prog, 2505 struct xdp_buff *xdp) 2506 { 2507 u32 act = bpf_prog_run_xdp(prog, xdp); 2508 2509 switch (act) { 2510 case XDP_PASS: 2511 return IGC_XDP_PASS; 2512 case XDP_TX: 2513 if (igc_xdp_xmit_back(adapter, xdp) < 0) 2514 goto out_failure; 2515 return IGC_XDP_TX; 2516 case XDP_REDIRECT: 2517 if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0) 2518 goto out_failure; 2519 return IGC_XDP_REDIRECT; 2520 break; 2521 default: 2522 bpf_warn_invalid_xdp_action(adapter->netdev, prog, act); 2523 fallthrough; 2524 case XDP_ABORTED: 2525 out_failure: 2526 trace_xdp_exception(adapter->netdev, prog, act); 2527 fallthrough; 2528 case XDP_DROP: 2529 return IGC_XDP_CONSUMED; 2530 } 2531 } 2532 2533 static int igc_xdp_run_prog(struct igc_adapter *adapter, struct xdp_buff *xdp) 2534 { 2535 struct bpf_prog *prog; 2536 int res; 2537 2538 prog = READ_ONCE(adapter->xdp_prog); 2539 if (!prog) { 2540 res = IGC_XDP_PASS; 2541 goto out; 2542 } 2543 2544 res = __igc_xdp_run_prog(adapter, prog, xdp); 2545 2546 out: 2547 return res; 2548 } 2549 2550 /* This function assumes __netif_tx_lock is held by the caller. */ 2551 void igc_flush_tx_descriptors(struct igc_ring *ring) 2552 { 2553 /* Once tail pointer is updated, hardware can fetch the descriptors 2554 * any time so we issue a write membar here to ensure all memory 2555 * writes are complete before the tail pointer is updated. 2556 */ 2557 wmb(); 2558 writel(ring->next_to_use, ring->tail); 2559 } 2560 2561 static void igc_finalize_xdp(struct igc_adapter *adapter, int status) 2562 { 2563 int cpu = smp_processor_id(); 2564 struct netdev_queue *nq; 2565 struct igc_ring *ring; 2566 2567 if (status & IGC_XDP_TX) { 2568 ring = igc_get_tx_ring(adapter, cpu); 2569 nq = txring_txq(ring); 2570 2571 __netif_tx_lock(nq, cpu); 2572 igc_flush_tx_descriptors(ring); 2573 __netif_tx_unlock(nq); 2574 } 2575 2576 if (status & IGC_XDP_REDIRECT) 2577 xdp_do_flush(); 2578 } 2579 2580 static void igc_update_rx_stats(struct igc_q_vector *q_vector, 2581 unsigned int packets, unsigned int bytes) 2582 { 2583 struct igc_ring *ring = q_vector->rx.ring; 2584 2585 u64_stats_update_begin(&ring->rx_syncp); 2586 ring->rx_stats.packets += packets; 2587 ring->rx_stats.bytes += bytes; 2588 u64_stats_update_end(&ring->rx_syncp); 2589 2590 q_vector->rx.total_packets += packets; 2591 q_vector->rx.total_bytes += bytes; 2592 } 2593 2594 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) 2595 { 2596 unsigned int total_bytes = 0, total_packets = 0; 2597 struct igc_adapter *adapter = q_vector->adapter; 2598 struct igc_ring *rx_ring = q_vector->rx.ring; 2599 struct sk_buff *skb = rx_ring->skb; 2600 u16 cleaned_count = igc_desc_unused(rx_ring); 2601 int xdp_status = 0, rx_buffer_pgcnt; 2602 int xdp_res = 0; 2603 2604 while (likely(total_packets < budget)) { 2605 struct igc_xdp_buff ctx = { .rx_ts = NULL }; 2606 struct igc_rx_buffer *rx_buffer; 2607 union igc_adv_rx_desc *rx_desc; 2608 unsigned int size, truesize; 2609 int pkt_offset = 0; 2610 void *pktbuf; 2611 2612 /* return some buffers to hardware, one at a time is too slow */ 2613 if (cleaned_count >= IGC_RX_BUFFER_WRITE) { 2614 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2615 cleaned_count = 0; 2616 } 2617 2618 rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean); 2619 size = le16_to_cpu(rx_desc->wb.upper.length); 2620 if (!size) 2621 break; 2622 2623 /* This memory barrier is needed to keep us from reading 2624 * any other fields out of the rx_desc until we know the 2625 * descriptor has been written back 2626 */ 2627 dma_rmb(); 2628 2629 rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); 2630 truesize = igc_get_rx_frame_truesize(rx_ring, size); 2631 2632 pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; 2633 2634 if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { 2635 ctx.rx_ts = pktbuf; 2636 pkt_offset = IGC_TS_HDR_LEN; 2637 size -= IGC_TS_HDR_LEN; 2638 } 2639 2640 if (igc_fpe_is_pmac_enabled(adapter) && 2641 igc_fpe_handle_mpacket(adapter, rx_desc, size, pktbuf)) { 2642 /* Advance the ring next-to-clean */ 2643 igc_is_non_eop(rx_ring, rx_desc); 2644 cleaned_count++; 2645 continue; 2646 } 2647 2648 if (!skb) { 2649 xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq); 2650 xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring), 2651 igc_rx_offset(rx_ring) + pkt_offset, 2652 size, true); 2653 xdp_buff_clear_frags_flag(&ctx.xdp); 2654 ctx.rx_desc = rx_desc; 2655 2656 xdp_res = igc_xdp_run_prog(adapter, &ctx.xdp); 2657 } 2658 2659 if (xdp_res) { 2660 switch (xdp_res) { 2661 case IGC_XDP_CONSUMED: 2662 rx_buffer->pagecnt_bias++; 2663 break; 2664 case IGC_XDP_TX: 2665 case IGC_XDP_REDIRECT: 2666 igc_rx_buffer_flip(rx_buffer, truesize); 2667 xdp_status |= xdp_res; 2668 break; 2669 } 2670 2671 total_packets++; 2672 total_bytes += size; 2673 } else if (skb) 2674 igc_add_rx_frag(rx_ring, rx_buffer, skb, size); 2675 else if (ring_uses_build_skb(rx_ring)) 2676 skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp); 2677 else 2678 skb = igc_construct_skb(rx_ring, rx_buffer, &ctx); 2679 2680 /* exit if we failed to retrieve a buffer */ 2681 if (!xdp_res && !skb) { 2682 rx_ring->rx_stats.alloc_failed++; 2683 rx_buffer->pagecnt_bias++; 2684 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 2685 break; 2686 } 2687 2688 igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); 2689 cleaned_count++; 2690 2691 /* fetch next buffer in frame if non-eop */ 2692 if (igc_is_non_eop(rx_ring, rx_desc)) 2693 continue; 2694 2695 /* verify the packet layout is correct */ 2696 if (xdp_res || igc_cleanup_headers(rx_ring, rx_desc, skb)) { 2697 skb = NULL; 2698 continue; 2699 } 2700 2701 /* probably a little skewed due to removing CRC */ 2702 total_bytes += skb->len; 2703 2704 /* populate checksum, VLAN, and protocol */ 2705 igc_process_skb_fields(rx_ring, rx_desc, skb); 2706 2707 napi_gro_receive(&q_vector->napi, skb); 2708 2709 /* reset skb pointer */ 2710 skb = NULL; 2711 2712 /* update budget accounting */ 2713 total_packets++; 2714 } 2715 2716 if (xdp_status) 2717 igc_finalize_xdp(adapter, xdp_status); 2718 2719 /* place incomplete frames back on ring for completion */ 2720 rx_ring->skb = skb; 2721 2722 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2723 2724 if (cleaned_count) 2725 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2726 2727 return total_packets; 2728 } 2729 2730 static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, 2731 struct igc_xdp_buff *ctx) 2732 { 2733 struct xdp_buff *xdp = &ctx->xdp; 2734 unsigned int totalsize = xdp->data_end - xdp->data_meta; 2735 unsigned int metasize = xdp->data - xdp->data_meta; 2736 struct sk_buff *skb; 2737 2738 net_prefetch(xdp->data_meta); 2739 2740 skb = napi_alloc_skb(&ring->q_vector->napi, totalsize); 2741 if (unlikely(!skb)) 2742 return NULL; 2743 2744 memcpy(__skb_put(skb, totalsize), xdp->data_meta, 2745 ALIGN(totalsize, sizeof(long))); 2746 2747 if (metasize) { 2748 skb_metadata_set(skb, metasize); 2749 __skb_pull(skb, metasize); 2750 } 2751 2752 if (ctx->rx_ts) { 2753 skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; 2754 skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; 2755 } 2756 2757 return skb; 2758 } 2759 2760 static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, 2761 union igc_adv_rx_desc *desc, 2762 struct igc_xdp_buff *ctx) 2763 { 2764 struct igc_ring *ring = q_vector->rx.ring; 2765 struct sk_buff *skb; 2766 2767 skb = igc_construct_skb_zc(ring, ctx); 2768 if (!skb) { 2769 ring->rx_stats.alloc_failed++; 2770 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &ring->flags); 2771 return; 2772 } 2773 2774 if (igc_cleanup_headers(ring, desc, skb)) 2775 return; 2776 2777 igc_process_skb_fields(ring, desc, skb); 2778 napi_gro_receive(&q_vector->napi, skb); 2779 } 2780 2781 static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp) 2782 { 2783 /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The 2784 * igc_xdp_buff shares its layout with xdp_buff_xsk and private 2785 * igc_xdp_buff fields fall into xdp_buff_xsk->cb 2786 */ 2787 return (struct igc_xdp_buff *)xdp; 2788 } 2789 2790 static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) 2791 { 2792 struct igc_adapter *adapter = q_vector->adapter; 2793 struct igc_ring *ring = q_vector->rx.ring; 2794 u16 cleaned_count = igc_desc_unused(ring); 2795 int total_bytes = 0, total_packets = 0; 2796 u16 ntc = ring->next_to_clean; 2797 struct bpf_prog *prog; 2798 bool failure = false; 2799 int xdp_status = 0; 2800 2801 rcu_read_lock(); 2802 2803 prog = READ_ONCE(adapter->xdp_prog); 2804 2805 while (likely(total_packets < budget)) { 2806 union igc_adv_rx_desc *desc; 2807 struct igc_rx_buffer *bi; 2808 struct igc_xdp_buff *ctx; 2809 unsigned int size; 2810 int res; 2811 2812 desc = IGC_RX_DESC(ring, ntc); 2813 size = le16_to_cpu(desc->wb.upper.length); 2814 if (!size) 2815 break; 2816 2817 /* This memory barrier is needed to keep us from reading 2818 * any other fields out of the rx_desc until we know the 2819 * descriptor has been written back 2820 */ 2821 dma_rmb(); 2822 2823 bi = &ring->rx_buffer_info[ntc]; 2824 2825 ctx = xsk_buff_to_igc_ctx(bi->xdp); 2826 ctx->rx_desc = desc; 2827 2828 if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { 2829 ctx->rx_ts = bi->xdp->data; 2830 2831 bi->xdp->data += IGC_TS_HDR_LEN; 2832 2833 /* HW timestamp has been copied into local variable. Metadata 2834 * length when XDP program is called should be 0. 2835 */ 2836 bi->xdp->data_meta += IGC_TS_HDR_LEN; 2837 size -= IGC_TS_HDR_LEN; 2838 } else { 2839 ctx->rx_ts = NULL; 2840 } 2841 2842 bi->xdp->data_end = bi->xdp->data + size; 2843 xsk_buff_dma_sync_for_cpu(bi->xdp); 2844 2845 res = __igc_xdp_run_prog(adapter, prog, bi->xdp); 2846 switch (res) { 2847 case IGC_XDP_PASS: 2848 igc_dispatch_skb_zc(q_vector, desc, ctx); 2849 fallthrough; 2850 case IGC_XDP_CONSUMED: 2851 xsk_buff_free(bi->xdp); 2852 break; 2853 case IGC_XDP_TX: 2854 case IGC_XDP_REDIRECT: 2855 xdp_status |= res; 2856 break; 2857 } 2858 2859 bi->xdp = NULL; 2860 total_bytes += size; 2861 total_packets++; 2862 cleaned_count++; 2863 ntc++; 2864 if (ntc == ring->count) 2865 ntc = 0; 2866 } 2867 2868 ring->next_to_clean = ntc; 2869 rcu_read_unlock(); 2870 2871 if (cleaned_count >= IGC_RX_BUFFER_WRITE) 2872 failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count); 2873 2874 if (xdp_status) 2875 igc_finalize_xdp(adapter, xdp_status); 2876 2877 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2878 2879 if (xsk_uses_need_wakeup(ring->xsk_pool)) { 2880 if (failure || ring->next_to_clean == ring->next_to_use) 2881 xsk_set_rx_need_wakeup(ring->xsk_pool); 2882 else 2883 xsk_clear_rx_need_wakeup(ring->xsk_pool); 2884 return total_packets; 2885 } 2886 2887 return failure ? budget : total_packets; 2888 } 2889 2890 static void igc_update_tx_stats(struct igc_q_vector *q_vector, 2891 unsigned int packets, unsigned int bytes) 2892 { 2893 struct igc_ring *ring = q_vector->tx.ring; 2894 2895 u64_stats_update_begin(&ring->tx_syncp); 2896 ring->tx_stats.bytes += bytes; 2897 ring->tx_stats.packets += packets; 2898 u64_stats_update_end(&ring->tx_syncp); 2899 2900 q_vector->tx.total_bytes += bytes; 2901 q_vector->tx.total_packets += packets; 2902 } 2903 2904 static void igc_xsk_request_timestamp(void *_priv) 2905 { 2906 struct igc_metadata_request *meta_req = _priv; 2907 struct igc_ring *tx_ring = meta_req->tx_ring; 2908 struct igc_tx_timestamp_request *tstamp; 2909 u32 tx_flags = IGC_TX_FLAGS_TSTAMP; 2910 struct igc_adapter *adapter; 2911 unsigned long lock_flags; 2912 bool found = false; 2913 int i; 2914 2915 if (test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags)) { 2916 adapter = netdev_priv(tx_ring->netdev); 2917 2918 spin_lock_irqsave(&adapter->ptp_tx_lock, lock_flags); 2919 2920 /* Search for available tstamp regs */ 2921 for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { 2922 tstamp = &adapter->tx_tstamp[i]; 2923 2924 /* tstamp->skb and tstamp->xsk_tx_buffer are in union. 2925 * When tstamp->skb is equal to NULL, 2926 * tstamp->xsk_tx_buffer is equal to NULL as well. 2927 * This condition means that the particular tstamp reg 2928 * is not occupied by other packet. 2929 */ 2930 if (!tstamp->skb) { 2931 found = true; 2932 break; 2933 } 2934 } 2935 2936 /* Return if no available tstamp regs */ 2937 if (!found) { 2938 adapter->tx_hwtstamp_skipped++; 2939 spin_unlock_irqrestore(&adapter->ptp_tx_lock, 2940 lock_flags); 2941 return; 2942 } 2943 2944 tstamp->start = jiffies; 2945 tstamp->xsk_queue_index = tx_ring->queue_index; 2946 tstamp->xsk_tx_buffer = meta_req->tx_buffer; 2947 tstamp->buffer_type = IGC_TX_BUFFER_TYPE_XSK; 2948 2949 /* Hold the transmit completion until timestamp is ready */ 2950 meta_req->tx_buffer->xsk_pending_ts = true; 2951 2952 /* Keep the pointer to tx_timestamp, which is located in XDP 2953 * metadata area. It is the location to store the value of 2954 * tx hardware timestamp. 2955 */ 2956 xsk_tx_metadata_to_compl(meta_req->meta, &tstamp->xsk_meta); 2957 2958 /* Set timestamp bit based on the _TSTAMP(_X) bit. */ 2959 tx_flags |= tstamp->flags; 2960 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2961 IGC_TX_FLAGS_TSTAMP, 2962 (IGC_ADVTXD_MAC_TSTAMP)); 2963 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2964 IGC_TX_FLAGS_TSTAMP_1, 2965 (IGC_ADVTXD_TSTAMP_REG_1)); 2966 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2967 IGC_TX_FLAGS_TSTAMP_2, 2968 (IGC_ADVTXD_TSTAMP_REG_2)); 2969 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2970 IGC_TX_FLAGS_TSTAMP_3, 2971 (IGC_ADVTXD_TSTAMP_REG_3)); 2972 2973 spin_unlock_irqrestore(&adapter->ptp_tx_lock, lock_flags); 2974 } 2975 } 2976 2977 static u64 igc_xsk_fill_timestamp(void *_priv) 2978 { 2979 return *(u64 *)_priv; 2980 } 2981 2982 static void igc_xsk_request_launch_time(u64 launch_time, void *_priv) 2983 { 2984 struct igc_metadata_request *meta_req = _priv; 2985 struct igc_ring *tx_ring = meta_req->tx_ring; 2986 __le32 launch_time_offset; 2987 bool insert_empty = false; 2988 bool first_flag = false; 2989 u16 used_desc = 0; 2990 2991 if (!tx_ring->launchtime_enable) 2992 return; 2993 2994 launch_time_offset = igc_tx_launchtime(tx_ring, 2995 ns_to_ktime(launch_time), 2996 &first_flag, &insert_empty); 2997 if (insert_empty) { 2998 /* Disregard the launch time request if the required empty frame 2999 * fails to be inserted. 3000 */ 3001 if (igc_insert_empty_frame(tx_ring)) 3002 return; 3003 3004 meta_req->tx_buffer = 3005 &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 3006 /* Inserting an empty packet requires two descriptors: 3007 * one data descriptor and one context descriptor. 3008 */ 3009 used_desc += 2; 3010 } 3011 3012 /* Use one context descriptor to specify launch time and first flag. */ 3013 igc_tx_ctxtdesc(tx_ring, launch_time_offset, first_flag, 0, 0, 0); 3014 used_desc += 1; 3015 3016 /* Update the number of used descriptors in this request */ 3017 meta_req->used_desc += used_desc; 3018 } 3019 3020 const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops = { 3021 .tmo_request_timestamp = igc_xsk_request_timestamp, 3022 .tmo_fill_timestamp = igc_xsk_fill_timestamp, 3023 .tmo_request_launch_time = igc_xsk_request_launch_time, 3024 }; 3025 3026 static void igc_xdp_xmit_zc(struct igc_ring *ring) 3027 { 3028 struct xsk_buff_pool *pool = ring->xsk_pool; 3029 struct netdev_queue *nq = txring_txq(ring); 3030 union igc_adv_tx_desc *tx_desc = NULL; 3031 int cpu = smp_processor_id(); 3032 struct xdp_desc xdp_desc; 3033 u16 budget, ntu; 3034 3035 if (!netif_carrier_ok(ring->netdev)) 3036 return; 3037 3038 __netif_tx_lock(nq, cpu); 3039 3040 /* Avoid transmit queue timeout since we share it with the slow path */ 3041 txq_trans_cond_update(nq); 3042 3043 ntu = ring->next_to_use; 3044 budget = igc_desc_unused(ring); 3045 3046 /* Packets with launch time require one data descriptor and one context 3047 * descriptor. When the launch time falls into the next Qbv cycle, we 3048 * may need to insert an empty packet, which requires two more 3049 * descriptors. Therefore, to be safe, we always ensure we have at least 3050 * 4 descriptors available. 3051 */ 3052 while (budget >= 4 && xsk_tx_peek_desc(pool, &xdp_desc)) { 3053 struct igc_metadata_request meta_req; 3054 struct xsk_tx_metadata *meta = NULL; 3055 struct igc_tx_buffer *bi; 3056 u32 olinfo_status; 3057 dma_addr_t dma; 3058 3059 meta_req.cmd_type = IGC_ADVTXD_DTYP_DATA | 3060 IGC_ADVTXD_DCMD_DEXT | 3061 IGC_ADVTXD_DCMD_IFCS | 3062 IGC_TXD_DCMD | xdp_desc.len; 3063 olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT; 3064 3065 dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr); 3066 meta = xsk_buff_get_metadata(pool, xdp_desc.addr); 3067 xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len); 3068 bi = &ring->tx_buffer_info[ntu]; 3069 3070 meta_req.tx_ring = ring; 3071 meta_req.tx_buffer = bi; 3072 meta_req.meta = meta; 3073 meta_req.used_desc = 0; 3074 xsk_tx_metadata_request(meta, &igc_xsk_tx_metadata_ops, 3075 &meta_req); 3076 3077 /* xsk_tx_metadata_request() may have updated next_to_use */ 3078 ntu = ring->next_to_use; 3079 3080 /* xsk_tx_metadata_request() may have updated Tx buffer info */ 3081 bi = meta_req.tx_buffer; 3082 3083 /* xsk_tx_metadata_request() may use a few descriptors */ 3084 budget -= meta_req.used_desc; 3085 3086 tx_desc = IGC_TX_DESC(ring, ntu); 3087 tx_desc->read.cmd_type_len = cpu_to_le32(meta_req.cmd_type); 3088 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 3089 tx_desc->read.buffer_addr = cpu_to_le64(dma); 3090 3091 bi->type = IGC_TX_BUFFER_TYPE_XSK; 3092 bi->protocol = 0; 3093 bi->bytecount = xdp_desc.len; 3094 bi->gso_segs = 1; 3095 bi->time_stamp = jiffies; 3096 bi->next_to_watch = tx_desc; 3097 3098 netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len); 3099 3100 ntu++; 3101 if (ntu == ring->count) 3102 ntu = 0; 3103 3104 ring->next_to_use = ntu; 3105 budget--; 3106 } 3107 3108 if (tx_desc) { 3109 igc_flush_tx_descriptors(ring); 3110 xsk_tx_release(pool); 3111 } 3112 3113 __netif_tx_unlock(nq); 3114 } 3115 3116 /** 3117 * igc_clean_tx_irq - Reclaim resources after transmit completes 3118 * @q_vector: pointer to q_vector containing needed info 3119 * @napi_budget: Used to determine if we are in netpoll 3120 * 3121 * returns true if ring is completely cleaned 3122 */ 3123 static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) 3124 { 3125 struct igc_adapter *adapter = q_vector->adapter; 3126 unsigned int total_bytes = 0, total_packets = 0; 3127 unsigned int budget = q_vector->tx.work_limit; 3128 struct igc_ring *tx_ring = q_vector->tx.ring; 3129 unsigned int i = tx_ring->next_to_clean; 3130 struct igc_tx_buffer *tx_buffer; 3131 union igc_adv_tx_desc *tx_desc; 3132 u32 xsk_frames = 0; 3133 3134 if (test_bit(__IGC_DOWN, &adapter->state)) 3135 return true; 3136 3137 tx_buffer = &tx_ring->tx_buffer_info[i]; 3138 tx_desc = IGC_TX_DESC(tx_ring, i); 3139 i -= tx_ring->count; 3140 3141 do { 3142 union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; 3143 3144 /* if next_to_watch is not set then there is no work pending */ 3145 if (!eop_desc) 3146 break; 3147 3148 /* prevent any other reads prior to eop_desc */ 3149 smp_rmb(); 3150 3151 /* if DD is not set pending work has not been completed */ 3152 if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) 3153 break; 3154 3155 if (igc_fpe_is_pmac_enabled(adapter) && 3156 igc_fpe_transmitted_smd_v(tx_desc)) 3157 ethtool_mmsv_event_handle(&adapter->fpe.mmsv, 3158 ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET); 3159 3160 /* Hold the completions while there's a pending tx hardware 3161 * timestamp request from XDP Tx metadata. 3162 */ 3163 if (tx_buffer->type == IGC_TX_BUFFER_TYPE_XSK && 3164 tx_buffer->xsk_pending_ts) 3165 break; 3166 3167 /* clear next_to_watch to prevent false hangs */ 3168 tx_buffer->next_to_watch = NULL; 3169 3170 /* update the statistics for this packet */ 3171 total_bytes += tx_buffer->bytecount; 3172 total_packets += tx_buffer->gso_segs; 3173 3174 switch (tx_buffer->type) { 3175 case IGC_TX_BUFFER_TYPE_XSK: 3176 xsk_frames++; 3177 break; 3178 case IGC_TX_BUFFER_TYPE_XDP: 3179 xdp_return_frame(tx_buffer->xdpf); 3180 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3181 break; 3182 case IGC_TX_BUFFER_TYPE_SKB: 3183 napi_consume_skb(tx_buffer->skb, napi_budget); 3184 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3185 break; 3186 default: 3187 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 3188 break; 3189 } 3190 3191 /* clear last DMA location and unmap remaining buffers */ 3192 while (tx_desc != eop_desc) { 3193 tx_buffer++; 3194 tx_desc++; 3195 i++; 3196 if (unlikely(!i)) { 3197 i -= tx_ring->count; 3198 tx_buffer = tx_ring->tx_buffer_info; 3199 tx_desc = IGC_TX_DESC(tx_ring, 0); 3200 } 3201 3202 /* unmap any remaining paged data */ 3203 if (dma_unmap_len(tx_buffer, len)) 3204 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3205 } 3206 3207 /* move us one more past the eop_desc for start of next pkt */ 3208 tx_buffer++; 3209 tx_desc++; 3210 i++; 3211 if (unlikely(!i)) { 3212 i -= tx_ring->count; 3213 tx_buffer = tx_ring->tx_buffer_info; 3214 tx_desc = IGC_TX_DESC(tx_ring, 0); 3215 } 3216 3217 /* issue prefetch for next Tx descriptor */ 3218 prefetch(tx_desc); 3219 3220 /* update budget accounting */ 3221 budget--; 3222 } while (likely(budget)); 3223 3224 netdev_tx_completed_queue(txring_txq(tx_ring), 3225 total_packets, total_bytes); 3226 3227 i += tx_ring->count; 3228 tx_ring->next_to_clean = i; 3229 3230 igc_update_tx_stats(q_vector, total_packets, total_bytes); 3231 3232 if (tx_ring->xsk_pool) { 3233 if (xsk_frames) 3234 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 3235 if (xsk_uses_need_wakeup(tx_ring->xsk_pool)) 3236 xsk_set_tx_need_wakeup(tx_ring->xsk_pool); 3237 igc_xdp_xmit_zc(tx_ring); 3238 } 3239 3240 if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { 3241 struct igc_hw *hw = &adapter->hw; 3242 3243 /* Detect a transmit hang in hardware, this serializes the 3244 * check with the clearing of time_stamp and movement of i 3245 */ 3246 clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 3247 if (tx_buffer->next_to_watch && 3248 time_after(jiffies, tx_buffer->time_stamp + 3249 (adapter->tx_timeout_factor * HZ)) && 3250 !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && 3251 (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) && 3252 !tx_ring->oper_gate_closed) { 3253 /* detected Tx unit hang */ 3254 netdev_err(tx_ring->netdev, 3255 "Detected Tx Unit Hang\n" 3256 " Tx Queue <%d>\n" 3257 " TDH <%x>\n" 3258 " TDT <%x>\n" 3259 " next_to_use <%x>\n" 3260 " next_to_clean <%x>\n" 3261 "buffer_info[next_to_clean]\n" 3262 " time_stamp <%lx>\n" 3263 " next_to_watch <%p>\n" 3264 " jiffies <%lx>\n" 3265 " desc.status <%x>\n", 3266 tx_ring->queue_index, 3267 rd32(IGC_TDH(tx_ring->reg_idx)), 3268 readl(tx_ring->tail), 3269 tx_ring->next_to_use, 3270 tx_ring->next_to_clean, 3271 tx_buffer->time_stamp, 3272 tx_buffer->next_to_watch, 3273 jiffies, 3274 tx_buffer->next_to_watch->wb.status); 3275 netif_stop_subqueue(tx_ring->netdev, 3276 tx_ring->queue_index); 3277 3278 /* we are about to reset, no point in enabling stuff */ 3279 return true; 3280 } 3281 } 3282 3283 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 3284 if (unlikely(total_packets && 3285 netif_carrier_ok(tx_ring->netdev) && 3286 igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { 3287 /* Make sure that anybody stopping the queue after this 3288 * sees the new next_to_clean. 3289 */ 3290 smp_mb(); 3291 if (__netif_subqueue_stopped(tx_ring->netdev, 3292 tx_ring->queue_index) && 3293 !(test_bit(__IGC_DOWN, &adapter->state))) { 3294 netif_wake_subqueue(tx_ring->netdev, 3295 tx_ring->queue_index); 3296 3297 u64_stats_update_begin(&tx_ring->tx_syncp); 3298 tx_ring->tx_stats.restart_queue++; 3299 u64_stats_update_end(&tx_ring->tx_syncp); 3300 } 3301 } 3302 3303 return !!budget; 3304 } 3305 3306 static int igc_find_mac_filter(struct igc_adapter *adapter, 3307 enum igc_mac_filter_type type, const u8 *addr) 3308 { 3309 struct igc_hw *hw = &adapter->hw; 3310 int max_entries = hw->mac.rar_entry_count; 3311 u32 ral, rah; 3312 int i; 3313 3314 for (i = 0; i < max_entries; i++) { 3315 ral = rd32(IGC_RAL(i)); 3316 rah = rd32(IGC_RAH(i)); 3317 3318 if (!(rah & IGC_RAH_AV)) 3319 continue; 3320 if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type) 3321 continue; 3322 if ((rah & IGC_RAH_RAH_MASK) != 3323 le16_to_cpup((__le16 *)(addr + 4))) 3324 continue; 3325 if (ral != le32_to_cpup((__le32 *)(addr))) 3326 continue; 3327 3328 return i; 3329 } 3330 3331 return -1; 3332 } 3333 3334 static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter) 3335 { 3336 struct igc_hw *hw = &adapter->hw; 3337 int max_entries = hw->mac.rar_entry_count; 3338 u32 rah; 3339 int i; 3340 3341 for (i = 0; i < max_entries; i++) { 3342 rah = rd32(IGC_RAH(i)); 3343 3344 if (!(rah & IGC_RAH_AV)) 3345 return i; 3346 } 3347 3348 return -1; 3349 } 3350 3351 /** 3352 * igc_add_mac_filter() - Add MAC address filter 3353 * @adapter: Pointer to adapter where the filter should be added 3354 * @type: MAC address filter type (source or destination) 3355 * @addr: MAC address 3356 * @queue: If non-negative, queue assignment feature is enabled and frames 3357 * matching the filter are enqueued onto 'queue'. Otherwise, queue 3358 * assignment is disabled. 3359 * 3360 * Return: 0 in case of success, negative errno code otherwise. 3361 */ 3362 static int igc_add_mac_filter(struct igc_adapter *adapter, 3363 enum igc_mac_filter_type type, const u8 *addr, 3364 int queue) 3365 { 3366 struct net_device *dev = adapter->netdev; 3367 int index; 3368 3369 index = igc_find_mac_filter(adapter, type, addr); 3370 if (index >= 0) 3371 goto update_filter; 3372 3373 index = igc_get_avail_mac_filter_slot(adapter); 3374 if (index < 0) 3375 return -ENOSPC; 3376 3377 netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n", 3378 index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 3379 addr, queue); 3380 3381 update_filter: 3382 igc_set_mac_filter_hw(adapter, index, type, addr, queue); 3383 return 0; 3384 } 3385 3386 /** 3387 * igc_del_mac_filter() - Delete MAC address filter 3388 * @adapter: Pointer to adapter where the filter should be deleted from 3389 * @type: MAC address filter type (source or destination) 3390 * @addr: MAC address 3391 */ 3392 static void igc_del_mac_filter(struct igc_adapter *adapter, 3393 enum igc_mac_filter_type type, const u8 *addr) 3394 { 3395 struct net_device *dev = adapter->netdev; 3396 int index; 3397 3398 index = igc_find_mac_filter(adapter, type, addr); 3399 if (index < 0) 3400 return; 3401 3402 if (index == 0) { 3403 /* If this is the default filter, we don't actually delete it. 3404 * We just reset to its default value i.e. disable queue 3405 * assignment. 3406 */ 3407 netdev_dbg(dev, "Disable default MAC filter queue assignment"); 3408 3409 igc_set_mac_filter_hw(adapter, 0, type, addr, -1); 3410 } else { 3411 netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n", 3412 index, 3413 type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 3414 addr); 3415 3416 igc_clear_mac_filter_hw(adapter, index); 3417 } 3418 } 3419 3420 /** 3421 * igc_add_vlan_prio_filter() - Add VLAN priority filter 3422 * @adapter: Pointer to adapter where the filter should be added 3423 * @prio: VLAN priority value 3424 * @queue: Queue number which matching frames are assigned to 3425 * 3426 * Return: 0 in case of success, negative errno code otherwise. 3427 */ 3428 static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio, 3429 int queue) 3430 { 3431 struct net_device *dev = adapter->netdev; 3432 struct igc_hw *hw = &adapter->hw; 3433 u32 vlanpqf; 3434 3435 vlanpqf = rd32(IGC_VLANPQF); 3436 3437 if (vlanpqf & IGC_VLANPQF_VALID(prio)) { 3438 netdev_dbg(dev, "VLAN priority filter already in use\n"); 3439 return -EEXIST; 3440 } 3441 3442 vlanpqf |= IGC_VLANPQF_QSEL(prio, queue); 3443 vlanpqf |= IGC_VLANPQF_VALID(prio); 3444 3445 wr32(IGC_VLANPQF, vlanpqf); 3446 3447 netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n", 3448 prio, queue); 3449 return 0; 3450 } 3451 3452 /** 3453 * igc_del_vlan_prio_filter() - Delete VLAN priority filter 3454 * @adapter: Pointer to adapter where the filter should be deleted from 3455 * @prio: VLAN priority value 3456 */ 3457 static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio) 3458 { 3459 struct igc_hw *hw = &adapter->hw; 3460 u32 vlanpqf; 3461 3462 vlanpqf = rd32(IGC_VLANPQF); 3463 3464 vlanpqf &= ~IGC_VLANPQF_VALID(prio); 3465 vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK); 3466 3467 wr32(IGC_VLANPQF, vlanpqf); 3468 3469 netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n", 3470 prio); 3471 } 3472 3473 static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter) 3474 { 3475 struct igc_hw *hw = &adapter->hw; 3476 int i; 3477 3478 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3479 u32 etqf = rd32(IGC_ETQF(i)); 3480 3481 if (!(etqf & IGC_ETQF_FILTER_ENABLE)) 3482 return i; 3483 } 3484 3485 return -1; 3486 } 3487 3488 /** 3489 * igc_add_etype_filter() - Add ethertype filter 3490 * @adapter: Pointer to adapter where the filter should be added 3491 * @etype: Ethertype value 3492 * @queue: If non-negative, queue assignment feature is enabled and frames 3493 * matching the filter are enqueued onto 'queue'. Otherwise, queue 3494 * assignment is disabled. 3495 * 3496 * Return: 0 in case of success, negative errno code otherwise. 3497 */ 3498 static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, 3499 int queue) 3500 { 3501 struct igc_hw *hw = &adapter->hw; 3502 int index; 3503 u32 etqf; 3504 3505 index = igc_get_avail_etype_filter_slot(adapter); 3506 if (index < 0) 3507 return -ENOSPC; 3508 3509 etqf = rd32(IGC_ETQF(index)); 3510 3511 etqf &= ~IGC_ETQF_ETYPE_MASK; 3512 etqf |= etype; 3513 3514 if (queue >= 0) { 3515 etqf &= ~IGC_ETQF_QUEUE_MASK; 3516 etqf |= (queue << IGC_ETQF_QUEUE_SHIFT); 3517 etqf |= IGC_ETQF_QUEUE_ENABLE; 3518 } 3519 3520 etqf |= IGC_ETQF_FILTER_ENABLE; 3521 3522 wr32(IGC_ETQF(index), etqf); 3523 3524 netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n", 3525 etype, queue); 3526 return 0; 3527 } 3528 3529 static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype) 3530 { 3531 struct igc_hw *hw = &adapter->hw; 3532 int i; 3533 3534 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3535 u32 etqf = rd32(IGC_ETQF(i)); 3536 3537 if ((etqf & IGC_ETQF_ETYPE_MASK) == etype) 3538 return i; 3539 } 3540 3541 return -1; 3542 } 3543 3544 /** 3545 * igc_del_etype_filter() - Delete ethertype filter 3546 * @adapter: Pointer to adapter where the filter should be deleted from 3547 * @etype: Ethertype value 3548 */ 3549 static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype) 3550 { 3551 struct igc_hw *hw = &adapter->hw; 3552 int index; 3553 3554 index = igc_find_etype_filter(adapter, etype); 3555 if (index < 0) 3556 return; 3557 3558 wr32(IGC_ETQF(index), 0); 3559 3560 netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n", 3561 etype); 3562 } 3563 3564 static int igc_flex_filter_select(struct igc_adapter *adapter, 3565 struct igc_flex_filter *input, 3566 u32 *fhft) 3567 { 3568 struct igc_hw *hw = &adapter->hw; 3569 u8 fhft_index; 3570 u32 fhftsl; 3571 3572 if (input->index >= MAX_FLEX_FILTER) { 3573 netdev_err(adapter->netdev, "Wrong Flex Filter index selected!\n"); 3574 return -EINVAL; 3575 } 3576 3577 /* Indirect table select register */ 3578 fhftsl = rd32(IGC_FHFTSL); 3579 fhftsl &= ~IGC_FHFTSL_FTSL_MASK; 3580 switch (input->index) { 3581 case 0 ... 7: 3582 fhftsl |= 0x00; 3583 break; 3584 case 8 ... 15: 3585 fhftsl |= 0x01; 3586 break; 3587 case 16 ... 23: 3588 fhftsl |= 0x02; 3589 break; 3590 case 24 ... 31: 3591 fhftsl |= 0x03; 3592 break; 3593 } 3594 wr32(IGC_FHFTSL, fhftsl); 3595 3596 /* Normalize index down to host table register */ 3597 fhft_index = input->index % 8; 3598 3599 *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) : 3600 IGC_FHFT_EXT(fhft_index - 4); 3601 3602 return 0; 3603 } 3604 3605 static int igc_write_flex_filter_ll(struct igc_adapter *adapter, 3606 struct igc_flex_filter *input) 3607 { 3608 struct igc_hw *hw = &adapter->hw; 3609 u8 *data = input->data; 3610 u8 *mask = input->mask; 3611 u32 queuing; 3612 u32 fhft; 3613 u32 wufc; 3614 int ret; 3615 int i; 3616 3617 /* Length has to be aligned to 8. Otherwise the filter will fail. Bail 3618 * out early to avoid surprises later. 3619 */ 3620 if (input->length % 8 != 0) { 3621 netdev_err(adapter->netdev, "The length of a flex filter has to be 8 byte aligned!\n"); 3622 return -EINVAL; 3623 } 3624 3625 /* Select corresponding flex filter register and get base for host table. */ 3626 ret = igc_flex_filter_select(adapter, input, &fhft); 3627 if (ret) 3628 return ret; 3629 3630 /* When adding a filter globally disable flex filter feature. That is 3631 * recommended within the datasheet. 3632 */ 3633 wufc = rd32(IGC_WUFC); 3634 wufc &= ~IGC_WUFC_FLEX_HQ; 3635 wr32(IGC_WUFC, wufc); 3636 3637 /* Configure filter */ 3638 queuing = input->length & IGC_FHFT_LENGTH_MASK; 3639 queuing |= FIELD_PREP(IGC_FHFT_QUEUE_MASK, input->rx_queue); 3640 queuing |= FIELD_PREP(IGC_FHFT_PRIO_MASK, input->prio); 3641 3642 if (input->immediate_irq) 3643 queuing |= IGC_FHFT_IMM_INT; 3644 3645 if (input->drop) 3646 queuing |= IGC_FHFT_DROP; 3647 3648 wr32(fhft + 0xFC, queuing); 3649 3650 /* Write data (128 byte) and mask (128 bit) */ 3651 for (i = 0; i < 16; ++i) { 3652 const size_t data_idx = i * 8; 3653 const size_t row_idx = i * 16; 3654 u32 dw0 = 3655 (data[data_idx + 0] << 0) | 3656 (data[data_idx + 1] << 8) | 3657 (data[data_idx + 2] << 16) | 3658 (data[data_idx + 3] << 24); 3659 u32 dw1 = 3660 (data[data_idx + 4] << 0) | 3661 (data[data_idx + 5] << 8) | 3662 (data[data_idx + 6] << 16) | 3663 (data[data_idx + 7] << 24); 3664 u32 tmp; 3665 3666 /* Write row: dw0, dw1 and mask */ 3667 wr32(fhft + row_idx, dw0); 3668 wr32(fhft + row_idx + 4, dw1); 3669 3670 /* mask is only valid for MASK(7, 0) */ 3671 tmp = rd32(fhft + row_idx + 8); 3672 tmp &= ~GENMASK(7, 0); 3673 tmp |= mask[i]; 3674 wr32(fhft + row_idx + 8, tmp); 3675 } 3676 3677 /* Enable filter. */ 3678 wufc |= IGC_WUFC_FLEX_HQ; 3679 if (input->index > 8) { 3680 /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */ 3681 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3682 3683 wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8)); 3684 3685 wr32(IGC_WUFC_EXT, wufc_ext); 3686 } else { 3687 wufc |= (IGC_WUFC_FLX0 << input->index); 3688 } 3689 wr32(IGC_WUFC, wufc); 3690 3691 netdev_dbg(adapter->netdev, "Added flex filter %u to HW.\n", 3692 input->index); 3693 3694 return 0; 3695 } 3696 3697 static void igc_flex_filter_add_field(struct igc_flex_filter *flex, 3698 const void *src, unsigned int offset, 3699 size_t len, const void *mask) 3700 { 3701 int i; 3702 3703 /* data */ 3704 memcpy(&flex->data[offset], src, len); 3705 3706 /* mask */ 3707 for (i = 0; i < len; ++i) { 3708 const unsigned int idx = i + offset; 3709 const u8 *ptr = mask; 3710 3711 if (mask) { 3712 if (ptr[i] & 0xff) 3713 flex->mask[idx / 8] |= BIT(idx % 8); 3714 3715 continue; 3716 } 3717 3718 flex->mask[idx / 8] |= BIT(idx % 8); 3719 } 3720 } 3721 3722 static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter) 3723 { 3724 struct igc_hw *hw = &adapter->hw; 3725 u32 wufc, wufc_ext; 3726 int i; 3727 3728 wufc = rd32(IGC_WUFC); 3729 wufc_ext = rd32(IGC_WUFC_EXT); 3730 3731 for (i = 0; i < MAX_FLEX_FILTER; i++) { 3732 if (i < 8) { 3733 if (!(wufc & (IGC_WUFC_FLX0 << i))) 3734 return i; 3735 } else { 3736 if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8)))) 3737 return i; 3738 } 3739 } 3740 3741 return -ENOSPC; 3742 } 3743 3744 static bool igc_flex_filter_in_use(struct igc_adapter *adapter) 3745 { 3746 struct igc_hw *hw = &adapter->hw; 3747 u32 wufc, wufc_ext; 3748 3749 wufc = rd32(IGC_WUFC); 3750 wufc_ext = rd32(IGC_WUFC_EXT); 3751 3752 if (wufc & IGC_WUFC_FILTER_MASK) 3753 return true; 3754 3755 if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK) 3756 return true; 3757 3758 return false; 3759 } 3760 3761 static int igc_add_flex_filter(struct igc_adapter *adapter, 3762 struct igc_nfc_rule *rule) 3763 { 3764 struct igc_nfc_filter *filter = &rule->filter; 3765 unsigned int eth_offset, user_offset; 3766 struct igc_flex_filter flex = { }; 3767 int ret, index; 3768 bool vlan; 3769 3770 index = igc_find_avail_flex_filter_slot(adapter); 3771 if (index < 0) 3772 return -ENOSPC; 3773 3774 /* Construct the flex filter: 3775 * -> dest_mac [6] 3776 * -> src_mac [6] 3777 * -> tpid [2] 3778 * -> vlan tci [2] 3779 * -> ether type [2] 3780 * -> user data [8] 3781 * -> = 26 bytes => 32 length 3782 */ 3783 flex.index = index; 3784 flex.length = 32; 3785 flex.rx_queue = rule->action; 3786 3787 vlan = rule->filter.vlan_tci || rule->filter.vlan_etype; 3788 eth_offset = vlan ? 16 : 12; 3789 user_offset = vlan ? 18 : 14; 3790 3791 /* Add destination MAC */ 3792 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3793 igc_flex_filter_add_field(&flex, &filter->dst_addr, 0, 3794 ETH_ALEN, NULL); 3795 3796 /* Add source MAC */ 3797 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3798 igc_flex_filter_add_field(&flex, &filter->src_addr, 6, 3799 ETH_ALEN, NULL); 3800 3801 /* Add VLAN etype */ 3802 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) { 3803 __be16 vlan_etype = cpu_to_be16(filter->vlan_etype); 3804 3805 igc_flex_filter_add_field(&flex, &vlan_etype, 12, 3806 sizeof(vlan_etype), NULL); 3807 } 3808 3809 /* Add VLAN TCI */ 3810 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) 3811 igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14, 3812 sizeof(filter->vlan_tci), NULL); 3813 3814 /* Add Ether type */ 3815 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3816 __be16 etype = cpu_to_be16(filter->etype); 3817 3818 igc_flex_filter_add_field(&flex, &etype, eth_offset, 3819 sizeof(etype), NULL); 3820 } 3821 3822 /* Add user data */ 3823 if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) 3824 igc_flex_filter_add_field(&flex, &filter->user_data, 3825 user_offset, 3826 sizeof(filter->user_data), 3827 filter->user_mask); 3828 3829 /* Add it down to the hardware and enable it. */ 3830 ret = igc_write_flex_filter_ll(adapter, &flex); 3831 if (ret) 3832 return ret; 3833 3834 filter->flex_index = index; 3835 3836 return 0; 3837 } 3838 3839 static void igc_del_flex_filter(struct igc_adapter *adapter, 3840 u16 reg_index) 3841 { 3842 struct igc_hw *hw = &adapter->hw; 3843 u32 wufc; 3844 3845 /* Just disable the filter. The filter table itself is kept 3846 * intact. Another flex_filter_add() should override the "old" data 3847 * then. 3848 */ 3849 if (reg_index > 8) { 3850 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3851 3852 wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8)); 3853 wr32(IGC_WUFC_EXT, wufc_ext); 3854 } else { 3855 wufc = rd32(IGC_WUFC); 3856 3857 wufc &= ~(IGC_WUFC_FLX0 << reg_index); 3858 wr32(IGC_WUFC, wufc); 3859 } 3860 3861 if (igc_flex_filter_in_use(adapter)) 3862 return; 3863 3864 /* No filters are in use, we may disable flex filters */ 3865 wufc = rd32(IGC_WUFC); 3866 wufc &= ~IGC_WUFC_FLEX_HQ; 3867 wr32(IGC_WUFC, wufc); 3868 } 3869 3870 static int igc_enable_nfc_rule(struct igc_adapter *adapter, 3871 struct igc_nfc_rule *rule) 3872 { 3873 int err; 3874 3875 if (rule->flex) { 3876 return igc_add_flex_filter(adapter, rule); 3877 } 3878 3879 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3880 err = igc_add_etype_filter(adapter, rule->filter.etype, 3881 rule->action); 3882 if (err) 3883 return err; 3884 } 3885 3886 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { 3887 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3888 rule->filter.src_addr, rule->action); 3889 if (err) 3890 return err; 3891 } 3892 3893 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { 3894 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3895 rule->filter.dst_addr, rule->action); 3896 if (err) 3897 return err; 3898 } 3899 3900 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3901 int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); 3902 3903 err = igc_add_vlan_prio_filter(adapter, prio, rule->action); 3904 if (err) 3905 return err; 3906 } 3907 3908 return 0; 3909 } 3910 3911 static void igc_disable_nfc_rule(struct igc_adapter *adapter, 3912 const struct igc_nfc_rule *rule) 3913 { 3914 if (rule->flex) { 3915 igc_del_flex_filter(adapter, rule->filter.flex_index); 3916 return; 3917 } 3918 3919 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) 3920 igc_del_etype_filter(adapter, rule->filter.etype); 3921 3922 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3923 int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); 3924 3925 igc_del_vlan_prio_filter(adapter, prio); 3926 } 3927 3928 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3929 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3930 rule->filter.src_addr); 3931 3932 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3933 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3934 rule->filter.dst_addr); 3935 } 3936 3937 /** 3938 * igc_get_nfc_rule() - Get NFC rule 3939 * @adapter: Pointer to adapter 3940 * @location: Rule location 3941 * 3942 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3943 * 3944 * Return: Pointer to NFC rule at @location. If not found, NULL. 3945 */ 3946 struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter, 3947 u32 location) 3948 { 3949 struct igc_nfc_rule *rule; 3950 3951 list_for_each_entry(rule, &adapter->nfc_rule_list, list) { 3952 if (rule->location == location) 3953 return rule; 3954 if (rule->location > location) 3955 break; 3956 } 3957 3958 return NULL; 3959 } 3960 3961 /** 3962 * igc_del_nfc_rule() - Delete NFC rule 3963 * @adapter: Pointer to adapter 3964 * @rule: Pointer to rule to be deleted 3965 * 3966 * Disable NFC rule in hardware and delete it from adapter. 3967 * 3968 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3969 */ 3970 void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 3971 { 3972 igc_disable_nfc_rule(adapter, rule); 3973 3974 list_del(&rule->list); 3975 adapter->nfc_rule_count--; 3976 3977 kfree(rule); 3978 } 3979 3980 static void igc_flush_nfc_rules(struct igc_adapter *adapter) 3981 { 3982 struct igc_nfc_rule *rule, *tmp; 3983 3984 mutex_lock(&adapter->nfc_rule_lock); 3985 3986 list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list) 3987 igc_del_nfc_rule(adapter, rule); 3988 3989 mutex_unlock(&adapter->nfc_rule_lock); 3990 } 3991 3992 /** 3993 * igc_add_nfc_rule() - Add NFC rule 3994 * @adapter: Pointer to adapter 3995 * @rule: Pointer to rule to be added 3996 * 3997 * Enable NFC rule in hardware and add it to adapter. 3998 * 3999 * Context: Expects adapter->nfc_rule_lock to be held by caller. 4000 * 4001 * Return: 0 on success, negative errno on failure. 4002 */ 4003 int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 4004 { 4005 struct igc_nfc_rule *pred, *cur; 4006 int err; 4007 4008 err = igc_enable_nfc_rule(adapter, rule); 4009 if (err) 4010 return err; 4011 4012 pred = NULL; 4013 list_for_each_entry(cur, &adapter->nfc_rule_list, list) { 4014 if (cur->location >= rule->location) 4015 break; 4016 pred = cur; 4017 } 4018 4019 list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list); 4020 adapter->nfc_rule_count++; 4021 return 0; 4022 } 4023 4024 static void igc_restore_nfc_rules(struct igc_adapter *adapter) 4025 { 4026 struct igc_nfc_rule *rule; 4027 4028 mutex_lock(&adapter->nfc_rule_lock); 4029 4030 list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list) 4031 igc_enable_nfc_rule(adapter, rule); 4032 4033 mutex_unlock(&adapter->nfc_rule_lock); 4034 } 4035 4036 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr) 4037 { 4038 struct igc_adapter *adapter = netdev_priv(netdev); 4039 4040 return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1); 4041 } 4042 4043 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) 4044 { 4045 struct igc_adapter *adapter = netdev_priv(netdev); 4046 4047 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr); 4048 return 0; 4049 } 4050 4051 /** 4052 * igc_enable_empty_addr_recv - Enable Rx of packets with all-zeroes MAC address 4053 * @adapter: Pointer to the igc_adapter structure. 4054 * 4055 * Frame preemption verification requires that packets with the all-zeroes 4056 * MAC address are allowed to be received by the driver. This function adds the 4057 * all-zeroes destination address to the list of acceptable addresses. 4058 * 4059 * Return: 0 on success, negative value otherwise. 4060 */ 4061 int igc_enable_empty_addr_recv(struct igc_adapter *adapter) 4062 { 4063 u8 empty[ETH_ALEN] = {}; 4064 4065 return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, empty, -1); 4066 } 4067 4068 void igc_disable_empty_addr_recv(struct igc_adapter *adapter) 4069 { 4070 u8 empty[ETH_ALEN] = {}; 4071 4072 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, empty); 4073 } 4074 4075 /** 4076 * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set 4077 * @netdev: network interface device structure 4078 * 4079 * The set_rx_mode entry point is called whenever the unicast or multicast 4080 * address lists or the network interface flags are updated. This routine is 4081 * responsible for configuring the hardware for proper unicast, multicast, 4082 * promiscuous mode, and all-multi behavior. 4083 */ 4084 static void igc_set_rx_mode(struct net_device *netdev) 4085 { 4086 struct igc_adapter *adapter = netdev_priv(netdev); 4087 struct igc_hw *hw = &adapter->hw; 4088 u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE; 4089 int count; 4090 4091 /* Check for Promiscuous and All Multicast modes */ 4092 if (netdev->flags & IFF_PROMISC) { 4093 rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE; 4094 } else { 4095 if (netdev->flags & IFF_ALLMULTI) { 4096 rctl |= IGC_RCTL_MPE; 4097 } else { 4098 /* Write addresses to the MTA, if the attempt fails 4099 * then we should just turn on promiscuous mode so 4100 * that we can at least receive multicast traffic 4101 */ 4102 count = igc_write_mc_addr_list(netdev); 4103 if (count < 0) 4104 rctl |= IGC_RCTL_MPE; 4105 } 4106 } 4107 4108 /* Write addresses to available RAR registers, if there is not 4109 * sufficient space to store all the addresses then enable 4110 * unicast promiscuous mode 4111 */ 4112 if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync)) 4113 rctl |= IGC_RCTL_UPE; 4114 4115 /* update state of unicast and multicast */ 4116 rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE); 4117 wr32(IGC_RCTL, rctl); 4118 4119 #if (PAGE_SIZE < 8192) 4120 if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB) 4121 rlpml = IGC_MAX_FRAME_BUILD_SKB; 4122 #endif 4123 wr32(IGC_RLPML, rlpml); 4124 } 4125 4126 /** 4127 * igc_configure - configure the hardware for RX and TX 4128 * @adapter: private board structure 4129 */ 4130 static void igc_configure(struct igc_adapter *adapter) 4131 { 4132 struct net_device *netdev = adapter->netdev; 4133 int i = 0; 4134 4135 igc_get_hw_control(adapter); 4136 igc_set_rx_mode(netdev); 4137 4138 igc_restore_vlan(adapter); 4139 4140 igc_setup_tctl(adapter); 4141 igc_setup_mrqc(adapter); 4142 igc_setup_rctl(adapter); 4143 4144 igc_set_default_mac_filter(adapter); 4145 igc_restore_nfc_rules(adapter); 4146 4147 igc_configure_tx(adapter); 4148 igc_configure_rx(adapter); 4149 4150 igc_rx_fifo_flush_base(&adapter->hw); 4151 4152 /* call igc_desc_unused which always leaves 4153 * at least 1 descriptor unused to make sure 4154 * next_to_use != next_to_clean 4155 */ 4156 for (i = 0; i < adapter->num_rx_queues; i++) { 4157 struct igc_ring *ring = adapter->rx_ring[i]; 4158 4159 if (ring->xsk_pool) 4160 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 4161 else 4162 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 4163 } 4164 } 4165 4166 /** 4167 * igc_write_ivar - configure ivar for given MSI-X vector 4168 * @hw: pointer to the HW structure 4169 * @msix_vector: vector number we are allocating to a given ring 4170 * @index: row index of IVAR register to write within IVAR table 4171 * @offset: column offset of in IVAR, should be multiple of 8 4172 * 4173 * The IVAR table consists of 2 columns, 4174 * each containing an cause allocation for an Rx and Tx ring, and a 4175 * variable number of rows depending on the number of queues supported. 4176 */ 4177 static void igc_write_ivar(struct igc_hw *hw, int msix_vector, 4178 int index, int offset) 4179 { 4180 u32 ivar = array_rd32(IGC_IVAR0, index); 4181 4182 /* clear any bits that are currently set */ 4183 ivar &= ~((u32)0xFF << offset); 4184 4185 /* write vector and valid bit */ 4186 ivar |= (msix_vector | IGC_IVAR_VALID) << offset; 4187 4188 array_wr32(IGC_IVAR0, index, ivar); 4189 } 4190 4191 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector) 4192 { 4193 struct igc_adapter *adapter = q_vector->adapter; 4194 struct igc_hw *hw = &adapter->hw; 4195 int rx_queue = IGC_N0_QUEUE; 4196 int tx_queue = IGC_N0_QUEUE; 4197 4198 if (q_vector->rx.ring) 4199 rx_queue = q_vector->rx.ring->reg_idx; 4200 if (q_vector->tx.ring) 4201 tx_queue = q_vector->tx.ring->reg_idx; 4202 4203 switch (hw->mac.type) { 4204 case igc_i225: 4205 if (rx_queue > IGC_N0_QUEUE) 4206 igc_write_ivar(hw, msix_vector, 4207 rx_queue >> 1, 4208 (rx_queue & 0x1) << 4); 4209 if (tx_queue > IGC_N0_QUEUE) 4210 igc_write_ivar(hw, msix_vector, 4211 tx_queue >> 1, 4212 ((tx_queue & 0x1) << 4) + 8); 4213 q_vector->eims_value = BIT(msix_vector); 4214 break; 4215 default: 4216 WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n"); 4217 break; 4218 } 4219 4220 /* add q_vector eims value to global eims_enable_mask */ 4221 adapter->eims_enable_mask |= q_vector->eims_value; 4222 4223 /* configure q_vector to set itr on first interrupt */ 4224 q_vector->set_itr = 1; 4225 } 4226 4227 /** 4228 * igc_configure_msix - Configure MSI-X hardware 4229 * @adapter: Pointer to adapter structure 4230 * 4231 * igc_configure_msix sets up the hardware to properly 4232 * generate MSI-X interrupts. 4233 */ 4234 static void igc_configure_msix(struct igc_adapter *adapter) 4235 { 4236 struct igc_hw *hw = &adapter->hw; 4237 int i, vector = 0; 4238 u32 tmp; 4239 4240 adapter->eims_enable_mask = 0; 4241 4242 /* set vector for other causes, i.e. link changes */ 4243 switch (hw->mac.type) { 4244 case igc_i225: 4245 /* Turn on MSI-X capability first, or our settings 4246 * won't stick. And it will take days to debug. 4247 */ 4248 wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE | 4249 IGC_GPIE_PBA | IGC_GPIE_EIAME | 4250 IGC_GPIE_NSICR); 4251 4252 /* enable msix_other interrupt */ 4253 adapter->eims_other = BIT(vector); 4254 tmp = (vector++ | IGC_IVAR_VALID) << 8; 4255 4256 wr32(IGC_IVAR_MISC, tmp); 4257 break; 4258 default: 4259 /* do nothing, since nothing else supports MSI-X */ 4260 break; 4261 } /* switch (hw->mac.type) */ 4262 4263 adapter->eims_enable_mask |= adapter->eims_other; 4264 4265 for (i = 0; i < adapter->num_q_vectors; i++) 4266 igc_assign_vector(adapter->q_vector[i], vector++); 4267 4268 wrfl(); 4269 } 4270 4271 /** 4272 * igc_irq_enable - Enable default interrupt generation settings 4273 * @adapter: board private structure 4274 */ 4275 static void igc_irq_enable(struct igc_adapter *adapter) 4276 { 4277 struct igc_hw *hw = &adapter->hw; 4278 4279 if (adapter->msix_entries) { 4280 u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA; 4281 u32 regval = rd32(IGC_EIAC); 4282 4283 wr32(IGC_EIAC, regval | adapter->eims_enable_mask); 4284 regval = rd32(IGC_EIAM); 4285 wr32(IGC_EIAM, regval | adapter->eims_enable_mask); 4286 wr32(IGC_EIMS, adapter->eims_enable_mask); 4287 wr32(IGC_IMS, ims); 4288 } else { 4289 wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 4290 wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 4291 } 4292 } 4293 4294 /** 4295 * igc_irq_disable - Mask off interrupt generation on the NIC 4296 * @adapter: board private structure 4297 */ 4298 static void igc_irq_disable(struct igc_adapter *adapter) 4299 { 4300 struct igc_hw *hw = &adapter->hw; 4301 4302 if (adapter->msix_entries) { 4303 u32 regval = rd32(IGC_EIAM); 4304 4305 wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask); 4306 wr32(IGC_EIMC, adapter->eims_enable_mask); 4307 regval = rd32(IGC_EIAC); 4308 wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask); 4309 } 4310 4311 wr32(IGC_IAM, 0); 4312 wr32(IGC_IMC, ~0); 4313 wrfl(); 4314 4315 if (adapter->msix_entries) { 4316 int vector = 0, i; 4317 4318 synchronize_irq(adapter->msix_entries[vector++].vector); 4319 4320 for (i = 0; i < adapter->num_q_vectors; i++) 4321 synchronize_irq(adapter->msix_entries[vector++].vector); 4322 } else { 4323 synchronize_irq(adapter->pdev->irq); 4324 } 4325 } 4326 4327 void igc_set_flag_queue_pairs(struct igc_adapter *adapter, 4328 const u32 max_rss_queues) 4329 { 4330 /* Determine if we need to pair queues. */ 4331 /* If rss_queues > half of max_rss_queues, pair the queues in 4332 * order to conserve interrupts due to limited supply. 4333 */ 4334 if (adapter->rss_queues > (max_rss_queues / 2)) 4335 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 4336 else 4337 adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS; 4338 } 4339 4340 unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) 4341 { 4342 return IGC_MAX_RX_QUEUES; 4343 } 4344 4345 static void igc_init_queue_configuration(struct igc_adapter *adapter) 4346 { 4347 u32 max_rss_queues; 4348 4349 max_rss_queues = igc_get_max_rss_queues(adapter); 4350 adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); 4351 4352 igc_set_flag_queue_pairs(adapter, max_rss_queues); 4353 } 4354 4355 /** 4356 * igc_reset_q_vector - Reset config for interrupt vector 4357 * @adapter: board private structure to initialize 4358 * @v_idx: Index of vector to be reset 4359 * 4360 * If NAPI is enabled it will delete any references to the 4361 * NAPI struct. This is preparation for igc_free_q_vector. 4362 */ 4363 static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx) 4364 { 4365 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 4366 4367 /* if we're coming from igc_set_interrupt_capability, the vectors are 4368 * not yet allocated 4369 */ 4370 if (!q_vector) 4371 return; 4372 4373 if (q_vector->tx.ring) 4374 adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; 4375 4376 if (q_vector->rx.ring) 4377 adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; 4378 4379 netif_napi_del(&q_vector->napi); 4380 } 4381 4382 /** 4383 * igc_free_q_vector - Free memory allocated for specific interrupt vector 4384 * @adapter: board private structure to initialize 4385 * @v_idx: Index of vector to be freed 4386 * 4387 * This function frees the memory allocated to the q_vector. 4388 */ 4389 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx) 4390 { 4391 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 4392 4393 adapter->q_vector[v_idx] = NULL; 4394 4395 /* igc_get_stats64() might access the rings on this vector, 4396 * we must wait a grace period before freeing it. 4397 */ 4398 if (q_vector) 4399 kfree_rcu(q_vector, rcu); 4400 } 4401 4402 /** 4403 * igc_free_q_vectors - Free memory allocated for interrupt vectors 4404 * @adapter: board private structure to initialize 4405 * 4406 * This function frees the memory allocated to the q_vectors. In addition if 4407 * NAPI is enabled it will delete any references to the NAPI struct prior 4408 * to freeing the q_vector. 4409 */ 4410 static void igc_free_q_vectors(struct igc_adapter *adapter) 4411 { 4412 int v_idx = adapter->num_q_vectors; 4413 4414 adapter->num_tx_queues = 0; 4415 adapter->num_rx_queues = 0; 4416 adapter->num_q_vectors = 0; 4417 4418 while (v_idx--) { 4419 igc_reset_q_vector(adapter, v_idx); 4420 igc_free_q_vector(adapter, v_idx); 4421 } 4422 } 4423 4424 /** 4425 * igc_update_itr - update the dynamic ITR value based on statistics 4426 * @q_vector: pointer to q_vector 4427 * @ring_container: ring info to update the itr for 4428 * 4429 * Stores a new ITR value based on packets and byte 4430 * counts during the last interrupt. The advantage of per interrupt 4431 * computation is faster updates and more accurate ITR for the current 4432 * traffic pattern. Constants in this function were computed 4433 * based on theoretical maximum wire speed and thresholds were set based 4434 * on testing data as well as attempting to minimize response time 4435 * while increasing bulk throughput. 4436 * NOTE: These calculations are only valid when operating in a single- 4437 * queue environment. 4438 */ 4439 static void igc_update_itr(struct igc_q_vector *q_vector, 4440 struct igc_ring_container *ring_container) 4441 { 4442 unsigned int packets = ring_container->total_packets; 4443 unsigned int bytes = ring_container->total_bytes; 4444 u8 itrval = ring_container->itr; 4445 4446 /* no packets, exit with status unchanged */ 4447 if (packets == 0) 4448 return; 4449 4450 switch (itrval) { 4451 case lowest_latency: 4452 /* handle TSO and jumbo frames */ 4453 if (bytes / packets > 8000) 4454 itrval = bulk_latency; 4455 else if ((packets < 5) && (bytes > 512)) 4456 itrval = low_latency; 4457 break; 4458 case low_latency: /* 50 usec aka 20000 ints/s */ 4459 if (bytes > 10000) { 4460 /* this if handles the TSO accounting */ 4461 if (bytes / packets > 8000) 4462 itrval = bulk_latency; 4463 else if ((packets < 10) || ((bytes / packets) > 1200)) 4464 itrval = bulk_latency; 4465 else if ((packets > 35)) 4466 itrval = lowest_latency; 4467 } else if (bytes / packets > 2000) { 4468 itrval = bulk_latency; 4469 } else if (packets <= 2 && bytes < 512) { 4470 itrval = lowest_latency; 4471 } 4472 break; 4473 case bulk_latency: /* 250 usec aka 4000 ints/s */ 4474 if (bytes > 25000) { 4475 if (packets > 35) 4476 itrval = low_latency; 4477 } else if (bytes < 1500) { 4478 itrval = low_latency; 4479 } 4480 break; 4481 } 4482 4483 /* clear work counters since we have the values we need */ 4484 ring_container->total_bytes = 0; 4485 ring_container->total_packets = 0; 4486 4487 /* write updated itr to ring container */ 4488 ring_container->itr = itrval; 4489 } 4490 4491 static void igc_set_itr(struct igc_q_vector *q_vector) 4492 { 4493 struct igc_adapter *adapter = q_vector->adapter; 4494 u32 new_itr = q_vector->itr_val; 4495 u8 current_itr = 0; 4496 4497 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ 4498 switch (adapter->link_speed) { 4499 case SPEED_10: 4500 case SPEED_100: 4501 current_itr = 0; 4502 new_itr = IGC_4K_ITR; 4503 goto set_itr_now; 4504 default: 4505 break; 4506 } 4507 4508 igc_update_itr(q_vector, &q_vector->tx); 4509 igc_update_itr(q_vector, &q_vector->rx); 4510 4511 current_itr = max(q_vector->rx.itr, q_vector->tx.itr); 4512 4513 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4514 if (current_itr == lowest_latency && 4515 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4516 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4517 current_itr = low_latency; 4518 4519 switch (current_itr) { 4520 /* counts and packets in update_itr are dependent on these numbers */ 4521 case lowest_latency: 4522 new_itr = IGC_70K_ITR; /* 70,000 ints/sec */ 4523 break; 4524 case low_latency: 4525 new_itr = IGC_20K_ITR; /* 20,000 ints/sec */ 4526 break; 4527 case bulk_latency: 4528 new_itr = IGC_4K_ITR; /* 4,000 ints/sec */ 4529 break; 4530 default: 4531 break; 4532 } 4533 4534 set_itr_now: 4535 if (new_itr != q_vector->itr_val) { 4536 /* this attempts to bias the interrupt rate towards Bulk 4537 * by adding intermediate steps when interrupt rate is 4538 * increasing 4539 */ 4540 new_itr = new_itr > q_vector->itr_val ? 4541 max((new_itr * q_vector->itr_val) / 4542 (new_itr + (q_vector->itr_val >> 2)), 4543 new_itr) : new_itr; 4544 /* Don't write the value here; it resets the adapter's 4545 * internal timer, and causes us to delay far longer than 4546 * we should between interrupts. Instead, we write the ITR 4547 * value at the beginning of the next interrupt so the timing 4548 * ends up being correct. 4549 */ 4550 q_vector->itr_val = new_itr; 4551 q_vector->set_itr = 1; 4552 } 4553 } 4554 4555 static void igc_reset_interrupt_capability(struct igc_adapter *adapter) 4556 { 4557 int v_idx = adapter->num_q_vectors; 4558 4559 if (adapter->msix_entries) { 4560 pci_disable_msix(adapter->pdev); 4561 kfree(adapter->msix_entries); 4562 adapter->msix_entries = NULL; 4563 } else if (adapter->flags & IGC_FLAG_HAS_MSI) { 4564 pci_disable_msi(adapter->pdev); 4565 } 4566 4567 while (v_idx--) 4568 igc_reset_q_vector(adapter, v_idx); 4569 } 4570 4571 /** 4572 * igc_set_interrupt_capability - set MSI or MSI-X if supported 4573 * @adapter: Pointer to adapter structure 4574 * @msix: boolean value for MSI-X capability 4575 * 4576 * Attempt to configure interrupts using the best available 4577 * capabilities of the hardware and kernel. 4578 */ 4579 static void igc_set_interrupt_capability(struct igc_adapter *adapter, 4580 bool msix) 4581 { 4582 int numvecs, i; 4583 int err; 4584 4585 if (!msix) 4586 goto msi_only; 4587 adapter->flags |= IGC_FLAG_HAS_MSIX; 4588 4589 /* Number of supported queues. */ 4590 adapter->num_rx_queues = adapter->rss_queues; 4591 4592 adapter->num_tx_queues = adapter->rss_queues; 4593 4594 /* start with one vector for every Rx queue */ 4595 numvecs = adapter->num_rx_queues; 4596 4597 /* if Tx handler is separate add 1 for every Tx queue */ 4598 if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) 4599 numvecs += adapter->num_tx_queues; 4600 4601 /* store the number of vectors reserved for queues */ 4602 adapter->num_q_vectors = numvecs; 4603 4604 /* add 1 vector for link status interrupts */ 4605 numvecs++; 4606 4607 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), 4608 GFP_KERNEL); 4609 4610 if (!adapter->msix_entries) 4611 return; 4612 4613 /* populate entry values */ 4614 for (i = 0; i < numvecs; i++) 4615 adapter->msix_entries[i].entry = i; 4616 4617 err = pci_enable_msix_range(adapter->pdev, 4618 adapter->msix_entries, 4619 numvecs, 4620 numvecs); 4621 if (err > 0) 4622 return; 4623 4624 kfree(adapter->msix_entries); 4625 adapter->msix_entries = NULL; 4626 4627 igc_reset_interrupt_capability(adapter); 4628 4629 msi_only: 4630 adapter->flags &= ~IGC_FLAG_HAS_MSIX; 4631 4632 adapter->rss_queues = 1; 4633 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 4634 adapter->num_rx_queues = 1; 4635 adapter->num_tx_queues = 1; 4636 adapter->num_q_vectors = 1; 4637 if (!pci_enable_msi(adapter->pdev)) 4638 adapter->flags |= IGC_FLAG_HAS_MSI; 4639 } 4640 4641 /** 4642 * igc_update_ring_itr - update the dynamic ITR value based on packet size 4643 * @q_vector: pointer to q_vector 4644 * 4645 * Stores a new ITR value based on strictly on packet size. This 4646 * algorithm is less sophisticated than that used in igc_update_itr, 4647 * due to the difficulty of synchronizing statistics across multiple 4648 * receive rings. The divisors and thresholds used by this function 4649 * were determined based on theoretical maximum wire speed and testing 4650 * data, in order to minimize response time while increasing bulk 4651 * throughput. 4652 * NOTE: This function is called only when operating in a multiqueue 4653 * receive environment. 4654 */ 4655 static void igc_update_ring_itr(struct igc_q_vector *q_vector) 4656 { 4657 struct igc_adapter *adapter = q_vector->adapter; 4658 int new_val = q_vector->itr_val; 4659 int avg_wire_size = 0; 4660 unsigned int packets; 4661 4662 /* For non-gigabit speeds, just fix the interrupt rate at 4000 4663 * ints/sec - ITR timer value of 120 ticks. 4664 */ 4665 switch (adapter->link_speed) { 4666 case SPEED_10: 4667 case SPEED_100: 4668 new_val = IGC_4K_ITR; 4669 goto set_itr_val; 4670 default: 4671 break; 4672 } 4673 4674 packets = q_vector->rx.total_packets; 4675 if (packets) 4676 avg_wire_size = q_vector->rx.total_bytes / packets; 4677 4678 packets = q_vector->tx.total_packets; 4679 if (packets) 4680 avg_wire_size = max_t(u32, avg_wire_size, 4681 q_vector->tx.total_bytes / packets); 4682 4683 /* if avg_wire_size isn't set no work was done */ 4684 if (!avg_wire_size) 4685 goto clear_counts; 4686 4687 /* Add 24 bytes to size to account for CRC, preamble, and gap */ 4688 avg_wire_size += 24; 4689 4690 /* Don't starve jumbo frames */ 4691 avg_wire_size = min(avg_wire_size, 3000); 4692 4693 /* Give a little boost to mid-size frames */ 4694 if (avg_wire_size > 300 && avg_wire_size < 1200) 4695 new_val = avg_wire_size / 3; 4696 else 4697 new_val = avg_wire_size / 2; 4698 4699 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4700 if (new_val < IGC_20K_ITR && 4701 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4702 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4703 new_val = IGC_20K_ITR; 4704 4705 set_itr_val: 4706 if (new_val != q_vector->itr_val) { 4707 q_vector->itr_val = new_val; 4708 q_vector->set_itr = 1; 4709 } 4710 clear_counts: 4711 q_vector->rx.total_bytes = 0; 4712 q_vector->rx.total_packets = 0; 4713 q_vector->tx.total_bytes = 0; 4714 q_vector->tx.total_packets = 0; 4715 } 4716 4717 static void igc_ring_irq_enable(struct igc_q_vector *q_vector) 4718 { 4719 struct igc_adapter *adapter = q_vector->adapter; 4720 struct igc_hw *hw = &adapter->hw; 4721 4722 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || 4723 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { 4724 if (adapter->num_q_vectors == 1) 4725 igc_set_itr(q_vector); 4726 else 4727 igc_update_ring_itr(q_vector); 4728 } 4729 4730 if (!test_bit(__IGC_DOWN, &adapter->state)) { 4731 if (adapter->msix_entries) 4732 wr32(IGC_EIMS, q_vector->eims_value); 4733 else 4734 igc_irq_enable(adapter); 4735 } 4736 } 4737 4738 static void igc_add_ring(struct igc_ring *ring, 4739 struct igc_ring_container *head) 4740 { 4741 head->ring = ring; 4742 head->count++; 4743 } 4744 4745 /** 4746 * igc_cache_ring_register - Descriptor ring to register mapping 4747 * @adapter: board private structure to initialize 4748 * 4749 * Once we know the feature-set enabled for the device, we'll cache 4750 * the register offset the descriptor ring is assigned to. 4751 */ 4752 static void igc_cache_ring_register(struct igc_adapter *adapter) 4753 { 4754 int i = 0, j = 0; 4755 4756 switch (adapter->hw.mac.type) { 4757 case igc_i225: 4758 default: 4759 for (; i < adapter->num_rx_queues; i++) 4760 adapter->rx_ring[i]->reg_idx = i; 4761 for (; j < adapter->num_tx_queues; j++) 4762 adapter->tx_ring[j]->reg_idx = j; 4763 break; 4764 } 4765 } 4766 4767 /** 4768 * igc_poll - NAPI Rx polling callback 4769 * @napi: napi polling structure 4770 * @budget: count of how many packets we should handle 4771 */ 4772 static int igc_poll(struct napi_struct *napi, int budget) 4773 { 4774 struct igc_q_vector *q_vector = container_of(napi, 4775 struct igc_q_vector, 4776 napi); 4777 struct igc_ring *rx_ring = q_vector->rx.ring; 4778 bool clean_complete = true; 4779 int work_done = 0; 4780 4781 if (q_vector->tx.ring) 4782 clean_complete = igc_clean_tx_irq(q_vector, budget); 4783 4784 if (rx_ring) { 4785 int cleaned = rx_ring->xsk_pool ? 4786 igc_clean_rx_irq_zc(q_vector, budget) : 4787 igc_clean_rx_irq(q_vector, budget); 4788 4789 work_done += cleaned; 4790 if (cleaned >= budget) 4791 clean_complete = false; 4792 } 4793 4794 /* If all work not completed, return budget and keep polling */ 4795 if (!clean_complete) 4796 return budget; 4797 4798 /* Exit the polling mode, but don't re-enable interrupts if stack might 4799 * poll us due to busy-polling 4800 */ 4801 if (likely(napi_complete_done(napi, work_done))) 4802 igc_ring_irq_enable(q_vector); 4803 4804 return min(work_done, budget - 1); 4805 } 4806 4807 /** 4808 * igc_alloc_q_vector - Allocate memory for a single interrupt vector 4809 * @adapter: board private structure to initialize 4810 * @v_count: q_vectors allocated on adapter, used for ring interleaving 4811 * @v_idx: index of vector in adapter struct 4812 * @txr_count: total number of Tx rings to allocate 4813 * @txr_idx: index of first Tx ring to allocate 4814 * @rxr_count: total number of Rx rings to allocate 4815 * @rxr_idx: index of first Rx ring to allocate 4816 * 4817 * We allocate one q_vector. If allocation fails we return -ENOMEM. 4818 */ 4819 static int igc_alloc_q_vector(struct igc_adapter *adapter, 4820 unsigned int v_count, unsigned int v_idx, 4821 unsigned int txr_count, unsigned int txr_idx, 4822 unsigned int rxr_count, unsigned int rxr_idx) 4823 { 4824 struct igc_q_vector *q_vector; 4825 struct igc_ring *ring; 4826 int ring_count; 4827 4828 /* igc only supports 1 Tx and/or 1 Rx queue per vector */ 4829 if (txr_count > 1 || rxr_count > 1) 4830 return -ENOMEM; 4831 4832 ring_count = txr_count + rxr_count; 4833 4834 /* allocate q_vector and rings */ 4835 q_vector = adapter->q_vector[v_idx]; 4836 if (!q_vector) 4837 q_vector = kzalloc(struct_size(q_vector, ring, ring_count), 4838 GFP_KERNEL); 4839 else 4840 memset(q_vector, 0, struct_size(q_vector, ring, ring_count)); 4841 if (!q_vector) 4842 return -ENOMEM; 4843 4844 /* initialize NAPI */ 4845 netif_napi_add(adapter->netdev, &q_vector->napi, igc_poll); 4846 4847 /* tie q_vector and adapter together */ 4848 adapter->q_vector[v_idx] = q_vector; 4849 q_vector->adapter = adapter; 4850 4851 /* initialize work limits */ 4852 q_vector->tx.work_limit = adapter->tx_work_limit; 4853 4854 /* initialize ITR configuration */ 4855 q_vector->itr_register = adapter->io_addr + IGC_EITR(0); 4856 q_vector->itr_val = IGC_START_ITR; 4857 4858 /* initialize pointer to rings */ 4859 ring = q_vector->ring; 4860 4861 /* initialize ITR */ 4862 if (rxr_count) { 4863 /* rx or rx/tx vector */ 4864 if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) 4865 q_vector->itr_val = adapter->rx_itr_setting; 4866 } else { 4867 /* tx only vector */ 4868 if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) 4869 q_vector->itr_val = adapter->tx_itr_setting; 4870 } 4871 4872 if (txr_count) { 4873 /* assign generic ring traits */ 4874 ring->dev = &adapter->pdev->dev; 4875 ring->netdev = adapter->netdev; 4876 4877 /* configure backlink on ring */ 4878 ring->q_vector = q_vector; 4879 4880 /* update q_vector Tx values */ 4881 igc_add_ring(ring, &q_vector->tx); 4882 4883 /* apply Tx specific ring traits */ 4884 ring->count = adapter->tx_ring_count; 4885 ring->queue_index = txr_idx; 4886 4887 /* assign ring to adapter */ 4888 adapter->tx_ring[txr_idx] = ring; 4889 4890 /* push pointer to next ring */ 4891 ring++; 4892 } 4893 4894 if (rxr_count) { 4895 /* assign generic ring traits */ 4896 ring->dev = &adapter->pdev->dev; 4897 ring->netdev = adapter->netdev; 4898 4899 /* configure backlink on ring */ 4900 ring->q_vector = q_vector; 4901 4902 /* update q_vector Rx values */ 4903 igc_add_ring(ring, &q_vector->rx); 4904 4905 /* apply Rx specific ring traits */ 4906 ring->count = adapter->rx_ring_count; 4907 ring->queue_index = rxr_idx; 4908 4909 /* assign ring to adapter */ 4910 adapter->rx_ring[rxr_idx] = ring; 4911 } 4912 4913 return 0; 4914 } 4915 4916 /** 4917 * igc_alloc_q_vectors - Allocate memory for interrupt vectors 4918 * @adapter: board private structure to initialize 4919 * 4920 * We allocate one q_vector per queue interrupt. If allocation fails we 4921 * return -ENOMEM. 4922 */ 4923 static int igc_alloc_q_vectors(struct igc_adapter *adapter) 4924 { 4925 int rxr_remaining = adapter->num_rx_queues; 4926 int txr_remaining = adapter->num_tx_queues; 4927 int rxr_idx = 0, txr_idx = 0, v_idx = 0; 4928 int q_vectors = adapter->num_q_vectors; 4929 int err; 4930 4931 if (q_vectors >= (rxr_remaining + txr_remaining)) { 4932 for (; rxr_remaining; v_idx++) { 4933 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4934 0, 0, 1, rxr_idx); 4935 4936 if (err) 4937 goto err_out; 4938 4939 /* update counts and index */ 4940 rxr_remaining--; 4941 rxr_idx++; 4942 } 4943 } 4944 4945 for (; v_idx < q_vectors; v_idx++) { 4946 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 4947 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 4948 4949 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4950 tqpv, txr_idx, rqpv, rxr_idx); 4951 4952 if (err) 4953 goto err_out; 4954 4955 /* update counts and index */ 4956 rxr_remaining -= rqpv; 4957 txr_remaining -= tqpv; 4958 rxr_idx++; 4959 txr_idx++; 4960 } 4961 4962 return 0; 4963 4964 err_out: 4965 adapter->num_tx_queues = 0; 4966 adapter->num_rx_queues = 0; 4967 adapter->num_q_vectors = 0; 4968 4969 while (v_idx--) 4970 igc_free_q_vector(adapter, v_idx); 4971 4972 return -ENOMEM; 4973 } 4974 4975 /** 4976 * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors 4977 * @adapter: Pointer to adapter structure 4978 * @msix: boolean for MSI-X capability 4979 * 4980 * This function initializes the interrupts and allocates all of the queues. 4981 */ 4982 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) 4983 { 4984 struct net_device *dev = adapter->netdev; 4985 int err = 0; 4986 4987 igc_set_interrupt_capability(adapter, msix); 4988 4989 err = igc_alloc_q_vectors(adapter); 4990 if (err) { 4991 netdev_err(dev, "Unable to allocate memory for vectors\n"); 4992 goto err_alloc_q_vectors; 4993 } 4994 4995 igc_cache_ring_register(adapter); 4996 4997 return 0; 4998 4999 err_alloc_q_vectors: 5000 igc_reset_interrupt_capability(adapter); 5001 return err; 5002 } 5003 5004 /** 5005 * igc_sw_init - Initialize general software structures (struct igc_adapter) 5006 * @adapter: board private structure to initialize 5007 * 5008 * igc_sw_init initializes the Adapter private data structure. 5009 * Fields are initialized based on PCI device information and 5010 * OS network device settings (MTU size). 5011 */ 5012 static int igc_sw_init(struct igc_adapter *adapter) 5013 { 5014 struct net_device *netdev = adapter->netdev; 5015 struct pci_dev *pdev = adapter->pdev; 5016 struct igc_hw *hw = &adapter->hw; 5017 5018 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 5019 5020 /* set default ring sizes */ 5021 adapter->tx_ring_count = IGC_DEFAULT_TXD; 5022 adapter->rx_ring_count = IGC_DEFAULT_RXD; 5023 5024 /* set default ITR values */ 5025 adapter->rx_itr_setting = IGC_DEFAULT_ITR; 5026 adapter->tx_itr_setting = IGC_DEFAULT_ITR; 5027 5028 /* set default work limits */ 5029 adapter->tx_work_limit = IGC_DEFAULT_TX_WORK; 5030 5031 /* adjust max frame to be at least the size of a standard frame */ 5032 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 5033 VLAN_HLEN; 5034 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 5035 5036 mutex_init(&adapter->nfc_rule_lock); 5037 INIT_LIST_HEAD(&adapter->nfc_rule_list); 5038 adapter->nfc_rule_count = 0; 5039 5040 spin_lock_init(&adapter->stats64_lock); 5041 spin_lock_init(&adapter->qbv_tx_lock); 5042 /* Assume MSI-X interrupts, will be checked during IRQ allocation */ 5043 adapter->flags |= IGC_FLAG_HAS_MSIX; 5044 5045 igc_init_queue_configuration(adapter); 5046 5047 /* This call may decrease the number of queues */ 5048 if (igc_init_interrupt_scheme(adapter, true)) { 5049 netdev_err(netdev, "Unable to allocate memory for queues\n"); 5050 return -ENOMEM; 5051 } 5052 5053 /* Explicitly disable IRQ since the NIC can be in any state. */ 5054 igc_irq_disable(adapter); 5055 5056 set_bit(__IGC_DOWN, &adapter->state); 5057 5058 return 0; 5059 } 5060 5061 static void igc_set_queue_napi(struct igc_adapter *adapter, int vector, 5062 struct napi_struct *napi) 5063 { 5064 struct igc_q_vector *q_vector = adapter->q_vector[vector]; 5065 5066 if (q_vector->rx.ring) 5067 netif_queue_set_napi(adapter->netdev, 5068 q_vector->rx.ring->queue_index, 5069 NETDEV_QUEUE_TYPE_RX, napi); 5070 5071 if (q_vector->tx.ring) 5072 netif_queue_set_napi(adapter->netdev, 5073 q_vector->tx.ring->queue_index, 5074 NETDEV_QUEUE_TYPE_TX, napi); 5075 } 5076 5077 /** 5078 * igc_up - Open the interface and prepare it to handle traffic 5079 * @adapter: board private structure 5080 */ 5081 void igc_up(struct igc_adapter *adapter) 5082 { 5083 struct igc_hw *hw = &adapter->hw; 5084 struct napi_struct *napi; 5085 int i = 0; 5086 5087 /* hardware has been reset, we need to reload some things */ 5088 igc_configure(adapter); 5089 5090 clear_bit(__IGC_DOWN, &adapter->state); 5091 5092 for (i = 0; i < adapter->num_q_vectors; i++) { 5093 napi = &adapter->q_vector[i]->napi; 5094 napi_enable(napi); 5095 igc_set_queue_napi(adapter, i, napi); 5096 } 5097 5098 if (adapter->msix_entries) 5099 igc_configure_msix(adapter); 5100 else 5101 igc_assign_vector(adapter->q_vector[0], 0); 5102 5103 /* Clear any pending interrupts. */ 5104 rd32(IGC_ICR); 5105 igc_irq_enable(adapter); 5106 5107 netif_tx_start_all_queues(adapter->netdev); 5108 5109 /* start the watchdog. */ 5110 hw->mac.get_link_status = true; 5111 schedule_work(&adapter->watchdog_task); 5112 } 5113 5114 /** 5115 * igc_update_stats - Update the board statistics counters 5116 * @adapter: board private structure 5117 */ 5118 void igc_update_stats(struct igc_adapter *adapter) 5119 { 5120 struct rtnl_link_stats64 *net_stats = &adapter->stats64; 5121 struct pci_dev *pdev = adapter->pdev; 5122 struct igc_hw *hw = &adapter->hw; 5123 u64 _bytes, _packets; 5124 u64 bytes, packets; 5125 unsigned int start; 5126 u32 mpc; 5127 int i; 5128 5129 /* Prevent stats update while adapter is being reset, or if the pci 5130 * connection is down. 5131 */ 5132 if (adapter->link_speed == 0) 5133 return; 5134 if (pci_channel_offline(pdev)) 5135 return; 5136 5137 packets = 0; 5138 bytes = 0; 5139 5140 rcu_read_lock(); 5141 for (i = 0; i < adapter->num_rx_queues; i++) { 5142 struct igc_ring *ring = adapter->rx_ring[i]; 5143 u32 rqdpc = rd32(IGC_RQDPC(i)); 5144 5145 if (hw->mac.type >= igc_i225) 5146 wr32(IGC_RQDPC(i), 0); 5147 5148 if (rqdpc) { 5149 ring->rx_stats.drops += rqdpc; 5150 net_stats->rx_fifo_errors += rqdpc; 5151 } 5152 5153 do { 5154 start = u64_stats_fetch_begin(&ring->rx_syncp); 5155 _bytes = ring->rx_stats.bytes; 5156 _packets = ring->rx_stats.packets; 5157 } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); 5158 bytes += _bytes; 5159 packets += _packets; 5160 } 5161 5162 net_stats->rx_bytes = bytes; 5163 net_stats->rx_packets = packets; 5164 5165 packets = 0; 5166 bytes = 0; 5167 for (i = 0; i < adapter->num_tx_queues; i++) { 5168 struct igc_ring *ring = adapter->tx_ring[i]; 5169 5170 do { 5171 start = u64_stats_fetch_begin(&ring->tx_syncp); 5172 _bytes = ring->tx_stats.bytes; 5173 _packets = ring->tx_stats.packets; 5174 } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); 5175 bytes += _bytes; 5176 packets += _packets; 5177 } 5178 net_stats->tx_bytes = bytes; 5179 net_stats->tx_packets = packets; 5180 rcu_read_unlock(); 5181 5182 /* read stats registers */ 5183 adapter->stats.crcerrs += rd32(IGC_CRCERRS); 5184 adapter->stats.gprc += rd32(IGC_GPRC); 5185 adapter->stats.gorc += rd32(IGC_GORCL); 5186 rd32(IGC_GORCH); /* clear GORCL */ 5187 adapter->stats.bprc += rd32(IGC_BPRC); 5188 adapter->stats.mprc += rd32(IGC_MPRC); 5189 adapter->stats.roc += rd32(IGC_ROC); 5190 5191 adapter->stats.prc64 += rd32(IGC_PRC64); 5192 adapter->stats.prc127 += rd32(IGC_PRC127); 5193 adapter->stats.prc255 += rd32(IGC_PRC255); 5194 adapter->stats.prc511 += rd32(IGC_PRC511); 5195 adapter->stats.prc1023 += rd32(IGC_PRC1023); 5196 adapter->stats.prc1522 += rd32(IGC_PRC1522); 5197 adapter->stats.tlpic += rd32(IGC_TLPIC); 5198 adapter->stats.rlpic += rd32(IGC_RLPIC); 5199 adapter->stats.hgptc += rd32(IGC_HGPTC); 5200 5201 mpc = rd32(IGC_MPC); 5202 adapter->stats.mpc += mpc; 5203 net_stats->rx_fifo_errors += mpc; 5204 adapter->stats.scc += rd32(IGC_SCC); 5205 adapter->stats.ecol += rd32(IGC_ECOL); 5206 adapter->stats.mcc += rd32(IGC_MCC); 5207 adapter->stats.latecol += rd32(IGC_LATECOL); 5208 adapter->stats.dc += rd32(IGC_DC); 5209 adapter->stats.rlec += rd32(IGC_RLEC); 5210 adapter->stats.xonrxc += rd32(IGC_XONRXC); 5211 adapter->stats.xontxc += rd32(IGC_XONTXC); 5212 adapter->stats.xoffrxc += rd32(IGC_XOFFRXC); 5213 adapter->stats.xofftxc += rd32(IGC_XOFFTXC); 5214 adapter->stats.fcruc += rd32(IGC_FCRUC); 5215 adapter->stats.gptc += rd32(IGC_GPTC); 5216 adapter->stats.gotc += rd32(IGC_GOTCL); 5217 rd32(IGC_GOTCH); /* clear GOTCL */ 5218 adapter->stats.rnbc += rd32(IGC_RNBC); 5219 adapter->stats.ruc += rd32(IGC_RUC); 5220 adapter->stats.rfc += rd32(IGC_RFC); 5221 adapter->stats.rjc += rd32(IGC_RJC); 5222 adapter->stats.tor += rd32(IGC_TORH); 5223 adapter->stats.tot += rd32(IGC_TOTH); 5224 adapter->stats.tpr += rd32(IGC_TPR); 5225 5226 adapter->stats.ptc64 += rd32(IGC_PTC64); 5227 adapter->stats.ptc127 += rd32(IGC_PTC127); 5228 adapter->stats.ptc255 += rd32(IGC_PTC255); 5229 adapter->stats.ptc511 += rd32(IGC_PTC511); 5230 adapter->stats.ptc1023 += rd32(IGC_PTC1023); 5231 adapter->stats.ptc1522 += rd32(IGC_PTC1522); 5232 5233 adapter->stats.mptc += rd32(IGC_MPTC); 5234 adapter->stats.bptc += rd32(IGC_BPTC); 5235 5236 adapter->stats.tpt += rd32(IGC_TPT); 5237 adapter->stats.colc += rd32(IGC_COLC); 5238 adapter->stats.colc += rd32(IGC_RERC); 5239 5240 adapter->stats.algnerrc += rd32(IGC_ALGNERRC); 5241 5242 adapter->stats.tsctc += rd32(IGC_TSCTC); 5243 5244 adapter->stats.iac += rd32(IGC_IAC); 5245 5246 /* Fill out the OS statistics structure */ 5247 net_stats->multicast = adapter->stats.mprc; 5248 net_stats->collisions = adapter->stats.colc; 5249 5250 /* Rx Errors */ 5251 5252 /* RLEC on some newer hardware can be incorrect so build 5253 * our own version based on RUC and ROC 5254 */ 5255 net_stats->rx_errors = adapter->stats.rxerrc + 5256 adapter->stats.crcerrs + adapter->stats.algnerrc + 5257 adapter->stats.ruc + adapter->stats.roc + 5258 adapter->stats.cexterr; 5259 net_stats->rx_length_errors = adapter->stats.ruc + 5260 adapter->stats.roc; 5261 net_stats->rx_crc_errors = adapter->stats.crcerrs; 5262 net_stats->rx_frame_errors = adapter->stats.algnerrc; 5263 net_stats->rx_missed_errors = adapter->stats.mpc; 5264 5265 /* Tx Errors */ 5266 net_stats->tx_errors = adapter->stats.ecol + 5267 adapter->stats.latecol; 5268 net_stats->tx_aborted_errors = adapter->stats.ecol; 5269 net_stats->tx_window_errors = adapter->stats.latecol; 5270 net_stats->tx_carrier_errors = adapter->stats.tncrs; 5271 5272 /* Tx Dropped */ 5273 net_stats->tx_dropped = adapter->stats.txdrop; 5274 5275 /* Management Stats */ 5276 adapter->stats.mgptc += rd32(IGC_MGTPTC); 5277 adapter->stats.mgprc += rd32(IGC_MGTPRC); 5278 adapter->stats.mgpdc += rd32(IGC_MGTPDC); 5279 } 5280 5281 /** 5282 * igc_down - Close the interface 5283 * @adapter: board private structure 5284 */ 5285 void igc_down(struct igc_adapter *adapter) 5286 { 5287 struct net_device *netdev = adapter->netdev; 5288 struct igc_hw *hw = &adapter->hw; 5289 u32 tctl, rctl; 5290 int i = 0; 5291 5292 set_bit(__IGC_DOWN, &adapter->state); 5293 5294 igc_ptp_suspend(adapter); 5295 5296 if (pci_device_is_present(adapter->pdev)) { 5297 /* disable receives in the hardware */ 5298 rctl = rd32(IGC_RCTL); 5299 wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); 5300 /* flush and sleep below */ 5301 } 5302 /* set trans_start so we don't get spurious watchdogs during reset */ 5303 netif_trans_update(netdev); 5304 5305 netif_carrier_off(netdev); 5306 netif_tx_stop_all_queues(netdev); 5307 5308 if (pci_device_is_present(adapter->pdev)) { 5309 /* disable transmits in the hardware */ 5310 tctl = rd32(IGC_TCTL); 5311 tctl &= ~IGC_TCTL_EN; 5312 wr32(IGC_TCTL, tctl); 5313 /* flush both disables and wait for them to finish */ 5314 wrfl(); 5315 usleep_range(10000, 20000); 5316 5317 igc_irq_disable(adapter); 5318 } 5319 5320 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5321 5322 for (i = 0; i < adapter->num_q_vectors; i++) { 5323 if (adapter->q_vector[i]) { 5324 napi_synchronize(&adapter->q_vector[i]->napi); 5325 igc_set_queue_napi(adapter, i, NULL); 5326 napi_disable(&adapter->q_vector[i]->napi); 5327 } 5328 } 5329 5330 timer_delete_sync(&adapter->watchdog_timer); 5331 timer_delete_sync(&adapter->phy_info_timer); 5332 5333 /* record the stats before reset*/ 5334 spin_lock(&adapter->stats64_lock); 5335 igc_update_stats(adapter); 5336 spin_unlock(&adapter->stats64_lock); 5337 5338 adapter->link_speed = 0; 5339 adapter->link_duplex = 0; 5340 5341 if (!pci_channel_offline(adapter->pdev)) 5342 igc_reset(adapter); 5343 5344 /* clear VLAN promisc flag so VFTA will be updated if necessary */ 5345 adapter->flags &= ~IGC_FLAG_VLAN_PROMISC; 5346 5347 igc_disable_all_tx_rings_hw(adapter); 5348 igc_clean_all_tx_rings(adapter); 5349 igc_clean_all_rx_rings(adapter); 5350 5351 if (adapter->fpe.mmsv.pmac_enabled) 5352 ethtool_mmsv_stop(&adapter->fpe.mmsv); 5353 } 5354 5355 void igc_reinit_locked(struct igc_adapter *adapter) 5356 { 5357 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 5358 usleep_range(1000, 2000); 5359 igc_down(adapter); 5360 igc_up(adapter); 5361 clear_bit(__IGC_RESETTING, &adapter->state); 5362 } 5363 5364 static void igc_reset_task(struct work_struct *work) 5365 { 5366 struct igc_adapter *adapter; 5367 5368 adapter = container_of(work, struct igc_adapter, reset_task); 5369 5370 rtnl_lock(); 5371 /* If we're already down or resetting, just bail */ 5372 if (test_bit(__IGC_DOWN, &adapter->state) || 5373 test_bit(__IGC_RESETTING, &adapter->state)) { 5374 rtnl_unlock(); 5375 return; 5376 } 5377 5378 igc_rings_dump(adapter); 5379 igc_regs_dump(adapter); 5380 netdev_err(adapter->netdev, "Reset adapter\n"); 5381 igc_reinit_locked(adapter); 5382 rtnl_unlock(); 5383 } 5384 5385 /** 5386 * igc_change_mtu - Change the Maximum Transfer Unit 5387 * @netdev: network interface device structure 5388 * @new_mtu: new value for maximum frame size 5389 * 5390 * Returns 0 on success, negative on failure 5391 */ 5392 static int igc_change_mtu(struct net_device *netdev, int new_mtu) 5393 { 5394 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 5395 struct igc_adapter *adapter = netdev_priv(netdev); 5396 5397 if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) { 5398 netdev_dbg(netdev, "Jumbo frames not supported with XDP"); 5399 return -EINVAL; 5400 } 5401 5402 /* adjust max frame to be at least the size of a standard frame */ 5403 if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) 5404 max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; 5405 5406 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 5407 usleep_range(1000, 2000); 5408 5409 /* igc_down has a dependency on max_frame_size */ 5410 adapter->max_frame_size = max_frame; 5411 5412 if (netif_running(netdev)) 5413 igc_down(adapter); 5414 5415 netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); 5416 WRITE_ONCE(netdev->mtu, new_mtu); 5417 5418 if (netif_running(netdev)) 5419 igc_up(adapter); 5420 else 5421 igc_reset(adapter); 5422 5423 clear_bit(__IGC_RESETTING, &adapter->state); 5424 5425 return 0; 5426 } 5427 5428 /** 5429 * igc_tx_timeout - Respond to a Tx Hang 5430 * @netdev: network interface device structure 5431 * @txqueue: queue number that timed out 5432 **/ 5433 static void igc_tx_timeout(struct net_device *netdev, 5434 unsigned int __always_unused txqueue) 5435 { 5436 struct igc_adapter *adapter = netdev_priv(netdev); 5437 struct igc_hw *hw = &adapter->hw; 5438 5439 /* Do the reset outside of interrupt context */ 5440 adapter->tx_timeout_count++; 5441 schedule_work(&adapter->reset_task); 5442 wr32(IGC_EICS, 5443 (adapter->eims_enable_mask & ~adapter->eims_other)); 5444 } 5445 5446 /** 5447 * igc_get_stats64 - Get System Network Statistics 5448 * @netdev: network interface device structure 5449 * @stats: rtnl_link_stats64 pointer 5450 * 5451 * Returns the address of the device statistics structure. 5452 * The statistics are updated here and also from the timer callback. 5453 */ 5454 static void igc_get_stats64(struct net_device *netdev, 5455 struct rtnl_link_stats64 *stats) 5456 { 5457 struct igc_adapter *adapter = netdev_priv(netdev); 5458 5459 spin_lock(&adapter->stats64_lock); 5460 if (!test_bit(__IGC_RESETTING, &adapter->state)) 5461 igc_update_stats(adapter); 5462 memcpy(stats, &adapter->stats64, sizeof(*stats)); 5463 spin_unlock(&adapter->stats64_lock); 5464 } 5465 5466 static netdev_features_t igc_fix_features(struct net_device *netdev, 5467 netdev_features_t features) 5468 { 5469 /* Since there is no support for separate Rx/Tx vlan accel 5470 * enable/disable make sure Tx flag is always in same state as Rx. 5471 */ 5472 if (features & NETIF_F_HW_VLAN_CTAG_RX) 5473 features |= NETIF_F_HW_VLAN_CTAG_TX; 5474 else 5475 features &= ~NETIF_F_HW_VLAN_CTAG_TX; 5476 5477 return features; 5478 } 5479 5480 static int igc_set_features(struct net_device *netdev, 5481 netdev_features_t features) 5482 { 5483 netdev_features_t changed = netdev->features ^ features; 5484 struct igc_adapter *adapter = netdev_priv(netdev); 5485 5486 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 5487 igc_vlan_mode(netdev, features); 5488 5489 /* Add VLAN support */ 5490 if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) 5491 return 0; 5492 5493 if (!(features & NETIF_F_NTUPLE)) 5494 igc_flush_nfc_rules(adapter); 5495 5496 netdev->features = features; 5497 5498 if (netif_running(netdev)) 5499 igc_reinit_locked(adapter); 5500 else 5501 igc_reset(adapter); 5502 5503 return 1; 5504 } 5505 5506 static netdev_features_t 5507 igc_features_check(struct sk_buff *skb, struct net_device *dev, 5508 netdev_features_t features) 5509 { 5510 unsigned int network_hdr_len, mac_hdr_len; 5511 5512 /* Make certain the headers can be described by a context descriptor */ 5513 mac_hdr_len = skb_network_offset(skb); 5514 if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) 5515 return features & ~(NETIF_F_HW_CSUM | 5516 NETIF_F_SCTP_CRC | 5517 NETIF_F_HW_VLAN_CTAG_TX | 5518 NETIF_F_TSO | 5519 NETIF_F_TSO6); 5520 5521 network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); 5522 if (unlikely(network_hdr_len > IGC_MAX_NETWORK_HDR_LEN)) 5523 return features & ~(NETIF_F_HW_CSUM | 5524 NETIF_F_SCTP_CRC | 5525 NETIF_F_TSO | 5526 NETIF_F_TSO6); 5527 5528 /* We can only support IPv4 TSO in tunnels if we can mangle the 5529 * inner IP ID field, so strip TSO if MANGLEID is not supported. 5530 */ 5531 if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) 5532 features &= ~NETIF_F_TSO; 5533 5534 return features; 5535 } 5536 5537 static void igc_tsync_interrupt(struct igc_adapter *adapter) 5538 { 5539 struct igc_hw *hw = &adapter->hw; 5540 u32 tsauxc, sec, nsec, tsicr; 5541 struct ptp_clock_event event; 5542 struct timespec64 ts; 5543 5544 tsicr = rd32(IGC_TSICR); 5545 5546 if (tsicr & IGC_TSICR_SYS_WRAP) { 5547 event.type = PTP_CLOCK_PPS; 5548 if (adapter->ptp_caps.pps) 5549 ptp_clock_event(adapter->ptp_clock, &event); 5550 } 5551 5552 if (tsicr & IGC_TSICR_TXTS) { 5553 /* retrieve hardware timestamp */ 5554 igc_ptp_tx_tstamp_event(adapter); 5555 } 5556 5557 if (tsicr & IGC_TSICR_TT0) { 5558 spin_lock(&adapter->tmreg_lock); 5559 ts = timespec64_add(adapter->perout[0].start, 5560 adapter->perout[0].period); 5561 wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5562 wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec); 5563 tsauxc = rd32(IGC_TSAUXC); 5564 tsauxc |= IGC_TSAUXC_EN_TT0; 5565 wr32(IGC_TSAUXC, tsauxc); 5566 adapter->perout[0].start = ts; 5567 spin_unlock(&adapter->tmreg_lock); 5568 } 5569 5570 if (tsicr & IGC_TSICR_TT1) { 5571 spin_lock(&adapter->tmreg_lock); 5572 ts = timespec64_add(adapter->perout[1].start, 5573 adapter->perout[1].period); 5574 wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5575 wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec); 5576 tsauxc = rd32(IGC_TSAUXC); 5577 tsauxc |= IGC_TSAUXC_EN_TT1; 5578 wr32(IGC_TSAUXC, tsauxc); 5579 adapter->perout[1].start = ts; 5580 spin_unlock(&adapter->tmreg_lock); 5581 } 5582 5583 if (tsicr & IGC_TSICR_AUTT0) { 5584 nsec = rd32(IGC_AUXSTMPL0); 5585 sec = rd32(IGC_AUXSTMPH0); 5586 event.type = PTP_CLOCK_EXTTS; 5587 event.index = 0; 5588 event.timestamp = sec * NSEC_PER_SEC + nsec; 5589 ptp_clock_event(adapter->ptp_clock, &event); 5590 } 5591 5592 if (tsicr & IGC_TSICR_AUTT1) { 5593 nsec = rd32(IGC_AUXSTMPL1); 5594 sec = rd32(IGC_AUXSTMPH1); 5595 event.type = PTP_CLOCK_EXTTS; 5596 event.index = 1; 5597 event.timestamp = sec * NSEC_PER_SEC + nsec; 5598 ptp_clock_event(adapter->ptp_clock, &event); 5599 } 5600 } 5601 5602 /** 5603 * igc_msix_other - msix other interrupt handler 5604 * @irq: interrupt number 5605 * @data: pointer to a q_vector 5606 */ 5607 static irqreturn_t igc_msix_other(int irq, void *data) 5608 { 5609 struct igc_adapter *adapter = data; 5610 struct igc_hw *hw = &adapter->hw; 5611 u32 icr = rd32(IGC_ICR); 5612 5613 /* reading ICR causes bit 31 of EICR to be cleared */ 5614 if (icr & IGC_ICR_DRSTA) 5615 schedule_work(&adapter->reset_task); 5616 5617 if (icr & IGC_ICR_DOUTSYNC) { 5618 /* HW is reporting DMA is out of sync */ 5619 adapter->stats.doosync++; 5620 } 5621 5622 if (icr & IGC_ICR_LSC) { 5623 hw->mac.get_link_status = true; 5624 /* guard against interrupt when we're going down */ 5625 if (!test_bit(__IGC_DOWN, &adapter->state)) 5626 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5627 } 5628 5629 if (icr & IGC_ICR_TS) 5630 igc_tsync_interrupt(adapter); 5631 5632 wr32(IGC_EIMS, adapter->eims_other); 5633 5634 return IRQ_HANDLED; 5635 } 5636 5637 static void igc_write_itr(struct igc_q_vector *q_vector) 5638 { 5639 u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK; 5640 5641 if (!q_vector->set_itr) 5642 return; 5643 5644 if (!itr_val) 5645 itr_val = IGC_ITR_VAL_MASK; 5646 5647 itr_val |= IGC_EITR_CNT_IGNR; 5648 5649 writel(itr_val, q_vector->itr_register); 5650 q_vector->set_itr = 0; 5651 } 5652 5653 static irqreturn_t igc_msix_ring(int irq, void *data) 5654 { 5655 struct igc_q_vector *q_vector = data; 5656 5657 /* Write the ITR value calculated from the previous interrupt. */ 5658 igc_write_itr(q_vector); 5659 5660 napi_schedule(&q_vector->napi); 5661 5662 return IRQ_HANDLED; 5663 } 5664 5665 /** 5666 * igc_request_msix - Initialize MSI-X interrupts 5667 * @adapter: Pointer to adapter structure 5668 * 5669 * igc_request_msix allocates MSI-X vectors and requests interrupts from the 5670 * kernel. 5671 */ 5672 static int igc_request_msix(struct igc_adapter *adapter) 5673 { 5674 unsigned int num_q_vectors = adapter->num_q_vectors; 5675 int i = 0, err = 0, vector = 0, free_vector = 0; 5676 struct net_device *netdev = adapter->netdev; 5677 5678 err = request_irq(adapter->msix_entries[vector].vector, 5679 &igc_msix_other, 0, netdev->name, adapter); 5680 if (err) 5681 goto err_out; 5682 5683 if (num_q_vectors > MAX_Q_VECTORS) { 5684 num_q_vectors = MAX_Q_VECTORS; 5685 dev_warn(&adapter->pdev->dev, 5686 "The number of queue vectors (%d) is higher than max allowed (%d)\n", 5687 adapter->num_q_vectors, MAX_Q_VECTORS); 5688 } 5689 for (i = 0; i < num_q_vectors; i++) { 5690 struct igc_q_vector *q_vector = adapter->q_vector[i]; 5691 5692 vector++; 5693 5694 q_vector->itr_register = adapter->io_addr + IGC_EITR(vector); 5695 5696 if (q_vector->rx.ring && q_vector->tx.ring) 5697 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, 5698 q_vector->rx.ring->queue_index); 5699 else if (q_vector->tx.ring) 5700 sprintf(q_vector->name, "%s-tx-%u", netdev->name, 5701 q_vector->tx.ring->queue_index); 5702 else if (q_vector->rx.ring) 5703 sprintf(q_vector->name, "%s-rx-%u", netdev->name, 5704 q_vector->rx.ring->queue_index); 5705 else 5706 sprintf(q_vector->name, "%s-unused", netdev->name); 5707 5708 err = request_irq(adapter->msix_entries[vector].vector, 5709 igc_msix_ring, 0, q_vector->name, 5710 q_vector); 5711 if (err) 5712 goto err_free; 5713 5714 netif_napi_set_irq(&q_vector->napi, 5715 adapter->msix_entries[vector].vector); 5716 } 5717 5718 igc_configure_msix(adapter); 5719 return 0; 5720 5721 err_free: 5722 /* free already assigned IRQs */ 5723 free_irq(adapter->msix_entries[free_vector++].vector, adapter); 5724 5725 vector--; 5726 for (i = 0; i < vector; i++) { 5727 free_irq(adapter->msix_entries[free_vector++].vector, 5728 adapter->q_vector[i]); 5729 } 5730 err_out: 5731 return err; 5732 } 5733 5734 /** 5735 * igc_clear_interrupt_scheme - reset the device to a state of no interrupts 5736 * @adapter: Pointer to adapter structure 5737 * 5738 * This function resets the device so that it has 0 rx queues, tx queues, and 5739 * MSI-X interrupts allocated. 5740 */ 5741 static void igc_clear_interrupt_scheme(struct igc_adapter *adapter) 5742 { 5743 igc_free_q_vectors(adapter); 5744 igc_reset_interrupt_capability(adapter); 5745 } 5746 5747 /* Need to wait a few seconds after link up to get diagnostic information from 5748 * the phy 5749 */ 5750 static void igc_update_phy_info(struct timer_list *t) 5751 { 5752 struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer); 5753 5754 igc_get_phy_info(&adapter->hw); 5755 } 5756 5757 /** 5758 * igc_has_link - check shared code for link and determine up/down 5759 * @adapter: pointer to driver private info 5760 */ 5761 bool igc_has_link(struct igc_adapter *adapter) 5762 { 5763 struct igc_hw *hw = &adapter->hw; 5764 bool link_active = false; 5765 5766 /* get_link_status is set on LSC (link status) interrupt or 5767 * rx sequence error interrupt. get_link_status will stay 5768 * false until the igc_check_for_link establishes link 5769 * for copper adapters ONLY 5770 */ 5771 if (!hw->mac.get_link_status) 5772 return true; 5773 hw->mac.ops.check_for_link(hw); 5774 link_active = !hw->mac.get_link_status; 5775 5776 if (hw->mac.type == igc_i225) { 5777 if (!netif_carrier_ok(adapter->netdev)) { 5778 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5779 } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { 5780 adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE; 5781 adapter->link_check_timeout = jiffies; 5782 } 5783 } 5784 5785 return link_active; 5786 } 5787 5788 /** 5789 * igc_watchdog - Timer Call-back 5790 * @t: timer for the watchdog 5791 */ 5792 static void igc_watchdog(struct timer_list *t) 5793 { 5794 struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer); 5795 /* Do the rest outside of interrupt context */ 5796 schedule_work(&adapter->watchdog_task); 5797 } 5798 5799 static void igc_watchdog_task(struct work_struct *work) 5800 { 5801 struct igc_adapter *adapter = container_of(work, 5802 struct igc_adapter, 5803 watchdog_task); 5804 struct net_device *netdev = adapter->netdev; 5805 struct igc_hw *hw = &adapter->hw; 5806 struct igc_phy_info *phy = &hw->phy; 5807 u16 phy_data, retry_count = 20; 5808 u32 link; 5809 int i; 5810 5811 link = igc_has_link(adapter); 5812 5813 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) { 5814 if (time_after(jiffies, (adapter->link_check_timeout + HZ))) 5815 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5816 else 5817 link = false; 5818 } 5819 5820 if (link) { 5821 /* Cancel scheduled suspend requests. */ 5822 pm_runtime_resume(netdev->dev.parent); 5823 5824 if (!netif_carrier_ok(netdev)) { 5825 u32 ctrl; 5826 5827 hw->mac.ops.get_speed_and_duplex(hw, 5828 &adapter->link_speed, 5829 &adapter->link_duplex); 5830 5831 ctrl = rd32(IGC_CTRL); 5832 /* Link status message must follow this format */ 5833 netdev_info(netdev, 5834 "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", 5835 adapter->link_speed, 5836 adapter->link_duplex == FULL_DUPLEX ? 5837 "Full" : "Half", 5838 (ctrl & IGC_CTRL_TFCE) && 5839 (ctrl & IGC_CTRL_RFCE) ? "RX/TX" : 5840 (ctrl & IGC_CTRL_RFCE) ? "RX" : 5841 (ctrl & IGC_CTRL_TFCE) ? "TX" : "None"); 5842 5843 /* disable EEE if enabled */ 5844 if ((adapter->flags & IGC_FLAG_EEE) && 5845 adapter->link_duplex == HALF_DUPLEX) { 5846 netdev_info(netdev, 5847 "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n"); 5848 adapter->hw.dev_spec._base.eee_enable = false; 5849 adapter->flags &= ~IGC_FLAG_EEE; 5850 } 5851 5852 /* check if SmartSpeed worked */ 5853 igc_check_downshift(hw); 5854 if (phy->speed_downgraded) 5855 netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n"); 5856 5857 /* adjust timeout factor according to speed/duplex */ 5858 adapter->tx_timeout_factor = 1; 5859 switch (adapter->link_speed) { 5860 case SPEED_10: 5861 adapter->tx_timeout_factor = 14; 5862 break; 5863 case SPEED_100: 5864 case SPEED_1000: 5865 case SPEED_2500: 5866 adapter->tx_timeout_factor = 1; 5867 break; 5868 } 5869 5870 /* Once the launch time has been set on the wire, there 5871 * is a delay before the link speed can be determined 5872 * based on link-up activity. Write into the register 5873 * as soon as we know the correct link speed. 5874 */ 5875 igc_tsn_adjust_txtime_offset(adapter); 5876 5877 if (adapter->fpe.mmsv.pmac_enabled) 5878 ethtool_mmsv_link_state_handle(&adapter->fpe.mmsv, 5879 true); 5880 5881 if (adapter->link_speed != SPEED_1000) 5882 goto no_wait; 5883 5884 /* wait for Remote receiver status OK */ 5885 retry_read_status: 5886 if (!igc_read_phy_reg(hw, PHY_1000T_STATUS, 5887 &phy_data)) { 5888 if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && 5889 retry_count) { 5890 msleep(100); 5891 retry_count--; 5892 goto retry_read_status; 5893 } else if (!retry_count) { 5894 netdev_err(netdev, "exceed max 2 second\n"); 5895 } 5896 } else { 5897 netdev_err(netdev, "read 1000Base-T Status Reg\n"); 5898 } 5899 no_wait: 5900 netif_carrier_on(netdev); 5901 5902 /* link state has changed, schedule phy info update */ 5903 if (!test_bit(__IGC_DOWN, &adapter->state)) 5904 mod_timer(&adapter->phy_info_timer, 5905 round_jiffies(jiffies + 2 * HZ)); 5906 } 5907 } else { 5908 if (netif_carrier_ok(netdev)) { 5909 adapter->link_speed = 0; 5910 adapter->link_duplex = 0; 5911 5912 /* Links status message must follow this format */ 5913 netdev_info(netdev, "NIC Link is Down\n"); 5914 netif_carrier_off(netdev); 5915 5916 if (adapter->fpe.mmsv.pmac_enabled) 5917 ethtool_mmsv_link_state_handle(&adapter->fpe.mmsv, 5918 false); 5919 5920 /* link state has changed, schedule phy info update */ 5921 if (!test_bit(__IGC_DOWN, &adapter->state)) 5922 mod_timer(&adapter->phy_info_timer, 5923 round_jiffies(jiffies + 2 * HZ)); 5924 5925 pm_schedule_suspend(netdev->dev.parent, 5926 MSEC_PER_SEC * 5); 5927 } 5928 } 5929 5930 spin_lock(&adapter->stats64_lock); 5931 igc_update_stats(adapter); 5932 spin_unlock(&adapter->stats64_lock); 5933 5934 for (i = 0; i < adapter->num_tx_queues; i++) { 5935 struct igc_ring *tx_ring = adapter->tx_ring[i]; 5936 5937 if (!netif_carrier_ok(netdev)) { 5938 /* We've lost link, so the controller stops DMA, 5939 * but we've got queued Tx work that's never going 5940 * to get done, so reset controller to flush Tx. 5941 * (Do the reset outside of interrupt context). 5942 */ 5943 if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) { 5944 adapter->tx_timeout_count++; 5945 schedule_work(&adapter->reset_task); 5946 /* return immediately since reset is imminent */ 5947 return; 5948 } 5949 } 5950 5951 /* Force detection of hung controller every watchdog period */ 5952 set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 5953 } 5954 5955 /* Cause software interrupt to ensure Rx ring is cleaned */ 5956 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 5957 u32 eics = 0; 5958 5959 for (i = 0; i < adapter->num_q_vectors; i++) { 5960 struct igc_q_vector *q_vector = adapter->q_vector[i]; 5961 struct igc_ring *rx_ring; 5962 5963 if (!q_vector->rx.ring) 5964 continue; 5965 5966 rx_ring = adapter->rx_ring[q_vector->rx.ring->queue_index]; 5967 5968 if (test_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { 5969 eics |= q_vector->eims_value; 5970 clear_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 5971 } 5972 } 5973 if (eics) 5974 wr32(IGC_EICS, eics); 5975 } else { 5976 struct igc_ring *rx_ring = adapter->rx_ring[0]; 5977 5978 if (test_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { 5979 clear_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 5980 wr32(IGC_ICS, IGC_ICS_RXDMT0); 5981 } 5982 } 5983 5984 igc_ptp_tx_hang(adapter); 5985 5986 /* Reset the timer */ 5987 if (!test_bit(__IGC_DOWN, &adapter->state)) { 5988 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) 5989 mod_timer(&adapter->watchdog_timer, 5990 round_jiffies(jiffies + HZ)); 5991 else 5992 mod_timer(&adapter->watchdog_timer, 5993 round_jiffies(jiffies + 2 * HZ)); 5994 } 5995 } 5996 5997 /** 5998 * igc_intr_msi - Interrupt Handler 5999 * @irq: interrupt number 6000 * @data: pointer to a network interface device structure 6001 */ 6002 static irqreturn_t igc_intr_msi(int irq, void *data) 6003 { 6004 struct igc_adapter *adapter = data; 6005 struct igc_q_vector *q_vector = adapter->q_vector[0]; 6006 struct igc_hw *hw = &adapter->hw; 6007 /* read ICR disables interrupts using IAM */ 6008 u32 icr = rd32(IGC_ICR); 6009 6010 igc_write_itr(q_vector); 6011 6012 if (icr & IGC_ICR_DRSTA) 6013 schedule_work(&adapter->reset_task); 6014 6015 if (icr & IGC_ICR_DOUTSYNC) { 6016 /* HW is reporting DMA is out of sync */ 6017 adapter->stats.doosync++; 6018 } 6019 6020 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 6021 hw->mac.get_link_status = true; 6022 if (!test_bit(__IGC_DOWN, &adapter->state)) 6023 mod_timer(&adapter->watchdog_timer, jiffies + 1); 6024 } 6025 6026 if (icr & IGC_ICR_TS) 6027 igc_tsync_interrupt(adapter); 6028 6029 napi_schedule(&q_vector->napi); 6030 6031 return IRQ_HANDLED; 6032 } 6033 6034 /** 6035 * igc_intr - Legacy Interrupt Handler 6036 * @irq: interrupt number 6037 * @data: pointer to a network interface device structure 6038 */ 6039 static irqreturn_t igc_intr(int irq, void *data) 6040 { 6041 struct igc_adapter *adapter = data; 6042 struct igc_q_vector *q_vector = adapter->q_vector[0]; 6043 struct igc_hw *hw = &adapter->hw; 6044 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No 6045 * need for the IMC write 6046 */ 6047 u32 icr = rd32(IGC_ICR); 6048 6049 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is 6050 * not set, then the adapter didn't send an interrupt 6051 */ 6052 if (!(icr & IGC_ICR_INT_ASSERTED)) 6053 return IRQ_NONE; 6054 6055 igc_write_itr(q_vector); 6056 6057 if (icr & IGC_ICR_DRSTA) 6058 schedule_work(&adapter->reset_task); 6059 6060 if (icr & IGC_ICR_DOUTSYNC) { 6061 /* HW is reporting DMA is out of sync */ 6062 adapter->stats.doosync++; 6063 } 6064 6065 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 6066 hw->mac.get_link_status = true; 6067 /* guard against interrupt when we're going down */ 6068 if (!test_bit(__IGC_DOWN, &adapter->state)) 6069 mod_timer(&adapter->watchdog_timer, jiffies + 1); 6070 } 6071 6072 if (icr & IGC_ICR_TS) 6073 igc_tsync_interrupt(adapter); 6074 6075 napi_schedule(&q_vector->napi); 6076 6077 return IRQ_HANDLED; 6078 } 6079 6080 static void igc_free_irq(struct igc_adapter *adapter) 6081 { 6082 if (adapter->msix_entries) { 6083 int vector = 0, i; 6084 6085 free_irq(adapter->msix_entries[vector++].vector, adapter); 6086 6087 for (i = 0; i < adapter->num_q_vectors; i++) 6088 free_irq(adapter->msix_entries[vector++].vector, 6089 adapter->q_vector[i]); 6090 } else { 6091 free_irq(adapter->pdev->irq, adapter); 6092 } 6093 } 6094 6095 /** 6096 * igc_request_irq - initialize interrupts 6097 * @adapter: Pointer to adapter structure 6098 * 6099 * Attempts to configure interrupts using the best available 6100 * capabilities of the hardware and kernel. 6101 */ 6102 static int igc_request_irq(struct igc_adapter *adapter) 6103 { 6104 struct net_device *netdev = adapter->netdev; 6105 struct pci_dev *pdev = adapter->pdev; 6106 int err = 0; 6107 6108 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 6109 err = igc_request_msix(adapter); 6110 if (!err) 6111 goto request_done; 6112 /* fall back to MSI */ 6113 igc_free_all_tx_resources(adapter); 6114 igc_free_all_rx_resources(adapter); 6115 6116 igc_clear_interrupt_scheme(adapter); 6117 err = igc_init_interrupt_scheme(adapter, false); 6118 if (err) 6119 goto request_done; 6120 igc_setup_all_tx_resources(adapter); 6121 igc_setup_all_rx_resources(adapter); 6122 igc_configure(adapter); 6123 } 6124 6125 igc_assign_vector(adapter->q_vector[0], 0); 6126 6127 if (adapter->flags & IGC_FLAG_HAS_MSI) { 6128 err = request_irq(pdev->irq, &igc_intr_msi, 0, 6129 netdev->name, adapter); 6130 if (!err) 6131 goto request_done; 6132 6133 /* fall back to legacy interrupts */ 6134 igc_reset_interrupt_capability(adapter); 6135 adapter->flags &= ~IGC_FLAG_HAS_MSI; 6136 } 6137 6138 err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED, 6139 netdev->name, adapter); 6140 6141 if (err) 6142 netdev_err(netdev, "Error %d getting interrupt\n", err); 6143 6144 request_done: 6145 return err; 6146 } 6147 6148 /** 6149 * __igc_open - Called when a network interface is made active 6150 * @netdev: network interface device structure 6151 * @resuming: boolean indicating if the device is resuming 6152 * 6153 * Returns 0 on success, negative value on failure 6154 * 6155 * The open entry point is called when a network interface is made 6156 * active by the system (IFF_UP). At this point all resources needed 6157 * for transmit and receive operations are allocated, the interrupt 6158 * handler is registered with the OS, the watchdog timer is started, 6159 * and the stack is notified that the interface is ready. 6160 */ 6161 static int __igc_open(struct net_device *netdev, bool resuming) 6162 { 6163 struct igc_adapter *adapter = netdev_priv(netdev); 6164 struct pci_dev *pdev = adapter->pdev; 6165 struct igc_hw *hw = &adapter->hw; 6166 struct napi_struct *napi; 6167 int err = 0; 6168 int i = 0; 6169 6170 /* disallow open during test */ 6171 6172 if (test_bit(__IGC_TESTING, &adapter->state)) { 6173 WARN_ON(resuming); 6174 return -EBUSY; 6175 } 6176 6177 if (!resuming) 6178 pm_runtime_get_sync(&pdev->dev); 6179 6180 netif_carrier_off(netdev); 6181 6182 /* allocate transmit descriptors */ 6183 err = igc_setup_all_tx_resources(adapter); 6184 if (err) 6185 goto err_setup_tx; 6186 6187 /* allocate receive descriptors */ 6188 err = igc_setup_all_rx_resources(adapter); 6189 if (err) 6190 goto err_setup_rx; 6191 6192 igc_power_up_link(adapter); 6193 6194 igc_configure(adapter); 6195 6196 err = igc_request_irq(adapter); 6197 if (err) 6198 goto err_req_irq; 6199 6200 clear_bit(__IGC_DOWN, &adapter->state); 6201 6202 for (i = 0; i < adapter->num_q_vectors; i++) { 6203 napi = &adapter->q_vector[i]->napi; 6204 napi_enable(napi); 6205 igc_set_queue_napi(adapter, i, napi); 6206 } 6207 6208 /* Clear any pending interrupts. */ 6209 rd32(IGC_ICR); 6210 igc_irq_enable(adapter); 6211 6212 if (!resuming) 6213 pm_runtime_put(&pdev->dev); 6214 6215 netif_tx_start_all_queues(netdev); 6216 6217 /* start the watchdog. */ 6218 hw->mac.get_link_status = true; 6219 schedule_work(&adapter->watchdog_task); 6220 6221 return IGC_SUCCESS; 6222 6223 err_req_irq: 6224 igc_release_hw_control(adapter); 6225 igc_power_down_phy_copper_base(&adapter->hw); 6226 igc_free_all_rx_resources(adapter); 6227 err_setup_rx: 6228 igc_free_all_tx_resources(adapter); 6229 err_setup_tx: 6230 igc_reset(adapter); 6231 if (!resuming) 6232 pm_runtime_put(&pdev->dev); 6233 6234 return err; 6235 } 6236 6237 int igc_open(struct net_device *netdev) 6238 { 6239 struct igc_adapter *adapter = netdev_priv(netdev); 6240 int err; 6241 6242 /* Notify the stack of the actual queue counts. */ 6243 err = netif_set_real_num_queues(netdev, adapter->num_tx_queues, 6244 adapter->num_rx_queues); 6245 if (err) { 6246 netdev_err(netdev, "error setting real queue count\n"); 6247 return err; 6248 } 6249 6250 return __igc_open(netdev, false); 6251 } 6252 6253 /** 6254 * __igc_close - Disables a network interface 6255 * @netdev: network interface device structure 6256 * @suspending: boolean indicating the device is suspending 6257 * 6258 * Returns 0, this is not allowed to fail 6259 * 6260 * The close entry point is called when an interface is de-activated 6261 * by the OS. The hardware is still under the driver's control, but 6262 * needs to be disabled. A global MAC reset is issued to stop the 6263 * hardware, and all transmit and receive resources are freed. 6264 */ 6265 static int __igc_close(struct net_device *netdev, bool suspending) 6266 { 6267 struct igc_adapter *adapter = netdev_priv(netdev); 6268 struct pci_dev *pdev = adapter->pdev; 6269 6270 WARN_ON(test_bit(__IGC_RESETTING, &adapter->state)); 6271 6272 if (!suspending) 6273 pm_runtime_get_sync(&pdev->dev); 6274 6275 igc_down(adapter); 6276 6277 igc_release_hw_control(adapter); 6278 6279 igc_free_irq(adapter); 6280 6281 igc_free_all_tx_resources(adapter); 6282 igc_free_all_rx_resources(adapter); 6283 6284 if (!suspending) 6285 pm_runtime_put_sync(&pdev->dev); 6286 6287 return 0; 6288 } 6289 6290 int igc_close(struct net_device *netdev) 6291 { 6292 if (netif_device_present(netdev) || netdev->dismantle) 6293 return __igc_close(netdev, false); 6294 return 0; 6295 } 6296 6297 /** 6298 * igc_ioctl - Access the hwtstamp interface 6299 * @netdev: network interface device structure 6300 * @ifr: interface request data 6301 * @cmd: ioctl command 6302 **/ 6303 static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 6304 { 6305 switch (cmd) { 6306 case SIOCGHWTSTAMP: 6307 return igc_ptp_get_ts_config(netdev, ifr); 6308 case SIOCSHWTSTAMP: 6309 return igc_ptp_set_ts_config(netdev, ifr); 6310 default: 6311 return -EOPNOTSUPP; 6312 } 6313 } 6314 6315 static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue, 6316 bool enable) 6317 { 6318 struct igc_ring *ring; 6319 6320 if (queue < 0 || queue >= adapter->num_tx_queues) 6321 return -EINVAL; 6322 6323 ring = adapter->tx_ring[queue]; 6324 ring->launchtime_enable = enable; 6325 6326 return 0; 6327 } 6328 6329 static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now) 6330 { 6331 struct timespec64 b; 6332 6333 b = ktime_to_timespec64(base_time); 6334 6335 return timespec64_compare(now, &b) > 0; 6336 } 6337 6338 static bool validate_schedule(struct igc_adapter *adapter, 6339 const struct tc_taprio_qopt_offload *qopt) 6340 { 6341 int queue_uses[IGC_MAX_TX_QUEUES] = { }; 6342 struct igc_hw *hw = &adapter->hw; 6343 struct timespec64 now; 6344 size_t n; 6345 6346 if (qopt->cycle_time_extension) 6347 return false; 6348 6349 igc_ptp_read(adapter, &now); 6350 6351 /* If we program the controller's BASET registers with a time 6352 * in the future, it will hold all the packets until that 6353 * time, causing a lot of TX Hangs, so to avoid that, we 6354 * reject schedules that would start in the future. 6355 * Note: Limitation above is no longer in i226. 6356 */ 6357 if (!is_base_time_past(qopt->base_time, &now) && 6358 igc_is_device_id_i225(hw)) 6359 return false; 6360 6361 for (n = 0; n < qopt->num_entries; n++) { 6362 const struct tc_taprio_sched_entry *e, *prev; 6363 int i; 6364 6365 prev = n ? &qopt->entries[n - 1] : NULL; 6366 e = &qopt->entries[n]; 6367 6368 /* i225 only supports "global" frame preemption 6369 * settings. 6370 */ 6371 if (e->command != TC_TAPRIO_CMD_SET_GATES) 6372 return false; 6373 6374 for (i = 0; i < adapter->num_tx_queues; i++) 6375 if (e->gate_mask & BIT(i)) { 6376 queue_uses[i]++; 6377 6378 /* There are limitations: A single queue cannot 6379 * be opened and closed multiple times per cycle 6380 * unless the gate stays open. Check for it. 6381 */ 6382 if (queue_uses[i] > 1 && 6383 !(prev->gate_mask & BIT(i))) 6384 return false; 6385 } 6386 } 6387 6388 return true; 6389 } 6390 6391 static int igc_tsn_enable_launchtime(struct igc_adapter *adapter, 6392 struct tc_etf_qopt_offload *qopt) 6393 { 6394 struct igc_hw *hw = &adapter->hw; 6395 int err; 6396 6397 if (hw->mac.type != igc_i225) 6398 return -EOPNOTSUPP; 6399 6400 err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable); 6401 if (err) 6402 return err; 6403 6404 return igc_tsn_offload_apply(adapter); 6405 } 6406 6407 static int igc_qbv_clear_schedule(struct igc_adapter *adapter) 6408 { 6409 unsigned long flags; 6410 int i; 6411 6412 adapter->base_time = 0; 6413 adapter->cycle_time = NSEC_PER_SEC; 6414 adapter->taprio_offload_enable = false; 6415 adapter->qbv_config_change_errors = 0; 6416 adapter->qbv_count = 0; 6417 6418 for (i = 0; i < adapter->num_tx_queues; i++) { 6419 struct igc_ring *ring = adapter->tx_ring[i]; 6420 6421 ring->start_time = 0; 6422 ring->end_time = NSEC_PER_SEC; 6423 ring->max_sdu = 0; 6424 } 6425 6426 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 6427 6428 adapter->qbv_transition = false; 6429 6430 for (i = 0; i < adapter->num_tx_queues; i++) { 6431 struct igc_ring *ring = adapter->tx_ring[i]; 6432 6433 ring->oper_gate_closed = false; 6434 ring->admin_gate_closed = false; 6435 } 6436 6437 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 6438 6439 return 0; 6440 } 6441 6442 static int igc_tsn_clear_schedule(struct igc_adapter *adapter) 6443 { 6444 igc_qbv_clear_schedule(adapter); 6445 6446 return 0; 6447 } 6448 6449 static void igc_taprio_stats(struct net_device *dev, 6450 struct tc_taprio_qopt_stats *stats) 6451 { 6452 /* When Strict_End is enabled, the tx_overruns counter 6453 * will always be zero. 6454 */ 6455 stats->tx_overruns = 0; 6456 } 6457 6458 static void igc_taprio_queue_stats(struct net_device *dev, 6459 struct tc_taprio_qopt_queue_stats *queue_stats) 6460 { 6461 struct tc_taprio_qopt_stats *stats = &queue_stats->stats; 6462 6463 /* When Strict_End is enabled, the tx_overruns counter 6464 * will always be zero. 6465 */ 6466 stats->tx_overruns = 0; 6467 } 6468 6469 static int igc_save_qbv_schedule(struct igc_adapter *adapter, 6470 struct tc_taprio_qopt_offload *qopt) 6471 { 6472 bool queue_configured[IGC_MAX_TX_QUEUES] = { }; 6473 struct igc_hw *hw = &adapter->hw; 6474 u32 start_time = 0, end_time = 0; 6475 struct timespec64 now; 6476 unsigned long flags; 6477 size_t n; 6478 int i; 6479 6480 if (qopt->base_time < 0) 6481 return -ERANGE; 6482 6483 if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable) 6484 return -EALREADY; 6485 6486 if (!validate_schedule(adapter, qopt)) 6487 return -EINVAL; 6488 6489 /* preemptible isn't supported yet */ 6490 if (qopt->mqprio.preemptible_tcs) 6491 return -EOPNOTSUPP; 6492 6493 igc_ptp_read(adapter, &now); 6494 6495 if (igc_tsn_is_taprio_activated_by_user(adapter) && 6496 is_base_time_past(qopt->base_time, &now)) 6497 adapter->qbv_config_change_errors++; 6498 6499 adapter->cycle_time = qopt->cycle_time; 6500 adapter->base_time = qopt->base_time; 6501 adapter->taprio_offload_enable = true; 6502 6503 for (n = 0; n < qopt->num_entries; n++) { 6504 struct tc_taprio_sched_entry *e = &qopt->entries[n]; 6505 6506 end_time += e->interval; 6507 6508 /* If any of the conditions below are true, we need to manually 6509 * control the end time of the cycle. 6510 * 1. Qbv users can specify a cycle time that is not equal 6511 * to the total GCL intervals. Hence, recalculation is 6512 * necessary here to exclude the time interval that 6513 * exceeds the cycle time. 6514 * 2. According to IEEE Std. 802.1Q-2018 section 8.6.9.2, 6515 * once the end of the list is reached, it will switch 6516 * to the END_OF_CYCLE state and leave the gates in the 6517 * same state until the next cycle is started. 6518 */ 6519 if (end_time > adapter->cycle_time || 6520 n + 1 == qopt->num_entries) 6521 end_time = adapter->cycle_time; 6522 6523 for (i = 0; i < adapter->num_tx_queues; i++) { 6524 struct igc_ring *ring = adapter->tx_ring[i]; 6525 6526 if (!(e->gate_mask & BIT(i))) 6527 continue; 6528 6529 /* Check whether a queue stays open for more than one 6530 * entry. If so, keep the start and advance the end 6531 * time. 6532 */ 6533 if (!queue_configured[i]) 6534 ring->start_time = start_time; 6535 ring->end_time = end_time; 6536 6537 if (ring->start_time >= adapter->cycle_time) 6538 queue_configured[i] = false; 6539 else 6540 queue_configured[i] = true; 6541 } 6542 6543 start_time += e->interval; 6544 } 6545 6546 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 6547 6548 /* Check whether a queue gets configured. 6549 * If not, set the start and end time to be end time. 6550 */ 6551 for (i = 0; i < adapter->num_tx_queues; i++) { 6552 struct igc_ring *ring = adapter->tx_ring[i]; 6553 6554 if (!is_base_time_past(qopt->base_time, &now)) { 6555 ring->admin_gate_closed = false; 6556 } else { 6557 ring->oper_gate_closed = false; 6558 ring->admin_gate_closed = false; 6559 } 6560 6561 if (!queue_configured[i]) { 6562 if (!is_base_time_past(qopt->base_time, &now)) 6563 ring->admin_gate_closed = true; 6564 else 6565 ring->oper_gate_closed = true; 6566 6567 ring->start_time = end_time; 6568 ring->end_time = end_time; 6569 } 6570 } 6571 6572 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 6573 6574 for (i = 0; i < adapter->num_tx_queues; i++) { 6575 struct igc_ring *ring = adapter->tx_ring[i]; 6576 struct net_device *dev = adapter->netdev; 6577 6578 if (qopt->max_sdu[i]) 6579 ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN; 6580 else 6581 ring->max_sdu = 0; 6582 } 6583 6584 return 0; 6585 } 6586 6587 static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter, 6588 struct tc_taprio_qopt_offload *qopt) 6589 { 6590 struct igc_hw *hw = &adapter->hw; 6591 int err; 6592 6593 if (hw->mac.type != igc_i225) 6594 return -EOPNOTSUPP; 6595 6596 switch (qopt->cmd) { 6597 case TAPRIO_CMD_REPLACE: 6598 err = igc_save_qbv_schedule(adapter, qopt); 6599 break; 6600 case TAPRIO_CMD_DESTROY: 6601 err = igc_tsn_clear_schedule(adapter); 6602 break; 6603 case TAPRIO_CMD_STATS: 6604 igc_taprio_stats(adapter->netdev, &qopt->stats); 6605 return 0; 6606 case TAPRIO_CMD_QUEUE_STATS: 6607 igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); 6608 return 0; 6609 default: 6610 return -EOPNOTSUPP; 6611 } 6612 6613 if (err) 6614 return err; 6615 6616 return igc_tsn_offload_apply(adapter); 6617 } 6618 6619 static int igc_save_cbs_params(struct igc_adapter *adapter, int queue, 6620 bool enable, int idleslope, int sendslope, 6621 int hicredit, int locredit) 6622 { 6623 bool cbs_status[IGC_MAX_SR_QUEUES] = { false }; 6624 struct net_device *netdev = adapter->netdev; 6625 struct igc_ring *ring; 6626 int i; 6627 6628 /* i225 has two sets of credit-based shaper logic. 6629 * Supporting it only on the top two priority queues 6630 */ 6631 if (queue < 0 || queue > 1) 6632 return -EINVAL; 6633 6634 ring = adapter->tx_ring[queue]; 6635 6636 for (i = 0; i < IGC_MAX_SR_QUEUES; i++) 6637 if (adapter->tx_ring[i]) 6638 cbs_status[i] = adapter->tx_ring[i]->cbs_enable; 6639 6640 /* CBS should be enabled on the highest priority queue first in order 6641 * for the CBS algorithm to operate as intended. 6642 */ 6643 if (enable) { 6644 if (queue == 1 && !cbs_status[0]) { 6645 netdev_err(netdev, 6646 "Enabling CBS on queue1 before queue0\n"); 6647 return -EINVAL; 6648 } 6649 } else { 6650 if (queue == 0 && cbs_status[1]) { 6651 netdev_err(netdev, 6652 "Disabling CBS on queue0 before queue1\n"); 6653 return -EINVAL; 6654 } 6655 } 6656 6657 ring->cbs_enable = enable; 6658 ring->idleslope = idleslope; 6659 ring->sendslope = sendslope; 6660 ring->hicredit = hicredit; 6661 ring->locredit = locredit; 6662 6663 return 0; 6664 } 6665 6666 static int igc_tsn_enable_cbs(struct igc_adapter *adapter, 6667 struct tc_cbs_qopt_offload *qopt) 6668 { 6669 struct igc_hw *hw = &adapter->hw; 6670 int err; 6671 6672 if (hw->mac.type != igc_i225) 6673 return -EOPNOTSUPP; 6674 6675 if (qopt->queue < 0 || qopt->queue > 1) 6676 return -EINVAL; 6677 6678 err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable, 6679 qopt->idleslope, qopt->sendslope, 6680 qopt->hicredit, qopt->locredit); 6681 if (err) 6682 return err; 6683 6684 return igc_tsn_offload_apply(adapter); 6685 } 6686 6687 static int igc_tc_query_caps(struct igc_adapter *adapter, 6688 struct tc_query_caps_base *base) 6689 { 6690 struct igc_hw *hw = &adapter->hw; 6691 6692 switch (base->type) { 6693 case TC_SETUP_QDISC_MQPRIO: { 6694 struct tc_mqprio_caps *caps = base->caps; 6695 6696 caps->validate_queue_counts = true; 6697 6698 return 0; 6699 } 6700 case TC_SETUP_QDISC_TAPRIO: { 6701 struct tc_taprio_caps *caps = base->caps; 6702 6703 caps->broken_mqprio = true; 6704 6705 if (hw->mac.type == igc_i225) { 6706 caps->supports_queue_max_sdu = true; 6707 caps->gate_mask_per_txq = true; 6708 } 6709 6710 return 0; 6711 } 6712 default: 6713 return -EOPNOTSUPP; 6714 } 6715 } 6716 6717 static void igc_save_mqprio_params(struct igc_adapter *adapter, u8 num_tc, 6718 u16 *offset) 6719 { 6720 int i; 6721 6722 adapter->strict_priority_enable = true; 6723 adapter->num_tc = num_tc; 6724 6725 for (i = 0; i < num_tc; i++) 6726 adapter->queue_per_tc[i] = offset[i]; 6727 } 6728 6729 static int igc_tsn_enable_mqprio(struct igc_adapter *adapter, 6730 struct tc_mqprio_qopt_offload *mqprio) 6731 { 6732 struct igc_hw *hw = &adapter->hw; 6733 int err, i; 6734 6735 if (hw->mac.type != igc_i225) 6736 return -EOPNOTSUPP; 6737 6738 if (!mqprio->qopt.num_tc) { 6739 adapter->strict_priority_enable = false; 6740 netdev_reset_tc(adapter->netdev); 6741 goto apply; 6742 } 6743 6744 /* There are as many TCs as Tx queues. */ 6745 if (mqprio->qopt.num_tc != adapter->num_tx_queues) { 6746 NL_SET_ERR_MSG_FMT_MOD(mqprio->extack, 6747 "Only %d traffic classes supported", 6748 adapter->num_tx_queues); 6749 return -EOPNOTSUPP; 6750 } 6751 6752 /* Only one queue per TC is supported. */ 6753 for (i = 0; i < mqprio->qopt.num_tc; i++) { 6754 if (mqprio->qopt.count[i] != 1) { 6755 NL_SET_ERR_MSG_MOD(mqprio->extack, 6756 "Only one queue per TC supported"); 6757 return -EOPNOTSUPP; 6758 } 6759 } 6760 6761 /* Preemption is not supported yet. */ 6762 if (mqprio->preemptible_tcs) { 6763 NL_SET_ERR_MSG_MOD(mqprio->extack, 6764 "Preemption is not supported yet"); 6765 return -EOPNOTSUPP; 6766 } 6767 6768 igc_save_mqprio_params(adapter, mqprio->qopt.num_tc, 6769 mqprio->qopt.offset); 6770 6771 err = netdev_set_num_tc(adapter->netdev, adapter->num_tc); 6772 if (err) 6773 return err; 6774 6775 for (i = 0; i < adapter->num_tc; i++) { 6776 err = netdev_set_tc_queue(adapter->netdev, i, 1, 6777 adapter->queue_per_tc[i]); 6778 if (err) 6779 return err; 6780 } 6781 6782 /* In case the card is configured with less than four queues. */ 6783 for (; i < IGC_MAX_TX_QUEUES; i++) 6784 adapter->queue_per_tc[i] = i; 6785 6786 mqprio->qopt.hw = TC_MQPRIO_HW_OFFLOAD_TCS; 6787 6788 apply: 6789 return igc_tsn_offload_apply(adapter); 6790 } 6791 6792 static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, 6793 void *type_data) 6794 { 6795 struct igc_adapter *adapter = netdev_priv(dev); 6796 6797 adapter->tc_setup_type = type; 6798 6799 switch (type) { 6800 case TC_QUERY_CAPS: 6801 return igc_tc_query_caps(adapter, type_data); 6802 case TC_SETUP_QDISC_TAPRIO: 6803 return igc_tsn_enable_qbv_scheduling(adapter, type_data); 6804 6805 case TC_SETUP_QDISC_ETF: 6806 return igc_tsn_enable_launchtime(adapter, type_data); 6807 6808 case TC_SETUP_QDISC_CBS: 6809 return igc_tsn_enable_cbs(adapter, type_data); 6810 6811 case TC_SETUP_QDISC_MQPRIO: 6812 return igc_tsn_enable_mqprio(adapter, type_data); 6813 6814 default: 6815 return -EOPNOTSUPP; 6816 } 6817 } 6818 6819 static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf) 6820 { 6821 struct igc_adapter *adapter = netdev_priv(dev); 6822 6823 switch (bpf->command) { 6824 case XDP_SETUP_PROG: 6825 return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack); 6826 case XDP_SETUP_XSK_POOL: 6827 return igc_xdp_setup_pool(adapter, bpf->xsk.pool, 6828 bpf->xsk.queue_id); 6829 default: 6830 return -EOPNOTSUPP; 6831 } 6832 } 6833 6834 static int igc_xdp_xmit(struct net_device *dev, int num_frames, 6835 struct xdp_frame **frames, u32 flags) 6836 { 6837 struct igc_adapter *adapter = netdev_priv(dev); 6838 int cpu = smp_processor_id(); 6839 struct netdev_queue *nq; 6840 struct igc_ring *ring; 6841 int i, nxmit; 6842 6843 if (unlikely(!netif_carrier_ok(dev))) 6844 return -ENETDOWN; 6845 6846 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 6847 return -EINVAL; 6848 6849 ring = igc_get_tx_ring(adapter, cpu); 6850 nq = txring_txq(ring); 6851 6852 __netif_tx_lock(nq, cpu); 6853 6854 /* Avoid transmit queue timeout since we share it with the slow path */ 6855 txq_trans_cond_update(nq); 6856 6857 nxmit = 0; 6858 for (i = 0; i < num_frames; i++) { 6859 int err; 6860 struct xdp_frame *xdpf = frames[i]; 6861 6862 err = igc_xdp_init_tx_descriptor(ring, xdpf); 6863 if (err) 6864 break; 6865 nxmit++; 6866 } 6867 6868 if (flags & XDP_XMIT_FLUSH) 6869 igc_flush_tx_descriptors(ring); 6870 6871 __netif_tx_unlock(nq); 6872 6873 return nxmit; 6874 } 6875 6876 static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, 6877 struct igc_q_vector *q_vector) 6878 { 6879 struct igc_hw *hw = &adapter->hw; 6880 u32 eics = 0; 6881 6882 eics |= q_vector->eims_value; 6883 wr32(IGC_EICS, eics); 6884 } 6885 6886 int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 6887 { 6888 struct igc_adapter *adapter = netdev_priv(dev); 6889 struct igc_q_vector *q_vector; 6890 struct igc_ring *ring; 6891 6892 if (test_bit(__IGC_DOWN, &adapter->state)) 6893 return -ENETDOWN; 6894 6895 if (!igc_xdp_is_enabled(adapter)) 6896 return -ENXIO; 6897 6898 if (queue_id >= adapter->num_rx_queues) 6899 return -EINVAL; 6900 6901 ring = adapter->rx_ring[queue_id]; 6902 6903 if (!ring->xsk_pool) 6904 return -ENXIO; 6905 6906 q_vector = adapter->q_vector[queue_id]; 6907 if (!napi_if_scheduled_mark_missed(&q_vector->napi)) 6908 igc_trigger_rxtxq_interrupt(adapter, q_vector); 6909 6910 return 0; 6911 } 6912 6913 static ktime_t igc_get_tstamp(struct net_device *dev, 6914 const struct skb_shared_hwtstamps *hwtstamps, 6915 bool cycles) 6916 { 6917 struct igc_adapter *adapter = netdev_priv(dev); 6918 struct igc_inline_rx_tstamps *tstamp; 6919 ktime_t timestamp; 6920 6921 tstamp = hwtstamps->netdev_data; 6922 6923 if (cycles) 6924 timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer1); 6925 else 6926 timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); 6927 6928 return timestamp; 6929 } 6930 6931 static const struct net_device_ops igc_netdev_ops = { 6932 .ndo_open = igc_open, 6933 .ndo_stop = igc_close, 6934 .ndo_start_xmit = igc_xmit_frame, 6935 .ndo_set_rx_mode = igc_set_rx_mode, 6936 .ndo_set_mac_address = igc_set_mac, 6937 .ndo_change_mtu = igc_change_mtu, 6938 .ndo_tx_timeout = igc_tx_timeout, 6939 .ndo_get_stats64 = igc_get_stats64, 6940 .ndo_fix_features = igc_fix_features, 6941 .ndo_set_features = igc_set_features, 6942 .ndo_features_check = igc_features_check, 6943 .ndo_eth_ioctl = igc_ioctl, 6944 .ndo_setup_tc = igc_setup_tc, 6945 .ndo_bpf = igc_bpf, 6946 .ndo_xdp_xmit = igc_xdp_xmit, 6947 .ndo_xsk_wakeup = igc_xsk_wakeup, 6948 .ndo_get_tstamp = igc_get_tstamp, 6949 }; 6950 6951 u32 igc_rd32(struct igc_hw *hw, u32 reg) 6952 { 6953 struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw); 6954 u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); 6955 u32 value = 0; 6956 6957 if (IGC_REMOVED(hw_addr)) 6958 return ~value; 6959 6960 value = readl(&hw_addr[reg]); 6961 6962 /* reads should not return all F's */ 6963 if (!(~value) && (!reg || !(~readl(hw_addr)))) { 6964 struct net_device *netdev = igc->netdev; 6965 6966 hw->hw_addr = NULL; 6967 netif_device_detach(netdev); 6968 netdev_err(netdev, "PCIe link lost, device now detached\n"); 6969 WARN(pci_device_is_present(igc->pdev), 6970 "igc: Failed to read reg 0x%x!\n", reg); 6971 } 6972 6973 return value; 6974 } 6975 6976 /* Mapping HW RSS Type to enum xdp_rss_hash_type */ 6977 static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = { 6978 [IGC_RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_L2, 6979 [IGC_RSS_TYPE_HASH_TCP_IPV4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6980 [IGC_RSS_TYPE_HASH_IPV4] = XDP_RSS_TYPE_L3_IPV4, 6981 [IGC_RSS_TYPE_HASH_TCP_IPV6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6982 [IGC_RSS_TYPE_HASH_IPV6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6983 [IGC_RSS_TYPE_HASH_IPV6] = XDP_RSS_TYPE_L3_IPV6, 6984 [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6985 [IGC_RSS_TYPE_HASH_UDP_IPV4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6986 [IGC_RSS_TYPE_HASH_UDP_IPV6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6987 [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX, 6988 [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ 6989 [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask */ 6990 [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons */ 6991 [13] = XDP_RSS_TYPE_NONE, 6992 [14] = XDP_RSS_TYPE_NONE, 6993 [15] = XDP_RSS_TYPE_NONE, 6994 }; 6995 6996 static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6997 enum xdp_rss_hash_type *rss_type) 6998 { 6999 const struct igc_xdp_buff *ctx = (void *)_ctx; 7000 7001 if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)) 7002 return -ENODATA; 7003 7004 *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss); 7005 *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)]; 7006 7007 return 0; 7008 } 7009 7010 static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp) 7011 { 7012 const struct igc_xdp_buff *ctx = (void *)_ctx; 7013 struct igc_adapter *adapter = netdev_priv(ctx->xdp.rxq->dev); 7014 struct igc_inline_rx_tstamps *tstamp = ctx->rx_ts; 7015 7016 if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) { 7017 *timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); 7018 7019 return 0; 7020 } 7021 7022 return -ENODATA; 7023 } 7024 7025 static const struct xdp_metadata_ops igc_xdp_metadata_ops = { 7026 .xmo_rx_hash = igc_xdp_rx_hash, 7027 .xmo_rx_timestamp = igc_xdp_rx_timestamp, 7028 }; 7029 7030 static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer) 7031 { 7032 struct igc_adapter *adapter = container_of(timer, struct igc_adapter, 7033 hrtimer); 7034 unsigned long flags; 7035 unsigned int i; 7036 7037 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 7038 7039 adapter->qbv_transition = true; 7040 for (i = 0; i < adapter->num_tx_queues; i++) { 7041 struct igc_ring *tx_ring = adapter->tx_ring[i]; 7042 7043 if (tx_ring->admin_gate_closed) { 7044 tx_ring->admin_gate_closed = false; 7045 tx_ring->oper_gate_closed = true; 7046 } else { 7047 tx_ring->oper_gate_closed = false; 7048 } 7049 } 7050 adapter->qbv_transition = false; 7051 7052 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 7053 7054 return HRTIMER_NORESTART; 7055 } 7056 7057 /** 7058 * igc_probe - Device Initialization Routine 7059 * @pdev: PCI device information struct 7060 * @ent: entry in igc_pci_tbl 7061 * 7062 * Returns 0 on success, negative on failure 7063 * 7064 * igc_probe initializes an adapter identified by a pci_dev structure. 7065 * The OS initialization, configuring the adapter private structure, 7066 * and a hardware reset occur. 7067 */ 7068 static int igc_probe(struct pci_dev *pdev, 7069 const struct pci_device_id *ent) 7070 { 7071 struct igc_adapter *adapter; 7072 struct net_device *netdev; 7073 struct igc_hw *hw; 7074 const struct igc_info *ei = igc_info_tbl[ent->driver_data]; 7075 int err; 7076 7077 err = pci_enable_device_mem(pdev); 7078 if (err) 7079 return err; 7080 7081 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 7082 if (err) { 7083 dev_err(&pdev->dev, 7084 "No usable DMA configuration, aborting\n"); 7085 goto err_dma; 7086 } 7087 7088 err = pci_request_mem_regions(pdev, igc_driver_name); 7089 if (err) 7090 goto err_pci_reg; 7091 7092 err = pci_enable_ptm(pdev, NULL); 7093 if (err < 0) 7094 dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n"); 7095 7096 pci_set_master(pdev); 7097 7098 err = -ENOMEM; 7099 netdev = alloc_etherdev_mq(sizeof(struct igc_adapter), 7100 IGC_MAX_TX_QUEUES); 7101 7102 if (!netdev) 7103 goto err_alloc_etherdev; 7104 7105 SET_NETDEV_DEV(netdev, &pdev->dev); 7106 7107 pci_set_drvdata(pdev, netdev); 7108 adapter = netdev_priv(netdev); 7109 adapter->netdev = netdev; 7110 adapter->pdev = pdev; 7111 hw = &adapter->hw; 7112 hw->back = adapter; 7113 adapter->port_num = hw->bus.func; 7114 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); 7115 7116 err = pci_save_state(pdev); 7117 if (err) 7118 goto err_ioremap; 7119 7120 err = -EIO; 7121 adapter->io_addr = ioremap(pci_resource_start(pdev, 0), 7122 pci_resource_len(pdev, 0)); 7123 if (!adapter->io_addr) 7124 goto err_ioremap; 7125 7126 /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */ 7127 hw->hw_addr = adapter->io_addr; 7128 7129 netdev->netdev_ops = &igc_netdev_ops; 7130 netdev->xdp_metadata_ops = &igc_xdp_metadata_ops; 7131 netdev->xsk_tx_metadata_ops = &igc_xsk_tx_metadata_ops; 7132 igc_ethtool_set_ops(netdev); 7133 netdev->watchdog_timeo = 5 * HZ; 7134 7135 netdev->mem_start = pci_resource_start(pdev, 0); 7136 netdev->mem_end = pci_resource_end(pdev, 0); 7137 7138 /* PCI config space info */ 7139 hw->vendor_id = pdev->vendor; 7140 hw->device_id = pdev->device; 7141 hw->revision_id = pdev->revision; 7142 hw->subsystem_vendor_id = pdev->subsystem_vendor; 7143 hw->subsystem_device_id = pdev->subsystem_device; 7144 7145 /* Copy the default MAC and PHY function pointers */ 7146 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); 7147 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); 7148 7149 /* Initialize skew-specific constants */ 7150 err = ei->get_invariants(hw); 7151 if (err) 7152 goto err_sw_init; 7153 7154 /* Add supported features to the features list*/ 7155 netdev->features |= NETIF_F_SG; 7156 netdev->features |= NETIF_F_TSO; 7157 netdev->features |= NETIF_F_TSO6; 7158 netdev->features |= NETIF_F_TSO_ECN; 7159 netdev->features |= NETIF_F_RXHASH; 7160 netdev->features |= NETIF_F_RXCSUM; 7161 netdev->features |= NETIF_F_HW_CSUM; 7162 netdev->features |= NETIF_F_SCTP_CRC; 7163 netdev->features |= NETIF_F_HW_TC; 7164 7165 #define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ 7166 NETIF_F_GSO_GRE_CSUM | \ 7167 NETIF_F_GSO_IPXIP4 | \ 7168 NETIF_F_GSO_IPXIP6 | \ 7169 NETIF_F_GSO_UDP_TUNNEL | \ 7170 NETIF_F_GSO_UDP_TUNNEL_CSUM) 7171 7172 netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES; 7173 netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES; 7174 7175 /* setup the private structure */ 7176 err = igc_sw_init(adapter); 7177 if (err) 7178 goto err_sw_init; 7179 7180 /* copy netdev features into list of user selectable features */ 7181 netdev->hw_features |= NETIF_F_NTUPLE; 7182 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; 7183 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 7184 netdev->hw_features |= netdev->features; 7185 7186 netdev->features |= NETIF_F_HIGHDMA; 7187 7188 netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; 7189 netdev->mpls_features |= NETIF_F_HW_CSUM; 7190 netdev->hw_enc_features |= netdev->vlan_features; 7191 7192 netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 7193 NETDEV_XDP_ACT_XSK_ZEROCOPY; 7194 7195 /* enable HW vlan tag insertion/stripping by default */ 7196 netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; 7197 7198 /* MTU range: 68 - 9216 */ 7199 netdev->min_mtu = ETH_MIN_MTU; 7200 netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; 7201 7202 /* before reading the NVM, reset the controller to put the device in a 7203 * known good starting state 7204 */ 7205 hw->mac.ops.reset_hw(hw); 7206 7207 if (igc_get_flash_presence_i225(hw)) { 7208 if (hw->nvm.ops.validate(hw) < 0) { 7209 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); 7210 err = -EIO; 7211 goto err_eeprom; 7212 } 7213 } 7214 7215 if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) { 7216 /* copy the MAC address out of the NVM */ 7217 if (hw->mac.ops.read_mac_addr(hw)) 7218 dev_err(&pdev->dev, "NVM Read Error\n"); 7219 } 7220 7221 eth_hw_addr_set(netdev, hw->mac.addr); 7222 7223 if (!is_valid_ether_addr(netdev->dev_addr)) { 7224 dev_err(&pdev->dev, "Invalid MAC Address\n"); 7225 err = -EIO; 7226 goto err_eeprom; 7227 } 7228 7229 /* configure RXPBSIZE and TXPBSIZE */ 7230 wr32(IGC_RXPBS, IGC_RXPBSIZE_EXP_BMC_DEFAULT); 7231 wr32(IGC_TXPBS, IGC_TXPBSIZE_DEFAULT); 7232 7233 timer_setup(&adapter->watchdog_timer, igc_watchdog, 0); 7234 timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0); 7235 7236 INIT_WORK(&adapter->reset_task, igc_reset_task); 7237 INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); 7238 7239 hrtimer_setup(&adapter->hrtimer, &igc_qbv_scheduling_timer, CLOCK_MONOTONIC, 7240 HRTIMER_MODE_REL); 7241 7242 /* Initialize link properties that are user-changeable */ 7243 adapter->fc_autoneg = true; 7244 hw->phy.autoneg_advertised = 0xaf; 7245 7246 hw->fc.requested_mode = igc_fc_default; 7247 hw->fc.current_mode = igc_fc_default; 7248 7249 /* By default, support wake on port A */ 7250 adapter->flags |= IGC_FLAG_WOL_SUPPORTED; 7251 7252 /* initialize the wol settings based on the eeprom settings */ 7253 if (adapter->flags & IGC_FLAG_WOL_SUPPORTED) 7254 adapter->wol |= IGC_WUFC_MAG; 7255 7256 device_set_wakeup_enable(&adapter->pdev->dev, 7257 adapter->flags & IGC_FLAG_WOL_SUPPORTED); 7258 7259 igc_ptp_init(adapter); 7260 7261 igc_tsn_clear_schedule(adapter); 7262 7263 igc_fpe_init(adapter); 7264 7265 /* reset the hardware with the new settings */ 7266 igc_reset(adapter); 7267 7268 /* let the f/w know that the h/w is now under the control of the 7269 * driver. 7270 */ 7271 igc_get_hw_control(adapter); 7272 7273 strscpy(netdev->name, "eth%d", sizeof(netdev->name)); 7274 err = register_netdev(netdev); 7275 if (err) 7276 goto err_register; 7277 7278 /* carrier off reporting is important to ethtool even BEFORE open */ 7279 netif_carrier_off(netdev); 7280 7281 /* Check if Media Autosense is enabled */ 7282 adapter->ei = *ei; 7283 7284 /* print pcie link status and MAC address */ 7285 pcie_print_link_status(pdev); 7286 netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr); 7287 7288 dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); 7289 /* Disable EEE for internal PHY devices */ 7290 hw->dev_spec._base.eee_enable = false; 7291 adapter->flags &= ~IGC_FLAG_EEE; 7292 igc_set_eee_i225(hw, false, false, false); 7293 7294 pm_runtime_put_noidle(&pdev->dev); 7295 7296 if (IS_ENABLED(CONFIG_IGC_LEDS)) { 7297 err = igc_led_setup(adapter); 7298 if (err) 7299 goto err_register; 7300 } 7301 7302 return 0; 7303 7304 err_register: 7305 igc_release_hw_control(adapter); 7306 igc_ptp_stop(adapter); 7307 err_eeprom: 7308 if (!igc_check_reset_block(hw)) 7309 igc_reset_phy(hw); 7310 err_sw_init: 7311 igc_clear_interrupt_scheme(adapter); 7312 iounmap(adapter->io_addr); 7313 err_ioremap: 7314 free_netdev(netdev); 7315 err_alloc_etherdev: 7316 pci_release_mem_regions(pdev); 7317 err_pci_reg: 7318 err_dma: 7319 pci_disable_device(pdev); 7320 return err; 7321 } 7322 7323 /** 7324 * igc_remove - Device Removal Routine 7325 * @pdev: PCI device information struct 7326 * 7327 * igc_remove is called by the PCI subsystem to alert the driver 7328 * that it should release a PCI device. This could be caused by a 7329 * Hot-Plug event, or because the driver is going to be removed from 7330 * memory. 7331 */ 7332 static void igc_remove(struct pci_dev *pdev) 7333 { 7334 struct net_device *netdev = pci_get_drvdata(pdev); 7335 struct igc_adapter *adapter = netdev_priv(netdev); 7336 7337 pm_runtime_get_noresume(&pdev->dev); 7338 7339 igc_flush_nfc_rules(adapter); 7340 7341 igc_ptp_stop(adapter); 7342 7343 pci_disable_ptm(pdev); 7344 pci_clear_master(pdev); 7345 7346 set_bit(__IGC_DOWN, &adapter->state); 7347 7348 timer_delete_sync(&adapter->watchdog_timer); 7349 timer_delete_sync(&adapter->phy_info_timer); 7350 7351 cancel_work_sync(&adapter->reset_task); 7352 cancel_work_sync(&adapter->watchdog_task); 7353 hrtimer_cancel(&adapter->hrtimer); 7354 7355 if (IS_ENABLED(CONFIG_IGC_LEDS)) 7356 igc_led_free(adapter); 7357 7358 /* Release control of h/w to f/w. If f/w is AMT enabled, this 7359 * would have already happened in close and is redundant. 7360 */ 7361 igc_release_hw_control(adapter); 7362 unregister_netdev(netdev); 7363 7364 igc_clear_interrupt_scheme(adapter); 7365 pci_iounmap(pdev, adapter->io_addr); 7366 pci_release_mem_regions(pdev); 7367 7368 free_netdev(netdev); 7369 7370 pci_disable_device(pdev); 7371 } 7372 7373 static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake, 7374 bool runtime) 7375 { 7376 struct net_device *netdev = pci_get_drvdata(pdev); 7377 struct igc_adapter *adapter = netdev_priv(netdev); 7378 u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol; 7379 struct igc_hw *hw = &adapter->hw; 7380 u32 ctrl, rctl, status; 7381 bool wake; 7382 7383 rtnl_lock(); 7384 netif_device_detach(netdev); 7385 7386 if (netif_running(netdev)) 7387 __igc_close(netdev, true); 7388 7389 igc_ptp_suspend(adapter); 7390 7391 igc_clear_interrupt_scheme(adapter); 7392 rtnl_unlock(); 7393 7394 status = rd32(IGC_STATUS); 7395 if (status & IGC_STATUS_LU) 7396 wufc &= ~IGC_WUFC_LNKC; 7397 7398 if (wufc) { 7399 igc_setup_rctl(adapter); 7400 igc_set_rx_mode(netdev); 7401 7402 /* turn on all-multi mode if wake on multicast is enabled */ 7403 if (wufc & IGC_WUFC_MC) { 7404 rctl = rd32(IGC_RCTL); 7405 rctl |= IGC_RCTL_MPE; 7406 wr32(IGC_RCTL, rctl); 7407 } 7408 7409 ctrl = rd32(IGC_CTRL); 7410 ctrl |= IGC_CTRL_ADVD3WUC; 7411 wr32(IGC_CTRL, ctrl); 7412 7413 /* Allow time for pending master requests to run */ 7414 igc_disable_pcie_master(hw); 7415 7416 wr32(IGC_WUC, IGC_WUC_PME_EN); 7417 wr32(IGC_WUFC, wufc); 7418 } else { 7419 wr32(IGC_WUC, 0); 7420 wr32(IGC_WUFC, 0); 7421 } 7422 7423 wake = wufc || adapter->en_mng_pt; 7424 if (!wake) 7425 igc_power_down_phy_copper_base(&adapter->hw); 7426 else 7427 igc_power_up_link(adapter); 7428 7429 if (enable_wake) 7430 *enable_wake = wake; 7431 7432 /* Release control of h/w to f/w. If f/w is AMT enabled, this 7433 * would have already happened in close and is redundant. 7434 */ 7435 igc_release_hw_control(adapter); 7436 7437 pci_disable_device(pdev); 7438 7439 return 0; 7440 } 7441 7442 static int igc_runtime_suspend(struct device *dev) 7443 { 7444 return __igc_shutdown(to_pci_dev(dev), NULL, 1); 7445 } 7446 7447 static void igc_deliver_wake_packet(struct net_device *netdev) 7448 { 7449 struct igc_adapter *adapter = netdev_priv(netdev); 7450 struct igc_hw *hw = &adapter->hw; 7451 struct sk_buff *skb; 7452 u32 wupl; 7453 7454 wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK; 7455 7456 /* WUPM stores only the first 128 bytes of the wake packet. 7457 * Read the packet only if we have the whole thing. 7458 */ 7459 if (wupl == 0 || wupl > IGC_WUPM_BYTES) 7460 return; 7461 7462 skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES); 7463 if (!skb) 7464 return; 7465 7466 skb_put(skb, wupl); 7467 7468 /* Ensure reads are 32-bit aligned */ 7469 wupl = roundup(wupl, 4); 7470 7471 memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl); 7472 7473 skb->protocol = eth_type_trans(skb, netdev); 7474 netif_rx(skb); 7475 } 7476 7477 static int __igc_resume(struct device *dev, bool rpm) 7478 { 7479 struct pci_dev *pdev = to_pci_dev(dev); 7480 struct net_device *netdev = pci_get_drvdata(pdev); 7481 struct igc_adapter *adapter = netdev_priv(netdev); 7482 struct igc_hw *hw = &adapter->hw; 7483 u32 err, val; 7484 7485 pci_set_power_state(pdev, PCI_D0); 7486 pci_restore_state(pdev); 7487 pci_save_state(pdev); 7488 7489 if (!pci_device_is_present(pdev)) 7490 return -ENODEV; 7491 err = pci_enable_device_mem(pdev); 7492 if (err) { 7493 netdev_err(netdev, "Cannot enable PCI device from suspend\n"); 7494 return err; 7495 } 7496 pci_set_master(pdev); 7497 7498 pci_enable_wake(pdev, PCI_D3hot, 0); 7499 pci_enable_wake(pdev, PCI_D3cold, 0); 7500 7501 if (igc_init_interrupt_scheme(adapter, true)) { 7502 netdev_err(netdev, "Unable to allocate memory for queues\n"); 7503 return -ENOMEM; 7504 } 7505 7506 igc_reset(adapter); 7507 7508 /* let the f/w know that the h/w is now under the control of the 7509 * driver. 7510 */ 7511 igc_get_hw_control(adapter); 7512 7513 val = rd32(IGC_WUS); 7514 if (val & WAKE_PKT_WUS) 7515 igc_deliver_wake_packet(netdev); 7516 7517 wr32(IGC_WUS, ~0); 7518 7519 if (netif_running(netdev)) { 7520 if (!rpm) 7521 rtnl_lock(); 7522 err = __igc_open(netdev, true); 7523 if (!rpm) 7524 rtnl_unlock(); 7525 if (!err) 7526 netif_device_attach(netdev); 7527 } 7528 7529 return err; 7530 } 7531 7532 static int igc_resume(struct device *dev) 7533 { 7534 return __igc_resume(dev, false); 7535 } 7536 7537 static int igc_runtime_resume(struct device *dev) 7538 { 7539 return __igc_resume(dev, true); 7540 } 7541 7542 static int igc_suspend(struct device *dev) 7543 { 7544 return __igc_shutdown(to_pci_dev(dev), NULL, 0); 7545 } 7546 7547 static int __maybe_unused igc_runtime_idle(struct device *dev) 7548 { 7549 struct net_device *netdev = dev_get_drvdata(dev); 7550 struct igc_adapter *adapter = netdev_priv(netdev); 7551 7552 if (!igc_has_link(adapter)) 7553 pm_schedule_suspend(dev, MSEC_PER_SEC * 5); 7554 7555 return -EBUSY; 7556 } 7557 7558 static void igc_shutdown(struct pci_dev *pdev) 7559 { 7560 bool wake; 7561 7562 __igc_shutdown(pdev, &wake, 0); 7563 7564 if (system_state == SYSTEM_POWER_OFF) { 7565 pci_wake_from_d3(pdev, wake); 7566 pci_set_power_state(pdev, PCI_D3hot); 7567 } 7568 } 7569 7570 /** 7571 * igc_io_error_detected - called when PCI error is detected 7572 * @pdev: Pointer to PCI device 7573 * @state: The current PCI connection state 7574 * 7575 * This function is called after a PCI bus error affecting 7576 * this device has been detected. 7577 **/ 7578 static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev, 7579 pci_channel_state_t state) 7580 { 7581 struct net_device *netdev = pci_get_drvdata(pdev); 7582 struct igc_adapter *adapter = netdev_priv(netdev); 7583 7584 rtnl_lock(); 7585 netif_device_detach(netdev); 7586 7587 if (state == pci_channel_io_perm_failure) { 7588 rtnl_unlock(); 7589 return PCI_ERS_RESULT_DISCONNECT; 7590 } 7591 7592 if (netif_running(netdev)) 7593 igc_down(adapter); 7594 pci_disable_device(pdev); 7595 rtnl_unlock(); 7596 7597 /* Request a slot reset. */ 7598 return PCI_ERS_RESULT_NEED_RESET; 7599 } 7600 7601 /** 7602 * igc_io_slot_reset - called after the PCI bus has been reset. 7603 * @pdev: Pointer to PCI device 7604 * 7605 * Restart the card from scratch, as if from a cold-boot. Implementation 7606 * resembles the first-half of the __igc_resume routine. 7607 **/ 7608 static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) 7609 { 7610 struct net_device *netdev = pci_get_drvdata(pdev); 7611 struct igc_adapter *adapter = netdev_priv(netdev); 7612 struct igc_hw *hw = &adapter->hw; 7613 pci_ers_result_t result; 7614 7615 if (pci_enable_device_mem(pdev)) { 7616 netdev_err(netdev, "Could not re-enable PCI device after reset\n"); 7617 result = PCI_ERS_RESULT_DISCONNECT; 7618 } else { 7619 pci_set_master(pdev); 7620 pci_restore_state(pdev); 7621 pci_save_state(pdev); 7622 7623 pci_enable_wake(pdev, PCI_D3hot, 0); 7624 pci_enable_wake(pdev, PCI_D3cold, 0); 7625 7626 /* In case of PCI error, adapter loses its HW address 7627 * so we should re-assign it here. 7628 */ 7629 hw->hw_addr = adapter->io_addr; 7630 7631 igc_reset(adapter); 7632 wr32(IGC_WUS, ~0); 7633 result = PCI_ERS_RESULT_RECOVERED; 7634 } 7635 7636 return result; 7637 } 7638 7639 /** 7640 * igc_io_resume - called when traffic can start to flow again. 7641 * @pdev: Pointer to PCI device 7642 * 7643 * This callback is called when the error recovery driver tells us that 7644 * its OK to resume normal operation. Implementation resembles the 7645 * second-half of the __igc_resume routine. 7646 */ 7647 static void igc_io_resume(struct pci_dev *pdev) 7648 { 7649 struct net_device *netdev = pci_get_drvdata(pdev); 7650 struct igc_adapter *adapter = netdev_priv(netdev); 7651 7652 rtnl_lock(); 7653 if (netif_running(netdev)) { 7654 if (igc_open(netdev)) { 7655 rtnl_unlock(); 7656 netdev_err(netdev, "igc_open failed after reset\n"); 7657 return; 7658 } 7659 } 7660 7661 netif_device_attach(netdev); 7662 7663 /* let the f/w know that the h/w is now under the control of the 7664 * driver. 7665 */ 7666 igc_get_hw_control(adapter); 7667 rtnl_unlock(); 7668 } 7669 7670 static const struct pci_error_handlers igc_err_handler = { 7671 .error_detected = igc_io_error_detected, 7672 .slot_reset = igc_io_slot_reset, 7673 .resume = igc_io_resume, 7674 }; 7675 7676 static _DEFINE_DEV_PM_OPS(igc_pm_ops, igc_suspend, igc_resume, 7677 igc_runtime_suspend, igc_runtime_resume, 7678 igc_runtime_idle); 7679 7680 static struct pci_driver igc_driver = { 7681 .name = igc_driver_name, 7682 .id_table = igc_pci_tbl, 7683 .probe = igc_probe, 7684 .remove = igc_remove, 7685 .driver.pm = pm_ptr(&igc_pm_ops), 7686 .shutdown = igc_shutdown, 7687 .err_handler = &igc_err_handler, 7688 }; 7689 7690 /** 7691 * igc_reinit_queues - return error 7692 * @adapter: pointer to adapter structure 7693 */ 7694 int igc_reinit_queues(struct igc_adapter *adapter) 7695 { 7696 struct net_device *netdev = adapter->netdev; 7697 int err = 0; 7698 7699 if (netif_running(netdev)) 7700 igc_close(netdev); 7701 7702 igc_reset_interrupt_capability(adapter); 7703 7704 if (igc_init_interrupt_scheme(adapter, true)) { 7705 netdev_err(netdev, "Unable to allocate memory for queues\n"); 7706 return -ENOMEM; 7707 } 7708 7709 if (netif_running(netdev)) 7710 err = igc_open(netdev); 7711 7712 return err; 7713 } 7714 7715 /** 7716 * igc_get_hw_dev - return device 7717 * @hw: pointer to hardware structure 7718 * 7719 * used by hardware layer to print debugging information 7720 */ 7721 struct net_device *igc_get_hw_dev(struct igc_hw *hw) 7722 { 7723 struct igc_adapter *adapter = hw->back; 7724 7725 return adapter->netdev; 7726 } 7727 7728 static void igc_disable_rx_ring_hw(struct igc_ring *ring) 7729 { 7730 struct igc_hw *hw = &ring->q_vector->adapter->hw; 7731 u8 idx = ring->reg_idx; 7732 u32 rxdctl; 7733 7734 rxdctl = rd32(IGC_RXDCTL(idx)); 7735 rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE; 7736 rxdctl |= IGC_RXDCTL_SWFLUSH; 7737 wr32(IGC_RXDCTL(idx), rxdctl); 7738 } 7739 7740 void igc_disable_rx_ring(struct igc_ring *ring) 7741 { 7742 igc_disable_rx_ring_hw(ring); 7743 igc_clean_rx_ring(ring); 7744 } 7745 7746 void igc_enable_rx_ring(struct igc_ring *ring) 7747 { 7748 struct igc_adapter *adapter = ring->q_vector->adapter; 7749 7750 igc_configure_rx_ring(adapter, ring); 7751 7752 if (ring->xsk_pool) 7753 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 7754 else 7755 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 7756 } 7757 7758 void igc_disable_tx_ring(struct igc_ring *ring) 7759 { 7760 igc_disable_tx_ring_hw(ring); 7761 igc_clean_tx_ring(ring); 7762 } 7763 7764 void igc_enable_tx_ring(struct igc_ring *ring) 7765 { 7766 struct igc_adapter *adapter = ring->q_vector->adapter; 7767 7768 igc_configure_tx_ring(adapter, ring); 7769 } 7770 7771 /** 7772 * igc_init_module - Driver Registration Routine 7773 * 7774 * igc_init_module is the first routine called when the driver is 7775 * loaded. All it does is register with the PCI subsystem. 7776 */ 7777 static int __init igc_init_module(void) 7778 { 7779 int ret; 7780 7781 pr_info("%s\n", igc_driver_string); 7782 pr_info("%s\n", igc_copyright); 7783 7784 ret = pci_register_driver(&igc_driver); 7785 return ret; 7786 } 7787 7788 module_init(igc_init_module); 7789 7790 /** 7791 * igc_exit_module - Driver Exit Cleanup Routine 7792 * 7793 * igc_exit_module is called just before the driver is removed 7794 * from memory. 7795 */ 7796 static void __exit igc_exit_module(void) 7797 { 7798 pci_unregister_driver(&igc_driver); 7799 } 7800 7801 module_exit(igc_exit_module); 7802 /* igc_main.c */ 7803