1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018 Intel Corporation */ 3 4 #include <linux/module.h> 5 #include <linux/types.h> 6 #include <linux/if_vlan.h> 7 #include <linux/tcp.h> 8 #include <linux/udp.h> 9 #include <linux/ip.h> 10 #include <linux/pm_runtime.h> 11 #include <net/pkt_sched.h> 12 #include <linux/bpf_trace.h> 13 #include <net/xdp_sock_drv.h> 14 #include <linux/pci.h> 15 #include <linux/mdio.h> 16 17 #include <net/ipv6.h> 18 19 #include "igc.h" 20 #include "igc_hw.h" 21 #include "igc_tsn.h" 22 #include "igc_xdp.h" 23 24 #define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver" 25 26 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) 27 28 #define IGC_XDP_PASS 0 29 #define IGC_XDP_CONSUMED BIT(0) 30 #define IGC_XDP_TX BIT(1) 31 #define IGC_XDP_REDIRECT BIT(2) 32 33 static int debug = -1; 34 35 MODULE_DESCRIPTION(DRV_SUMMARY); 36 MODULE_LICENSE("GPL v2"); 37 module_param(debug, int, 0); 38 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 39 40 char igc_driver_name[] = "igc"; 41 static const char igc_driver_string[] = DRV_SUMMARY; 42 static const char igc_copyright[] = 43 "Copyright(c) 2018 Intel Corporation."; 44 45 static const struct igc_info *igc_info_tbl[] = { 46 [board_base] = &igc_base_info, 47 }; 48 49 static const struct pci_device_id igc_pci_tbl[] = { 50 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base }, 51 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base }, 52 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base }, 53 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base }, 54 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base }, 55 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base }, 56 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base }, 57 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base }, 58 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base }, 59 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base }, 60 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base }, 61 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base }, 62 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base }, 63 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base }, 64 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base }, 65 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base }, 66 /* required last entry */ 67 {0, } 68 }; 69 70 MODULE_DEVICE_TABLE(pci, igc_pci_tbl); 71 72 enum latency_range { 73 lowest_latency = 0, 74 low_latency = 1, 75 bulk_latency = 2, 76 latency_invalid = 255 77 }; 78 79 void igc_reset(struct igc_adapter *adapter) 80 { 81 struct net_device *dev = adapter->netdev; 82 struct igc_hw *hw = &adapter->hw; 83 struct igc_fc_info *fc = &hw->fc; 84 u32 pba, hwm; 85 86 /* Repartition PBA for greater than 9k MTU if required */ 87 pba = IGC_PBA_34K; 88 89 /* flow control settings 90 * The high water mark must be low enough to fit one full frame 91 * after transmitting the pause frame. As such we must have enough 92 * space to allow for us to complete our current transmit and then 93 * receive the frame that is in progress from the link partner. 94 * Set it to: 95 * - the full Rx FIFO size minus one full Tx plus one full Rx frame 96 */ 97 hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); 98 99 fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ 100 fc->low_water = fc->high_water - 16; 101 fc->pause_time = 0xFFFF; 102 fc->send_xon = 1; 103 fc->current_mode = fc->requested_mode; 104 105 hw->mac.ops.reset_hw(hw); 106 107 if (hw->mac.ops.init_hw(hw)) 108 netdev_err(dev, "Error on hardware initialization\n"); 109 110 /* Re-establish EEE setting */ 111 igc_set_eee_i225(hw, true, true, true); 112 113 if (!netif_running(adapter->netdev)) 114 igc_power_down_phy_copper_base(&adapter->hw); 115 116 /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */ 117 wr32(IGC_VET, ETH_P_8021Q); 118 119 /* Re-enable PTP, where applicable. */ 120 igc_ptp_reset(adapter); 121 122 /* Re-enable TSN offloading, where applicable. */ 123 igc_tsn_reset(adapter); 124 125 igc_get_phy_info(hw); 126 } 127 128 /** 129 * igc_power_up_link - Power up the phy link 130 * @adapter: address of board private structure 131 */ 132 static void igc_power_up_link(struct igc_adapter *adapter) 133 { 134 igc_reset_phy(&adapter->hw); 135 136 igc_power_up_phy_copper(&adapter->hw); 137 138 igc_setup_link(&adapter->hw); 139 } 140 141 /** 142 * igc_release_hw_control - release control of the h/w to f/w 143 * @adapter: address of board private structure 144 * 145 * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 146 * For ASF and Pass Through versions of f/w this means that the 147 * driver is no longer loaded. 148 */ 149 static void igc_release_hw_control(struct igc_adapter *adapter) 150 { 151 struct igc_hw *hw = &adapter->hw; 152 u32 ctrl_ext; 153 154 if (!pci_device_is_present(adapter->pdev)) 155 return; 156 157 /* Let firmware take over control of h/w */ 158 ctrl_ext = rd32(IGC_CTRL_EXT); 159 wr32(IGC_CTRL_EXT, 160 ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD); 161 } 162 163 /** 164 * igc_get_hw_control - get control of the h/w from f/w 165 * @adapter: address of board private structure 166 * 167 * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 168 * For ASF and Pass Through versions of f/w this means that 169 * the driver is loaded. 170 */ 171 static void igc_get_hw_control(struct igc_adapter *adapter) 172 { 173 struct igc_hw *hw = &adapter->hw; 174 u32 ctrl_ext; 175 176 /* Let firmware know the driver has taken over */ 177 ctrl_ext = rd32(IGC_CTRL_EXT); 178 wr32(IGC_CTRL_EXT, 179 ctrl_ext | IGC_CTRL_EXT_DRV_LOAD); 180 } 181 182 static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf) 183 { 184 dma_unmap_single(dev, dma_unmap_addr(buf, dma), 185 dma_unmap_len(buf, len), DMA_TO_DEVICE); 186 187 dma_unmap_len_set(buf, len, 0); 188 } 189 190 /** 191 * igc_clean_tx_ring - Free Tx Buffers 192 * @tx_ring: ring to be cleaned 193 */ 194 static void igc_clean_tx_ring(struct igc_ring *tx_ring) 195 { 196 u16 i = tx_ring->next_to_clean; 197 struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; 198 u32 xsk_frames = 0; 199 200 while (i != tx_ring->next_to_use) { 201 union igc_adv_tx_desc *eop_desc, *tx_desc; 202 203 switch (tx_buffer->type) { 204 case IGC_TX_BUFFER_TYPE_XSK: 205 xsk_frames++; 206 break; 207 case IGC_TX_BUFFER_TYPE_XDP: 208 xdp_return_frame(tx_buffer->xdpf); 209 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 210 break; 211 case IGC_TX_BUFFER_TYPE_SKB: 212 dev_kfree_skb_any(tx_buffer->skb); 213 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 214 break; 215 default: 216 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 217 break; 218 } 219 220 /* check for eop_desc to determine the end of the packet */ 221 eop_desc = tx_buffer->next_to_watch; 222 tx_desc = IGC_TX_DESC(tx_ring, i); 223 224 /* unmap remaining buffers */ 225 while (tx_desc != eop_desc) { 226 tx_buffer++; 227 tx_desc++; 228 i++; 229 if (unlikely(i == tx_ring->count)) { 230 i = 0; 231 tx_buffer = tx_ring->tx_buffer_info; 232 tx_desc = IGC_TX_DESC(tx_ring, 0); 233 } 234 235 /* unmap any remaining paged data */ 236 if (dma_unmap_len(tx_buffer, len)) 237 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 238 } 239 240 tx_buffer->next_to_watch = NULL; 241 242 /* move us one more past the eop_desc for start of next pkt */ 243 tx_buffer++; 244 i++; 245 if (unlikely(i == tx_ring->count)) { 246 i = 0; 247 tx_buffer = tx_ring->tx_buffer_info; 248 } 249 } 250 251 if (tx_ring->xsk_pool && xsk_frames) 252 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 253 254 /* reset BQL for queue */ 255 netdev_tx_reset_queue(txring_txq(tx_ring)); 256 257 /* Zero out the buffer ring */ 258 memset(tx_ring->tx_buffer_info, 0, 259 sizeof(*tx_ring->tx_buffer_info) * tx_ring->count); 260 261 /* Zero out the descriptor ring */ 262 memset(tx_ring->desc, 0, tx_ring->size); 263 264 /* reset next_to_use and next_to_clean */ 265 tx_ring->next_to_use = 0; 266 tx_ring->next_to_clean = 0; 267 } 268 269 /** 270 * igc_free_tx_resources - Free Tx Resources per Queue 271 * @tx_ring: Tx descriptor ring for a specific queue 272 * 273 * Free all transmit software resources 274 */ 275 void igc_free_tx_resources(struct igc_ring *tx_ring) 276 { 277 igc_disable_tx_ring(tx_ring); 278 279 vfree(tx_ring->tx_buffer_info); 280 tx_ring->tx_buffer_info = NULL; 281 282 /* if not set, then don't free */ 283 if (!tx_ring->desc) 284 return; 285 286 dma_free_coherent(tx_ring->dev, tx_ring->size, 287 tx_ring->desc, tx_ring->dma); 288 289 tx_ring->desc = NULL; 290 } 291 292 /** 293 * igc_free_all_tx_resources - Free Tx Resources for All Queues 294 * @adapter: board private structure 295 * 296 * Free all transmit software resources 297 */ 298 static void igc_free_all_tx_resources(struct igc_adapter *adapter) 299 { 300 int i; 301 302 for (i = 0; i < adapter->num_tx_queues; i++) 303 igc_free_tx_resources(adapter->tx_ring[i]); 304 } 305 306 /** 307 * igc_clean_all_tx_rings - Free Tx Buffers for all queues 308 * @adapter: board private structure 309 */ 310 static void igc_clean_all_tx_rings(struct igc_adapter *adapter) 311 { 312 int i; 313 314 for (i = 0; i < adapter->num_tx_queues; i++) 315 if (adapter->tx_ring[i]) 316 igc_clean_tx_ring(adapter->tx_ring[i]); 317 } 318 319 static void igc_disable_tx_ring_hw(struct igc_ring *ring) 320 { 321 struct igc_hw *hw = &ring->q_vector->adapter->hw; 322 u8 idx = ring->reg_idx; 323 u32 txdctl; 324 325 txdctl = rd32(IGC_TXDCTL(idx)); 326 txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; 327 txdctl |= IGC_TXDCTL_SWFLUSH; 328 wr32(IGC_TXDCTL(idx), txdctl); 329 } 330 331 /** 332 * igc_disable_all_tx_rings_hw - Disable all transmit queue operation 333 * @adapter: board private structure 334 */ 335 static void igc_disable_all_tx_rings_hw(struct igc_adapter *adapter) 336 { 337 int i; 338 339 for (i = 0; i < adapter->num_tx_queues; i++) { 340 struct igc_ring *tx_ring = adapter->tx_ring[i]; 341 342 igc_disable_tx_ring_hw(tx_ring); 343 } 344 } 345 346 /** 347 * igc_setup_tx_resources - allocate Tx resources (Descriptors) 348 * @tx_ring: tx descriptor ring (for a specific queue) to setup 349 * 350 * Return 0 on success, negative on failure 351 */ 352 int igc_setup_tx_resources(struct igc_ring *tx_ring) 353 { 354 struct net_device *ndev = tx_ring->netdev; 355 struct device *dev = tx_ring->dev; 356 int size = 0; 357 358 size = sizeof(struct igc_tx_buffer) * tx_ring->count; 359 tx_ring->tx_buffer_info = vzalloc(size); 360 if (!tx_ring->tx_buffer_info) 361 goto err; 362 363 /* round up to nearest 4K */ 364 tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc); 365 tx_ring->size = ALIGN(tx_ring->size, 4096); 366 367 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 368 &tx_ring->dma, GFP_KERNEL); 369 370 if (!tx_ring->desc) 371 goto err; 372 373 tx_ring->next_to_use = 0; 374 tx_ring->next_to_clean = 0; 375 376 return 0; 377 378 err: 379 vfree(tx_ring->tx_buffer_info); 380 netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n"); 381 return -ENOMEM; 382 } 383 384 /** 385 * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues 386 * @adapter: board private structure 387 * 388 * Return 0 on success, negative on failure 389 */ 390 static int igc_setup_all_tx_resources(struct igc_adapter *adapter) 391 { 392 struct net_device *dev = adapter->netdev; 393 int i, err = 0; 394 395 for (i = 0; i < adapter->num_tx_queues; i++) { 396 err = igc_setup_tx_resources(adapter->tx_ring[i]); 397 if (err) { 398 netdev_err(dev, "Error on Tx queue %u setup\n", i); 399 for (i--; i >= 0; i--) 400 igc_free_tx_resources(adapter->tx_ring[i]); 401 break; 402 } 403 } 404 405 return err; 406 } 407 408 static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring) 409 { 410 u16 i = rx_ring->next_to_clean; 411 412 dev_kfree_skb(rx_ring->skb); 413 rx_ring->skb = NULL; 414 415 /* Free all the Rx ring sk_buffs */ 416 while (i != rx_ring->next_to_alloc) { 417 struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 418 419 /* Invalidate cache lines that may have been written to by 420 * device so that we avoid corrupting memory. 421 */ 422 dma_sync_single_range_for_cpu(rx_ring->dev, 423 buffer_info->dma, 424 buffer_info->page_offset, 425 igc_rx_bufsz(rx_ring), 426 DMA_FROM_DEVICE); 427 428 /* free resources associated with mapping */ 429 dma_unmap_page_attrs(rx_ring->dev, 430 buffer_info->dma, 431 igc_rx_pg_size(rx_ring), 432 DMA_FROM_DEVICE, 433 IGC_RX_DMA_ATTR); 434 __page_frag_cache_drain(buffer_info->page, 435 buffer_info->pagecnt_bias); 436 437 i++; 438 if (i == rx_ring->count) 439 i = 0; 440 } 441 } 442 443 static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring) 444 { 445 struct igc_rx_buffer *bi; 446 u16 i; 447 448 for (i = 0; i < ring->count; i++) { 449 bi = &ring->rx_buffer_info[i]; 450 if (!bi->xdp) 451 continue; 452 453 xsk_buff_free(bi->xdp); 454 bi->xdp = NULL; 455 } 456 } 457 458 /** 459 * igc_clean_rx_ring - Free Rx Buffers per Queue 460 * @ring: ring to free buffers from 461 */ 462 static void igc_clean_rx_ring(struct igc_ring *ring) 463 { 464 if (ring->xsk_pool) 465 igc_clean_rx_ring_xsk_pool(ring); 466 else 467 igc_clean_rx_ring_page_shared(ring); 468 469 clear_ring_uses_large_buffer(ring); 470 471 ring->next_to_alloc = 0; 472 ring->next_to_clean = 0; 473 ring->next_to_use = 0; 474 } 475 476 /** 477 * igc_clean_all_rx_rings - Free Rx Buffers for all queues 478 * @adapter: board private structure 479 */ 480 static void igc_clean_all_rx_rings(struct igc_adapter *adapter) 481 { 482 int i; 483 484 for (i = 0; i < adapter->num_rx_queues; i++) 485 if (adapter->rx_ring[i]) 486 igc_clean_rx_ring(adapter->rx_ring[i]); 487 } 488 489 /** 490 * igc_free_rx_resources - Free Rx Resources 491 * @rx_ring: ring to clean the resources from 492 * 493 * Free all receive software resources 494 */ 495 void igc_free_rx_resources(struct igc_ring *rx_ring) 496 { 497 igc_clean_rx_ring(rx_ring); 498 499 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 500 501 vfree(rx_ring->rx_buffer_info); 502 rx_ring->rx_buffer_info = NULL; 503 504 /* if not set, then don't free */ 505 if (!rx_ring->desc) 506 return; 507 508 dma_free_coherent(rx_ring->dev, rx_ring->size, 509 rx_ring->desc, rx_ring->dma); 510 511 rx_ring->desc = NULL; 512 } 513 514 /** 515 * igc_free_all_rx_resources - Free Rx Resources for All Queues 516 * @adapter: board private structure 517 * 518 * Free all receive software resources 519 */ 520 static void igc_free_all_rx_resources(struct igc_adapter *adapter) 521 { 522 int i; 523 524 for (i = 0; i < adapter->num_rx_queues; i++) 525 igc_free_rx_resources(adapter->rx_ring[i]); 526 } 527 528 /** 529 * igc_setup_rx_resources - allocate Rx resources (Descriptors) 530 * @rx_ring: rx descriptor ring (for a specific queue) to setup 531 * 532 * Returns 0 on success, negative on failure 533 */ 534 int igc_setup_rx_resources(struct igc_ring *rx_ring) 535 { 536 struct net_device *ndev = rx_ring->netdev; 537 struct device *dev = rx_ring->dev; 538 u8 index = rx_ring->queue_index; 539 int size, desc_len, res; 540 541 /* XDP RX-queue info */ 542 if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) 543 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 544 res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, 545 rx_ring->q_vector->napi.napi_id); 546 if (res < 0) { 547 netdev_err(ndev, "Failed to register xdp_rxq index %u\n", 548 index); 549 return res; 550 } 551 552 size = sizeof(struct igc_rx_buffer) * rx_ring->count; 553 rx_ring->rx_buffer_info = vzalloc(size); 554 if (!rx_ring->rx_buffer_info) 555 goto err; 556 557 desc_len = sizeof(union igc_adv_rx_desc); 558 559 /* Round up to nearest 4K */ 560 rx_ring->size = rx_ring->count * desc_len; 561 rx_ring->size = ALIGN(rx_ring->size, 4096); 562 563 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 564 &rx_ring->dma, GFP_KERNEL); 565 566 if (!rx_ring->desc) 567 goto err; 568 569 rx_ring->next_to_alloc = 0; 570 rx_ring->next_to_clean = 0; 571 rx_ring->next_to_use = 0; 572 573 return 0; 574 575 err: 576 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 577 vfree(rx_ring->rx_buffer_info); 578 rx_ring->rx_buffer_info = NULL; 579 netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n"); 580 return -ENOMEM; 581 } 582 583 /** 584 * igc_setup_all_rx_resources - wrapper to allocate Rx resources 585 * (Descriptors) for all queues 586 * @adapter: board private structure 587 * 588 * Return 0 on success, negative on failure 589 */ 590 static int igc_setup_all_rx_resources(struct igc_adapter *adapter) 591 { 592 struct net_device *dev = adapter->netdev; 593 int i, err = 0; 594 595 for (i = 0; i < adapter->num_rx_queues; i++) { 596 err = igc_setup_rx_resources(adapter->rx_ring[i]); 597 if (err) { 598 netdev_err(dev, "Error on Rx queue %u setup\n", i); 599 for (i--; i >= 0; i--) 600 igc_free_rx_resources(adapter->rx_ring[i]); 601 break; 602 } 603 } 604 605 return err; 606 } 607 608 static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter, 609 struct igc_ring *ring) 610 { 611 if (!igc_xdp_is_enabled(adapter) || 612 !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags)) 613 return NULL; 614 615 return xsk_get_pool_from_qid(ring->netdev, ring->queue_index); 616 } 617 618 /** 619 * igc_configure_rx_ring - Configure a receive ring after Reset 620 * @adapter: board private structure 621 * @ring: receive ring to be configured 622 * 623 * Configure the Rx unit of the MAC after a reset. 624 */ 625 static void igc_configure_rx_ring(struct igc_adapter *adapter, 626 struct igc_ring *ring) 627 { 628 struct igc_hw *hw = &adapter->hw; 629 union igc_adv_rx_desc *rx_desc; 630 int reg_idx = ring->reg_idx; 631 u32 srrctl = 0, rxdctl = 0; 632 u64 rdba = ring->dma; 633 u32 buf_size; 634 635 xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); 636 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 637 if (ring->xsk_pool) { 638 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 639 MEM_TYPE_XSK_BUFF_POOL, 640 NULL)); 641 xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); 642 } else { 643 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 644 MEM_TYPE_PAGE_SHARED, 645 NULL)); 646 } 647 648 if (igc_xdp_is_enabled(adapter)) 649 set_ring_uses_large_buffer(ring); 650 651 /* disable the queue */ 652 wr32(IGC_RXDCTL(reg_idx), 0); 653 654 /* Set DMA base address registers */ 655 wr32(IGC_RDBAL(reg_idx), 656 rdba & 0x00000000ffffffffULL); 657 wr32(IGC_RDBAH(reg_idx), rdba >> 32); 658 wr32(IGC_RDLEN(reg_idx), 659 ring->count * sizeof(union igc_adv_rx_desc)); 660 661 /* initialize head and tail */ 662 ring->tail = adapter->io_addr + IGC_RDT(reg_idx); 663 wr32(IGC_RDH(reg_idx), 0); 664 writel(0, ring->tail); 665 666 /* reset next-to- use/clean to place SW in sync with hardware */ 667 ring->next_to_clean = 0; 668 ring->next_to_use = 0; 669 670 if (ring->xsk_pool) 671 buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool); 672 else if (ring_uses_large_buffer(ring)) 673 buf_size = IGC_RXBUFFER_3072; 674 else 675 buf_size = IGC_RXBUFFER_2048; 676 677 srrctl = rd32(IGC_SRRCTL(reg_idx)); 678 srrctl &= ~(IGC_SRRCTL_BSIZEPKT_MASK | IGC_SRRCTL_BSIZEHDR_MASK | 679 IGC_SRRCTL_DESCTYPE_MASK); 680 srrctl |= IGC_SRRCTL_BSIZEHDR(IGC_RX_HDR_LEN); 681 srrctl |= IGC_SRRCTL_BSIZEPKT(buf_size); 682 srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; 683 684 wr32(IGC_SRRCTL(reg_idx), srrctl); 685 686 rxdctl |= IGC_RXDCTL_PTHRESH; 687 rxdctl |= IGC_RXDCTL_HTHRESH << 8; 688 rxdctl |= IGC_RXDCTL_WTHRESH << 16; 689 690 /* initialize rx_buffer_info */ 691 memset(ring->rx_buffer_info, 0, 692 sizeof(struct igc_rx_buffer) * ring->count); 693 694 /* initialize Rx descriptor 0 */ 695 rx_desc = IGC_RX_DESC(ring, 0); 696 rx_desc->wb.upper.length = 0; 697 698 /* enable receive descriptor fetching */ 699 rxdctl |= IGC_RXDCTL_QUEUE_ENABLE; 700 701 wr32(IGC_RXDCTL(reg_idx), rxdctl); 702 } 703 704 /** 705 * igc_configure_rx - Configure receive Unit after Reset 706 * @adapter: board private structure 707 * 708 * Configure the Rx unit of the MAC after a reset. 709 */ 710 static void igc_configure_rx(struct igc_adapter *adapter) 711 { 712 int i; 713 714 /* Setup the HW Rx Head and Tail Descriptor Pointers and 715 * the Base and Length of the Rx Descriptor Ring 716 */ 717 for (i = 0; i < adapter->num_rx_queues; i++) 718 igc_configure_rx_ring(adapter, adapter->rx_ring[i]); 719 } 720 721 /** 722 * igc_configure_tx_ring - Configure transmit ring after Reset 723 * @adapter: board private structure 724 * @ring: tx ring to configure 725 * 726 * Configure a transmit ring after a reset. 727 */ 728 static void igc_configure_tx_ring(struct igc_adapter *adapter, 729 struct igc_ring *ring) 730 { 731 struct igc_hw *hw = &adapter->hw; 732 int reg_idx = ring->reg_idx; 733 u64 tdba = ring->dma; 734 u32 txdctl = 0; 735 736 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 737 738 /* disable the queue */ 739 wr32(IGC_TXDCTL(reg_idx), 0); 740 wrfl(); 741 742 wr32(IGC_TDLEN(reg_idx), 743 ring->count * sizeof(union igc_adv_tx_desc)); 744 wr32(IGC_TDBAL(reg_idx), 745 tdba & 0x00000000ffffffffULL); 746 wr32(IGC_TDBAH(reg_idx), tdba >> 32); 747 748 ring->tail = adapter->io_addr + IGC_TDT(reg_idx); 749 wr32(IGC_TDH(reg_idx), 0); 750 writel(0, ring->tail); 751 752 txdctl |= IGC_TXDCTL_PTHRESH(8) | IGC_TXDCTL_HTHRESH(1) | 753 IGC_TXDCTL_WTHRESH(16) | IGC_TXDCTL_QUEUE_ENABLE; 754 755 wr32(IGC_TXDCTL(reg_idx), txdctl); 756 } 757 758 /** 759 * igc_configure_tx - Configure transmit Unit after Reset 760 * @adapter: board private structure 761 * 762 * Configure the Tx unit of the MAC after a reset. 763 */ 764 static void igc_configure_tx(struct igc_adapter *adapter) 765 { 766 int i; 767 768 for (i = 0; i < adapter->num_tx_queues; i++) 769 igc_configure_tx_ring(adapter, adapter->tx_ring[i]); 770 } 771 772 /** 773 * igc_setup_mrqc - configure the multiple receive queue control registers 774 * @adapter: Board private structure 775 */ 776 static void igc_setup_mrqc(struct igc_adapter *adapter) 777 { 778 struct igc_hw *hw = &adapter->hw; 779 u32 j, num_rx_queues; 780 u32 mrqc, rxcsum; 781 u32 rss_key[10]; 782 783 netdev_rss_key_fill(rss_key, sizeof(rss_key)); 784 for (j = 0; j < 10; j++) 785 wr32(IGC_RSSRK(j), rss_key[j]); 786 787 num_rx_queues = adapter->rss_queues; 788 789 if (adapter->rss_indir_tbl_init != num_rx_queues) { 790 for (j = 0; j < IGC_RETA_SIZE; j++) 791 adapter->rss_indir_tbl[j] = 792 (j * num_rx_queues) / IGC_RETA_SIZE; 793 adapter->rss_indir_tbl_init = num_rx_queues; 794 } 795 igc_write_rss_indir_tbl(adapter); 796 797 /* Disable raw packet checksumming so that RSS hash is placed in 798 * descriptor on writeback. No need to enable TCP/UDP/IP checksum 799 * offloads as they are enabled by default 800 */ 801 rxcsum = rd32(IGC_RXCSUM); 802 rxcsum |= IGC_RXCSUM_PCSD; 803 804 /* Enable Receive Checksum Offload for SCTP */ 805 rxcsum |= IGC_RXCSUM_CRCOFL; 806 807 /* Don't need to set TUOFL or IPOFL, they default to 1 */ 808 wr32(IGC_RXCSUM, rxcsum); 809 810 /* Generate RSS hash based on packet types, TCP/UDP 811 * port numbers and/or IPv4/v6 src and dst addresses 812 */ 813 mrqc = IGC_MRQC_RSS_FIELD_IPV4 | 814 IGC_MRQC_RSS_FIELD_IPV4_TCP | 815 IGC_MRQC_RSS_FIELD_IPV6 | 816 IGC_MRQC_RSS_FIELD_IPV6_TCP | 817 IGC_MRQC_RSS_FIELD_IPV6_TCP_EX; 818 819 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP) 820 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP; 821 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP) 822 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP; 823 824 mrqc |= IGC_MRQC_ENABLE_RSS_MQ; 825 826 wr32(IGC_MRQC, mrqc); 827 } 828 829 /** 830 * igc_setup_rctl - configure the receive control registers 831 * @adapter: Board private structure 832 */ 833 static void igc_setup_rctl(struct igc_adapter *adapter) 834 { 835 struct igc_hw *hw = &adapter->hw; 836 u32 rctl; 837 838 rctl = rd32(IGC_RCTL); 839 840 rctl &= ~(3 << IGC_RCTL_MO_SHIFT); 841 rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC); 842 843 rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF | 844 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT); 845 846 /* enable stripping of CRC. Newer features require 847 * that the HW strips the CRC. 848 */ 849 rctl |= IGC_RCTL_SECRC; 850 851 /* disable store bad packets and clear size bits. */ 852 rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256); 853 854 /* enable LPE to allow for reception of jumbo frames */ 855 rctl |= IGC_RCTL_LPE; 856 857 /* disable queue 0 to prevent tail write w/o re-config */ 858 wr32(IGC_RXDCTL(0), 0); 859 860 /* This is useful for sniffing bad packets. */ 861 if (adapter->netdev->features & NETIF_F_RXALL) { 862 /* UPE and MPE will be handled by normal PROMISC logic 863 * in set_rx_mode 864 */ 865 rctl |= (IGC_RCTL_SBP | /* Receive bad packets */ 866 IGC_RCTL_BAM | /* RX All Bcast Pkts */ 867 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ 868 869 rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */ 870 IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */ 871 } 872 873 wr32(IGC_RCTL, rctl); 874 } 875 876 /** 877 * igc_setup_tctl - configure the transmit control registers 878 * @adapter: Board private structure 879 */ 880 static void igc_setup_tctl(struct igc_adapter *adapter) 881 { 882 struct igc_hw *hw = &adapter->hw; 883 u32 tctl; 884 885 /* disable queue 0 which icould be enabled by default */ 886 wr32(IGC_TXDCTL(0), 0); 887 888 /* Program the Transmit Control Register */ 889 tctl = rd32(IGC_TCTL); 890 tctl &= ~IGC_TCTL_CT; 891 tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC | 892 (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT); 893 894 /* Enable transmits */ 895 tctl |= IGC_TCTL_EN; 896 897 wr32(IGC_TCTL, tctl); 898 } 899 900 /** 901 * igc_set_mac_filter_hw() - Set MAC address filter in hardware 902 * @adapter: Pointer to adapter where the filter should be set 903 * @index: Filter index 904 * @type: MAC address filter type (source or destination) 905 * @addr: MAC address 906 * @queue: If non-negative, queue assignment feature is enabled and frames 907 * matching the filter are enqueued onto 'queue'. Otherwise, queue 908 * assignment is disabled. 909 */ 910 static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index, 911 enum igc_mac_filter_type type, 912 const u8 *addr, int queue) 913 { 914 struct net_device *dev = adapter->netdev; 915 struct igc_hw *hw = &adapter->hw; 916 u32 ral, rah; 917 918 if (WARN_ON(index >= hw->mac.rar_entry_count)) 919 return; 920 921 ral = le32_to_cpup((__le32 *)(addr)); 922 rah = le16_to_cpup((__le16 *)(addr + 4)); 923 924 if (type == IGC_MAC_FILTER_TYPE_SRC) { 925 rah &= ~IGC_RAH_ASEL_MASK; 926 rah |= IGC_RAH_ASEL_SRC_ADDR; 927 } 928 929 if (queue >= 0) { 930 rah &= ~IGC_RAH_QSEL_MASK; 931 rah |= (queue << IGC_RAH_QSEL_SHIFT); 932 rah |= IGC_RAH_QSEL_ENABLE; 933 } 934 935 rah |= IGC_RAH_AV; 936 937 wr32(IGC_RAL(index), ral); 938 wr32(IGC_RAH(index), rah); 939 940 netdev_dbg(dev, "MAC address filter set in HW: index %d", index); 941 } 942 943 /** 944 * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware 945 * @adapter: Pointer to adapter where the filter should be cleared 946 * @index: Filter index 947 */ 948 static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index) 949 { 950 struct net_device *dev = adapter->netdev; 951 struct igc_hw *hw = &adapter->hw; 952 953 if (WARN_ON(index >= hw->mac.rar_entry_count)) 954 return; 955 956 wr32(IGC_RAL(index), 0); 957 wr32(IGC_RAH(index), 0); 958 959 netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index); 960 } 961 962 /* Set default MAC address for the PF in the first RAR entry */ 963 static void igc_set_default_mac_filter(struct igc_adapter *adapter) 964 { 965 struct net_device *dev = adapter->netdev; 966 u8 *addr = adapter->hw.mac.addr; 967 968 netdev_dbg(dev, "Set default MAC address filter: address %pM", addr); 969 970 igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1); 971 } 972 973 /** 974 * igc_set_mac - Change the Ethernet Address of the NIC 975 * @netdev: network interface device structure 976 * @p: pointer to an address structure 977 * 978 * Returns 0 on success, negative on failure 979 */ 980 static int igc_set_mac(struct net_device *netdev, void *p) 981 { 982 struct igc_adapter *adapter = netdev_priv(netdev); 983 struct igc_hw *hw = &adapter->hw; 984 struct sockaddr *addr = p; 985 986 if (!is_valid_ether_addr(addr->sa_data)) 987 return -EADDRNOTAVAIL; 988 989 eth_hw_addr_set(netdev, addr->sa_data); 990 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); 991 992 /* set the correct pool for the new PF MAC address in entry 0 */ 993 igc_set_default_mac_filter(adapter); 994 995 return 0; 996 } 997 998 /** 999 * igc_write_mc_addr_list - write multicast addresses to MTA 1000 * @netdev: network interface device structure 1001 * 1002 * Writes multicast address list to the MTA hash table. 1003 * Returns: -ENOMEM on failure 1004 * 0 on no addresses written 1005 * X on writing X addresses to MTA 1006 **/ 1007 static int igc_write_mc_addr_list(struct net_device *netdev) 1008 { 1009 struct igc_adapter *adapter = netdev_priv(netdev); 1010 struct igc_hw *hw = &adapter->hw; 1011 struct netdev_hw_addr *ha; 1012 u8 *mta_list; 1013 int i; 1014 1015 if (netdev_mc_empty(netdev)) { 1016 /* nothing to program, so clear mc list */ 1017 igc_update_mc_addr_list(hw, NULL, 0); 1018 return 0; 1019 } 1020 1021 mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC); 1022 if (!mta_list) 1023 return -ENOMEM; 1024 1025 /* The shared function expects a packed array of only addresses. */ 1026 i = 0; 1027 netdev_for_each_mc_addr(ha, netdev) 1028 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); 1029 1030 igc_update_mc_addr_list(hw, mta_list, i); 1031 kfree(mta_list); 1032 1033 return netdev_mc_count(netdev); 1034 } 1035 1036 static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, 1037 bool *first_flag, bool *insert_empty) 1038 { 1039 struct igc_adapter *adapter = netdev_priv(ring->netdev); 1040 ktime_t cycle_time = adapter->cycle_time; 1041 ktime_t base_time = adapter->base_time; 1042 ktime_t now = ktime_get_clocktai(); 1043 ktime_t baset_est, end_of_cycle; 1044 s32 launchtime; 1045 s64 n; 1046 1047 n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); 1048 1049 baset_est = ktime_add_ns(base_time, cycle_time * (n)); 1050 end_of_cycle = ktime_add_ns(baset_est, cycle_time); 1051 1052 if (ktime_compare(txtime, end_of_cycle) >= 0) { 1053 if (baset_est != ring->last_ff_cycle) { 1054 *first_flag = true; 1055 ring->last_ff_cycle = baset_est; 1056 1057 if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) 1058 *insert_empty = true; 1059 } 1060 } 1061 1062 /* Introducing a window at end of cycle on which packets 1063 * potentially not honor launchtime. Window of 5us chosen 1064 * considering software update the tail pointer and packets 1065 * are dma'ed to packet buffer. 1066 */ 1067 if ((ktime_sub_ns(end_of_cycle, now) < 5 * NSEC_PER_USEC)) 1068 netdev_warn(ring->netdev, "Packet with txtime=%llu may not be honoured\n", 1069 txtime); 1070 1071 ring->last_tx_cycle = end_of_cycle; 1072 1073 launchtime = ktime_sub_ns(txtime, baset_est); 1074 if (launchtime > 0) 1075 div_s64_rem(launchtime, cycle_time, &launchtime); 1076 else 1077 launchtime = 0; 1078 1079 return cpu_to_le32(launchtime); 1080 } 1081 1082 static int igc_init_empty_frame(struct igc_ring *ring, 1083 struct igc_tx_buffer *buffer, 1084 struct sk_buff *skb) 1085 { 1086 unsigned int size; 1087 dma_addr_t dma; 1088 1089 size = skb_headlen(skb); 1090 1091 dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE); 1092 if (dma_mapping_error(ring->dev, dma)) { 1093 net_err_ratelimited("%s: DMA mapping error for empty frame\n", 1094 netdev_name(ring->netdev)); 1095 return -ENOMEM; 1096 } 1097 1098 buffer->type = IGC_TX_BUFFER_TYPE_SKB; 1099 buffer->skb = skb; 1100 buffer->protocol = 0; 1101 buffer->bytecount = skb->len; 1102 buffer->gso_segs = 1; 1103 buffer->time_stamp = jiffies; 1104 dma_unmap_len_set(buffer, len, skb->len); 1105 dma_unmap_addr_set(buffer, dma, dma); 1106 1107 return 0; 1108 } 1109 1110 static void igc_init_tx_empty_descriptor(struct igc_ring *ring, 1111 struct sk_buff *skb, 1112 struct igc_tx_buffer *first) 1113 { 1114 union igc_adv_tx_desc *desc; 1115 u32 cmd_type, olinfo_status; 1116 1117 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 1118 IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | 1119 first->bytecount; 1120 olinfo_status = first->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; 1121 1122 desc = IGC_TX_DESC(ring, ring->next_to_use); 1123 desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1124 desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1125 desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(first, dma)); 1126 1127 netdev_tx_sent_queue(txring_txq(ring), skb->len); 1128 1129 first->next_to_watch = desc; 1130 1131 ring->next_to_use++; 1132 if (ring->next_to_use == ring->count) 1133 ring->next_to_use = 0; 1134 } 1135 1136 #define IGC_EMPTY_FRAME_SIZE 60 1137 1138 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, 1139 __le32 launch_time, bool first_flag, 1140 u32 vlan_macip_lens, u32 type_tucmd, 1141 u32 mss_l4len_idx) 1142 { 1143 struct igc_adv_tx_context_desc *context_desc; 1144 u16 i = tx_ring->next_to_use; 1145 1146 context_desc = IGC_TX_CTXTDESC(tx_ring, i); 1147 1148 i++; 1149 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1150 1151 /* set bits to identify this as an advanced context descriptor */ 1152 type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT; 1153 1154 /* For i225, context index must be unique per ring. */ 1155 if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 1156 mss_l4len_idx |= tx_ring->reg_idx << 4; 1157 1158 if (first_flag) 1159 mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FIRST; 1160 1161 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 1162 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 1163 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 1164 context_desc->launch_time = launch_time; 1165 } 1166 1167 static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first, 1168 __le32 launch_time, bool first_flag) 1169 { 1170 struct sk_buff *skb = first->skb; 1171 u32 vlan_macip_lens = 0; 1172 u32 type_tucmd = 0; 1173 1174 if (skb->ip_summed != CHECKSUM_PARTIAL) { 1175 csum_failed: 1176 if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) && 1177 !tx_ring->launchtime_enable) 1178 return; 1179 goto no_csum; 1180 } 1181 1182 switch (skb->csum_offset) { 1183 case offsetof(struct tcphdr, check): 1184 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1185 fallthrough; 1186 case offsetof(struct udphdr, check): 1187 break; 1188 case offsetof(struct sctphdr, checksum): 1189 /* validate that this is actually an SCTP request */ 1190 if (skb_csum_is_sctp(skb)) { 1191 type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP; 1192 break; 1193 } 1194 fallthrough; 1195 default: 1196 skb_checksum_help(skb); 1197 goto csum_failed; 1198 } 1199 1200 /* update TX checksum flag */ 1201 first->tx_flags |= IGC_TX_FLAGS_CSUM; 1202 vlan_macip_lens = skb_checksum_start_offset(skb) - 1203 skb_network_offset(skb); 1204 no_csum: 1205 vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT; 1206 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1207 1208 igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, 1209 vlan_macip_lens, type_tucmd, 0); 1210 } 1211 1212 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1213 { 1214 struct net_device *netdev = tx_ring->netdev; 1215 1216 netif_stop_subqueue(netdev, tx_ring->queue_index); 1217 1218 /* memory barriier comment */ 1219 smp_mb(); 1220 1221 /* We need to check again in a case another CPU has just 1222 * made room available. 1223 */ 1224 if (igc_desc_unused(tx_ring) < size) 1225 return -EBUSY; 1226 1227 /* A reprieve! */ 1228 netif_wake_subqueue(netdev, tx_ring->queue_index); 1229 1230 u64_stats_update_begin(&tx_ring->tx_syncp2); 1231 tx_ring->tx_stats.restart_queue2++; 1232 u64_stats_update_end(&tx_ring->tx_syncp2); 1233 1234 return 0; 1235 } 1236 1237 static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1238 { 1239 if (igc_desc_unused(tx_ring) >= size) 1240 return 0; 1241 return __igc_maybe_stop_tx(tx_ring, size); 1242 } 1243 1244 #define IGC_SET_FLAG(_input, _flag, _result) \ 1245 (((_flag) <= (_result)) ? \ 1246 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ 1247 ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) 1248 1249 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) 1250 { 1251 /* set type for advanced descriptor with frame checksum insertion */ 1252 u32 cmd_type = IGC_ADVTXD_DTYP_DATA | 1253 IGC_ADVTXD_DCMD_DEXT | 1254 IGC_ADVTXD_DCMD_IFCS; 1255 1256 /* set HW vlan bit if vlan is present */ 1257 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN, 1258 IGC_ADVTXD_DCMD_VLE); 1259 1260 /* set segmentation bits for TSO */ 1261 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO, 1262 (IGC_ADVTXD_DCMD_TSE)); 1263 1264 /* set timestamp bit if present, will select the register set 1265 * based on the _TSTAMP(_X) bit. 1266 */ 1267 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP, 1268 (IGC_ADVTXD_MAC_TSTAMP)); 1269 1270 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_1, 1271 (IGC_ADVTXD_TSTAMP_REG_1)); 1272 1273 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_2, 1274 (IGC_ADVTXD_TSTAMP_REG_2)); 1275 1276 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_3, 1277 (IGC_ADVTXD_TSTAMP_REG_3)); 1278 1279 /* insert frame checksum */ 1280 cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS); 1281 1282 return cmd_type; 1283 } 1284 1285 static void igc_tx_olinfo_status(struct igc_ring *tx_ring, 1286 union igc_adv_tx_desc *tx_desc, 1287 u32 tx_flags, unsigned int paylen) 1288 { 1289 u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT; 1290 1291 /* insert L4 checksum */ 1292 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_CSUM, 1293 (IGC_TXD_POPTS_TXSM << 8)); 1294 1295 /* insert IPv4 checksum */ 1296 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_IPV4, 1297 (IGC_TXD_POPTS_IXSM << 8)); 1298 1299 /* Use the second timer (free running, in general) for the timestamp */ 1300 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_TIMER_1, 1301 IGC_TXD_PTP2_TIMER_1); 1302 1303 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1304 } 1305 1306 static int igc_tx_map(struct igc_ring *tx_ring, 1307 struct igc_tx_buffer *first, 1308 const u8 hdr_len) 1309 { 1310 struct sk_buff *skb = first->skb; 1311 struct igc_tx_buffer *tx_buffer; 1312 union igc_adv_tx_desc *tx_desc; 1313 u32 tx_flags = first->tx_flags; 1314 skb_frag_t *frag; 1315 u16 i = tx_ring->next_to_use; 1316 unsigned int data_len, size; 1317 dma_addr_t dma; 1318 u32 cmd_type; 1319 1320 cmd_type = igc_tx_cmd_type(skb, tx_flags); 1321 tx_desc = IGC_TX_DESC(tx_ring, i); 1322 1323 igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); 1324 1325 size = skb_headlen(skb); 1326 data_len = skb->data_len; 1327 1328 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 1329 1330 tx_buffer = first; 1331 1332 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 1333 if (dma_mapping_error(tx_ring->dev, dma)) 1334 goto dma_error; 1335 1336 /* record length, and DMA address */ 1337 dma_unmap_len_set(tx_buffer, len, size); 1338 dma_unmap_addr_set(tx_buffer, dma, dma); 1339 1340 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1341 1342 while (unlikely(size > IGC_MAX_DATA_PER_TXD)) { 1343 tx_desc->read.cmd_type_len = 1344 cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD); 1345 1346 i++; 1347 tx_desc++; 1348 if (i == tx_ring->count) { 1349 tx_desc = IGC_TX_DESC(tx_ring, 0); 1350 i = 0; 1351 } 1352 tx_desc->read.olinfo_status = 0; 1353 1354 dma += IGC_MAX_DATA_PER_TXD; 1355 size -= IGC_MAX_DATA_PER_TXD; 1356 1357 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1358 } 1359 1360 if (likely(!data_len)) 1361 break; 1362 1363 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); 1364 1365 i++; 1366 tx_desc++; 1367 if (i == tx_ring->count) { 1368 tx_desc = IGC_TX_DESC(tx_ring, 0); 1369 i = 0; 1370 } 1371 tx_desc->read.olinfo_status = 0; 1372 1373 size = skb_frag_size(frag); 1374 data_len -= size; 1375 1376 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, 1377 size, DMA_TO_DEVICE); 1378 1379 tx_buffer = &tx_ring->tx_buffer_info[i]; 1380 } 1381 1382 /* write last descriptor with RS and EOP bits */ 1383 cmd_type |= size | IGC_TXD_DCMD; 1384 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1385 1386 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 1387 1388 /* set the timestamp */ 1389 first->time_stamp = jiffies; 1390 1391 skb_tx_timestamp(skb); 1392 1393 /* Force memory writes to complete before letting h/w know there 1394 * are new descriptors to fetch. (Only applicable for weak-ordered 1395 * memory model archs, such as IA-64). 1396 * 1397 * We also need this memory barrier to make certain all of the 1398 * status bits have been updated before next_to_watch is written. 1399 */ 1400 wmb(); 1401 1402 /* set next_to_watch value indicating a packet is present */ 1403 first->next_to_watch = tx_desc; 1404 1405 i++; 1406 if (i == tx_ring->count) 1407 i = 0; 1408 1409 tx_ring->next_to_use = i; 1410 1411 /* Make sure there is space in the ring for the next send. */ 1412 igc_maybe_stop_tx(tx_ring, DESC_NEEDED); 1413 1414 if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { 1415 writel(i, tx_ring->tail); 1416 } 1417 1418 return 0; 1419 dma_error: 1420 netdev_err(tx_ring->netdev, "TX DMA map failed\n"); 1421 tx_buffer = &tx_ring->tx_buffer_info[i]; 1422 1423 /* clear dma mappings for failed tx_buffer_info map */ 1424 while (tx_buffer != first) { 1425 if (dma_unmap_len(tx_buffer, len)) 1426 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1427 1428 if (i-- == 0) 1429 i += tx_ring->count; 1430 tx_buffer = &tx_ring->tx_buffer_info[i]; 1431 } 1432 1433 if (dma_unmap_len(tx_buffer, len)) 1434 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1435 1436 dev_kfree_skb_any(tx_buffer->skb); 1437 tx_buffer->skb = NULL; 1438 1439 tx_ring->next_to_use = i; 1440 1441 return -1; 1442 } 1443 1444 static int igc_tso(struct igc_ring *tx_ring, 1445 struct igc_tx_buffer *first, 1446 __le32 launch_time, bool first_flag, 1447 u8 *hdr_len) 1448 { 1449 u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; 1450 struct sk_buff *skb = first->skb; 1451 union { 1452 struct iphdr *v4; 1453 struct ipv6hdr *v6; 1454 unsigned char *hdr; 1455 } ip; 1456 union { 1457 struct tcphdr *tcp; 1458 struct udphdr *udp; 1459 unsigned char *hdr; 1460 } l4; 1461 u32 paylen, l4_offset; 1462 int err; 1463 1464 if (skb->ip_summed != CHECKSUM_PARTIAL) 1465 return 0; 1466 1467 if (!skb_is_gso(skb)) 1468 return 0; 1469 1470 err = skb_cow_head(skb, 0); 1471 if (err < 0) 1472 return err; 1473 1474 ip.hdr = skb_network_header(skb); 1475 l4.hdr = skb_checksum_start(skb); 1476 1477 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ 1478 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1479 1480 /* initialize outer IP header fields */ 1481 if (ip.v4->version == 4) { 1482 unsigned char *csum_start = skb_checksum_start(skb); 1483 unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); 1484 1485 /* IP header will have to cancel out any data that 1486 * is not a part of the outer IP header 1487 */ 1488 ip.v4->check = csum_fold(csum_partial(trans_start, 1489 csum_start - trans_start, 1490 0)); 1491 type_tucmd |= IGC_ADVTXD_TUCMD_IPV4; 1492 1493 ip.v4->tot_len = 0; 1494 first->tx_flags |= IGC_TX_FLAGS_TSO | 1495 IGC_TX_FLAGS_CSUM | 1496 IGC_TX_FLAGS_IPV4; 1497 } else { 1498 ip.v6->payload_len = 0; 1499 first->tx_flags |= IGC_TX_FLAGS_TSO | 1500 IGC_TX_FLAGS_CSUM; 1501 } 1502 1503 /* determine offset of inner transport header */ 1504 l4_offset = l4.hdr - skb->data; 1505 1506 /* remove payload length from inner checksum */ 1507 paylen = skb->len - l4_offset; 1508 if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) { 1509 /* compute length of segmentation header */ 1510 *hdr_len = (l4.tcp->doff * 4) + l4_offset; 1511 csum_replace_by_diff(&l4.tcp->check, 1512 (__force __wsum)htonl(paylen)); 1513 } else { 1514 /* compute length of segmentation header */ 1515 *hdr_len = sizeof(*l4.udp) + l4_offset; 1516 csum_replace_by_diff(&l4.udp->check, 1517 (__force __wsum)htonl(paylen)); 1518 } 1519 1520 /* update gso size and bytecount with header size */ 1521 first->gso_segs = skb_shinfo(skb)->gso_segs; 1522 first->bytecount += (first->gso_segs - 1) * *hdr_len; 1523 1524 /* MSS L4LEN IDX */ 1525 mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT; 1526 mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT; 1527 1528 /* VLAN MACLEN IPLEN */ 1529 vlan_macip_lens = l4.hdr - ip.hdr; 1530 vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT; 1531 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1532 1533 igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, 1534 vlan_macip_lens, type_tucmd, mss_l4len_idx); 1535 1536 return 1; 1537 } 1538 1539 static bool igc_request_tx_tstamp(struct igc_adapter *adapter, struct sk_buff *skb, u32 *flags) 1540 { 1541 int i; 1542 1543 for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { 1544 struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; 1545 1546 if (tstamp->skb) 1547 continue; 1548 1549 tstamp->skb = skb_get(skb); 1550 tstamp->start = jiffies; 1551 *flags = tstamp->flags; 1552 1553 return true; 1554 } 1555 1556 return false; 1557 } 1558 1559 static int igc_insert_empty_frame(struct igc_ring *tx_ring) 1560 { 1561 struct igc_tx_buffer *empty_info; 1562 struct sk_buff *empty_skb; 1563 void *data; 1564 int ret; 1565 1566 empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1567 empty_skb = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC); 1568 if (unlikely(!empty_skb)) { 1569 net_err_ratelimited("%s: skb alloc error for empty frame\n", 1570 netdev_name(tx_ring->netdev)); 1571 return -ENOMEM; 1572 } 1573 1574 data = skb_put(empty_skb, IGC_EMPTY_FRAME_SIZE); 1575 memset(data, 0, IGC_EMPTY_FRAME_SIZE); 1576 1577 /* Prepare DMA mapping and Tx buffer information */ 1578 ret = igc_init_empty_frame(tx_ring, empty_info, empty_skb); 1579 if (unlikely(ret)) { 1580 dev_kfree_skb_any(empty_skb); 1581 return ret; 1582 } 1583 1584 /* Prepare advanced context descriptor for empty packet */ 1585 igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0); 1586 1587 /* Prepare advanced data descriptor for empty packet */ 1588 igc_init_tx_empty_descriptor(tx_ring, empty_skb, empty_info); 1589 1590 return 0; 1591 } 1592 1593 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, 1594 struct igc_ring *tx_ring) 1595 { 1596 struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); 1597 bool first_flag = false, insert_empty = false; 1598 u16 count = TXD_USE_COUNT(skb_headlen(skb)); 1599 __be16 protocol = vlan_get_protocol(skb); 1600 struct igc_tx_buffer *first; 1601 __le32 launch_time = 0; 1602 u32 tx_flags = 0; 1603 unsigned short f; 1604 ktime_t txtime; 1605 u8 hdr_len = 0; 1606 int tso = 0; 1607 1608 /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD, 1609 * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD, 1610 * + 2 desc gap to keep tail from touching head, 1611 * + 1 desc for context descriptor, 1612 * + 2 desc for inserting an empty packet for launch time, 1613 * otherwise try next time 1614 */ 1615 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1616 count += TXD_USE_COUNT(skb_frag_size( 1617 &skb_shinfo(skb)->frags[f])); 1618 1619 if (igc_maybe_stop_tx(tx_ring, count + 5)) { 1620 /* this is a hard error */ 1621 return NETDEV_TX_BUSY; 1622 } 1623 1624 if (!tx_ring->launchtime_enable) 1625 goto done; 1626 1627 txtime = skb->tstamp; 1628 skb->tstamp = ktime_set(0, 0); 1629 launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty); 1630 1631 if (insert_empty) { 1632 /* Reset the launch time if the required empty frame fails to 1633 * be inserted. However, this packet is not dropped, so it 1634 * "dirties" the current Qbv cycle. This ensures that the 1635 * upcoming packet, which is scheduled in the next Qbv cycle, 1636 * does not require an empty frame. This way, the launch time 1637 * continues to function correctly despite the current failure 1638 * to insert the empty frame. 1639 */ 1640 if (igc_insert_empty_frame(tx_ring)) 1641 launch_time = 0; 1642 } 1643 1644 done: 1645 /* record the location of the first descriptor for this packet */ 1646 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1647 first->type = IGC_TX_BUFFER_TYPE_SKB; 1648 first->skb = skb; 1649 first->bytecount = skb->len; 1650 first->gso_segs = 1; 1651 1652 if (adapter->qbv_transition || tx_ring->oper_gate_closed) 1653 goto out_drop; 1654 1655 if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) { 1656 adapter->stats.txdrop++; 1657 goto out_drop; 1658 } 1659 1660 if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && 1661 skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 1662 unsigned long flags; 1663 u32 tstamp_flags; 1664 1665 spin_lock_irqsave(&adapter->ptp_tx_lock, flags); 1666 if (igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) { 1667 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 1668 tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags; 1669 if (skb->sk && 1670 READ_ONCE(skb->sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC) 1671 tx_flags |= IGC_TX_FLAGS_TSTAMP_TIMER_1; 1672 } else { 1673 adapter->tx_hwtstamp_skipped++; 1674 } 1675 1676 spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); 1677 } 1678 1679 if (skb_vlan_tag_present(skb)) { 1680 tx_flags |= IGC_TX_FLAGS_VLAN; 1681 tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT); 1682 } 1683 1684 /* record initial flags and protocol */ 1685 first->tx_flags = tx_flags; 1686 first->protocol = protocol; 1687 1688 /* For preemptible queue, manually pad the skb so that HW includes 1689 * padding bytes in mCRC calculation 1690 */ 1691 if (tx_ring->preemptible && skb->len < ETH_ZLEN) { 1692 if (skb_padto(skb, ETH_ZLEN)) 1693 goto out_drop; 1694 skb_put(skb, ETH_ZLEN - skb->len); 1695 } 1696 1697 tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len); 1698 if (tso < 0) 1699 goto out_drop; 1700 else if (!tso) 1701 igc_tx_csum(tx_ring, first, launch_time, first_flag); 1702 1703 igc_tx_map(tx_ring, first, hdr_len); 1704 1705 return NETDEV_TX_OK; 1706 1707 out_drop: 1708 dev_kfree_skb_any(first->skb); 1709 first->skb = NULL; 1710 1711 return NETDEV_TX_OK; 1712 } 1713 1714 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter, 1715 struct sk_buff *skb) 1716 { 1717 unsigned int r_idx = skb->queue_mapping; 1718 1719 if (r_idx >= adapter->num_tx_queues) 1720 r_idx = r_idx % adapter->num_tx_queues; 1721 1722 return adapter->tx_ring[r_idx]; 1723 } 1724 1725 static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, 1726 struct net_device *netdev) 1727 { 1728 struct igc_adapter *adapter = netdev_priv(netdev); 1729 1730 /* The minimum packet size with TCTL.PSP set is 17 so pad the skb 1731 * in order to meet this minimum size requirement. 1732 */ 1733 if (skb->len < 17) { 1734 if (skb_padto(skb, 17)) 1735 return NETDEV_TX_OK; 1736 skb->len = 17; 1737 } 1738 1739 return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb)); 1740 } 1741 1742 static void igc_rx_checksum(struct igc_ring *ring, 1743 union igc_adv_rx_desc *rx_desc, 1744 struct sk_buff *skb) 1745 { 1746 skb_checksum_none_assert(skb); 1747 1748 /* Ignore Checksum bit is set */ 1749 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM)) 1750 return; 1751 1752 /* Rx checksum disabled via ethtool */ 1753 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 1754 return; 1755 1756 /* TCP/UDP checksum error bit is set */ 1757 if (igc_test_staterr(rx_desc, 1758 IGC_RXDEXT_STATERR_L4E | 1759 IGC_RXDEXT_STATERR_IPE)) { 1760 /* work around errata with sctp packets where the TCPE aka 1761 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) 1762 * packets (aka let the stack check the crc32c) 1763 */ 1764 if (!(skb->len == 60 && 1765 test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { 1766 u64_stats_update_begin(&ring->rx_syncp); 1767 ring->rx_stats.csum_err++; 1768 u64_stats_update_end(&ring->rx_syncp); 1769 } 1770 /* let the stack verify checksum errors */ 1771 return; 1772 } 1773 /* It must be a TCP or UDP packet with a valid checksum */ 1774 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS | 1775 IGC_RXD_STAT_UDPCS)) 1776 skb->ip_summed = CHECKSUM_UNNECESSARY; 1777 1778 netdev_dbg(ring->netdev, "cksum success: bits %08X\n", 1779 le32_to_cpu(rx_desc->wb.upper.status_error)); 1780 } 1781 1782 /* Mapping HW RSS Type to enum pkt_hash_types */ 1783 static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = { 1784 [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2, 1785 [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4, 1786 [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3, 1787 [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4, 1788 [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3, 1789 [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3, 1790 [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4, 1791 [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4, 1792 [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4, 1793 [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4, 1794 [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ 1795 [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */ 1796 [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */ 1797 [13] = PKT_HASH_TYPE_NONE, 1798 [14] = PKT_HASH_TYPE_NONE, 1799 [15] = PKT_HASH_TYPE_NONE, 1800 }; 1801 1802 static inline void igc_rx_hash(struct igc_ring *ring, 1803 union igc_adv_rx_desc *rx_desc, 1804 struct sk_buff *skb) 1805 { 1806 if (ring->netdev->features & NETIF_F_RXHASH) { 1807 u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); 1808 u32 rss_type = igc_rss_type(rx_desc); 1809 1810 skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]); 1811 } 1812 } 1813 1814 static void igc_rx_vlan(struct igc_ring *rx_ring, 1815 union igc_adv_rx_desc *rx_desc, 1816 struct sk_buff *skb) 1817 { 1818 struct net_device *dev = rx_ring->netdev; 1819 u16 vid; 1820 1821 if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && 1822 igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) { 1823 if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) && 1824 test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) 1825 vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan); 1826 else 1827 vid = le16_to_cpu(rx_desc->wb.upper.vlan); 1828 1829 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); 1830 } 1831 } 1832 1833 /** 1834 * igc_process_skb_fields - Populate skb header fields from Rx descriptor 1835 * @rx_ring: rx descriptor ring packet is being transacted on 1836 * @rx_desc: pointer to the EOP Rx descriptor 1837 * @skb: pointer to current skb being populated 1838 * 1839 * This function checks the ring, descriptor, and packet information in order 1840 * to populate the hash, checksum, VLAN, protocol, and other fields within the 1841 * skb. 1842 */ 1843 static void igc_process_skb_fields(struct igc_ring *rx_ring, 1844 union igc_adv_rx_desc *rx_desc, 1845 struct sk_buff *skb) 1846 { 1847 igc_rx_hash(rx_ring, rx_desc, skb); 1848 1849 igc_rx_checksum(rx_ring, rx_desc, skb); 1850 1851 igc_rx_vlan(rx_ring, rx_desc, skb); 1852 1853 skb_record_rx_queue(skb, rx_ring->queue_index); 1854 1855 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1856 } 1857 1858 static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features) 1859 { 1860 bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); 1861 struct igc_adapter *adapter = netdev_priv(netdev); 1862 struct igc_hw *hw = &adapter->hw; 1863 u32 ctrl; 1864 1865 ctrl = rd32(IGC_CTRL); 1866 1867 if (enable) { 1868 /* enable VLAN tag insert/strip */ 1869 ctrl |= IGC_CTRL_VME; 1870 } else { 1871 /* disable VLAN tag insert/strip */ 1872 ctrl &= ~IGC_CTRL_VME; 1873 } 1874 wr32(IGC_CTRL, ctrl); 1875 } 1876 1877 static void igc_restore_vlan(struct igc_adapter *adapter) 1878 { 1879 igc_vlan_mode(adapter->netdev, adapter->netdev->features); 1880 } 1881 1882 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, 1883 const unsigned int size, 1884 int *rx_buffer_pgcnt) 1885 { 1886 struct igc_rx_buffer *rx_buffer; 1887 1888 rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; 1889 *rx_buffer_pgcnt = 1890 #if (PAGE_SIZE < 8192) 1891 page_count(rx_buffer->page); 1892 #else 1893 0; 1894 #endif 1895 prefetchw(rx_buffer->page); 1896 1897 /* we are reusing so sync this buffer for CPU use */ 1898 dma_sync_single_range_for_cpu(rx_ring->dev, 1899 rx_buffer->dma, 1900 rx_buffer->page_offset, 1901 size, 1902 DMA_FROM_DEVICE); 1903 1904 rx_buffer->pagecnt_bias--; 1905 1906 return rx_buffer; 1907 } 1908 1909 static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer, 1910 unsigned int truesize) 1911 { 1912 #if (PAGE_SIZE < 8192) 1913 buffer->page_offset ^= truesize; 1914 #else 1915 buffer->page_offset += truesize; 1916 #endif 1917 } 1918 1919 static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring, 1920 unsigned int size) 1921 { 1922 unsigned int truesize; 1923 1924 #if (PAGE_SIZE < 8192) 1925 truesize = igc_rx_pg_size(ring) / 2; 1926 #else 1927 truesize = ring_uses_build_skb(ring) ? 1928 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + 1929 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1930 SKB_DATA_ALIGN(size); 1931 #endif 1932 return truesize; 1933 } 1934 1935 /** 1936 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff 1937 * @rx_ring: rx descriptor ring to transact packets on 1938 * @rx_buffer: buffer containing page to add 1939 * @skb: sk_buff to place the data into 1940 * @size: size of buffer to be added 1941 * 1942 * This function will add the data contained in rx_buffer->page to the skb. 1943 */ 1944 static void igc_add_rx_frag(struct igc_ring *rx_ring, 1945 struct igc_rx_buffer *rx_buffer, 1946 struct sk_buff *skb, 1947 unsigned int size) 1948 { 1949 unsigned int truesize; 1950 1951 #if (PAGE_SIZE < 8192) 1952 truesize = igc_rx_pg_size(rx_ring) / 2; 1953 #else 1954 truesize = ring_uses_build_skb(rx_ring) ? 1955 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1956 SKB_DATA_ALIGN(size); 1957 #endif 1958 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, 1959 rx_buffer->page_offset, size, truesize); 1960 1961 igc_rx_buffer_flip(rx_buffer, truesize); 1962 } 1963 1964 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, 1965 struct igc_rx_buffer *rx_buffer, 1966 struct xdp_buff *xdp) 1967 { 1968 unsigned int size = xdp->data_end - xdp->data; 1969 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1970 unsigned int metasize = xdp->data - xdp->data_meta; 1971 struct sk_buff *skb; 1972 1973 /* prefetch first cache line of first page */ 1974 net_prefetch(xdp->data_meta); 1975 1976 /* build an skb around the page buffer */ 1977 skb = napi_build_skb(xdp->data_hard_start, truesize); 1978 if (unlikely(!skb)) 1979 return NULL; 1980 1981 /* update pointers within the skb to store the data */ 1982 skb_reserve(skb, xdp->data - xdp->data_hard_start); 1983 __skb_put(skb, size); 1984 if (metasize) 1985 skb_metadata_set(skb, metasize); 1986 1987 igc_rx_buffer_flip(rx_buffer, truesize); 1988 return skb; 1989 } 1990 1991 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, 1992 struct igc_rx_buffer *rx_buffer, 1993 struct igc_xdp_buff *ctx) 1994 { 1995 struct xdp_buff *xdp = &ctx->xdp; 1996 unsigned int metasize = xdp->data - xdp->data_meta; 1997 unsigned int size = xdp->data_end - xdp->data; 1998 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1999 void *va = xdp->data; 2000 unsigned int headlen; 2001 struct sk_buff *skb; 2002 2003 /* prefetch first cache line of first page */ 2004 net_prefetch(xdp->data_meta); 2005 2006 /* allocate a skb to store the frags */ 2007 skb = napi_alloc_skb(&rx_ring->q_vector->napi, 2008 IGC_RX_HDR_LEN + metasize); 2009 if (unlikely(!skb)) 2010 return NULL; 2011 2012 if (ctx->rx_ts) { 2013 skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; 2014 skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; 2015 } 2016 2017 /* Determine available headroom for copy */ 2018 headlen = size; 2019 if (headlen > IGC_RX_HDR_LEN) 2020 headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN); 2021 2022 /* align pull length to size of long to optimize memcpy performance */ 2023 memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta, 2024 ALIGN(headlen + metasize, sizeof(long))); 2025 2026 if (metasize) { 2027 skb_metadata_set(skb, metasize); 2028 __skb_pull(skb, metasize); 2029 } 2030 2031 /* update all of the pointers */ 2032 size -= headlen; 2033 if (size) { 2034 skb_add_rx_frag(skb, 0, rx_buffer->page, 2035 (va + headlen) - page_address(rx_buffer->page), 2036 size, truesize); 2037 igc_rx_buffer_flip(rx_buffer, truesize); 2038 } else { 2039 rx_buffer->pagecnt_bias++; 2040 } 2041 2042 return skb; 2043 } 2044 2045 /** 2046 * igc_reuse_rx_page - page flip buffer and store it back on the ring 2047 * @rx_ring: rx descriptor ring to store buffers on 2048 * @old_buff: donor buffer to have page reused 2049 * 2050 * Synchronizes page for reuse by the adapter 2051 */ 2052 static void igc_reuse_rx_page(struct igc_ring *rx_ring, 2053 struct igc_rx_buffer *old_buff) 2054 { 2055 u16 nta = rx_ring->next_to_alloc; 2056 struct igc_rx_buffer *new_buff; 2057 2058 new_buff = &rx_ring->rx_buffer_info[nta]; 2059 2060 /* update, and store next to alloc */ 2061 nta++; 2062 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 2063 2064 /* Transfer page from old buffer to new buffer. 2065 * Move each member individually to avoid possible store 2066 * forwarding stalls. 2067 */ 2068 new_buff->dma = old_buff->dma; 2069 new_buff->page = old_buff->page; 2070 new_buff->page_offset = old_buff->page_offset; 2071 new_buff->pagecnt_bias = old_buff->pagecnt_bias; 2072 } 2073 2074 static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer, 2075 int rx_buffer_pgcnt) 2076 { 2077 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; 2078 struct page *page = rx_buffer->page; 2079 2080 /* avoid re-using remote and pfmemalloc pages */ 2081 if (!dev_page_is_reusable(page)) 2082 return false; 2083 2084 #if (PAGE_SIZE < 8192) 2085 /* if we are only owner of page we can reuse it */ 2086 if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) 2087 return false; 2088 #else 2089 #define IGC_LAST_OFFSET \ 2090 (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048) 2091 2092 if (rx_buffer->page_offset > IGC_LAST_OFFSET) 2093 return false; 2094 #endif 2095 2096 /* If we have drained the page fragment pool we need to update 2097 * the pagecnt_bias and page count so that we fully restock the 2098 * number of references the driver holds. 2099 */ 2100 if (unlikely(pagecnt_bias == 1)) { 2101 page_ref_add(page, USHRT_MAX - 1); 2102 rx_buffer->pagecnt_bias = USHRT_MAX; 2103 } 2104 2105 return true; 2106 } 2107 2108 /** 2109 * igc_is_non_eop - process handling of non-EOP buffers 2110 * @rx_ring: Rx ring being processed 2111 * @rx_desc: Rx descriptor for current buffer 2112 * 2113 * This function updates next to clean. If the buffer is an EOP buffer 2114 * this function exits returning false, otherwise it will place the 2115 * sk_buff in the next buffer to be chained and return true indicating 2116 * that this is in fact a non-EOP buffer. 2117 */ 2118 static bool igc_is_non_eop(struct igc_ring *rx_ring, 2119 union igc_adv_rx_desc *rx_desc) 2120 { 2121 u32 ntc = rx_ring->next_to_clean + 1; 2122 2123 /* fetch, update, and store next to clean */ 2124 ntc = (ntc < rx_ring->count) ? ntc : 0; 2125 rx_ring->next_to_clean = ntc; 2126 2127 prefetch(IGC_RX_DESC(rx_ring, ntc)); 2128 2129 if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP))) 2130 return false; 2131 2132 return true; 2133 } 2134 2135 /** 2136 * igc_cleanup_headers - Correct corrupted or empty headers 2137 * @rx_ring: rx descriptor ring packet is being transacted on 2138 * @rx_desc: pointer to the EOP Rx descriptor 2139 * @skb: pointer to current skb being fixed 2140 * 2141 * Address the case where we are pulling data in on pages only 2142 * and as such no data is present in the skb header. 2143 * 2144 * In addition if skb is not at least 60 bytes we need to pad it so that 2145 * it is large enough to qualify as a valid Ethernet frame. 2146 * 2147 * Returns true if an error was encountered and skb was freed. 2148 */ 2149 static bool igc_cleanup_headers(struct igc_ring *rx_ring, 2150 union igc_adv_rx_desc *rx_desc, 2151 struct sk_buff *skb) 2152 { 2153 if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) { 2154 struct net_device *netdev = rx_ring->netdev; 2155 2156 if (!(netdev->features & NETIF_F_RXALL)) { 2157 dev_kfree_skb_any(skb); 2158 return true; 2159 } 2160 } 2161 2162 /* if eth_skb_pad returns an error the skb was freed */ 2163 if (eth_skb_pad(skb)) 2164 return true; 2165 2166 return false; 2167 } 2168 2169 static void igc_put_rx_buffer(struct igc_ring *rx_ring, 2170 struct igc_rx_buffer *rx_buffer, 2171 int rx_buffer_pgcnt) 2172 { 2173 if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { 2174 /* hand second half of page back to the ring */ 2175 igc_reuse_rx_page(rx_ring, rx_buffer); 2176 } else { 2177 /* We are not reusing the buffer so unmap it and free 2178 * any references we are holding to it 2179 */ 2180 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, 2181 igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE, 2182 IGC_RX_DMA_ATTR); 2183 __page_frag_cache_drain(rx_buffer->page, 2184 rx_buffer->pagecnt_bias); 2185 } 2186 2187 /* clear contents of rx_buffer */ 2188 rx_buffer->page = NULL; 2189 } 2190 2191 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) 2192 { 2193 struct igc_adapter *adapter = rx_ring->q_vector->adapter; 2194 2195 if (ring_uses_build_skb(rx_ring)) 2196 return IGC_SKB_PAD; 2197 if (igc_xdp_is_enabled(adapter)) 2198 return XDP_PACKET_HEADROOM; 2199 2200 return 0; 2201 } 2202 2203 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, 2204 struct igc_rx_buffer *bi) 2205 { 2206 struct page *page = bi->page; 2207 dma_addr_t dma; 2208 2209 /* since we are recycling buffers we should seldom need to alloc */ 2210 if (likely(page)) 2211 return true; 2212 2213 /* alloc new page for storage */ 2214 page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); 2215 if (unlikely(!page)) { 2216 rx_ring->rx_stats.alloc_failed++; 2217 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 2218 return false; 2219 } 2220 2221 /* map page for use */ 2222 dma = dma_map_page_attrs(rx_ring->dev, page, 0, 2223 igc_rx_pg_size(rx_ring), 2224 DMA_FROM_DEVICE, 2225 IGC_RX_DMA_ATTR); 2226 2227 /* if mapping failed free memory back to system since 2228 * there isn't much point in holding memory we can't use 2229 */ 2230 if (dma_mapping_error(rx_ring->dev, dma)) { 2231 __free_page(page); 2232 2233 rx_ring->rx_stats.alloc_failed++; 2234 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 2235 return false; 2236 } 2237 2238 bi->dma = dma; 2239 bi->page = page; 2240 bi->page_offset = igc_rx_offset(rx_ring); 2241 page_ref_add(page, USHRT_MAX - 1); 2242 bi->pagecnt_bias = USHRT_MAX; 2243 2244 return true; 2245 } 2246 2247 /** 2248 * igc_alloc_rx_buffers - Replace used receive buffers; packet split 2249 * @rx_ring: rx descriptor ring 2250 * @cleaned_count: number of buffers to clean 2251 */ 2252 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) 2253 { 2254 union igc_adv_rx_desc *rx_desc; 2255 u16 i = rx_ring->next_to_use; 2256 struct igc_rx_buffer *bi; 2257 u16 bufsz; 2258 2259 /* nothing to do */ 2260 if (!cleaned_count) 2261 return; 2262 2263 rx_desc = IGC_RX_DESC(rx_ring, i); 2264 bi = &rx_ring->rx_buffer_info[i]; 2265 i -= rx_ring->count; 2266 2267 bufsz = igc_rx_bufsz(rx_ring); 2268 2269 do { 2270 if (!igc_alloc_mapped_page(rx_ring, bi)) 2271 break; 2272 2273 /* sync the buffer for use by the device */ 2274 dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 2275 bi->page_offset, bufsz, 2276 DMA_FROM_DEVICE); 2277 2278 /* Refresh the desc even if buffer_addrs didn't change 2279 * because each write-back erases this info. 2280 */ 2281 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); 2282 2283 rx_desc++; 2284 bi++; 2285 i++; 2286 if (unlikely(!i)) { 2287 rx_desc = IGC_RX_DESC(rx_ring, 0); 2288 bi = rx_ring->rx_buffer_info; 2289 i -= rx_ring->count; 2290 } 2291 2292 /* clear the length for the next_to_use descriptor */ 2293 rx_desc->wb.upper.length = 0; 2294 2295 cleaned_count--; 2296 } while (cleaned_count); 2297 2298 i += rx_ring->count; 2299 2300 if (rx_ring->next_to_use != i) { 2301 /* record the next descriptor to use */ 2302 rx_ring->next_to_use = i; 2303 2304 /* update next to alloc since we have filled the ring */ 2305 rx_ring->next_to_alloc = i; 2306 2307 /* Force memory writes to complete before letting h/w 2308 * know there are new descriptors to fetch. (Only 2309 * applicable for weak-ordered memory model archs, 2310 * such as IA-64). 2311 */ 2312 wmb(); 2313 writel(i, rx_ring->tail); 2314 } 2315 } 2316 2317 static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) 2318 { 2319 union igc_adv_rx_desc *desc; 2320 u16 i = ring->next_to_use; 2321 struct igc_rx_buffer *bi; 2322 dma_addr_t dma; 2323 bool ok = true; 2324 2325 if (!count) 2326 return ok; 2327 2328 XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff); 2329 2330 desc = IGC_RX_DESC(ring, i); 2331 bi = &ring->rx_buffer_info[i]; 2332 i -= ring->count; 2333 2334 do { 2335 bi->xdp = xsk_buff_alloc(ring->xsk_pool); 2336 if (!bi->xdp) { 2337 ok = false; 2338 break; 2339 } 2340 2341 dma = xsk_buff_xdp_get_dma(bi->xdp); 2342 desc->read.pkt_addr = cpu_to_le64(dma); 2343 2344 desc++; 2345 bi++; 2346 i++; 2347 if (unlikely(!i)) { 2348 desc = IGC_RX_DESC(ring, 0); 2349 bi = ring->rx_buffer_info; 2350 i -= ring->count; 2351 } 2352 2353 /* Clear the length for the next_to_use descriptor. */ 2354 desc->wb.upper.length = 0; 2355 2356 count--; 2357 } while (count); 2358 2359 i += ring->count; 2360 2361 if (ring->next_to_use != i) { 2362 ring->next_to_use = i; 2363 2364 /* Force memory writes to complete before letting h/w 2365 * know there are new descriptors to fetch. (Only 2366 * applicable for weak-ordered memory model archs, 2367 * such as IA-64). 2368 */ 2369 wmb(); 2370 writel(i, ring->tail); 2371 } 2372 2373 return ok; 2374 } 2375 2376 /* This function requires __netif_tx_lock is held by the caller. */ 2377 static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, 2378 struct xdp_frame *xdpf) 2379 { 2380 struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); 2381 u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0; 2382 u16 count, index = ring->next_to_use; 2383 struct igc_tx_buffer *head = &ring->tx_buffer_info[index]; 2384 struct igc_tx_buffer *buffer = head; 2385 union igc_adv_tx_desc *desc = IGC_TX_DESC(ring, index); 2386 u32 olinfo_status, len = xdpf->len, cmd_type; 2387 void *data = xdpf->data; 2388 u16 i; 2389 2390 count = TXD_USE_COUNT(len); 2391 for (i = 0; i < nr_frags; i++) 2392 count += TXD_USE_COUNT(skb_frag_size(&sinfo->frags[i])); 2393 2394 if (igc_maybe_stop_tx(ring, count + 3)) { 2395 /* this is a hard error */ 2396 return -EBUSY; 2397 } 2398 2399 i = 0; 2400 head->bytecount = xdp_get_frame_len(xdpf); 2401 head->type = IGC_TX_BUFFER_TYPE_XDP; 2402 head->gso_segs = 1; 2403 head->xdpf = xdpf; 2404 2405 olinfo_status = head->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; 2406 desc->read.olinfo_status = cpu_to_le32(olinfo_status); 2407 2408 for (;;) { 2409 dma_addr_t dma; 2410 2411 dma = dma_map_single(ring->dev, data, len, DMA_TO_DEVICE); 2412 if (dma_mapping_error(ring->dev, dma)) { 2413 netdev_err_once(ring->netdev, 2414 "Failed to map DMA for TX\n"); 2415 goto unmap; 2416 } 2417 2418 dma_unmap_len_set(buffer, len, len); 2419 dma_unmap_addr_set(buffer, dma, dma); 2420 2421 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 2422 IGC_ADVTXD_DCMD_IFCS | len; 2423 2424 desc->read.cmd_type_len = cpu_to_le32(cmd_type); 2425 desc->read.buffer_addr = cpu_to_le64(dma); 2426 2427 buffer->protocol = 0; 2428 2429 if (++index == ring->count) 2430 index = 0; 2431 2432 if (i == nr_frags) 2433 break; 2434 2435 buffer = &ring->tx_buffer_info[index]; 2436 desc = IGC_TX_DESC(ring, index); 2437 desc->read.olinfo_status = 0; 2438 2439 data = skb_frag_address(&sinfo->frags[i]); 2440 len = skb_frag_size(&sinfo->frags[i]); 2441 i++; 2442 } 2443 desc->read.cmd_type_len |= cpu_to_le32(IGC_TXD_DCMD); 2444 2445 netdev_tx_sent_queue(txring_txq(ring), head->bytecount); 2446 /* set the timestamp */ 2447 head->time_stamp = jiffies; 2448 /* set next_to_watch value indicating a packet is present */ 2449 head->next_to_watch = desc; 2450 ring->next_to_use = index; 2451 2452 return 0; 2453 2454 unmap: 2455 for (;;) { 2456 buffer = &ring->tx_buffer_info[index]; 2457 if (dma_unmap_len(buffer, len)) 2458 dma_unmap_page(ring->dev, 2459 dma_unmap_addr(buffer, dma), 2460 dma_unmap_len(buffer, len), 2461 DMA_TO_DEVICE); 2462 dma_unmap_len_set(buffer, len, 0); 2463 if (buffer == head) 2464 break; 2465 2466 if (!index) 2467 index += ring->count; 2468 index--; 2469 } 2470 2471 return -ENOMEM; 2472 } 2473 2474 struct igc_ring *igc_get_tx_ring(struct igc_adapter *adapter, int cpu) 2475 { 2476 int index = cpu; 2477 2478 if (unlikely(index < 0)) 2479 index = 0; 2480 2481 while (index >= adapter->num_tx_queues) 2482 index -= adapter->num_tx_queues; 2483 2484 return adapter->tx_ring[index]; 2485 } 2486 2487 static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) 2488 { 2489 struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); 2490 int cpu = smp_processor_id(); 2491 struct netdev_queue *nq; 2492 struct igc_ring *ring; 2493 int res; 2494 2495 if (unlikely(!xdpf)) 2496 return -EFAULT; 2497 2498 ring = igc_get_tx_ring(adapter, cpu); 2499 nq = txring_txq(ring); 2500 2501 __netif_tx_lock(nq, cpu); 2502 /* Avoid transmit queue timeout since we share it with the slow path */ 2503 txq_trans_cond_update(nq); 2504 res = igc_xdp_init_tx_descriptor(ring, xdpf); 2505 __netif_tx_unlock(nq); 2506 return res; 2507 } 2508 2509 /* This function assumes rcu_read_lock() is held by the caller. */ 2510 static int __igc_xdp_run_prog(struct igc_adapter *adapter, 2511 struct bpf_prog *prog, 2512 struct xdp_buff *xdp) 2513 { 2514 u32 act = bpf_prog_run_xdp(prog, xdp); 2515 2516 switch (act) { 2517 case XDP_PASS: 2518 return IGC_XDP_PASS; 2519 case XDP_TX: 2520 if (igc_xdp_xmit_back(adapter, xdp) < 0) 2521 goto out_failure; 2522 return IGC_XDP_TX; 2523 case XDP_REDIRECT: 2524 if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0) 2525 goto out_failure; 2526 return IGC_XDP_REDIRECT; 2527 break; 2528 default: 2529 bpf_warn_invalid_xdp_action(adapter->netdev, prog, act); 2530 fallthrough; 2531 case XDP_ABORTED: 2532 out_failure: 2533 trace_xdp_exception(adapter->netdev, prog, act); 2534 fallthrough; 2535 case XDP_DROP: 2536 return IGC_XDP_CONSUMED; 2537 } 2538 } 2539 2540 static int igc_xdp_run_prog(struct igc_adapter *adapter, struct xdp_buff *xdp) 2541 { 2542 struct bpf_prog *prog; 2543 int res; 2544 2545 prog = READ_ONCE(adapter->xdp_prog); 2546 if (!prog) { 2547 res = IGC_XDP_PASS; 2548 goto out; 2549 } 2550 2551 res = __igc_xdp_run_prog(adapter, prog, xdp); 2552 2553 out: 2554 return res; 2555 } 2556 2557 /* This function assumes __netif_tx_lock is held by the caller. */ 2558 void igc_flush_tx_descriptors(struct igc_ring *ring) 2559 { 2560 /* Once tail pointer is updated, hardware can fetch the descriptors 2561 * any time so we issue a write membar here to ensure all memory 2562 * writes are complete before the tail pointer is updated. 2563 */ 2564 wmb(); 2565 writel(ring->next_to_use, ring->tail); 2566 } 2567 2568 static void igc_finalize_xdp(struct igc_adapter *adapter, int status) 2569 { 2570 int cpu = smp_processor_id(); 2571 struct netdev_queue *nq; 2572 struct igc_ring *ring; 2573 2574 if (status & IGC_XDP_TX) { 2575 ring = igc_get_tx_ring(adapter, cpu); 2576 nq = txring_txq(ring); 2577 2578 __netif_tx_lock(nq, cpu); 2579 igc_flush_tx_descriptors(ring); 2580 __netif_tx_unlock(nq); 2581 } 2582 2583 if (status & IGC_XDP_REDIRECT) 2584 xdp_do_flush(); 2585 } 2586 2587 static void igc_update_rx_stats(struct igc_q_vector *q_vector, 2588 unsigned int packets, unsigned int bytes) 2589 { 2590 struct igc_ring *ring = q_vector->rx.ring; 2591 2592 u64_stats_update_begin(&ring->rx_syncp); 2593 ring->rx_stats.packets += packets; 2594 ring->rx_stats.bytes += bytes; 2595 u64_stats_update_end(&ring->rx_syncp); 2596 2597 q_vector->rx.total_packets += packets; 2598 q_vector->rx.total_bytes += bytes; 2599 } 2600 2601 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) 2602 { 2603 unsigned int total_bytes = 0, total_packets = 0; 2604 struct igc_adapter *adapter = q_vector->adapter; 2605 struct igc_ring *rx_ring = q_vector->rx.ring; 2606 struct sk_buff *skb = rx_ring->skb; 2607 u16 cleaned_count = igc_desc_unused(rx_ring); 2608 int xdp_status = 0, rx_buffer_pgcnt; 2609 int xdp_res = 0; 2610 2611 while (likely(total_packets < budget)) { 2612 struct igc_xdp_buff ctx = { .rx_ts = NULL }; 2613 struct igc_rx_buffer *rx_buffer; 2614 union igc_adv_rx_desc *rx_desc; 2615 unsigned int size, truesize; 2616 int pkt_offset = 0; 2617 void *pktbuf; 2618 2619 /* return some buffers to hardware, one at a time is too slow */ 2620 if (cleaned_count >= IGC_RX_BUFFER_WRITE) { 2621 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2622 cleaned_count = 0; 2623 } 2624 2625 rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean); 2626 size = le16_to_cpu(rx_desc->wb.upper.length); 2627 if (!size) 2628 break; 2629 2630 /* This memory barrier is needed to keep us from reading 2631 * any other fields out of the rx_desc until we know the 2632 * descriptor has been written back 2633 */ 2634 dma_rmb(); 2635 2636 rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); 2637 truesize = igc_get_rx_frame_truesize(rx_ring, size); 2638 2639 pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; 2640 2641 if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { 2642 ctx.rx_ts = pktbuf; 2643 pkt_offset = IGC_TS_HDR_LEN; 2644 size -= IGC_TS_HDR_LEN; 2645 } 2646 2647 if (igc_fpe_is_pmac_enabled(adapter) && 2648 igc_fpe_handle_mpacket(adapter, rx_desc, size, pktbuf)) { 2649 /* Advance the ring next-to-clean */ 2650 igc_is_non_eop(rx_ring, rx_desc); 2651 cleaned_count++; 2652 continue; 2653 } 2654 2655 if (!skb) { 2656 xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq); 2657 xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring), 2658 igc_rx_offset(rx_ring) + pkt_offset, 2659 size, true); 2660 xdp_buff_clear_frags_flag(&ctx.xdp); 2661 ctx.rx_desc = rx_desc; 2662 2663 xdp_res = igc_xdp_run_prog(adapter, &ctx.xdp); 2664 } 2665 2666 if (xdp_res) { 2667 switch (xdp_res) { 2668 case IGC_XDP_CONSUMED: 2669 rx_buffer->pagecnt_bias++; 2670 break; 2671 case IGC_XDP_TX: 2672 case IGC_XDP_REDIRECT: 2673 igc_rx_buffer_flip(rx_buffer, truesize); 2674 xdp_status |= xdp_res; 2675 break; 2676 } 2677 2678 total_packets++; 2679 total_bytes += size; 2680 } else if (skb) 2681 igc_add_rx_frag(rx_ring, rx_buffer, skb, size); 2682 else if (ring_uses_build_skb(rx_ring)) 2683 skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp); 2684 else 2685 skb = igc_construct_skb(rx_ring, rx_buffer, &ctx); 2686 2687 /* exit if we failed to retrieve a buffer */ 2688 if (!xdp_res && !skb) { 2689 rx_ring->rx_stats.alloc_failed++; 2690 rx_buffer->pagecnt_bias++; 2691 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 2692 break; 2693 } 2694 2695 igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); 2696 cleaned_count++; 2697 2698 /* fetch next buffer in frame if non-eop */ 2699 if (igc_is_non_eop(rx_ring, rx_desc)) 2700 continue; 2701 2702 /* verify the packet layout is correct */ 2703 if (xdp_res || igc_cleanup_headers(rx_ring, rx_desc, skb)) { 2704 skb = NULL; 2705 continue; 2706 } 2707 2708 /* probably a little skewed due to removing CRC */ 2709 total_bytes += skb->len; 2710 2711 /* populate checksum, VLAN, and protocol */ 2712 igc_process_skb_fields(rx_ring, rx_desc, skb); 2713 2714 napi_gro_receive(&q_vector->napi, skb); 2715 2716 /* reset skb pointer */ 2717 skb = NULL; 2718 2719 /* update budget accounting */ 2720 total_packets++; 2721 } 2722 2723 if (xdp_status) 2724 igc_finalize_xdp(adapter, xdp_status); 2725 2726 /* place incomplete frames back on ring for completion */ 2727 rx_ring->skb = skb; 2728 2729 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2730 2731 if (cleaned_count) 2732 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2733 2734 return total_packets; 2735 } 2736 2737 static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, 2738 struct igc_xdp_buff *ctx) 2739 { 2740 struct xdp_buff *xdp = &ctx->xdp; 2741 unsigned int totalsize = xdp->data_end - xdp->data_meta; 2742 unsigned int metasize = xdp->data - xdp->data_meta; 2743 struct sk_buff *skb; 2744 2745 net_prefetch(xdp->data_meta); 2746 2747 skb = napi_alloc_skb(&ring->q_vector->napi, totalsize); 2748 if (unlikely(!skb)) 2749 return NULL; 2750 2751 memcpy(__skb_put(skb, totalsize), xdp->data_meta, 2752 ALIGN(totalsize, sizeof(long))); 2753 2754 if (metasize) { 2755 skb_metadata_set(skb, metasize); 2756 __skb_pull(skb, metasize); 2757 } 2758 2759 if (ctx->rx_ts) { 2760 skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; 2761 skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; 2762 } 2763 2764 return skb; 2765 } 2766 2767 static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, 2768 union igc_adv_rx_desc *desc, 2769 struct igc_xdp_buff *ctx) 2770 { 2771 struct igc_ring *ring = q_vector->rx.ring; 2772 struct sk_buff *skb; 2773 2774 skb = igc_construct_skb_zc(ring, ctx); 2775 if (!skb) { 2776 ring->rx_stats.alloc_failed++; 2777 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &ring->flags); 2778 return; 2779 } 2780 2781 if (igc_cleanup_headers(ring, desc, skb)) 2782 return; 2783 2784 igc_process_skb_fields(ring, desc, skb); 2785 napi_gro_receive(&q_vector->napi, skb); 2786 } 2787 2788 static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp) 2789 { 2790 /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The 2791 * igc_xdp_buff shares its layout with xdp_buff_xsk and private 2792 * igc_xdp_buff fields fall into xdp_buff_xsk->cb 2793 */ 2794 return (struct igc_xdp_buff *)xdp; 2795 } 2796 2797 static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) 2798 { 2799 struct igc_adapter *adapter = q_vector->adapter; 2800 struct igc_ring *ring = q_vector->rx.ring; 2801 u16 cleaned_count = igc_desc_unused(ring); 2802 int total_bytes = 0, total_packets = 0; 2803 u16 ntc = ring->next_to_clean; 2804 struct bpf_prog *prog; 2805 bool failure = false; 2806 int xdp_status = 0; 2807 2808 rcu_read_lock(); 2809 2810 prog = READ_ONCE(adapter->xdp_prog); 2811 2812 while (likely(total_packets < budget)) { 2813 union igc_adv_rx_desc *desc; 2814 struct igc_rx_buffer *bi; 2815 struct igc_xdp_buff *ctx; 2816 unsigned int size; 2817 int res; 2818 2819 desc = IGC_RX_DESC(ring, ntc); 2820 size = le16_to_cpu(desc->wb.upper.length); 2821 if (!size) 2822 break; 2823 2824 /* This memory barrier is needed to keep us from reading 2825 * any other fields out of the rx_desc until we know the 2826 * descriptor has been written back 2827 */ 2828 dma_rmb(); 2829 2830 bi = &ring->rx_buffer_info[ntc]; 2831 2832 ctx = xsk_buff_to_igc_ctx(bi->xdp); 2833 ctx->rx_desc = desc; 2834 2835 if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { 2836 ctx->rx_ts = bi->xdp->data; 2837 2838 bi->xdp->data += IGC_TS_HDR_LEN; 2839 2840 /* HW timestamp has been copied into local variable. Metadata 2841 * length when XDP program is called should be 0. 2842 */ 2843 bi->xdp->data_meta += IGC_TS_HDR_LEN; 2844 size -= IGC_TS_HDR_LEN; 2845 } else { 2846 ctx->rx_ts = NULL; 2847 } 2848 2849 bi->xdp->data_end = bi->xdp->data + size; 2850 xsk_buff_dma_sync_for_cpu(bi->xdp); 2851 2852 res = __igc_xdp_run_prog(adapter, prog, bi->xdp); 2853 switch (res) { 2854 case IGC_XDP_PASS: 2855 igc_dispatch_skb_zc(q_vector, desc, ctx); 2856 fallthrough; 2857 case IGC_XDP_CONSUMED: 2858 xsk_buff_free(bi->xdp); 2859 break; 2860 case IGC_XDP_TX: 2861 case IGC_XDP_REDIRECT: 2862 xdp_status |= res; 2863 break; 2864 } 2865 2866 bi->xdp = NULL; 2867 total_bytes += size; 2868 total_packets++; 2869 cleaned_count++; 2870 ntc++; 2871 if (ntc == ring->count) 2872 ntc = 0; 2873 } 2874 2875 ring->next_to_clean = ntc; 2876 rcu_read_unlock(); 2877 2878 if (cleaned_count >= IGC_RX_BUFFER_WRITE) 2879 failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count); 2880 2881 if (xdp_status) 2882 igc_finalize_xdp(adapter, xdp_status); 2883 2884 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2885 2886 if (xsk_uses_need_wakeup(ring->xsk_pool)) { 2887 if (failure || ring->next_to_clean == ring->next_to_use) 2888 xsk_set_rx_need_wakeup(ring->xsk_pool); 2889 else 2890 xsk_clear_rx_need_wakeup(ring->xsk_pool); 2891 return total_packets; 2892 } 2893 2894 return failure ? budget : total_packets; 2895 } 2896 2897 static void igc_update_tx_stats(struct igc_q_vector *q_vector, 2898 unsigned int packets, unsigned int bytes) 2899 { 2900 struct igc_ring *ring = q_vector->tx.ring; 2901 2902 u64_stats_update_begin(&ring->tx_syncp); 2903 ring->tx_stats.bytes += bytes; 2904 ring->tx_stats.packets += packets; 2905 u64_stats_update_end(&ring->tx_syncp); 2906 2907 q_vector->tx.total_bytes += bytes; 2908 q_vector->tx.total_packets += packets; 2909 } 2910 2911 static void igc_xsk_request_timestamp(void *_priv) 2912 { 2913 struct igc_metadata_request *meta_req = _priv; 2914 struct igc_ring *tx_ring = meta_req->tx_ring; 2915 struct igc_tx_timestamp_request *tstamp; 2916 u32 tx_flags = IGC_TX_FLAGS_TSTAMP; 2917 struct igc_adapter *adapter; 2918 unsigned long lock_flags; 2919 bool found = false; 2920 int i; 2921 2922 if (test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags)) { 2923 adapter = netdev_priv(tx_ring->netdev); 2924 2925 spin_lock_irqsave(&adapter->ptp_tx_lock, lock_flags); 2926 2927 /* Search for available tstamp regs */ 2928 for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { 2929 tstamp = &adapter->tx_tstamp[i]; 2930 2931 /* tstamp->skb and tstamp->xsk_tx_buffer are in union. 2932 * When tstamp->skb is equal to NULL, 2933 * tstamp->xsk_tx_buffer is equal to NULL as well. 2934 * This condition means that the particular tstamp reg 2935 * is not occupied by other packet. 2936 */ 2937 if (!tstamp->skb) { 2938 found = true; 2939 break; 2940 } 2941 } 2942 2943 /* Return if no available tstamp regs */ 2944 if (!found) { 2945 adapter->tx_hwtstamp_skipped++; 2946 spin_unlock_irqrestore(&adapter->ptp_tx_lock, 2947 lock_flags); 2948 return; 2949 } 2950 2951 tstamp->start = jiffies; 2952 tstamp->xsk_queue_index = tx_ring->queue_index; 2953 tstamp->xsk_tx_buffer = meta_req->tx_buffer; 2954 tstamp->buffer_type = IGC_TX_BUFFER_TYPE_XSK; 2955 2956 /* Hold the transmit completion until timestamp is ready */ 2957 meta_req->tx_buffer->xsk_pending_ts = true; 2958 2959 /* Keep the pointer to tx_timestamp, which is located in XDP 2960 * metadata area. It is the location to store the value of 2961 * tx hardware timestamp. 2962 */ 2963 xsk_tx_metadata_to_compl(meta_req->meta, &tstamp->xsk_meta); 2964 2965 /* Set timestamp bit based on the _TSTAMP(_X) bit. */ 2966 tx_flags |= tstamp->flags; 2967 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2968 IGC_TX_FLAGS_TSTAMP, 2969 (IGC_ADVTXD_MAC_TSTAMP)); 2970 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2971 IGC_TX_FLAGS_TSTAMP_1, 2972 (IGC_ADVTXD_TSTAMP_REG_1)); 2973 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2974 IGC_TX_FLAGS_TSTAMP_2, 2975 (IGC_ADVTXD_TSTAMP_REG_2)); 2976 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2977 IGC_TX_FLAGS_TSTAMP_3, 2978 (IGC_ADVTXD_TSTAMP_REG_3)); 2979 2980 spin_unlock_irqrestore(&adapter->ptp_tx_lock, lock_flags); 2981 } 2982 } 2983 2984 static u64 igc_xsk_fill_timestamp(void *_priv) 2985 { 2986 return *(u64 *)_priv; 2987 } 2988 2989 static void igc_xsk_request_launch_time(u64 launch_time, void *_priv) 2990 { 2991 struct igc_metadata_request *meta_req = _priv; 2992 struct igc_ring *tx_ring = meta_req->tx_ring; 2993 __le32 launch_time_offset; 2994 bool insert_empty = false; 2995 bool first_flag = false; 2996 u16 used_desc = 0; 2997 2998 if (!tx_ring->launchtime_enable) 2999 return; 3000 3001 launch_time_offset = igc_tx_launchtime(tx_ring, 3002 ns_to_ktime(launch_time), 3003 &first_flag, &insert_empty); 3004 if (insert_empty) { 3005 /* Disregard the launch time request if the required empty frame 3006 * fails to be inserted. 3007 */ 3008 if (igc_insert_empty_frame(tx_ring)) 3009 return; 3010 3011 meta_req->tx_buffer = 3012 &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 3013 /* Inserting an empty packet requires two descriptors: 3014 * one data descriptor and one context descriptor. 3015 */ 3016 used_desc += 2; 3017 } 3018 3019 /* Use one context descriptor to specify launch time and first flag. */ 3020 igc_tx_ctxtdesc(tx_ring, launch_time_offset, first_flag, 0, 0, 0); 3021 used_desc += 1; 3022 3023 /* Update the number of used descriptors in this request */ 3024 meta_req->used_desc += used_desc; 3025 } 3026 3027 const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops = { 3028 .tmo_request_timestamp = igc_xsk_request_timestamp, 3029 .tmo_fill_timestamp = igc_xsk_fill_timestamp, 3030 .tmo_request_launch_time = igc_xsk_request_launch_time, 3031 }; 3032 3033 static void igc_xdp_xmit_zc(struct igc_ring *ring) 3034 { 3035 struct xsk_buff_pool *pool = ring->xsk_pool; 3036 struct netdev_queue *nq = txring_txq(ring); 3037 union igc_adv_tx_desc *tx_desc = NULL; 3038 int cpu = smp_processor_id(); 3039 struct xdp_desc xdp_desc; 3040 u16 budget, ntu; 3041 3042 if (!netif_carrier_ok(ring->netdev)) 3043 return; 3044 3045 __netif_tx_lock(nq, cpu); 3046 3047 /* Avoid transmit queue timeout since we share it with the slow path */ 3048 txq_trans_cond_update(nq); 3049 3050 ntu = ring->next_to_use; 3051 budget = igc_desc_unused(ring); 3052 3053 /* Packets with launch time require one data descriptor and one context 3054 * descriptor. When the launch time falls into the next Qbv cycle, we 3055 * may need to insert an empty packet, which requires two more 3056 * descriptors. Therefore, to be safe, we always ensure we have at least 3057 * 4 descriptors available. 3058 */ 3059 while (budget >= 4 && xsk_tx_peek_desc(pool, &xdp_desc)) { 3060 struct igc_metadata_request meta_req; 3061 struct xsk_tx_metadata *meta = NULL; 3062 struct igc_tx_buffer *bi; 3063 u32 olinfo_status; 3064 dma_addr_t dma; 3065 3066 meta_req.cmd_type = IGC_ADVTXD_DTYP_DATA | 3067 IGC_ADVTXD_DCMD_DEXT | 3068 IGC_ADVTXD_DCMD_IFCS | 3069 IGC_TXD_DCMD | xdp_desc.len; 3070 olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT; 3071 3072 dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr); 3073 meta = xsk_buff_get_metadata(pool, xdp_desc.addr); 3074 xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len); 3075 bi = &ring->tx_buffer_info[ntu]; 3076 3077 meta_req.tx_ring = ring; 3078 meta_req.tx_buffer = bi; 3079 meta_req.meta = meta; 3080 meta_req.used_desc = 0; 3081 xsk_tx_metadata_request(meta, &igc_xsk_tx_metadata_ops, 3082 &meta_req); 3083 3084 /* xsk_tx_metadata_request() may have updated next_to_use */ 3085 ntu = ring->next_to_use; 3086 3087 /* xsk_tx_metadata_request() may have updated Tx buffer info */ 3088 bi = meta_req.tx_buffer; 3089 3090 /* xsk_tx_metadata_request() may use a few descriptors */ 3091 budget -= meta_req.used_desc; 3092 3093 tx_desc = IGC_TX_DESC(ring, ntu); 3094 tx_desc->read.cmd_type_len = cpu_to_le32(meta_req.cmd_type); 3095 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 3096 tx_desc->read.buffer_addr = cpu_to_le64(dma); 3097 3098 bi->type = IGC_TX_BUFFER_TYPE_XSK; 3099 bi->protocol = 0; 3100 bi->bytecount = xdp_desc.len; 3101 bi->gso_segs = 1; 3102 bi->time_stamp = jiffies; 3103 bi->next_to_watch = tx_desc; 3104 3105 netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len); 3106 3107 ntu++; 3108 if (ntu == ring->count) 3109 ntu = 0; 3110 3111 ring->next_to_use = ntu; 3112 budget--; 3113 } 3114 3115 if (tx_desc) { 3116 igc_flush_tx_descriptors(ring); 3117 xsk_tx_release(pool); 3118 } 3119 3120 __netif_tx_unlock(nq); 3121 } 3122 3123 /** 3124 * igc_clean_tx_irq - Reclaim resources after transmit completes 3125 * @q_vector: pointer to q_vector containing needed info 3126 * @napi_budget: Used to determine if we are in netpoll 3127 * 3128 * returns true if ring is completely cleaned 3129 */ 3130 static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) 3131 { 3132 struct igc_adapter *adapter = q_vector->adapter; 3133 unsigned int total_bytes = 0, total_packets = 0; 3134 unsigned int budget = q_vector->tx.work_limit; 3135 struct igc_ring *tx_ring = q_vector->tx.ring; 3136 unsigned int i = tx_ring->next_to_clean; 3137 struct igc_tx_buffer *tx_buffer; 3138 union igc_adv_tx_desc *tx_desc; 3139 u32 xsk_frames = 0; 3140 3141 if (test_bit(__IGC_DOWN, &adapter->state)) 3142 return true; 3143 3144 tx_buffer = &tx_ring->tx_buffer_info[i]; 3145 tx_desc = IGC_TX_DESC(tx_ring, i); 3146 i -= tx_ring->count; 3147 3148 do { 3149 union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; 3150 3151 /* if next_to_watch is not set then there is no work pending */ 3152 if (!eop_desc) 3153 break; 3154 3155 /* prevent any other reads prior to eop_desc */ 3156 smp_rmb(); 3157 3158 /* if DD is not set pending work has not been completed */ 3159 if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) 3160 break; 3161 3162 if (igc_fpe_is_pmac_enabled(adapter) && 3163 igc_fpe_transmitted_smd_v(tx_desc)) 3164 ethtool_mmsv_event_handle(&adapter->fpe.mmsv, 3165 ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET); 3166 3167 /* Hold the completions while there's a pending tx hardware 3168 * timestamp request from XDP Tx metadata. 3169 */ 3170 if (tx_buffer->type == IGC_TX_BUFFER_TYPE_XSK && 3171 tx_buffer->xsk_pending_ts) 3172 break; 3173 3174 /* clear next_to_watch to prevent false hangs */ 3175 tx_buffer->next_to_watch = NULL; 3176 3177 /* update the statistics for this packet */ 3178 total_bytes += tx_buffer->bytecount; 3179 total_packets += tx_buffer->gso_segs; 3180 3181 switch (tx_buffer->type) { 3182 case IGC_TX_BUFFER_TYPE_XSK: 3183 xsk_frames++; 3184 break; 3185 case IGC_TX_BUFFER_TYPE_XDP: 3186 xdp_return_frame(tx_buffer->xdpf); 3187 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3188 break; 3189 case IGC_TX_BUFFER_TYPE_SKB: 3190 napi_consume_skb(tx_buffer->skb, napi_budget); 3191 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3192 break; 3193 default: 3194 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 3195 break; 3196 } 3197 3198 /* clear last DMA location and unmap remaining buffers */ 3199 while (tx_desc != eop_desc) { 3200 tx_buffer++; 3201 tx_desc++; 3202 i++; 3203 if (unlikely(!i)) { 3204 i -= tx_ring->count; 3205 tx_buffer = tx_ring->tx_buffer_info; 3206 tx_desc = IGC_TX_DESC(tx_ring, 0); 3207 } 3208 3209 /* unmap any remaining paged data */ 3210 if (dma_unmap_len(tx_buffer, len)) 3211 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3212 } 3213 3214 /* move us one more past the eop_desc for start of next pkt */ 3215 tx_buffer++; 3216 tx_desc++; 3217 i++; 3218 if (unlikely(!i)) { 3219 i -= tx_ring->count; 3220 tx_buffer = tx_ring->tx_buffer_info; 3221 tx_desc = IGC_TX_DESC(tx_ring, 0); 3222 } 3223 3224 /* issue prefetch for next Tx descriptor */ 3225 prefetch(tx_desc); 3226 3227 /* update budget accounting */ 3228 budget--; 3229 } while (likely(budget)); 3230 3231 netdev_tx_completed_queue(txring_txq(tx_ring), 3232 total_packets, total_bytes); 3233 3234 i += tx_ring->count; 3235 tx_ring->next_to_clean = i; 3236 3237 igc_update_tx_stats(q_vector, total_packets, total_bytes); 3238 3239 if (tx_ring->xsk_pool) { 3240 if (xsk_frames) 3241 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 3242 if (xsk_uses_need_wakeup(tx_ring->xsk_pool)) 3243 xsk_set_tx_need_wakeup(tx_ring->xsk_pool); 3244 igc_xdp_xmit_zc(tx_ring); 3245 } 3246 3247 if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { 3248 struct igc_hw *hw = &adapter->hw; 3249 3250 /* Detect a transmit hang in hardware, this serializes the 3251 * check with the clearing of time_stamp and movement of i 3252 */ 3253 clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 3254 if (tx_buffer->next_to_watch && 3255 time_after(jiffies, tx_buffer->time_stamp + 3256 (adapter->tx_timeout_factor * HZ)) && 3257 !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && 3258 (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) && 3259 !tx_ring->oper_gate_closed) { 3260 /* detected Tx unit hang */ 3261 netdev_err(tx_ring->netdev, 3262 "Detected Tx Unit Hang\n" 3263 " Tx Queue <%d>\n" 3264 " TDH <%x>\n" 3265 " TDT <%x>\n" 3266 " next_to_use <%x>\n" 3267 " next_to_clean <%x>\n" 3268 "buffer_info[next_to_clean]\n" 3269 " time_stamp <%lx>\n" 3270 " next_to_watch <%p>\n" 3271 " jiffies <%lx>\n" 3272 " desc.status <%x>\n", 3273 tx_ring->queue_index, 3274 rd32(IGC_TDH(tx_ring->reg_idx)), 3275 readl(tx_ring->tail), 3276 tx_ring->next_to_use, 3277 tx_ring->next_to_clean, 3278 tx_buffer->time_stamp, 3279 tx_buffer->next_to_watch, 3280 jiffies, 3281 tx_buffer->next_to_watch->wb.status); 3282 netif_stop_subqueue(tx_ring->netdev, 3283 tx_ring->queue_index); 3284 3285 /* we are about to reset, no point in enabling stuff */ 3286 return true; 3287 } 3288 } 3289 3290 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 3291 if (unlikely(total_packets && 3292 netif_carrier_ok(tx_ring->netdev) && 3293 igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { 3294 /* Make sure that anybody stopping the queue after this 3295 * sees the new next_to_clean. 3296 */ 3297 smp_mb(); 3298 if (__netif_subqueue_stopped(tx_ring->netdev, 3299 tx_ring->queue_index) && 3300 !(test_bit(__IGC_DOWN, &adapter->state))) { 3301 netif_wake_subqueue(tx_ring->netdev, 3302 tx_ring->queue_index); 3303 3304 u64_stats_update_begin(&tx_ring->tx_syncp); 3305 tx_ring->tx_stats.restart_queue++; 3306 u64_stats_update_end(&tx_ring->tx_syncp); 3307 } 3308 } 3309 3310 return !!budget; 3311 } 3312 3313 static int igc_find_mac_filter(struct igc_adapter *adapter, 3314 enum igc_mac_filter_type type, const u8 *addr) 3315 { 3316 struct igc_hw *hw = &adapter->hw; 3317 int max_entries = hw->mac.rar_entry_count; 3318 u32 ral, rah; 3319 int i; 3320 3321 for (i = 0; i < max_entries; i++) { 3322 ral = rd32(IGC_RAL(i)); 3323 rah = rd32(IGC_RAH(i)); 3324 3325 if (!(rah & IGC_RAH_AV)) 3326 continue; 3327 if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type) 3328 continue; 3329 if ((rah & IGC_RAH_RAH_MASK) != 3330 le16_to_cpup((__le16 *)(addr + 4))) 3331 continue; 3332 if (ral != le32_to_cpup((__le32 *)(addr))) 3333 continue; 3334 3335 return i; 3336 } 3337 3338 return -1; 3339 } 3340 3341 static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter) 3342 { 3343 struct igc_hw *hw = &adapter->hw; 3344 int max_entries = hw->mac.rar_entry_count; 3345 u32 rah; 3346 int i; 3347 3348 for (i = 0; i < max_entries; i++) { 3349 rah = rd32(IGC_RAH(i)); 3350 3351 if (!(rah & IGC_RAH_AV)) 3352 return i; 3353 } 3354 3355 return -1; 3356 } 3357 3358 /** 3359 * igc_add_mac_filter() - Add MAC address filter 3360 * @adapter: Pointer to adapter where the filter should be added 3361 * @type: MAC address filter type (source or destination) 3362 * @addr: MAC address 3363 * @queue: If non-negative, queue assignment feature is enabled and frames 3364 * matching the filter are enqueued onto 'queue'. Otherwise, queue 3365 * assignment is disabled. 3366 * 3367 * Return: 0 in case of success, negative errno code otherwise. 3368 */ 3369 static int igc_add_mac_filter(struct igc_adapter *adapter, 3370 enum igc_mac_filter_type type, const u8 *addr, 3371 int queue) 3372 { 3373 struct net_device *dev = adapter->netdev; 3374 int index; 3375 3376 index = igc_find_mac_filter(adapter, type, addr); 3377 if (index >= 0) 3378 goto update_filter; 3379 3380 index = igc_get_avail_mac_filter_slot(adapter); 3381 if (index < 0) 3382 return -ENOSPC; 3383 3384 netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n", 3385 index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 3386 addr, queue); 3387 3388 update_filter: 3389 igc_set_mac_filter_hw(adapter, index, type, addr, queue); 3390 return 0; 3391 } 3392 3393 /** 3394 * igc_del_mac_filter() - Delete MAC address filter 3395 * @adapter: Pointer to adapter where the filter should be deleted from 3396 * @type: MAC address filter type (source or destination) 3397 * @addr: MAC address 3398 */ 3399 static void igc_del_mac_filter(struct igc_adapter *adapter, 3400 enum igc_mac_filter_type type, const u8 *addr) 3401 { 3402 struct net_device *dev = adapter->netdev; 3403 int index; 3404 3405 index = igc_find_mac_filter(adapter, type, addr); 3406 if (index < 0) 3407 return; 3408 3409 if (index == 0) { 3410 /* If this is the default filter, we don't actually delete it. 3411 * We just reset to its default value i.e. disable queue 3412 * assignment. 3413 */ 3414 netdev_dbg(dev, "Disable default MAC filter queue assignment"); 3415 3416 igc_set_mac_filter_hw(adapter, 0, type, addr, -1); 3417 } else { 3418 netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n", 3419 index, 3420 type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 3421 addr); 3422 3423 igc_clear_mac_filter_hw(adapter, index); 3424 } 3425 } 3426 3427 /** 3428 * igc_add_vlan_prio_filter() - Add VLAN priority filter 3429 * @adapter: Pointer to adapter where the filter should be added 3430 * @prio: VLAN priority value 3431 * @queue: Queue number which matching frames are assigned to 3432 * 3433 * Return: 0 in case of success, negative errno code otherwise. 3434 */ 3435 static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio, 3436 int queue) 3437 { 3438 struct net_device *dev = adapter->netdev; 3439 struct igc_hw *hw = &adapter->hw; 3440 u32 vlanpqf; 3441 3442 vlanpqf = rd32(IGC_VLANPQF); 3443 3444 if (vlanpqf & IGC_VLANPQF_VALID(prio)) { 3445 netdev_dbg(dev, "VLAN priority filter already in use\n"); 3446 return -EEXIST; 3447 } 3448 3449 vlanpqf |= IGC_VLANPQF_QSEL(prio, queue); 3450 vlanpqf |= IGC_VLANPQF_VALID(prio); 3451 3452 wr32(IGC_VLANPQF, vlanpqf); 3453 3454 netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n", 3455 prio, queue); 3456 return 0; 3457 } 3458 3459 /** 3460 * igc_del_vlan_prio_filter() - Delete VLAN priority filter 3461 * @adapter: Pointer to adapter where the filter should be deleted from 3462 * @prio: VLAN priority value 3463 */ 3464 static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio) 3465 { 3466 struct igc_hw *hw = &adapter->hw; 3467 u32 vlanpqf; 3468 3469 vlanpqf = rd32(IGC_VLANPQF); 3470 3471 vlanpqf &= ~IGC_VLANPQF_VALID(prio); 3472 vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK); 3473 3474 wr32(IGC_VLANPQF, vlanpqf); 3475 3476 netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n", 3477 prio); 3478 } 3479 3480 static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter) 3481 { 3482 struct igc_hw *hw = &adapter->hw; 3483 int i; 3484 3485 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3486 u32 etqf = rd32(IGC_ETQF(i)); 3487 3488 if (!(etqf & IGC_ETQF_FILTER_ENABLE)) 3489 return i; 3490 } 3491 3492 return -1; 3493 } 3494 3495 /** 3496 * igc_add_etype_filter() - Add ethertype filter 3497 * @adapter: Pointer to adapter where the filter should be added 3498 * @etype: Ethertype value 3499 * @queue: If non-negative, queue assignment feature is enabled and frames 3500 * matching the filter are enqueued onto 'queue'. Otherwise, queue 3501 * assignment is disabled. 3502 * 3503 * Return: 0 in case of success, negative errno code otherwise. 3504 */ 3505 static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, 3506 int queue) 3507 { 3508 struct igc_hw *hw = &adapter->hw; 3509 int index; 3510 u32 etqf; 3511 3512 index = igc_get_avail_etype_filter_slot(adapter); 3513 if (index < 0) 3514 return -ENOSPC; 3515 3516 etqf = rd32(IGC_ETQF(index)); 3517 3518 etqf &= ~IGC_ETQF_ETYPE_MASK; 3519 etqf |= etype; 3520 3521 if (queue >= 0) { 3522 etqf &= ~IGC_ETQF_QUEUE_MASK; 3523 etqf |= (queue << IGC_ETQF_QUEUE_SHIFT); 3524 etqf |= IGC_ETQF_QUEUE_ENABLE; 3525 } 3526 3527 etqf |= IGC_ETQF_FILTER_ENABLE; 3528 3529 wr32(IGC_ETQF(index), etqf); 3530 3531 netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n", 3532 etype, queue); 3533 return 0; 3534 } 3535 3536 static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype) 3537 { 3538 struct igc_hw *hw = &adapter->hw; 3539 int i; 3540 3541 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3542 u32 etqf = rd32(IGC_ETQF(i)); 3543 3544 if ((etqf & IGC_ETQF_ETYPE_MASK) == etype) 3545 return i; 3546 } 3547 3548 return -1; 3549 } 3550 3551 /** 3552 * igc_del_etype_filter() - Delete ethertype filter 3553 * @adapter: Pointer to adapter where the filter should be deleted from 3554 * @etype: Ethertype value 3555 */ 3556 static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype) 3557 { 3558 struct igc_hw *hw = &adapter->hw; 3559 int index; 3560 3561 index = igc_find_etype_filter(adapter, etype); 3562 if (index < 0) 3563 return; 3564 3565 wr32(IGC_ETQF(index), 0); 3566 3567 netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n", 3568 etype); 3569 } 3570 3571 static int igc_flex_filter_select(struct igc_adapter *adapter, 3572 struct igc_flex_filter *input, 3573 u32 *fhft) 3574 { 3575 struct igc_hw *hw = &adapter->hw; 3576 u8 fhft_index; 3577 u32 fhftsl; 3578 3579 if (input->index >= MAX_FLEX_FILTER) { 3580 netdev_err(adapter->netdev, "Wrong Flex Filter index selected!\n"); 3581 return -EINVAL; 3582 } 3583 3584 /* Indirect table select register */ 3585 fhftsl = rd32(IGC_FHFTSL); 3586 fhftsl &= ~IGC_FHFTSL_FTSL_MASK; 3587 switch (input->index) { 3588 case 0 ... 7: 3589 fhftsl |= 0x00; 3590 break; 3591 case 8 ... 15: 3592 fhftsl |= 0x01; 3593 break; 3594 case 16 ... 23: 3595 fhftsl |= 0x02; 3596 break; 3597 case 24 ... 31: 3598 fhftsl |= 0x03; 3599 break; 3600 } 3601 wr32(IGC_FHFTSL, fhftsl); 3602 3603 /* Normalize index down to host table register */ 3604 fhft_index = input->index % 8; 3605 3606 *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) : 3607 IGC_FHFT_EXT(fhft_index - 4); 3608 3609 return 0; 3610 } 3611 3612 static int igc_write_flex_filter_ll(struct igc_adapter *adapter, 3613 struct igc_flex_filter *input) 3614 { 3615 struct igc_hw *hw = &adapter->hw; 3616 u8 *data = input->data; 3617 u8 *mask = input->mask; 3618 u32 queuing; 3619 u32 fhft; 3620 u32 wufc; 3621 int ret; 3622 int i; 3623 3624 /* Length has to be aligned to 8. Otherwise the filter will fail. Bail 3625 * out early to avoid surprises later. 3626 */ 3627 if (input->length % 8 != 0) { 3628 netdev_err(adapter->netdev, "The length of a flex filter has to be 8 byte aligned!\n"); 3629 return -EINVAL; 3630 } 3631 3632 /* Select corresponding flex filter register and get base for host table. */ 3633 ret = igc_flex_filter_select(adapter, input, &fhft); 3634 if (ret) 3635 return ret; 3636 3637 /* When adding a filter globally disable flex filter feature. That is 3638 * recommended within the datasheet. 3639 */ 3640 wufc = rd32(IGC_WUFC); 3641 wufc &= ~IGC_WUFC_FLEX_HQ; 3642 wr32(IGC_WUFC, wufc); 3643 3644 /* Configure filter */ 3645 queuing = input->length & IGC_FHFT_LENGTH_MASK; 3646 queuing |= FIELD_PREP(IGC_FHFT_QUEUE_MASK, input->rx_queue); 3647 queuing |= FIELD_PREP(IGC_FHFT_PRIO_MASK, input->prio); 3648 3649 if (input->immediate_irq) 3650 queuing |= IGC_FHFT_IMM_INT; 3651 3652 if (input->drop) 3653 queuing |= IGC_FHFT_DROP; 3654 3655 wr32(fhft + 0xFC, queuing); 3656 3657 /* Write data (128 byte) and mask (128 bit) */ 3658 for (i = 0; i < 16; ++i) { 3659 const size_t data_idx = i * 8; 3660 const size_t row_idx = i * 16; 3661 u32 dw0 = 3662 (data[data_idx + 0] << 0) | 3663 (data[data_idx + 1] << 8) | 3664 (data[data_idx + 2] << 16) | 3665 (data[data_idx + 3] << 24); 3666 u32 dw1 = 3667 (data[data_idx + 4] << 0) | 3668 (data[data_idx + 5] << 8) | 3669 (data[data_idx + 6] << 16) | 3670 (data[data_idx + 7] << 24); 3671 u32 tmp; 3672 3673 /* Write row: dw0, dw1 and mask */ 3674 wr32(fhft + row_idx, dw0); 3675 wr32(fhft + row_idx + 4, dw1); 3676 3677 /* mask is only valid for MASK(7, 0) */ 3678 tmp = rd32(fhft + row_idx + 8); 3679 tmp &= ~GENMASK(7, 0); 3680 tmp |= mask[i]; 3681 wr32(fhft + row_idx + 8, tmp); 3682 } 3683 3684 /* Enable filter. */ 3685 wufc |= IGC_WUFC_FLEX_HQ; 3686 if (input->index > 8) { 3687 /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */ 3688 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3689 3690 wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8)); 3691 3692 wr32(IGC_WUFC_EXT, wufc_ext); 3693 } else { 3694 wufc |= (IGC_WUFC_FLX0 << input->index); 3695 } 3696 wr32(IGC_WUFC, wufc); 3697 3698 netdev_dbg(adapter->netdev, "Added flex filter %u to HW.\n", 3699 input->index); 3700 3701 return 0; 3702 } 3703 3704 static void igc_flex_filter_add_field(struct igc_flex_filter *flex, 3705 const void *src, unsigned int offset, 3706 size_t len, const void *mask) 3707 { 3708 int i; 3709 3710 /* data */ 3711 memcpy(&flex->data[offset], src, len); 3712 3713 /* mask */ 3714 for (i = 0; i < len; ++i) { 3715 const unsigned int idx = i + offset; 3716 const u8 *ptr = mask; 3717 3718 if (mask) { 3719 if (ptr[i] & 0xff) 3720 flex->mask[idx / 8] |= BIT(idx % 8); 3721 3722 continue; 3723 } 3724 3725 flex->mask[idx / 8] |= BIT(idx % 8); 3726 } 3727 } 3728 3729 static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter) 3730 { 3731 struct igc_hw *hw = &adapter->hw; 3732 u32 wufc, wufc_ext; 3733 int i; 3734 3735 wufc = rd32(IGC_WUFC); 3736 wufc_ext = rd32(IGC_WUFC_EXT); 3737 3738 for (i = 0; i < MAX_FLEX_FILTER; i++) { 3739 if (i < 8) { 3740 if (!(wufc & (IGC_WUFC_FLX0 << i))) 3741 return i; 3742 } else { 3743 if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8)))) 3744 return i; 3745 } 3746 } 3747 3748 return -ENOSPC; 3749 } 3750 3751 static bool igc_flex_filter_in_use(struct igc_adapter *adapter) 3752 { 3753 struct igc_hw *hw = &adapter->hw; 3754 u32 wufc, wufc_ext; 3755 3756 wufc = rd32(IGC_WUFC); 3757 wufc_ext = rd32(IGC_WUFC_EXT); 3758 3759 if (wufc & IGC_WUFC_FILTER_MASK) 3760 return true; 3761 3762 if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK) 3763 return true; 3764 3765 return false; 3766 } 3767 3768 static int igc_add_flex_filter(struct igc_adapter *adapter, 3769 struct igc_nfc_rule *rule) 3770 { 3771 struct igc_nfc_filter *filter = &rule->filter; 3772 unsigned int eth_offset, user_offset; 3773 struct igc_flex_filter flex = { }; 3774 int ret, index; 3775 bool vlan; 3776 3777 index = igc_find_avail_flex_filter_slot(adapter); 3778 if (index < 0) 3779 return -ENOSPC; 3780 3781 /* Construct the flex filter: 3782 * -> dest_mac [6] 3783 * -> src_mac [6] 3784 * -> tpid [2] 3785 * -> vlan tci [2] 3786 * -> ether type [2] 3787 * -> user data [8] 3788 * -> = 26 bytes => 32 length 3789 */ 3790 flex.index = index; 3791 flex.length = 32; 3792 flex.rx_queue = rule->action; 3793 3794 vlan = rule->filter.vlan_tci || rule->filter.vlan_etype; 3795 eth_offset = vlan ? 16 : 12; 3796 user_offset = vlan ? 18 : 14; 3797 3798 /* Add destination MAC */ 3799 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3800 igc_flex_filter_add_field(&flex, &filter->dst_addr, 0, 3801 ETH_ALEN, NULL); 3802 3803 /* Add source MAC */ 3804 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3805 igc_flex_filter_add_field(&flex, &filter->src_addr, 6, 3806 ETH_ALEN, NULL); 3807 3808 /* Add VLAN etype */ 3809 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) { 3810 __be16 vlan_etype = cpu_to_be16(filter->vlan_etype); 3811 3812 igc_flex_filter_add_field(&flex, &vlan_etype, 12, 3813 sizeof(vlan_etype), NULL); 3814 } 3815 3816 /* Add VLAN TCI */ 3817 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) 3818 igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14, 3819 sizeof(filter->vlan_tci), NULL); 3820 3821 /* Add Ether type */ 3822 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3823 __be16 etype = cpu_to_be16(filter->etype); 3824 3825 igc_flex_filter_add_field(&flex, &etype, eth_offset, 3826 sizeof(etype), NULL); 3827 } 3828 3829 /* Add user data */ 3830 if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) 3831 igc_flex_filter_add_field(&flex, &filter->user_data, 3832 user_offset, 3833 sizeof(filter->user_data), 3834 filter->user_mask); 3835 3836 /* Add it down to the hardware and enable it. */ 3837 ret = igc_write_flex_filter_ll(adapter, &flex); 3838 if (ret) 3839 return ret; 3840 3841 filter->flex_index = index; 3842 3843 return 0; 3844 } 3845 3846 static void igc_del_flex_filter(struct igc_adapter *adapter, 3847 u16 reg_index) 3848 { 3849 struct igc_hw *hw = &adapter->hw; 3850 u32 wufc; 3851 3852 /* Just disable the filter. The filter table itself is kept 3853 * intact. Another flex_filter_add() should override the "old" data 3854 * then. 3855 */ 3856 if (reg_index > 8) { 3857 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3858 3859 wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8)); 3860 wr32(IGC_WUFC_EXT, wufc_ext); 3861 } else { 3862 wufc = rd32(IGC_WUFC); 3863 3864 wufc &= ~(IGC_WUFC_FLX0 << reg_index); 3865 wr32(IGC_WUFC, wufc); 3866 } 3867 3868 if (igc_flex_filter_in_use(adapter)) 3869 return; 3870 3871 /* No filters are in use, we may disable flex filters */ 3872 wufc = rd32(IGC_WUFC); 3873 wufc &= ~IGC_WUFC_FLEX_HQ; 3874 wr32(IGC_WUFC, wufc); 3875 } 3876 3877 static void igc_set_default_queue_filter(struct igc_adapter *adapter, u32 queue) 3878 { 3879 struct igc_hw *hw = &adapter->hw; 3880 u32 mrqc = rd32(IGC_MRQC); 3881 3882 mrqc &= ~IGC_MRQC_DEFAULT_QUEUE_MASK; 3883 mrqc |= FIELD_PREP(IGC_MRQC_DEFAULT_QUEUE_MASK, queue); 3884 wr32(IGC_MRQC, mrqc); 3885 } 3886 3887 static void igc_reset_default_queue_filter(struct igc_adapter *adapter) 3888 { 3889 /* Reset the default queue to its default value which is Queue 0 */ 3890 igc_set_default_queue_filter(adapter, 0); 3891 } 3892 3893 static int igc_enable_nfc_rule(struct igc_adapter *adapter, 3894 struct igc_nfc_rule *rule) 3895 { 3896 int err; 3897 3898 if (rule->flex) { 3899 return igc_add_flex_filter(adapter, rule); 3900 } 3901 3902 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3903 err = igc_add_etype_filter(adapter, rule->filter.etype, 3904 rule->action); 3905 if (err) 3906 return err; 3907 } 3908 3909 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { 3910 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3911 rule->filter.src_addr, rule->action); 3912 if (err) 3913 return err; 3914 } 3915 3916 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { 3917 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3918 rule->filter.dst_addr, rule->action); 3919 if (err) 3920 return err; 3921 } 3922 3923 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3924 int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); 3925 3926 err = igc_add_vlan_prio_filter(adapter, prio, rule->action); 3927 if (err) 3928 return err; 3929 } 3930 3931 if (rule->filter.match_flags & IGC_FILTER_FLAG_DEFAULT_QUEUE) 3932 igc_set_default_queue_filter(adapter, rule->action); 3933 3934 return 0; 3935 } 3936 3937 static void igc_disable_nfc_rule(struct igc_adapter *adapter, 3938 const struct igc_nfc_rule *rule) 3939 { 3940 if (rule->flex) { 3941 igc_del_flex_filter(adapter, rule->filter.flex_index); 3942 return; 3943 } 3944 3945 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) 3946 igc_del_etype_filter(adapter, rule->filter.etype); 3947 3948 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3949 int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); 3950 3951 igc_del_vlan_prio_filter(adapter, prio); 3952 } 3953 3954 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3955 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3956 rule->filter.src_addr); 3957 3958 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3959 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3960 rule->filter.dst_addr); 3961 3962 if (rule->filter.match_flags & IGC_FILTER_FLAG_DEFAULT_QUEUE) 3963 igc_reset_default_queue_filter(adapter); 3964 } 3965 3966 /** 3967 * igc_get_nfc_rule() - Get NFC rule 3968 * @adapter: Pointer to adapter 3969 * @location: Rule location 3970 * 3971 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3972 * 3973 * Return: Pointer to NFC rule at @location. If not found, NULL. 3974 */ 3975 struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter, 3976 u32 location) 3977 { 3978 struct igc_nfc_rule *rule; 3979 3980 list_for_each_entry(rule, &adapter->nfc_rule_list, list) { 3981 if (rule->location == location) 3982 return rule; 3983 if (rule->location > location) 3984 break; 3985 } 3986 3987 return NULL; 3988 } 3989 3990 /** 3991 * igc_del_nfc_rule() - Delete NFC rule 3992 * @adapter: Pointer to adapter 3993 * @rule: Pointer to rule to be deleted 3994 * 3995 * Disable NFC rule in hardware and delete it from adapter. 3996 * 3997 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3998 */ 3999 void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 4000 { 4001 igc_disable_nfc_rule(adapter, rule); 4002 4003 list_del(&rule->list); 4004 adapter->nfc_rule_count--; 4005 4006 kfree(rule); 4007 } 4008 4009 static void igc_flush_nfc_rules(struct igc_adapter *adapter) 4010 { 4011 struct igc_nfc_rule *rule, *tmp; 4012 4013 mutex_lock(&adapter->nfc_rule_lock); 4014 4015 list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list) 4016 igc_del_nfc_rule(adapter, rule); 4017 4018 mutex_unlock(&adapter->nfc_rule_lock); 4019 } 4020 4021 /** 4022 * igc_add_nfc_rule() - Add NFC rule 4023 * @adapter: Pointer to adapter 4024 * @rule: Pointer to rule to be added 4025 * 4026 * Enable NFC rule in hardware and add it to adapter. 4027 * 4028 * Context: Expects adapter->nfc_rule_lock to be held by caller. 4029 * 4030 * Return: 0 on success, negative errno on failure. 4031 */ 4032 int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 4033 { 4034 struct igc_nfc_rule *pred, *cur; 4035 int err; 4036 4037 err = igc_enable_nfc_rule(adapter, rule); 4038 if (err) 4039 return err; 4040 4041 pred = NULL; 4042 list_for_each_entry(cur, &adapter->nfc_rule_list, list) { 4043 if (cur->location >= rule->location) 4044 break; 4045 pred = cur; 4046 } 4047 4048 list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list); 4049 adapter->nfc_rule_count++; 4050 return 0; 4051 } 4052 4053 static void igc_restore_nfc_rules(struct igc_adapter *adapter) 4054 { 4055 struct igc_nfc_rule *rule; 4056 4057 mutex_lock(&adapter->nfc_rule_lock); 4058 4059 list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list) 4060 igc_enable_nfc_rule(adapter, rule); 4061 4062 mutex_unlock(&adapter->nfc_rule_lock); 4063 } 4064 4065 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr) 4066 { 4067 struct igc_adapter *adapter = netdev_priv(netdev); 4068 4069 return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1); 4070 } 4071 4072 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) 4073 { 4074 struct igc_adapter *adapter = netdev_priv(netdev); 4075 4076 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr); 4077 return 0; 4078 } 4079 4080 /** 4081 * igc_enable_empty_addr_recv - Enable Rx of packets with all-zeroes MAC address 4082 * @adapter: Pointer to the igc_adapter structure. 4083 * 4084 * Frame preemption verification requires that packets with the all-zeroes 4085 * MAC address are allowed to be received by the driver. This function adds the 4086 * all-zeroes destination address to the list of acceptable addresses. 4087 * 4088 * Return: 0 on success, negative value otherwise. 4089 */ 4090 int igc_enable_empty_addr_recv(struct igc_adapter *adapter) 4091 { 4092 u8 empty[ETH_ALEN] = {}; 4093 4094 return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, empty, -1); 4095 } 4096 4097 void igc_disable_empty_addr_recv(struct igc_adapter *adapter) 4098 { 4099 u8 empty[ETH_ALEN] = {}; 4100 4101 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, empty); 4102 } 4103 4104 /** 4105 * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set 4106 * @netdev: network interface device structure 4107 * 4108 * The set_rx_mode entry point is called whenever the unicast or multicast 4109 * address lists or the network interface flags are updated. This routine is 4110 * responsible for configuring the hardware for proper unicast, multicast, 4111 * promiscuous mode, and all-multi behavior. 4112 */ 4113 static void igc_set_rx_mode(struct net_device *netdev) 4114 { 4115 struct igc_adapter *adapter = netdev_priv(netdev); 4116 struct igc_hw *hw = &adapter->hw; 4117 u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE; 4118 int count; 4119 4120 /* Check for Promiscuous and All Multicast modes */ 4121 if (netdev->flags & IFF_PROMISC) { 4122 rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE; 4123 } else { 4124 if (netdev->flags & IFF_ALLMULTI) { 4125 rctl |= IGC_RCTL_MPE; 4126 } else { 4127 /* Write addresses to the MTA, if the attempt fails 4128 * then we should just turn on promiscuous mode so 4129 * that we can at least receive multicast traffic 4130 */ 4131 count = igc_write_mc_addr_list(netdev); 4132 if (count < 0) 4133 rctl |= IGC_RCTL_MPE; 4134 } 4135 } 4136 4137 /* Write addresses to available RAR registers, if there is not 4138 * sufficient space to store all the addresses then enable 4139 * unicast promiscuous mode 4140 */ 4141 if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync)) 4142 rctl |= IGC_RCTL_UPE; 4143 4144 /* update state of unicast and multicast */ 4145 rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE); 4146 wr32(IGC_RCTL, rctl); 4147 4148 #if (PAGE_SIZE < 8192) 4149 if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB) 4150 rlpml = IGC_MAX_FRAME_BUILD_SKB; 4151 #endif 4152 wr32(IGC_RLPML, rlpml); 4153 } 4154 4155 /** 4156 * igc_configure - configure the hardware for RX and TX 4157 * @adapter: private board structure 4158 */ 4159 static void igc_configure(struct igc_adapter *adapter) 4160 { 4161 struct net_device *netdev = adapter->netdev; 4162 int i = 0; 4163 4164 igc_get_hw_control(adapter); 4165 igc_set_rx_mode(netdev); 4166 4167 igc_restore_vlan(adapter); 4168 4169 igc_setup_tctl(adapter); 4170 igc_setup_mrqc(adapter); 4171 igc_setup_rctl(adapter); 4172 4173 igc_set_default_mac_filter(adapter); 4174 igc_restore_nfc_rules(adapter); 4175 4176 igc_configure_tx(adapter); 4177 igc_configure_rx(adapter); 4178 4179 igc_rx_fifo_flush_base(&adapter->hw); 4180 4181 /* call igc_desc_unused which always leaves 4182 * at least 1 descriptor unused to make sure 4183 * next_to_use != next_to_clean 4184 */ 4185 for (i = 0; i < adapter->num_rx_queues; i++) { 4186 struct igc_ring *ring = adapter->rx_ring[i]; 4187 4188 if (ring->xsk_pool) 4189 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 4190 else 4191 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 4192 } 4193 } 4194 4195 /** 4196 * igc_write_ivar - configure ivar for given MSI-X vector 4197 * @hw: pointer to the HW structure 4198 * @msix_vector: vector number we are allocating to a given ring 4199 * @index: row index of IVAR register to write within IVAR table 4200 * @offset: column offset of in IVAR, should be multiple of 8 4201 * 4202 * The IVAR table consists of 2 columns, 4203 * each containing an cause allocation for an Rx and Tx ring, and a 4204 * variable number of rows depending on the number of queues supported. 4205 */ 4206 static void igc_write_ivar(struct igc_hw *hw, int msix_vector, 4207 int index, int offset) 4208 { 4209 u32 ivar = array_rd32(IGC_IVAR0, index); 4210 4211 /* clear any bits that are currently set */ 4212 ivar &= ~((u32)0xFF << offset); 4213 4214 /* write vector and valid bit */ 4215 ivar |= (msix_vector | IGC_IVAR_VALID) << offset; 4216 4217 array_wr32(IGC_IVAR0, index, ivar); 4218 } 4219 4220 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector) 4221 { 4222 struct igc_adapter *adapter = q_vector->adapter; 4223 struct igc_hw *hw = &adapter->hw; 4224 int rx_queue = IGC_N0_QUEUE; 4225 int tx_queue = IGC_N0_QUEUE; 4226 4227 if (q_vector->rx.ring) 4228 rx_queue = q_vector->rx.ring->reg_idx; 4229 if (q_vector->tx.ring) 4230 tx_queue = q_vector->tx.ring->reg_idx; 4231 4232 switch (hw->mac.type) { 4233 case igc_i225: 4234 if (rx_queue > IGC_N0_QUEUE) 4235 igc_write_ivar(hw, msix_vector, 4236 rx_queue >> 1, 4237 (rx_queue & 0x1) << 4); 4238 if (tx_queue > IGC_N0_QUEUE) 4239 igc_write_ivar(hw, msix_vector, 4240 tx_queue >> 1, 4241 ((tx_queue & 0x1) << 4) + 8); 4242 q_vector->eims_value = BIT(msix_vector); 4243 break; 4244 default: 4245 WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n"); 4246 break; 4247 } 4248 4249 /* add q_vector eims value to global eims_enable_mask */ 4250 adapter->eims_enable_mask |= q_vector->eims_value; 4251 4252 /* configure q_vector to set itr on first interrupt */ 4253 q_vector->set_itr = 1; 4254 } 4255 4256 /** 4257 * igc_configure_msix - Configure MSI-X hardware 4258 * @adapter: Pointer to adapter structure 4259 * 4260 * igc_configure_msix sets up the hardware to properly 4261 * generate MSI-X interrupts. 4262 */ 4263 static void igc_configure_msix(struct igc_adapter *adapter) 4264 { 4265 struct igc_hw *hw = &adapter->hw; 4266 int i, vector = 0; 4267 u32 tmp; 4268 4269 adapter->eims_enable_mask = 0; 4270 4271 /* set vector for other causes, i.e. link changes */ 4272 switch (hw->mac.type) { 4273 case igc_i225: 4274 /* Turn on MSI-X capability first, or our settings 4275 * won't stick. And it will take days to debug. 4276 */ 4277 wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE | 4278 IGC_GPIE_PBA | IGC_GPIE_EIAME | 4279 IGC_GPIE_NSICR); 4280 4281 /* enable msix_other interrupt */ 4282 adapter->eims_other = BIT(vector); 4283 tmp = (vector++ | IGC_IVAR_VALID) << 8; 4284 4285 wr32(IGC_IVAR_MISC, tmp); 4286 break; 4287 default: 4288 /* do nothing, since nothing else supports MSI-X */ 4289 break; 4290 } /* switch (hw->mac.type) */ 4291 4292 adapter->eims_enable_mask |= adapter->eims_other; 4293 4294 for (i = 0; i < adapter->num_q_vectors; i++) 4295 igc_assign_vector(adapter->q_vector[i], vector++); 4296 4297 wrfl(); 4298 } 4299 4300 /** 4301 * igc_irq_enable - Enable default interrupt generation settings 4302 * @adapter: board private structure 4303 */ 4304 static void igc_irq_enable(struct igc_adapter *adapter) 4305 { 4306 struct igc_hw *hw = &adapter->hw; 4307 4308 if (adapter->msix_entries) { 4309 u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA; 4310 u32 regval = rd32(IGC_EIAC); 4311 4312 wr32(IGC_EIAC, regval | adapter->eims_enable_mask); 4313 regval = rd32(IGC_EIAM); 4314 wr32(IGC_EIAM, regval | adapter->eims_enable_mask); 4315 wr32(IGC_EIMS, adapter->eims_enable_mask); 4316 wr32(IGC_IMS, ims); 4317 } else { 4318 wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 4319 wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 4320 } 4321 } 4322 4323 /** 4324 * igc_irq_disable - Mask off interrupt generation on the NIC 4325 * @adapter: board private structure 4326 */ 4327 static void igc_irq_disable(struct igc_adapter *adapter) 4328 { 4329 struct igc_hw *hw = &adapter->hw; 4330 4331 if (adapter->msix_entries) { 4332 u32 regval = rd32(IGC_EIAM); 4333 4334 wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask); 4335 wr32(IGC_EIMC, adapter->eims_enable_mask); 4336 regval = rd32(IGC_EIAC); 4337 wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask); 4338 } 4339 4340 wr32(IGC_IAM, 0); 4341 wr32(IGC_IMC, ~0); 4342 wrfl(); 4343 4344 if (adapter->msix_entries) { 4345 int vector = 0, i; 4346 4347 synchronize_irq(adapter->msix_entries[vector++].vector); 4348 4349 for (i = 0; i < adapter->num_q_vectors; i++) 4350 synchronize_irq(adapter->msix_entries[vector++].vector); 4351 } else { 4352 synchronize_irq(adapter->pdev->irq); 4353 } 4354 } 4355 4356 void igc_set_flag_queue_pairs(struct igc_adapter *adapter, 4357 const u32 max_rss_queues) 4358 { 4359 /* Determine if we need to pair queues. */ 4360 /* If rss_queues > half of max_rss_queues, pair the queues in 4361 * order to conserve interrupts due to limited supply. 4362 */ 4363 if (adapter->rss_queues > (max_rss_queues / 2)) 4364 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 4365 else 4366 adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS; 4367 } 4368 4369 unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) 4370 { 4371 return IGC_MAX_RX_QUEUES; 4372 } 4373 4374 static void igc_init_queue_configuration(struct igc_adapter *adapter) 4375 { 4376 u32 max_rss_queues; 4377 4378 max_rss_queues = igc_get_max_rss_queues(adapter); 4379 adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); 4380 4381 igc_set_flag_queue_pairs(adapter, max_rss_queues); 4382 } 4383 4384 /** 4385 * igc_reset_q_vector - Reset config for interrupt vector 4386 * @adapter: board private structure to initialize 4387 * @v_idx: Index of vector to be reset 4388 * 4389 * If NAPI is enabled it will delete any references to the 4390 * NAPI struct. This is preparation for igc_free_q_vector. 4391 */ 4392 static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx) 4393 { 4394 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 4395 4396 /* if we're coming from igc_set_interrupt_capability, the vectors are 4397 * not yet allocated 4398 */ 4399 if (!q_vector) 4400 return; 4401 4402 if (q_vector->tx.ring) 4403 adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; 4404 4405 if (q_vector->rx.ring) 4406 adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; 4407 4408 netif_napi_del(&q_vector->napi); 4409 } 4410 4411 /** 4412 * igc_free_q_vector - Free memory allocated for specific interrupt vector 4413 * @adapter: board private structure to initialize 4414 * @v_idx: Index of vector to be freed 4415 * 4416 * This function frees the memory allocated to the q_vector. 4417 */ 4418 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx) 4419 { 4420 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 4421 4422 adapter->q_vector[v_idx] = NULL; 4423 4424 /* igc_get_stats64() might access the rings on this vector, 4425 * we must wait a grace period before freeing it. 4426 */ 4427 if (q_vector) 4428 kfree_rcu(q_vector, rcu); 4429 } 4430 4431 /** 4432 * igc_free_q_vectors - Free memory allocated for interrupt vectors 4433 * @adapter: board private structure to initialize 4434 * 4435 * This function frees the memory allocated to the q_vectors. In addition if 4436 * NAPI is enabled it will delete any references to the NAPI struct prior 4437 * to freeing the q_vector. 4438 */ 4439 static void igc_free_q_vectors(struct igc_adapter *adapter) 4440 { 4441 int v_idx = adapter->num_q_vectors; 4442 4443 adapter->num_tx_queues = 0; 4444 adapter->num_rx_queues = 0; 4445 adapter->num_q_vectors = 0; 4446 4447 while (v_idx--) { 4448 igc_reset_q_vector(adapter, v_idx); 4449 igc_free_q_vector(adapter, v_idx); 4450 } 4451 } 4452 4453 /** 4454 * igc_update_itr - update the dynamic ITR value based on statistics 4455 * @q_vector: pointer to q_vector 4456 * @ring_container: ring info to update the itr for 4457 * 4458 * Stores a new ITR value based on packets and byte 4459 * counts during the last interrupt. The advantage of per interrupt 4460 * computation is faster updates and more accurate ITR for the current 4461 * traffic pattern. Constants in this function were computed 4462 * based on theoretical maximum wire speed and thresholds were set based 4463 * on testing data as well as attempting to minimize response time 4464 * while increasing bulk throughput. 4465 * NOTE: These calculations are only valid when operating in a single- 4466 * queue environment. 4467 */ 4468 static void igc_update_itr(struct igc_q_vector *q_vector, 4469 struct igc_ring_container *ring_container) 4470 { 4471 unsigned int packets = ring_container->total_packets; 4472 unsigned int bytes = ring_container->total_bytes; 4473 u8 itrval = ring_container->itr; 4474 4475 /* no packets, exit with status unchanged */ 4476 if (packets == 0) 4477 return; 4478 4479 switch (itrval) { 4480 case lowest_latency: 4481 /* handle TSO and jumbo frames */ 4482 if (bytes / packets > 8000) 4483 itrval = bulk_latency; 4484 else if ((packets < 5) && (bytes > 512)) 4485 itrval = low_latency; 4486 break; 4487 case low_latency: /* 50 usec aka 20000 ints/s */ 4488 if (bytes > 10000) { 4489 /* this if handles the TSO accounting */ 4490 if (bytes / packets > 8000) 4491 itrval = bulk_latency; 4492 else if ((packets < 10) || ((bytes / packets) > 1200)) 4493 itrval = bulk_latency; 4494 else if ((packets > 35)) 4495 itrval = lowest_latency; 4496 } else if (bytes / packets > 2000) { 4497 itrval = bulk_latency; 4498 } else if (packets <= 2 && bytes < 512) { 4499 itrval = lowest_latency; 4500 } 4501 break; 4502 case bulk_latency: /* 250 usec aka 4000 ints/s */ 4503 if (bytes > 25000) { 4504 if (packets > 35) 4505 itrval = low_latency; 4506 } else if (bytes < 1500) { 4507 itrval = low_latency; 4508 } 4509 break; 4510 } 4511 4512 /* clear work counters since we have the values we need */ 4513 ring_container->total_bytes = 0; 4514 ring_container->total_packets = 0; 4515 4516 /* write updated itr to ring container */ 4517 ring_container->itr = itrval; 4518 } 4519 4520 static void igc_set_itr(struct igc_q_vector *q_vector) 4521 { 4522 struct igc_adapter *adapter = q_vector->adapter; 4523 u32 new_itr = q_vector->itr_val; 4524 u8 current_itr = 0; 4525 4526 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ 4527 switch (adapter->link_speed) { 4528 case SPEED_10: 4529 case SPEED_100: 4530 current_itr = 0; 4531 new_itr = IGC_4K_ITR; 4532 goto set_itr_now; 4533 default: 4534 break; 4535 } 4536 4537 igc_update_itr(q_vector, &q_vector->tx); 4538 igc_update_itr(q_vector, &q_vector->rx); 4539 4540 current_itr = max(q_vector->rx.itr, q_vector->tx.itr); 4541 4542 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4543 if (current_itr == lowest_latency && 4544 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4545 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4546 current_itr = low_latency; 4547 4548 switch (current_itr) { 4549 /* counts and packets in update_itr are dependent on these numbers */ 4550 case lowest_latency: 4551 new_itr = IGC_70K_ITR; /* 70,000 ints/sec */ 4552 break; 4553 case low_latency: 4554 new_itr = IGC_20K_ITR; /* 20,000 ints/sec */ 4555 break; 4556 case bulk_latency: 4557 new_itr = IGC_4K_ITR; /* 4,000 ints/sec */ 4558 break; 4559 default: 4560 break; 4561 } 4562 4563 set_itr_now: 4564 if (new_itr != q_vector->itr_val) { 4565 /* this attempts to bias the interrupt rate towards Bulk 4566 * by adding intermediate steps when interrupt rate is 4567 * increasing 4568 */ 4569 new_itr = new_itr > q_vector->itr_val ? 4570 max((new_itr * q_vector->itr_val) / 4571 (new_itr + (q_vector->itr_val >> 2)), 4572 new_itr) : new_itr; 4573 /* Don't write the value here; it resets the adapter's 4574 * internal timer, and causes us to delay far longer than 4575 * we should between interrupts. Instead, we write the ITR 4576 * value at the beginning of the next interrupt so the timing 4577 * ends up being correct. 4578 */ 4579 q_vector->itr_val = new_itr; 4580 q_vector->set_itr = 1; 4581 } 4582 } 4583 4584 static void igc_reset_interrupt_capability(struct igc_adapter *adapter) 4585 { 4586 int v_idx = adapter->num_q_vectors; 4587 4588 if (adapter->msix_entries) { 4589 pci_disable_msix(adapter->pdev); 4590 kfree(adapter->msix_entries); 4591 adapter->msix_entries = NULL; 4592 } else if (adapter->flags & IGC_FLAG_HAS_MSI) { 4593 pci_disable_msi(adapter->pdev); 4594 } 4595 4596 while (v_idx--) 4597 igc_reset_q_vector(adapter, v_idx); 4598 } 4599 4600 /** 4601 * igc_set_interrupt_capability - set MSI or MSI-X if supported 4602 * @adapter: Pointer to adapter structure 4603 * @msix: boolean value for MSI-X capability 4604 * 4605 * Attempt to configure interrupts using the best available 4606 * capabilities of the hardware and kernel. 4607 */ 4608 static void igc_set_interrupt_capability(struct igc_adapter *adapter, 4609 bool msix) 4610 { 4611 int numvecs, i; 4612 int err; 4613 4614 if (!msix) 4615 goto msi_only; 4616 adapter->flags |= IGC_FLAG_HAS_MSIX; 4617 4618 /* Number of supported queues. */ 4619 adapter->num_rx_queues = adapter->rss_queues; 4620 4621 adapter->num_tx_queues = adapter->rss_queues; 4622 4623 /* start with one vector for every Rx queue */ 4624 numvecs = adapter->num_rx_queues; 4625 4626 /* if Tx handler is separate add 1 for every Tx queue */ 4627 if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) 4628 numvecs += adapter->num_tx_queues; 4629 4630 /* store the number of vectors reserved for queues */ 4631 adapter->num_q_vectors = numvecs; 4632 4633 /* add 1 vector for link status interrupts */ 4634 numvecs++; 4635 4636 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), 4637 GFP_KERNEL); 4638 4639 if (!adapter->msix_entries) 4640 return; 4641 4642 /* populate entry values */ 4643 for (i = 0; i < numvecs; i++) 4644 adapter->msix_entries[i].entry = i; 4645 4646 err = pci_enable_msix_range(adapter->pdev, 4647 adapter->msix_entries, 4648 numvecs, 4649 numvecs); 4650 if (err > 0) 4651 return; 4652 4653 kfree(adapter->msix_entries); 4654 adapter->msix_entries = NULL; 4655 4656 igc_reset_interrupt_capability(adapter); 4657 4658 msi_only: 4659 adapter->flags &= ~IGC_FLAG_HAS_MSIX; 4660 4661 adapter->rss_queues = 1; 4662 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 4663 adapter->num_rx_queues = 1; 4664 adapter->num_tx_queues = 1; 4665 adapter->num_q_vectors = 1; 4666 if (!pci_enable_msi(adapter->pdev)) 4667 adapter->flags |= IGC_FLAG_HAS_MSI; 4668 } 4669 4670 /** 4671 * igc_update_ring_itr - update the dynamic ITR value based on packet size 4672 * @q_vector: pointer to q_vector 4673 * 4674 * Stores a new ITR value based on strictly on packet size. This 4675 * algorithm is less sophisticated than that used in igc_update_itr, 4676 * due to the difficulty of synchronizing statistics across multiple 4677 * receive rings. The divisors and thresholds used by this function 4678 * were determined based on theoretical maximum wire speed and testing 4679 * data, in order to minimize response time while increasing bulk 4680 * throughput. 4681 * NOTE: This function is called only when operating in a multiqueue 4682 * receive environment. 4683 */ 4684 static void igc_update_ring_itr(struct igc_q_vector *q_vector) 4685 { 4686 struct igc_adapter *adapter = q_vector->adapter; 4687 int new_val = q_vector->itr_val; 4688 int avg_wire_size = 0; 4689 unsigned int packets; 4690 4691 /* For non-gigabit speeds, just fix the interrupt rate at 4000 4692 * ints/sec - ITR timer value of 120 ticks. 4693 */ 4694 switch (adapter->link_speed) { 4695 case SPEED_10: 4696 case SPEED_100: 4697 new_val = IGC_4K_ITR; 4698 goto set_itr_val; 4699 default: 4700 break; 4701 } 4702 4703 packets = q_vector->rx.total_packets; 4704 if (packets) 4705 avg_wire_size = q_vector->rx.total_bytes / packets; 4706 4707 packets = q_vector->tx.total_packets; 4708 if (packets) 4709 avg_wire_size = max_t(u32, avg_wire_size, 4710 q_vector->tx.total_bytes / packets); 4711 4712 /* if avg_wire_size isn't set no work was done */ 4713 if (!avg_wire_size) 4714 goto clear_counts; 4715 4716 /* Add 24 bytes to size to account for CRC, preamble, and gap */ 4717 avg_wire_size += 24; 4718 4719 /* Don't starve jumbo frames */ 4720 avg_wire_size = min(avg_wire_size, 3000); 4721 4722 /* Give a little boost to mid-size frames */ 4723 if (avg_wire_size > 300 && avg_wire_size < 1200) 4724 new_val = avg_wire_size / 3; 4725 else 4726 new_val = avg_wire_size / 2; 4727 4728 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4729 if (new_val < IGC_20K_ITR && 4730 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4731 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4732 new_val = IGC_20K_ITR; 4733 4734 set_itr_val: 4735 if (new_val != q_vector->itr_val) { 4736 q_vector->itr_val = new_val; 4737 q_vector->set_itr = 1; 4738 } 4739 clear_counts: 4740 q_vector->rx.total_bytes = 0; 4741 q_vector->rx.total_packets = 0; 4742 q_vector->tx.total_bytes = 0; 4743 q_vector->tx.total_packets = 0; 4744 } 4745 4746 static void igc_ring_irq_enable(struct igc_q_vector *q_vector) 4747 { 4748 struct igc_adapter *adapter = q_vector->adapter; 4749 struct igc_hw *hw = &adapter->hw; 4750 4751 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || 4752 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { 4753 if (adapter->num_q_vectors == 1) 4754 igc_set_itr(q_vector); 4755 else 4756 igc_update_ring_itr(q_vector); 4757 } 4758 4759 if (!test_bit(__IGC_DOWN, &adapter->state)) { 4760 if (adapter->msix_entries) 4761 wr32(IGC_EIMS, q_vector->eims_value); 4762 else 4763 igc_irq_enable(adapter); 4764 } 4765 } 4766 4767 static void igc_add_ring(struct igc_ring *ring, 4768 struct igc_ring_container *head) 4769 { 4770 head->ring = ring; 4771 head->count++; 4772 } 4773 4774 /** 4775 * igc_cache_ring_register - Descriptor ring to register mapping 4776 * @adapter: board private structure to initialize 4777 * 4778 * Once we know the feature-set enabled for the device, we'll cache 4779 * the register offset the descriptor ring is assigned to. 4780 */ 4781 static void igc_cache_ring_register(struct igc_adapter *adapter) 4782 { 4783 int i = 0, j = 0; 4784 4785 switch (adapter->hw.mac.type) { 4786 case igc_i225: 4787 default: 4788 for (; i < adapter->num_rx_queues; i++) 4789 adapter->rx_ring[i]->reg_idx = i; 4790 for (; j < adapter->num_tx_queues; j++) 4791 adapter->tx_ring[j]->reg_idx = j; 4792 break; 4793 } 4794 } 4795 4796 /** 4797 * igc_poll - NAPI Rx polling callback 4798 * @napi: napi polling structure 4799 * @budget: count of how many packets we should handle 4800 */ 4801 static int igc_poll(struct napi_struct *napi, int budget) 4802 { 4803 struct igc_q_vector *q_vector = container_of(napi, 4804 struct igc_q_vector, 4805 napi); 4806 struct igc_ring *rx_ring = q_vector->rx.ring; 4807 bool clean_complete = true; 4808 int work_done = 0; 4809 4810 if (q_vector->tx.ring) 4811 clean_complete = igc_clean_tx_irq(q_vector, budget); 4812 4813 if (rx_ring) { 4814 int cleaned = rx_ring->xsk_pool ? 4815 igc_clean_rx_irq_zc(q_vector, budget) : 4816 igc_clean_rx_irq(q_vector, budget); 4817 4818 work_done += cleaned; 4819 if (cleaned >= budget) 4820 clean_complete = false; 4821 } 4822 4823 /* If all work not completed, return budget and keep polling */ 4824 if (!clean_complete) 4825 return budget; 4826 4827 /* Exit the polling mode, but don't re-enable interrupts if stack might 4828 * poll us due to busy-polling 4829 */ 4830 if (likely(napi_complete_done(napi, work_done))) 4831 igc_ring_irq_enable(q_vector); 4832 4833 return min(work_done, budget - 1); 4834 } 4835 4836 /** 4837 * igc_alloc_q_vector - Allocate memory for a single interrupt vector 4838 * @adapter: board private structure to initialize 4839 * @v_count: q_vectors allocated on adapter, used for ring interleaving 4840 * @v_idx: index of vector in adapter struct 4841 * @txr_count: total number of Tx rings to allocate 4842 * @txr_idx: index of first Tx ring to allocate 4843 * @rxr_count: total number of Rx rings to allocate 4844 * @rxr_idx: index of first Rx ring to allocate 4845 * 4846 * We allocate one q_vector. If allocation fails we return -ENOMEM. 4847 */ 4848 static int igc_alloc_q_vector(struct igc_adapter *adapter, 4849 unsigned int v_count, unsigned int v_idx, 4850 unsigned int txr_count, unsigned int txr_idx, 4851 unsigned int rxr_count, unsigned int rxr_idx) 4852 { 4853 struct igc_q_vector *q_vector; 4854 struct igc_ring *ring; 4855 int ring_count; 4856 4857 /* igc only supports 1 Tx and/or 1 Rx queue per vector */ 4858 if (txr_count > 1 || rxr_count > 1) 4859 return -ENOMEM; 4860 4861 ring_count = txr_count + rxr_count; 4862 4863 /* allocate q_vector and rings */ 4864 q_vector = adapter->q_vector[v_idx]; 4865 if (!q_vector) 4866 q_vector = kzalloc(struct_size(q_vector, ring, ring_count), 4867 GFP_KERNEL); 4868 else 4869 memset(q_vector, 0, struct_size(q_vector, ring, ring_count)); 4870 if (!q_vector) 4871 return -ENOMEM; 4872 4873 /* initialize NAPI */ 4874 netif_napi_add(adapter->netdev, &q_vector->napi, igc_poll); 4875 4876 /* tie q_vector and adapter together */ 4877 adapter->q_vector[v_idx] = q_vector; 4878 q_vector->adapter = adapter; 4879 4880 /* initialize work limits */ 4881 q_vector->tx.work_limit = adapter->tx_work_limit; 4882 4883 /* initialize ITR configuration */ 4884 q_vector->itr_register = adapter->io_addr + IGC_EITR(0); 4885 q_vector->itr_val = IGC_START_ITR; 4886 4887 /* initialize pointer to rings */ 4888 ring = q_vector->ring; 4889 4890 /* initialize ITR */ 4891 if (rxr_count) { 4892 /* rx or rx/tx vector */ 4893 if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) 4894 q_vector->itr_val = adapter->rx_itr_setting; 4895 } else { 4896 /* tx only vector */ 4897 if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) 4898 q_vector->itr_val = adapter->tx_itr_setting; 4899 } 4900 4901 if (txr_count) { 4902 /* assign generic ring traits */ 4903 ring->dev = &adapter->pdev->dev; 4904 ring->netdev = adapter->netdev; 4905 4906 /* configure backlink on ring */ 4907 ring->q_vector = q_vector; 4908 4909 /* update q_vector Tx values */ 4910 igc_add_ring(ring, &q_vector->tx); 4911 4912 /* apply Tx specific ring traits */ 4913 ring->count = adapter->tx_ring_count; 4914 ring->queue_index = txr_idx; 4915 4916 /* assign ring to adapter */ 4917 adapter->tx_ring[txr_idx] = ring; 4918 4919 /* push pointer to next ring */ 4920 ring++; 4921 } 4922 4923 if (rxr_count) { 4924 /* assign generic ring traits */ 4925 ring->dev = &adapter->pdev->dev; 4926 ring->netdev = adapter->netdev; 4927 4928 /* configure backlink on ring */ 4929 ring->q_vector = q_vector; 4930 4931 /* update q_vector Rx values */ 4932 igc_add_ring(ring, &q_vector->rx); 4933 4934 /* apply Rx specific ring traits */ 4935 ring->count = adapter->rx_ring_count; 4936 ring->queue_index = rxr_idx; 4937 4938 /* assign ring to adapter */ 4939 adapter->rx_ring[rxr_idx] = ring; 4940 } 4941 4942 return 0; 4943 } 4944 4945 /** 4946 * igc_alloc_q_vectors - Allocate memory for interrupt vectors 4947 * @adapter: board private structure to initialize 4948 * 4949 * We allocate one q_vector per queue interrupt. If allocation fails we 4950 * return -ENOMEM. 4951 */ 4952 static int igc_alloc_q_vectors(struct igc_adapter *adapter) 4953 { 4954 int rxr_remaining = adapter->num_rx_queues; 4955 int txr_remaining = adapter->num_tx_queues; 4956 int rxr_idx = 0, txr_idx = 0, v_idx = 0; 4957 int q_vectors = adapter->num_q_vectors; 4958 int err; 4959 4960 if (q_vectors >= (rxr_remaining + txr_remaining)) { 4961 for (; rxr_remaining; v_idx++) { 4962 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4963 0, 0, 1, rxr_idx); 4964 4965 if (err) 4966 goto err_out; 4967 4968 /* update counts and index */ 4969 rxr_remaining--; 4970 rxr_idx++; 4971 } 4972 } 4973 4974 for (; v_idx < q_vectors; v_idx++) { 4975 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 4976 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 4977 4978 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4979 tqpv, txr_idx, rqpv, rxr_idx); 4980 4981 if (err) 4982 goto err_out; 4983 4984 /* update counts and index */ 4985 rxr_remaining -= rqpv; 4986 txr_remaining -= tqpv; 4987 rxr_idx++; 4988 txr_idx++; 4989 } 4990 4991 return 0; 4992 4993 err_out: 4994 adapter->num_tx_queues = 0; 4995 adapter->num_rx_queues = 0; 4996 adapter->num_q_vectors = 0; 4997 4998 while (v_idx--) 4999 igc_free_q_vector(adapter, v_idx); 5000 5001 return -ENOMEM; 5002 } 5003 5004 /** 5005 * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors 5006 * @adapter: Pointer to adapter structure 5007 * @msix: boolean for MSI-X capability 5008 * 5009 * This function initializes the interrupts and allocates all of the queues. 5010 */ 5011 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) 5012 { 5013 struct net_device *dev = adapter->netdev; 5014 int err = 0; 5015 5016 igc_set_interrupt_capability(adapter, msix); 5017 5018 err = igc_alloc_q_vectors(adapter); 5019 if (err) { 5020 netdev_err(dev, "Unable to allocate memory for vectors\n"); 5021 goto err_alloc_q_vectors; 5022 } 5023 5024 igc_cache_ring_register(adapter); 5025 5026 return 0; 5027 5028 err_alloc_q_vectors: 5029 igc_reset_interrupt_capability(adapter); 5030 return err; 5031 } 5032 5033 /** 5034 * igc_sw_init - Initialize general software structures (struct igc_adapter) 5035 * @adapter: board private structure to initialize 5036 * 5037 * igc_sw_init initializes the Adapter private data structure. 5038 * Fields are initialized based on PCI device information and 5039 * OS network device settings (MTU size). 5040 */ 5041 static int igc_sw_init(struct igc_adapter *adapter) 5042 { 5043 struct net_device *netdev = adapter->netdev; 5044 struct pci_dev *pdev = adapter->pdev; 5045 struct igc_hw *hw = &adapter->hw; 5046 5047 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 5048 5049 /* set default ring sizes */ 5050 adapter->tx_ring_count = IGC_DEFAULT_TXD; 5051 adapter->rx_ring_count = IGC_DEFAULT_RXD; 5052 5053 /* set default ITR values */ 5054 adapter->rx_itr_setting = IGC_DEFAULT_ITR; 5055 adapter->tx_itr_setting = IGC_DEFAULT_ITR; 5056 5057 /* set default work limits */ 5058 adapter->tx_work_limit = IGC_DEFAULT_TX_WORK; 5059 5060 /* adjust max frame to be at least the size of a standard frame */ 5061 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 5062 VLAN_HLEN; 5063 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 5064 5065 mutex_init(&adapter->nfc_rule_lock); 5066 INIT_LIST_HEAD(&adapter->nfc_rule_list); 5067 adapter->nfc_rule_count = 0; 5068 5069 spin_lock_init(&adapter->stats64_lock); 5070 spin_lock_init(&adapter->qbv_tx_lock); 5071 /* Assume MSI-X interrupts, will be checked during IRQ allocation */ 5072 adapter->flags |= IGC_FLAG_HAS_MSIX; 5073 5074 igc_init_queue_configuration(adapter); 5075 5076 /* This call may decrease the number of queues */ 5077 if (igc_init_interrupt_scheme(adapter, true)) { 5078 netdev_err(netdev, "Unable to allocate memory for queues\n"); 5079 return -ENOMEM; 5080 } 5081 5082 /* Explicitly disable IRQ since the NIC can be in any state. */ 5083 igc_irq_disable(adapter); 5084 5085 set_bit(__IGC_DOWN, &adapter->state); 5086 5087 return 0; 5088 } 5089 5090 static void igc_set_queue_napi(struct igc_adapter *adapter, int vector, 5091 struct napi_struct *napi) 5092 { 5093 struct igc_q_vector *q_vector = adapter->q_vector[vector]; 5094 5095 if (q_vector->rx.ring) 5096 netif_queue_set_napi(adapter->netdev, 5097 q_vector->rx.ring->queue_index, 5098 NETDEV_QUEUE_TYPE_RX, napi); 5099 5100 if (q_vector->tx.ring) 5101 netif_queue_set_napi(adapter->netdev, 5102 q_vector->tx.ring->queue_index, 5103 NETDEV_QUEUE_TYPE_TX, napi); 5104 } 5105 5106 /** 5107 * igc_up - Open the interface and prepare it to handle traffic 5108 * @adapter: board private structure 5109 */ 5110 void igc_up(struct igc_adapter *adapter) 5111 { 5112 struct igc_hw *hw = &adapter->hw; 5113 struct napi_struct *napi; 5114 int i = 0; 5115 5116 /* hardware has been reset, we need to reload some things */ 5117 igc_configure(adapter); 5118 5119 clear_bit(__IGC_DOWN, &adapter->state); 5120 5121 for (i = 0; i < adapter->num_q_vectors; i++) { 5122 napi = &adapter->q_vector[i]->napi; 5123 napi_enable(napi); 5124 igc_set_queue_napi(adapter, i, napi); 5125 } 5126 5127 if (adapter->msix_entries) 5128 igc_configure_msix(adapter); 5129 else 5130 igc_assign_vector(adapter->q_vector[0], 0); 5131 5132 /* Clear any pending interrupts. */ 5133 rd32(IGC_ICR); 5134 igc_irq_enable(adapter); 5135 5136 netif_tx_start_all_queues(adapter->netdev); 5137 5138 /* start the watchdog. */ 5139 hw->mac.get_link_status = true; 5140 schedule_work(&adapter->watchdog_task); 5141 } 5142 5143 /** 5144 * igc_update_stats - Update the board statistics counters 5145 * @adapter: board private structure 5146 */ 5147 void igc_update_stats(struct igc_adapter *adapter) 5148 { 5149 struct rtnl_link_stats64 *net_stats = &adapter->stats64; 5150 struct pci_dev *pdev = adapter->pdev; 5151 struct igc_hw *hw = &adapter->hw; 5152 u64 _bytes, _packets; 5153 u64 bytes, packets; 5154 unsigned int start; 5155 u32 mpc; 5156 int i; 5157 5158 /* Prevent stats update while adapter is being reset, or if the pci 5159 * connection is down. 5160 */ 5161 if (adapter->link_speed == 0) 5162 return; 5163 if (pci_channel_offline(pdev)) 5164 return; 5165 5166 packets = 0; 5167 bytes = 0; 5168 5169 rcu_read_lock(); 5170 for (i = 0; i < adapter->num_rx_queues; i++) { 5171 struct igc_ring *ring = adapter->rx_ring[i]; 5172 u32 rqdpc = rd32(IGC_RQDPC(i)); 5173 5174 if (hw->mac.type >= igc_i225) 5175 wr32(IGC_RQDPC(i), 0); 5176 5177 if (rqdpc) { 5178 ring->rx_stats.drops += rqdpc; 5179 net_stats->rx_fifo_errors += rqdpc; 5180 } 5181 5182 do { 5183 start = u64_stats_fetch_begin(&ring->rx_syncp); 5184 _bytes = ring->rx_stats.bytes; 5185 _packets = ring->rx_stats.packets; 5186 } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); 5187 bytes += _bytes; 5188 packets += _packets; 5189 } 5190 5191 net_stats->rx_bytes = bytes; 5192 net_stats->rx_packets = packets; 5193 5194 packets = 0; 5195 bytes = 0; 5196 for (i = 0; i < adapter->num_tx_queues; i++) { 5197 struct igc_ring *ring = adapter->tx_ring[i]; 5198 5199 do { 5200 start = u64_stats_fetch_begin(&ring->tx_syncp); 5201 _bytes = ring->tx_stats.bytes; 5202 _packets = ring->tx_stats.packets; 5203 } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); 5204 bytes += _bytes; 5205 packets += _packets; 5206 } 5207 net_stats->tx_bytes = bytes; 5208 net_stats->tx_packets = packets; 5209 rcu_read_unlock(); 5210 5211 /* read stats registers */ 5212 adapter->stats.crcerrs += rd32(IGC_CRCERRS); 5213 adapter->stats.gprc += rd32(IGC_GPRC); 5214 adapter->stats.gorc += rd32(IGC_GORCL); 5215 rd32(IGC_GORCH); /* clear GORCL */ 5216 adapter->stats.bprc += rd32(IGC_BPRC); 5217 adapter->stats.mprc += rd32(IGC_MPRC); 5218 adapter->stats.roc += rd32(IGC_ROC); 5219 5220 adapter->stats.prc64 += rd32(IGC_PRC64); 5221 adapter->stats.prc127 += rd32(IGC_PRC127); 5222 adapter->stats.prc255 += rd32(IGC_PRC255); 5223 adapter->stats.prc511 += rd32(IGC_PRC511); 5224 adapter->stats.prc1023 += rd32(IGC_PRC1023); 5225 adapter->stats.prc1522 += rd32(IGC_PRC1522); 5226 adapter->stats.tlpic += rd32(IGC_TLPIC); 5227 adapter->stats.rlpic += rd32(IGC_RLPIC); 5228 adapter->stats.hgptc += rd32(IGC_HGPTC); 5229 5230 mpc = rd32(IGC_MPC); 5231 adapter->stats.mpc += mpc; 5232 net_stats->rx_fifo_errors += mpc; 5233 adapter->stats.scc += rd32(IGC_SCC); 5234 adapter->stats.ecol += rd32(IGC_ECOL); 5235 adapter->stats.mcc += rd32(IGC_MCC); 5236 adapter->stats.latecol += rd32(IGC_LATECOL); 5237 adapter->stats.dc += rd32(IGC_DC); 5238 adapter->stats.rlec += rd32(IGC_RLEC); 5239 adapter->stats.xonrxc += rd32(IGC_XONRXC); 5240 adapter->stats.xontxc += rd32(IGC_XONTXC); 5241 adapter->stats.xoffrxc += rd32(IGC_XOFFRXC); 5242 adapter->stats.xofftxc += rd32(IGC_XOFFTXC); 5243 adapter->stats.fcruc += rd32(IGC_FCRUC); 5244 adapter->stats.gptc += rd32(IGC_GPTC); 5245 adapter->stats.gotc += rd32(IGC_GOTCL); 5246 rd32(IGC_GOTCH); /* clear GOTCL */ 5247 adapter->stats.rnbc += rd32(IGC_RNBC); 5248 adapter->stats.ruc += rd32(IGC_RUC); 5249 adapter->stats.rfc += rd32(IGC_RFC); 5250 adapter->stats.rjc += rd32(IGC_RJC); 5251 adapter->stats.tor += rd32(IGC_TORH); 5252 adapter->stats.tot += rd32(IGC_TOTH); 5253 adapter->stats.tpr += rd32(IGC_TPR); 5254 5255 adapter->stats.ptc64 += rd32(IGC_PTC64); 5256 adapter->stats.ptc127 += rd32(IGC_PTC127); 5257 adapter->stats.ptc255 += rd32(IGC_PTC255); 5258 adapter->stats.ptc511 += rd32(IGC_PTC511); 5259 adapter->stats.ptc1023 += rd32(IGC_PTC1023); 5260 adapter->stats.ptc1522 += rd32(IGC_PTC1522); 5261 5262 adapter->stats.mptc += rd32(IGC_MPTC); 5263 adapter->stats.bptc += rd32(IGC_BPTC); 5264 5265 adapter->stats.tpt += rd32(IGC_TPT); 5266 adapter->stats.colc += rd32(IGC_COLC); 5267 adapter->stats.colc += rd32(IGC_RERC); 5268 5269 adapter->stats.algnerrc += rd32(IGC_ALGNERRC); 5270 5271 adapter->stats.tsctc += rd32(IGC_TSCTC); 5272 5273 adapter->stats.iac += rd32(IGC_IAC); 5274 5275 /* Fill out the OS statistics structure */ 5276 net_stats->multicast = adapter->stats.mprc; 5277 net_stats->collisions = adapter->stats.colc; 5278 5279 /* Rx Errors */ 5280 5281 /* RLEC on some newer hardware can be incorrect so build 5282 * our own version based on RUC and ROC 5283 */ 5284 net_stats->rx_errors = adapter->stats.rxerrc + 5285 adapter->stats.crcerrs + adapter->stats.algnerrc + 5286 adapter->stats.ruc + adapter->stats.roc + 5287 adapter->stats.cexterr; 5288 net_stats->rx_length_errors = adapter->stats.ruc + 5289 adapter->stats.roc; 5290 net_stats->rx_crc_errors = adapter->stats.crcerrs; 5291 net_stats->rx_frame_errors = adapter->stats.algnerrc; 5292 net_stats->rx_missed_errors = adapter->stats.mpc; 5293 5294 /* Tx Errors */ 5295 net_stats->tx_errors = adapter->stats.ecol + 5296 adapter->stats.latecol; 5297 net_stats->tx_aborted_errors = adapter->stats.ecol; 5298 net_stats->tx_window_errors = adapter->stats.latecol; 5299 net_stats->tx_carrier_errors = adapter->stats.tncrs; 5300 5301 /* Tx Dropped */ 5302 net_stats->tx_dropped = adapter->stats.txdrop; 5303 5304 /* Management Stats */ 5305 adapter->stats.mgptc += rd32(IGC_MGTPTC); 5306 adapter->stats.mgprc += rd32(IGC_MGTPRC); 5307 adapter->stats.mgpdc += rd32(IGC_MGTPDC); 5308 } 5309 5310 /** 5311 * igc_down - Close the interface 5312 * @adapter: board private structure 5313 */ 5314 void igc_down(struct igc_adapter *adapter) 5315 { 5316 struct net_device *netdev = adapter->netdev; 5317 struct igc_hw *hw = &adapter->hw; 5318 u32 tctl, rctl; 5319 int i = 0; 5320 5321 set_bit(__IGC_DOWN, &adapter->state); 5322 5323 igc_ptp_suspend(adapter); 5324 5325 if (pci_device_is_present(adapter->pdev)) { 5326 /* disable receives in the hardware */ 5327 rctl = rd32(IGC_RCTL); 5328 wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); 5329 /* flush and sleep below */ 5330 } 5331 /* set trans_start so we don't get spurious watchdogs during reset */ 5332 netif_trans_update(netdev); 5333 5334 netif_carrier_off(netdev); 5335 netif_tx_stop_all_queues(netdev); 5336 5337 if (pci_device_is_present(adapter->pdev)) { 5338 /* disable transmits in the hardware */ 5339 tctl = rd32(IGC_TCTL); 5340 tctl &= ~IGC_TCTL_EN; 5341 wr32(IGC_TCTL, tctl); 5342 /* flush both disables and wait for them to finish */ 5343 wrfl(); 5344 usleep_range(10000, 20000); 5345 5346 igc_irq_disable(adapter); 5347 } 5348 5349 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5350 5351 for (i = 0; i < adapter->num_q_vectors; i++) { 5352 if (adapter->q_vector[i]) { 5353 napi_synchronize(&adapter->q_vector[i]->napi); 5354 igc_set_queue_napi(adapter, i, NULL); 5355 napi_disable(&adapter->q_vector[i]->napi); 5356 } 5357 } 5358 5359 timer_delete_sync(&adapter->watchdog_timer); 5360 timer_delete_sync(&adapter->phy_info_timer); 5361 5362 /* record the stats before reset*/ 5363 spin_lock(&adapter->stats64_lock); 5364 igc_update_stats(adapter); 5365 spin_unlock(&adapter->stats64_lock); 5366 5367 adapter->link_speed = 0; 5368 adapter->link_duplex = 0; 5369 5370 if (!pci_channel_offline(adapter->pdev)) 5371 igc_reset(adapter); 5372 5373 /* clear VLAN promisc flag so VFTA will be updated if necessary */ 5374 adapter->flags &= ~IGC_FLAG_VLAN_PROMISC; 5375 5376 igc_disable_all_tx_rings_hw(adapter); 5377 igc_clean_all_tx_rings(adapter); 5378 igc_clean_all_rx_rings(adapter); 5379 5380 if (adapter->fpe.mmsv.pmac_enabled) 5381 ethtool_mmsv_stop(&adapter->fpe.mmsv); 5382 } 5383 5384 void igc_reinit_locked(struct igc_adapter *adapter) 5385 { 5386 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 5387 usleep_range(1000, 2000); 5388 igc_down(adapter); 5389 igc_up(adapter); 5390 clear_bit(__IGC_RESETTING, &adapter->state); 5391 } 5392 5393 static void igc_reset_task(struct work_struct *work) 5394 { 5395 struct igc_adapter *adapter; 5396 5397 adapter = container_of(work, struct igc_adapter, reset_task); 5398 5399 rtnl_lock(); 5400 /* If we're already down or resetting, just bail */ 5401 if (test_bit(__IGC_DOWN, &adapter->state) || 5402 test_bit(__IGC_RESETTING, &adapter->state)) { 5403 rtnl_unlock(); 5404 return; 5405 } 5406 5407 igc_rings_dump(adapter); 5408 igc_regs_dump(adapter); 5409 netdev_err(adapter->netdev, "Reset adapter\n"); 5410 igc_reinit_locked(adapter); 5411 rtnl_unlock(); 5412 } 5413 5414 /** 5415 * igc_change_mtu - Change the Maximum Transfer Unit 5416 * @netdev: network interface device structure 5417 * @new_mtu: new value for maximum frame size 5418 * 5419 * Returns 0 on success, negative on failure 5420 */ 5421 static int igc_change_mtu(struct net_device *netdev, int new_mtu) 5422 { 5423 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 5424 struct igc_adapter *adapter = netdev_priv(netdev); 5425 5426 if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) { 5427 netdev_dbg(netdev, "Jumbo frames not supported with XDP"); 5428 return -EINVAL; 5429 } 5430 5431 /* adjust max frame to be at least the size of a standard frame */ 5432 if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) 5433 max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; 5434 5435 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 5436 usleep_range(1000, 2000); 5437 5438 /* igc_down has a dependency on max_frame_size */ 5439 adapter->max_frame_size = max_frame; 5440 5441 if (netif_running(netdev)) 5442 igc_down(adapter); 5443 5444 netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); 5445 WRITE_ONCE(netdev->mtu, new_mtu); 5446 5447 if (netif_running(netdev)) 5448 igc_up(adapter); 5449 else 5450 igc_reset(adapter); 5451 5452 clear_bit(__IGC_RESETTING, &adapter->state); 5453 5454 return 0; 5455 } 5456 5457 /** 5458 * igc_tx_timeout - Respond to a Tx Hang 5459 * @netdev: network interface device structure 5460 * @txqueue: queue number that timed out 5461 **/ 5462 static void igc_tx_timeout(struct net_device *netdev, 5463 unsigned int __always_unused txqueue) 5464 { 5465 struct igc_adapter *adapter = netdev_priv(netdev); 5466 struct igc_hw *hw = &adapter->hw; 5467 5468 /* Do the reset outside of interrupt context */ 5469 adapter->tx_timeout_count++; 5470 schedule_work(&adapter->reset_task); 5471 wr32(IGC_EICS, 5472 (adapter->eims_enable_mask & ~adapter->eims_other)); 5473 } 5474 5475 /** 5476 * igc_get_stats64 - Get System Network Statistics 5477 * @netdev: network interface device structure 5478 * @stats: rtnl_link_stats64 pointer 5479 * 5480 * Returns the address of the device statistics structure. 5481 * The statistics are updated here and also from the timer callback. 5482 */ 5483 static void igc_get_stats64(struct net_device *netdev, 5484 struct rtnl_link_stats64 *stats) 5485 { 5486 struct igc_adapter *adapter = netdev_priv(netdev); 5487 5488 spin_lock(&adapter->stats64_lock); 5489 if (!test_bit(__IGC_RESETTING, &adapter->state)) 5490 igc_update_stats(adapter); 5491 memcpy(stats, &adapter->stats64, sizeof(*stats)); 5492 spin_unlock(&adapter->stats64_lock); 5493 } 5494 5495 static netdev_features_t igc_fix_features(struct net_device *netdev, 5496 netdev_features_t features) 5497 { 5498 /* Since there is no support for separate Rx/Tx vlan accel 5499 * enable/disable make sure Tx flag is always in same state as Rx. 5500 */ 5501 if (features & NETIF_F_HW_VLAN_CTAG_RX) 5502 features |= NETIF_F_HW_VLAN_CTAG_TX; 5503 else 5504 features &= ~NETIF_F_HW_VLAN_CTAG_TX; 5505 5506 return features; 5507 } 5508 5509 static int igc_set_features(struct net_device *netdev, 5510 netdev_features_t features) 5511 { 5512 netdev_features_t changed = netdev->features ^ features; 5513 struct igc_adapter *adapter = netdev_priv(netdev); 5514 5515 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 5516 igc_vlan_mode(netdev, features); 5517 5518 /* Add VLAN support */ 5519 if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) 5520 return 0; 5521 5522 if (!(features & NETIF_F_NTUPLE)) 5523 igc_flush_nfc_rules(adapter); 5524 5525 netdev->features = features; 5526 5527 if (netif_running(netdev)) 5528 igc_reinit_locked(adapter); 5529 else 5530 igc_reset(adapter); 5531 5532 return 1; 5533 } 5534 5535 static netdev_features_t 5536 igc_features_check(struct sk_buff *skb, struct net_device *dev, 5537 netdev_features_t features) 5538 { 5539 unsigned int network_hdr_len, mac_hdr_len; 5540 5541 /* Make certain the headers can be described by a context descriptor */ 5542 mac_hdr_len = skb_network_offset(skb); 5543 if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) 5544 return features & ~(NETIF_F_HW_CSUM | 5545 NETIF_F_SCTP_CRC | 5546 NETIF_F_HW_VLAN_CTAG_TX | 5547 NETIF_F_TSO | 5548 NETIF_F_TSO6); 5549 5550 network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); 5551 if (unlikely(network_hdr_len > IGC_MAX_NETWORK_HDR_LEN)) 5552 return features & ~(NETIF_F_HW_CSUM | 5553 NETIF_F_SCTP_CRC | 5554 NETIF_F_TSO | 5555 NETIF_F_TSO6); 5556 5557 /* We can only support IPv4 TSO in tunnels if we can mangle the 5558 * inner IP ID field, so strip TSO if MANGLEID is not supported. 5559 */ 5560 if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) 5561 features &= ~NETIF_F_TSO; 5562 5563 return features; 5564 } 5565 5566 static void igc_tsync_interrupt(struct igc_adapter *adapter) 5567 { 5568 struct igc_hw *hw = &adapter->hw; 5569 u32 tsauxc, sec, nsec, tsicr; 5570 struct ptp_clock_event event; 5571 struct timespec64 ts; 5572 5573 tsicr = rd32(IGC_TSICR); 5574 5575 if (tsicr & IGC_TSICR_SYS_WRAP) { 5576 event.type = PTP_CLOCK_PPS; 5577 if (adapter->ptp_caps.pps) 5578 ptp_clock_event(adapter->ptp_clock, &event); 5579 } 5580 5581 if (tsicr & IGC_TSICR_TXTS) { 5582 /* retrieve hardware timestamp */ 5583 igc_ptp_tx_tstamp_event(adapter); 5584 } 5585 5586 if (tsicr & IGC_TSICR_TT0) { 5587 spin_lock(&adapter->tmreg_lock); 5588 ts = timespec64_add(adapter->perout[0].start, 5589 adapter->perout[0].period); 5590 wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5591 wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec); 5592 tsauxc = rd32(IGC_TSAUXC); 5593 tsauxc |= IGC_TSAUXC_EN_TT0; 5594 wr32(IGC_TSAUXC, tsauxc); 5595 adapter->perout[0].start = ts; 5596 spin_unlock(&adapter->tmreg_lock); 5597 } 5598 5599 if (tsicr & IGC_TSICR_TT1) { 5600 spin_lock(&adapter->tmreg_lock); 5601 ts = timespec64_add(adapter->perout[1].start, 5602 adapter->perout[1].period); 5603 wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5604 wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec); 5605 tsauxc = rd32(IGC_TSAUXC); 5606 tsauxc |= IGC_TSAUXC_EN_TT1; 5607 wr32(IGC_TSAUXC, tsauxc); 5608 adapter->perout[1].start = ts; 5609 spin_unlock(&adapter->tmreg_lock); 5610 } 5611 5612 if (tsicr & IGC_TSICR_AUTT0) { 5613 nsec = rd32(IGC_AUXSTMPL0); 5614 sec = rd32(IGC_AUXSTMPH0); 5615 event.type = PTP_CLOCK_EXTTS; 5616 event.index = 0; 5617 event.timestamp = sec * NSEC_PER_SEC + nsec; 5618 ptp_clock_event(adapter->ptp_clock, &event); 5619 } 5620 5621 if (tsicr & IGC_TSICR_AUTT1) { 5622 nsec = rd32(IGC_AUXSTMPL1); 5623 sec = rd32(IGC_AUXSTMPH1); 5624 event.type = PTP_CLOCK_EXTTS; 5625 event.index = 1; 5626 event.timestamp = sec * NSEC_PER_SEC + nsec; 5627 ptp_clock_event(adapter->ptp_clock, &event); 5628 } 5629 } 5630 5631 /** 5632 * igc_msix_other - msix other interrupt handler 5633 * @irq: interrupt number 5634 * @data: pointer to a q_vector 5635 */ 5636 static irqreturn_t igc_msix_other(int irq, void *data) 5637 { 5638 struct igc_adapter *adapter = data; 5639 struct igc_hw *hw = &adapter->hw; 5640 u32 icr = rd32(IGC_ICR); 5641 5642 /* reading ICR causes bit 31 of EICR to be cleared */ 5643 if (icr & IGC_ICR_DRSTA) 5644 schedule_work(&adapter->reset_task); 5645 5646 if (icr & IGC_ICR_DOUTSYNC) { 5647 /* HW is reporting DMA is out of sync */ 5648 adapter->stats.doosync++; 5649 } 5650 5651 if (icr & IGC_ICR_LSC) { 5652 hw->mac.get_link_status = true; 5653 /* guard against interrupt when we're going down */ 5654 if (!test_bit(__IGC_DOWN, &adapter->state)) 5655 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5656 } 5657 5658 if (icr & IGC_ICR_TS) 5659 igc_tsync_interrupt(adapter); 5660 5661 wr32(IGC_EIMS, adapter->eims_other); 5662 5663 return IRQ_HANDLED; 5664 } 5665 5666 static void igc_write_itr(struct igc_q_vector *q_vector) 5667 { 5668 u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK; 5669 5670 if (!q_vector->set_itr) 5671 return; 5672 5673 if (!itr_val) 5674 itr_val = IGC_ITR_VAL_MASK; 5675 5676 itr_val |= IGC_EITR_CNT_IGNR; 5677 5678 writel(itr_val, q_vector->itr_register); 5679 q_vector->set_itr = 0; 5680 } 5681 5682 static irqreturn_t igc_msix_ring(int irq, void *data) 5683 { 5684 struct igc_q_vector *q_vector = data; 5685 5686 /* Write the ITR value calculated from the previous interrupt. */ 5687 igc_write_itr(q_vector); 5688 5689 napi_schedule(&q_vector->napi); 5690 5691 return IRQ_HANDLED; 5692 } 5693 5694 /** 5695 * igc_request_msix - Initialize MSI-X interrupts 5696 * @adapter: Pointer to adapter structure 5697 * 5698 * igc_request_msix allocates MSI-X vectors and requests interrupts from the 5699 * kernel. 5700 */ 5701 static int igc_request_msix(struct igc_adapter *adapter) 5702 { 5703 unsigned int num_q_vectors = adapter->num_q_vectors; 5704 int i = 0, err = 0, vector = 0, free_vector = 0; 5705 struct net_device *netdev = adapter->netdev; 5706 5707 err = request_irq(adapter->msix_entries[vector].vector, 5708 &igc_msix_other, 0, netdev->name, adapter); 5709 if (err) 5710 goto err_out; 5711 5712 if (num_q_vectors > MAX_Q_VECTORS) { 5713 num_q_vectors = MAX_Q_VECTORS; 5714 dev_warn(&adapter->pdev->dev, 5715 "The number of queue vectors (%d) is higher than max allowed (%d)\n", 5716 adapter->num_q_vectors, MAX_Q_VECTORS); 5717 } 5718 for (i = 0; i < num_q_vectors; i++) { 5719 struct igc_q_vector *q_vector = adapter->q_vector[i]; 5720 5721 vector++; 5722 5723 q_vector->itr_register = adapter->io_addr + IGC_EITR(vector); 5724 5725 if (q_vector->rx.ring && q_vector->tx.ring) 5726 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, 5727 q_vector->rx.ring->queue_index); 5728 else if (q_vector->tx.ring) 5729 sprintf(q_vector->name, "%s-tx-%u", netdev->name, 5730 q_vector->tx.ring->queue_index); 5731 else if (q_vector->rx.ring) 5732 sprintf(q_vector->name, "%s-rx-%u", netdev->name, 5733 q_vector->rx.ring->queue_index); 5734 else 5735 sprintf(q_vector->name, "%s-unused", netdev->name); 5736 5737 err = request_irq(adapter->msix_entries[vector].vector, 5738 igc_msix_ring, 0, q_vector->name, 5739 q_vector); 5740 if (err) 5741 goto err_free; 5742 5743 netif_napi_set_irq(&q_vector->napi, 5744 adapter->msix_entries[vector].vector); 5745 } 5746 5747 igc_configure_msix(adapter); 5748 return 0; 5749 5750 err_free: 5751 /* free already assigned IRQs */ 5752 free_irq(adapter->msix_entries[free_vector++].vector, adapter); 5753 5754 vector--; 5755 for (i = 0; i < vector; i++) { 5756 free_irq(adapter->msix_entries[free_vector++].vector, 5757 adapter->q_vector[i]); 5758 } 5759 err_out: 5760 return err; 5761 } 5762 5763 /** 5764 * igc_clear_interrupt_scheme - reset the device to a state of no interrupts 5765 * @adapter: Pointer to adapter structure 5766 * 5767 * This function resets the device so that it has 0 rx queues, tx queues, and 5768 * MSI-X interrupts allocated. 5769 */ 5770 static void igc_clear_interrupt_scheme(struct igc_adapter *adapter) 5771 { 5772 igc_free_q_vectors(adapter); 5773 igc_reset_interrupt_capability(adapter); 5774 } 5775 5776 /* Need to wait a few seconds after link up to get diagnostic information from 5777 * the phy 5778 */ 5779 static void igc_update_phy_info(struct timer_list *t) 5780 { 5781 struct igc_adapter *adapter = timer_container_of(adapter, t, 5782 phy_info_timer); 5783 5784 igc_get_phy_info(&adapter->hw); 5785 } 5786 5787 /** 5788 * igc_has_link - check shared code for link and determine up/down 5789 * @adapter: pointer to driver private info 5790 */ 5791 bool igc_has_link(struct igc_adapter *adapter) 5792 { 5793 struct igc_hw *hw = &adapter->hw; 5794 bool link_active = false; 5795 5796 /* get_link_status is set on LSC (link status) interrupt or 5797 * rx sequence error interrupt. get_link_status will stay 5798 * false until the igc_check_for_link establishes link 5799 * for copper adapters ONLY 5800 */ 5801 if (!hw->mac.get_link_status) 5802 return true; 5803 hw->mac.ops.check_for_link(hw); 5804 link_active = !hw->mac.get_link_status; 5805 5806 if (hw->mac.type == igc_i225) { 5807 if (!netif_carrier_ok(adapter->netdev)) { 5808 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5809 } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { 5810 adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE; 5811 adapter->link_check_timeout = jiffies; 5812 } 5813 } 5814 5815 return link_active; 5816 } 5817 5818 /** 5819 * igc_watchdog - Timer Call-back 5820 * @t: timer for the watchdog 5821 */ 5822 static void igc_watchdog(struct timer_list *t) 5823 { 5824 struct igc_adapter *adapter = timer_container_of(adapter, t, 5825 watchdog_timer); 5826 /* Do the rest outside of interrupt context */ 5827 schedule_work(&adapter->watchdog_task); 5828 } 5829 5830 static void igc_watchdog_task(struct work_struct *work) 5831 { 5832 struct igc_adapter *adapter = container_of(work, 5833 struct igc_adapter, 5834 watchdog_task); 5835 struct net_device *netdev = adapter->netdev; 5836 struct igc_hw *hw = &adapter->hw; 5837 struct igc_phy_info *phy = &hw->phy; 5838 u16 phy_data, retry_count = 20; 5839 u32 link; 5840 int i; 5841 5842 link = igc_has_link(adapter); 5843 5844 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) { 5845 if (time_after(jiffies, (adapter->link_check_timeout + HZ))) 5846 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5847 else 5848 link = false; 5849 } 5850 5851 if (link) { 5852 /* Cancel scheduled suspend requests. */ 5853 pm_runtime_resume(netdev->dev.parent); 5854 5855 if (!netif_carrier_ok(netdev)) { 5856 u32 ctrl; 5857 5858 hw->mac.ops.get_speed_and_duplex(hw, 5859 &adapter->link_speed, 5860 &adapter->link_duplex); 5861 5862 ctrl = rd32(IGC_CTRL); 5863 /* Link status message must follow this format */ 5864 netdev_info(netdev, 5865 "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", 5866 adapter->link_speed, 5867 adapter->link_duplex == FULL_DUPLEX ? 5868 "Full" : "Half", 5869 (ctrl & IGC_CTRL_TFCE) && 5870 (ctrl & IGC_CTRL_RFCE) ? "RX/TX" : 5871 (ctrl & IGC_CTRL_RFCE) ? "RX" : 5872 (ctrl & IGC_CTRL_TFCE) ? "TX" : "None"); 5873 5874 /* disable EEE if enabled */ 5875 if ((adapter->flags & IGC_FLAG_EEE) && 5876 adapter->link_duplex == HALF_DUPLEX) { 5877 netdev_info(netdev, 5878 "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n"); 5879 adapter->hw.dev_spec._base.eee_enable = false; 5880 adapter->flags &= ~IGC_FLAG_EEE; 5881 } 5882 5883 /* check if SmartSpeed worked */ 5884 igc_check_downshift(hw); 5885 if (phy->speed_downgraded) 5886 netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n"); 5887 5888 /* adjust timeout factor according to speed/duplex */ 5889 adapter->tx_timeout_factor = 1; 5890 switch (adapter->link_speed) { 5891 case SPEED_10: 5892 adapter->tx_timeout_factor = 14; 5893 break; 5894 case SPEED_100: 5895 case SPEED_1000: 5896 case SPEED_2500: 5897 adapter->tx_timeout_factor = 1; 5898 break; 5899 } 5900 5901 /* Once the launch time has been set on the wire, there 5902 * is a delay before the link speed can be determined 5903 * based on link-up activity. Write into the register 5904 * as soon as we know the correct link speed. 5905 */ 5906 igc_tsn_adjust_txtime_offset(adapter); 5907 5908 if (adapter->fpe.mmsv.pmac_enabled) 5909 ethtool_mmsv_link_state_handle(&adapter->fpe.mmsv, 5910 true); 5911 5912 if (adapter->link_speed != SPEED_1000) 5913 goto no_wait; 5914 5915 /* wait for Remote receiver status OK */ 5916 retry_read_status: 5917 if (!igc_read_phy_reg(hw, PHY_1000T_STATUS, 5918 &phy_data)) { 5919 if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && 5920 retry_count) { 5921 msleep(100); 5922 retry_count--; 5923 goto retry_read_status; 5924 } else if (!retry_count) { 5925 netdev_err(netdev, "exceed max 2 second\n"); 5926 } 5927 } else { 5928 netdev_err(netdev, "read 1000Base-T Status Reg\n"); 5929 } 5930 no_wait: 5931 netif_carrier_on(netdev); 5932 5933 /* link state has changed, schedule phy info update */ 5934 if (!test_bit(__IGC_DOWN, &adapter->state)) 5935 mod_timer(&adapter->phy_info_timer, 5936 round_jiffies(jiffies + 2 * HZ)); 5937 } 5938 } else { 5939 if (netif_carrier_ok(netdev)) { 5940 adapter->link_speed = 0; 5941 adapter->link_duplex = 0; 5942 5943 /* Links status message must follow this format */ 5944 netdev_info(netdev, "NIC Link is Down\n"); 5945 netif_carrier_off(netdev); 5946 5947 if (adapter->fpe.mmsv.pmac_enabled) 5948 ethtool_mmsv_link_state_handle(&adapter->fpe.mmsv, 5949 false); 5950 5951 /* link state has changed, schedule phy info update */ 5952 if (!test_bit(__IGC_DOWN, &adapter->state)) 5953 mod_timer(&adapter->phy_info_timer, 5954 round_jiffies(jiffies + 2 * HZ)); 5955 5956 pm_schedule_suspend(netdev->dev.parent, 5957 MSEC_PER_SEC * 5); 5958 } 5959 } 5960 5961 spin_lock(&adapter->stats64_lock); 5962 igc_update_stats(adapter); 5963 spin_unlock(&adapter->stats64_lock); 5964 5965 for (i = 0; i < adapter->num_tx_queues; i++) { 5966 struct igc_ring *tx_ring = adapter->tx_ring[i]; 5967 5968 if (!netif_carrier_ok(netdev)) { 5969 /* We've lost link, so the controller stops DMA, 5970 * but we've got queued Tx work that's never going 5971 * to get done, so reset controller to flush Tx. 5972 * (Do the reset outside of interrupt context). 5973 */ 5974 if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) { 5975 adapter->tx_timeout_count++; 5976 schedule_work(&adapter->reset_task); 5977 /* return immediately since reset is imminent */ 5978 return; 5979 } 5980 } 5981 5982 /* Force detection of hung controller every watchdog period */ 5983 set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 5984 } 5985 5986 /* Cause software interrupt to ensure Rx ring is cleaned */ 5987 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 5988 u32 eics = 0; 5989 5990 for (i = 0; i < adapter->num_q_vectors; i++) { 5991 struct igc_q_vector *q_vector = adapter->q_vector[i]; 5992 struct igc_ring *rx_ring; 5993 5994 if (!q_vector->rx.ring) 5995 continue; 5996 5997 rx_ring = adapter->rx_ring[q_vector->rx.ring->queue_index]; 5998 5999 if (test_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { 6000 eics |= q_vector->eims_value; 6001 clear_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 6002 } 6003 } 6004 if (eics) 6005 wr32(IGC_EICS, eics); 6006 } else { 6007 struct igc_ring *rx_ring = adapter->rx_ring[0]; 6008 6009 if (test_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { 6010 clear_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 6011 wr32(IGC_ICS, IGC_ICS_RXDMT0); 6012 } 6013 } 6014 6015 igc_ptp_tx_hang(adapter); 6016 6017 /* Reset the timer */ 6018 if (!test_bit(__IGC_DOWN, &adapter->state)) { 6019 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) 6020 mod_timer(&adapter->watchdog_timer, 6021 round_jiffies(jiffies + HZ)); 6022 else 6023 mod_timer(&adapter->watchdog_timer, 6024 round_jiffies(jiffies + 2 * HZ)); 6025 } 6026 } 6027 6028 /** 6029 * igc_intr_msi - Interrupt Handler 6030 * @irq: interrupt number 6031 * @data: pointer to a network interface device structure 6032 */ 6033 static irqreturn_t igc_intr_msi(int irq, void *data) 6034 { 6035 struct igc_adapter *adapter = data; 6036 struct igc_q_vector *q_vector = adapter->q_vector[0]; 6037 struct igc_hw *hw = &adapter->hw; 6038 /* read ICR disables interrupts using IAM */ 6039 u32 icr = rd32(IGC_ICR); 6040 6041 igc_write_itr(q_vector); 6042 6043 if (icr & IGC_ICR_DRSTA) 6044 schedule_work(&adapter->reset_task); 6045 6046 if (icr & IGC_ICR_DOUTSYNC) { 6047 /* HW is reporting DMA is out of sync */ 6048 adapter->stats.doosync++; 6049 } 6050 6051 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 6052 hw->mac.get_link_status = true; 6053 if (!test_bit(__IGC_DOWN, &adapter->state)) 6054 mod_timer(&adapter->watchdog_timer, jiffies + 1); 6055 } 6056 6057 if (icr & IGC_ICR_TS) 6058 igc_tsync_interrupt(adapter); 6059 6060 napi_schedule(&q_vector->napi); 6061 6062 return IRQ_HANDLED; 6063 } 6064 6065 /** 6066 * igc_intr - Legacy Interrupt Handler 6067 * @irq: interrupt number 6068 * @data: pointer to a network interface device structure 6069 */ 6070 static irqreturn_t igc_intr(int irq, void *data) 6071 { 6072 struct igc_adapter *adapter = data; 6073 struct igc_q_vector *q_vector = adapter->q_vector[0]; 6074 struct igc_hw *hw = &adapter->hw; 6075 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No 6076 * need for the IMC write 6077 */ 6078 u32 icr = rd32(IGC_ICR); 6079 6080 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is 6081 * not set, then the adapter didn't send an interrupt 6082 */ 6083 if (!(icr & IGC_ICR_INT_ASSERTED)) 6084 return IRQ_NONE; 6085 6086 igc_write_itr(q_vector); 6087 6088 if (icr & IGC_ICR_DRSTA) 6089 schedule_work(&adapter->reset_task); 6090 6091 if (icr & IGC_ICR_DOUTSYNC) { 6092 /* HW is reporting DMA is out of sync */ 6093 adapter->stats.doosync++; 6094 } 6095 6096 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 6097 hw->mac.get_link_status = true; 6098 /* guard against interrupt when we're going down */ 6099 if (!test_bit(__IGC_DOWN, &adapter->state)) 6100 mod_timer(&adapter->watchdog_timer, jiffies + 1); 6101 } 6102 6103 if (icr & IGC_ICR_TS) 6104 igc_tsync_interrupt(adapter); 6105 6106 napi_schedule(&q_vector->napi); 6107 6108 return IRQ_HANDLED; 6109 } 6110 6111 static void igc_free_irq(struct igc_adapter *adapter) 6112 { 6113 if (adapter->msix_entries) { 6114 int vector = 0, i; 6115 6116 free_irq(adapter->msix_entries[vector++].vector, adapter); 6117 6118 for (i = 0; i < adapter->num_q_vectors; i++) 6119 free_irq(adapter->msix_entries[vector++].vector, 6120 adapter->q_vector[i]); 6121 } else { 6122 free_irq(adapter->pdev->irq, adapter); 6123 } 6124 } 6125 6126 /** 6127 * igc_request_irq - initialize interrupts 6128 * @adapter: Pointer to adapter structure 6129 * 6130 * Attempts to configure interrupts using the best available 6131 * capabilities of the hardware and kernel. 6132 */ 6133 static int igc_request_irq(struct igc_adapter *adapter) 6134 { 6135 struct net_device *netdev = adapter->netdev; 6136 struct pci_dev *pdev = adapter->pdev; 6137 int err = 0; 6138 6139 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 6140 err = igc_request_msix(adapter); 6141 if (!err) 6142 goto request_done; 6143 /* fall back to MSI */ 6144 igc_free_all_tx_resources(adapter); 6145 igc_free_all_rx_resources(adapter); 6146 6147 igc_clear_interrupt_scheme(adapter); 6148 err = igc_init_interrupt_scheme(adapter, false); 6149 if (err) 6150 goto request_done; 6151 igc_setup_all_tx_resources(adapter); 6152 igc_setup_all_rx_resources(adapter); 6153 igc_configure(adapter); 6154 } 6155 6156 igc_assign_vector(adapter->q_vector[0], 0); 6157 6158 if (adapter->flags & IGC_FLAG_HAS_MSI) { 6159 err = request_irq(pdev->irq, &igc_intr_msi, 0, 6160 netdev->name, adapter); 6161 if (!err) 6162 goto request_done; 6163 6164 /* fall back to legacy interrupts */ 6165 igc_reset_interrupt_capability(adapter); 6166 adapter->flags &= ~IGC_FLAG_HAS_MSI; 6167 } 6168 6169 err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED, 6170 netdev->name, adapter); 6171 6172 if (err) 6173 netdev_err(netdev, "Error %d getting interrupt\n", err); 6174 6175 request_done: 6176 return err; 6177 } 6178 6179 /** 6180 * __igc_open - Called when a network interface is made active 6181 * @netdev: network interface device structure 6182 * @resuming: boolean indicating if the device is resuming 6183 * 6184 * Returns 0 on success, negative value on failure 6185 * 6186 * The open entry point is called when a network interface is made 6187 * active by the system (IFF_UP). At this point all resources needed 6188 * for transmit and receive operations are allocated, the interrupt 6189 * handler is registered with the OS, the watchdog timer is started, 6190 * and the stack is notified that the interface is ready. 6191 */ 6192 static int __igc_open(struct net_device *netdev, bool resuming) 6193 { 6194 struct igc_adapter *adapter = netdev_priv(netdev); 6195 struct pci_dev *pdev = adapter->pdev; 6196 struct igc_hw *hw = &adapter->hw; 6197 struct napi_struct *napi; 6198 int err = 0; 6199 int i = 0; 6200 6201 /* disallow open during test */ 6202 6203 if (test_bit(__IGC_TESTING, &adapter->state)) { 6204 WARN_ON(resuming); 6205 return -EBUSY; 6206 } 6207 6208 if (!resuming) 6209 pm_runtime_get_sync(&pdev->dev); 6210 6211 netif_carrier_off(netdev); 6212 6213 /* allocate transmit descriptors */ 6214 err = igc_setup_all_tx_resources(adapter); 6215 if (err) 6216 goto err_setup_tx; 6217 6218 /* allocate receive descriptors */ 6219 err = igc_setup_all_rx_resources(adapter); 6220 if (err) 6221 goto err_setup_rx; 6222 6223 igc_power_up_link(adapter); 6224 6225 igc_configure(adapter); 6226 6227 err = igc_request_irq(adapter); 6228 if (err) 6229 goto err_req_irq; 6230 6231 clear_bit(__IGC_DOWN, &adapter->state); 6232 6233 for (i = 0; i < adapter->num_q_vectors; i++) { 6234 napi = &adapter->q_vector[i]->napi; 6235 napi_enable(napi); 6236 igc_set_queue_napi(adapter, i, napi); 6237 } 6238 6239 /* Clear any pending interrupts. */ 6240 rd32(IGC_ICR); 6241 igc_irq_enable(adapter); 6242 6243 if (!resuming) 6244 pm_runtime_put(&pdev->dev); 6245 6246 netif_tx_start_all_queues(netdev); 6247 6248 /* start the watchdog. */ 6249 hw->mac.get_link_status = true; 6250 schedule_work(&adapter->watchdog_task); 6251 6252 return IGC_SUCCESS; 6253 6254 err_req_irq: 6255 igc_release_hw_control(adapter); 6256 igc_power_down_phy_copper_base(&adapter->hw); 6257 igc_free_all_rx_resources(adapter); 6258 err_setup_rx: 6259 igc_free_all_tx_resources(adapter); 6260 err_setup_tx: 6261 igc_reset(adapter); 6262 if (!resuming) 6263 pm_runtime_put(&pdev->dev); 6264 6265 return err; 6266 } 6267 6268 int igc_open(struct net_device *netdev) 6269 { 6270 struct igc_adapter *adapter = netdev_priv(netdev); 6271 int err; 6272 6273 /* Notify the stack of the actual queue counts. */ 6274 err = netif_set_real_num_queues(netdev, adapter->num_tx_queues, 6275 adapter->num_rx_queues); 6276 if (err) { 6277 netdev_err(netdev, "error setting real queue count\n"); 6278 return err; 6279 } 6280 6281 return __igc_open(netdev, false); 6282 } 6283 6284 /** 6285 * __igc_close - Disables a network interface 6286 * @netdev: network interface device structure 6287 * @suspending: boolean indicating the device is suspending 6288 * 6289 * Returns 0, this is not allowed to fail 6290 * 6291 * The close entry point is called when an interface is de-activated 6292 * by the OS. The hardware is still under the driver's control, but 6293 * needs to be disabled. A global MAC reset is issued to stop the 6294 * hardware, and all transmit and receive resources are freed. 6295 */ 6296 static int __igc_close(struct net_device *netdev, bool suspending) 6297 { 6298 struct igc_adapter *adapter = netdev_priv(netdev); 6299 struct pci_dev *pdev = adapter->pdev; 6300 6301 WARN_ON(test_bit(__IGC_RESETTING, &adapter->state)); 6302 6303 if (!suspending) 6304 pm_runtime_get_sync(&pdev->dev); 6305 6306 igc_down(adapter); 6307 6308 igc_release_hw_control(adapter); 6309 6310 igc_free_irq(adapter); 6311 6312 igc_free_all_tx_resources(adapter); 6313 igc_free_all_rx_resources(adapter); 6314 6315 if (!suspending) 6316 pm_runtime_put_sync(&pdev->dev); 6317 6318 return 0; 6319 } 6320 6321 int igc_close(struct net_device *netdev) 6322 { 6323 if (netif_device_present(netdev) || netdev->dismantle) 6324 return __igc_close(netdev, false); 6325 return 0; 6326 } 6327 6328 static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue, 6329 bool enable) 6330 { 6331 struct igc_ring *ring; 6332 6333 if (queue < 0 || queue >= adapter->num_tx_queues) 6334 return -EINVAL; 6335 6336 ring = adapter->tx_ring[queue]; 6337 ring->launchtime_enable = enable; 6338 6339 return 0; 6340 } 6341 6342 static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now) 6343 { 6344 struct timespec64 b; 6345 6346 b = ktime_to_timespec64(base_time); 6347 6348 return timespec64_compare(now, &b) > 0; 6349 } 6350 6351 static bool validate_schedule(struct igc_adapter *adapter, 6352 const struct tc_taprio_qopt_offload *qopt) 6353 { 6354 int queue_uses[IGC_MAX_TX_QUEUES] = { }; 6355 struct igc_hw *hw = &adapter->hw; 6356 struct timespec64 now; 6357 size_t n; 6358 6359 if (qopt->cycle_time_extension) 6360 return false; 6361 6362 igc_ptp_read(adapter, &now); 6363 6364 /* If we program the controller's BASET registers with a time 6365 * in the future, it will hold all the packets until that 6366 * time, causing a lot of TX Hangs, so to avoid that, we 6367 * reject schedules that would start in the future. 6368 * Note: Limitation above is no longer in i226. 6369 */ 6370 if (!is_base_time_past(qopt->base_time, &now) && 6371 igc_is_device_id_i225(hw)) 6372 return false; 6373 6374 for (n = 0; n < qopt->num_entries; n++) { 6375 const struct tc_taprio_sched_entry *e, *prev; 6376 int i; 6377 6378 prev = n ? &qopt->entries[n - 1] : NULL; 6379 e = &qopt->entries[n]; 6380 6381 /* i225 only supports "global" frame preemption 6382 * settings. 6383 */ 6384 if (e->command != TC_TAPRIO_CMD_SET_GATES) 6385 return false; 6386 6387 for (i = 0; i < adapter->num_tx_queues; i++) 6388 if (e->gate_mask & BIT(i)) { 6389 queue_uses[i]++; 6390 6391 /* There are limitations: A single queue cannot 6392 * be opened and closed multiple times per cycle 6393 * unless the gate stays open. Check for it. 6394 */ 6395 if (queue_uses[i] > 1 && 6396 !(prev->gate_mask & BIT(i))) 6397 return false; 6398 } 6399 } 6400 6401 return true; 6402 } 6403 6404 static int igc_tsn_enable_launchtime(struct igc_adapter *adapter, 6405 struct tc_etf_qopt_offload *qopt) 6406 { 6407 struct igc_hw *hw = &adapter->hw; 6408 int err; 6409 6410 if (hw->mac.type != igc_i225) 6411 return -EOPNOTSUPP; 6412 6413 err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable); 6414 if (err) 6415 return err; 6416 6417 return igc_tsn_offload_apply(adapter); 6418 } 6419 6420 static int igc_qbv_clear_schedule(struct igc_adapter *adapter) 6421 { 6422 unsigned long flags; 6423 int i; 6424 6425 adapter->base_time = 0; 6426 adapter->cycle_time = NSEC_PER_SEC; 6427 adapter->taprio_offload_enable = false; 6428 adapter->qbv_config_change_errors = 0; 6429 adapter->qbv_count = 0; 6430 6431 for (i = 0; i < adapter->num_tx_queues; i++) { 6432 struct igc_ring *ring = adapter->tx_ring[i]; 6433 6434 ring->start_time = 0; 6435 ring->end_time = NSEC_PER_SEC; 6436 ring->max_sdu = 0; 6437 ring->preemptible = false; 6438 } 6439 6440 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 6441 6442 adapter->qbv_transition = false; 6443 6444 for (i = 0; i < adapter->num_tx_queues; i++) { 6445 struct igc_ring *ring = adapter->tx_ring[i]; 6446 6447 ring->oper_gate_closed = false; 6448 ring->admin_gate_closed = false; 6449 } 6450 6451 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 6452 6453 return 0; 6454 } 6455 6456 static int igc_tsn_clear_schedule(struct igc_adapter *adapter) 6457 { 6458 igc_qbv_clear_schedule(adapter); 6459 6460 return 0; 6461 } 6462 6463 static void igc_taprio_stats(struct net_device *dev, 6464 struct tc_taprio_qopt_stats *stats) 6465 { 6466 /* When Strict_End is enabled, the tx_overruns counter 6467 * will always be zero. 6468 */ 6469 stats->tx_overruns = 0; 6470 } 6471 6472 static void igc_taprio_queue_stats(struct net_device *dev, 6473 struct tc_taprio_qopt_queue_stats *queue_stats) 6474 { 6475 struct tc_taprio_qopt_stats *stats = &queue_stats->stats; 6476 6477 /* When Strict_End is enabled, the tx_overruns counter 6478 * will always be zero. 6479 */ 6480 stats->tx_overruns = 0; 6481 } 6482 6483 static int igc_save_qbv_schedule(struct igc_adapter *adapter, 6484 struct tc_taprio_qopt_offload *qopt) 6485 { 6486 bool queue_configured[IGC_MAX_TX_QUEUES] = { }; 6487 struct igc_hw *hw = &adapter->hw; 6488 u32 start_time = 0, end_time = 0; 6489 struct timespec64 now; 6490 unsigned long flags; 6491 size_t n; 6492 int i; 6493 6494 if (qopt->base_time < 0) 6495 return -ERANGE; 6496 6497 if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable) 6498 return -EALREADY; 6499 6500 if (!validate_schedule(adapter, qopt)) 6501 return -EINVAL; 6502 6503 if (qopt->mqprio.preemptible_tcs && 6504 !(adapter->flags & IGC_FLAG_TSN_REVERSE_TXQ_PRIO)) { 6505 NL_SET_ERR_MSG_MOD(qopt->extack, 6506 "reverse-tsn-txq-prio private flag must be enabled before setting preemptible tc"); 6507 return -ENODEV; 6508 } 6509 6510 igc_ptp_read(adapter, &now); 6511 6512 if (igc_tsn_is_taprio_activated_by_user(adapter) && 6513 is_base_time_past(qopt->base_time, &now)) 6514 adapter->qbv_config_change_errors++; 6515 6516 adapter->cycle_time = qopt->cycle_time; 6517 adapter->base_time = qopt->base_time; 6518 adapter->taprio_offload_enable = true; 6519 6520 for (n = 0; n < qopt->num_entries; n++) { 6521 struct tc_taprio_sched_entry *e = &qopt->entries[n]; 6522 6523 end_time += e->interval; 6524 6525 /* If any of the conditions below are true, we need to manually 6526 * control the end time of the cycle. 6527 * 1. Qbv users can specify a cycle time that is not equal 6528 * to the total GCL intervals. Hence, recalculation is 6529 * necessary here to exclude the time interval that 6530 * exceeds the cycle time. 6531 * 2. According to IEEE Std. 802.1Q-2018 section 8.6.9.2, 6532 * once the end of the list is reached, it will switch 6533 * to the END_OF_CYCLE state and leave the gates in the 6534 * same state until the next cycle is started. 6535 */ 6536 if (end_time > adapter->cycle_time || 6537 n + 1 == qopt->num_entries) 6538 end_time = adapter->cycle_time; 6539 6540 for (i = 0; i < adapter->num_tx_queues; i++) { 6541 struct igc_ring *ring = adapter->tx_ring[i]; 6542 6543 if (!(e->gate_mask & BIT(i))) 6544 continue; 6545 6546 /* Check whether a queue stays open for more than one 6547 * entry. If so, keep the start and advance the end 6548 * time. 6549 */ 6550 if (!queue_configured[i]) 6551 ring->start_time = start_time; 6552 ring->end_time = end_time; 6553 6554 if (ring->start_time >= adapter->cycle_time) 6555 queue_configured[i] = false; 6556 else 6557 queue_configured[i] = true; 6558 } 6559 6560 start_time += e->interval; 6561 } 6562 6563 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 6564 6565 /* Check whether a queue gets configured. 6566 * If not, set the start and end time to be end time. 6567 */ 6568 for (i = 0; i < adapter->num_tx_queues; i++) { 6569 struct igc_ring *ring = adapter->tx_ring[i]; 6570 6571 if (!is_base_time_past(qopt->base_time, &now)) { 6572 ring->admin_gate_closed = false; 6573 } else { 6574 ring->oper_gate_closed = false; 6575 ring->admin_gate_closed = false; 6576 } 6577 6578 if (!queue_configured[i]) { 6579 if (!is_base_time_past(qopt->base_time, &now)) 6580 ring->admin_gate_closed = true; 6581 else 6582 ring->oper_gate_closed = true; 6583 6584 ring->start_time = end_time; 6585 ring->end_time = end_time; 6586 } 6587 } 6588 6589 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 6590 6591 for (i = 0; i < adapter->num_tx_queues; i++) { 6592 struct igc_ring *ring = adapter->tx_ring[i]; 6593 struct net_device *dev = adapter->netdev; 6594 6595 if (qopt->max_sdu[i]) 6596 ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN; 6597 else 6598 ring->max_sdu = 0; 6599 } 6600 6601 igc_fpe_save_preempt_queue(adapter, &qopt->mqprio); 6602 6603 return 0; 6604 } 6605 6606 static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter, 6607 struct tc_taprio_qopt_offload *qopt) 6608 { 6609 struct igc_hw *hw = &adapter->hw; 6610 int err; 6611 6612 if (hw->mac.type != igc_i225) 6613 return -EOPNOTSUPP; 6614 6615 switch (qopt->cmd) { 6616 case TAPRIO_CMD_REPLACE: 6617 err = igc_save_qbv_schedule(adapter, qopt); 6618 break; 6619 case TAPRIO_CMD_DESTROY: 6620 err = igc_tsn_clear_schedule(adapter); 6621 break; 6622 case TAPRIO_CMD_STATS: 6623 igc_taprio_stats(adapter->netdev, &qopt->stats); 6624 return 0; 6625 case TAPRIO_CMD_QUEUE_STATS: 6626 igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); 6627 return 0; 6628 default: 6629 return -EOPNOTSUPP; 6630 } 6631 6632 if (err) 6633 return err; 6634 6635 return igc_tsn_offload_apply(adapter); 6636 } 6637 6638 static int igc_save_cbs_params(struct igc_adapter *adapter, int queue, 6639 bool enable, int idleslope, int sendslope, 6640 int hicredit, int locredit) 6641 { 6642 bool cbs_status[IGC_MAX_SR_QUEUES] = { false }; 6643 struct net_device *netdev = adapter->netdev; 6644 struct igc_ring *ring; 6645 int i; 6646 6647 /* i225 has two sets of credit-based shaper logic. 6648 * Supporting it only on the top two priority queues 6649 */ 6650 if (queue < 0 || queue > 1) 6651 return -EINVAL; 6652 6653 ring = adapter->tx_ring[queue]; 6654 6655 for (i = 0; i < IGC_MAX_SR_QUEUES; i++) 6656 if (adapter->tx_ring[i]) 6657 cbs_status[i] = adapter->tx_ring[i]->cbs_enable; 6658 6659 /* CBS should be enabled on the highest priority queue first in order 6660 * for the CBS algorithm to operate as intended. 6661 */ 6662 if (enable) { 6663 if (queue == 1 && !cbs_status[0]) { 6664 netdev_err(netdev, 6665 "Enabling CBS on queue1 before queue0\n"); 6666 return -EINVAL; 6667 } 6668 } else { 6669 if (queue == 0 && cbs_status[1]) { 6670 netdev_err(netdev, 6671 "Disabling CBS on queue0 before queue1\n"); 6672 return -EINVAL; 6673 } 6674 } 6675 6676 ring->cbs_enable = enable; 6677 ring->idleslope = idleslope; 6678 ring->sendslope = sendslope; 6679 ring->hicredit = hicredit; 6680 ring->locredit = locredit; 6681 6682 return 0; 6683 } 6684 6685 static int igc_tsn_enable_cbs(struct igc_adapter *adapter, 6686 struct tc_cbs_qopt_offload *qopt) 6687 { 6688 struct igc_hw *hw = &adapter->hw; 6689 int err; 6690 6691 if (hw->mac.type != igc_i225) 6692 return -EOPNOTSUPP; 6693 6694 if (qopt->queue < 0 || qopt->queue > 1) 6695 return -EINVAL; 6696 6697 err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable, 6698 qopt->idleslope, qopt->sendslope, 6699 qopt->hicredit, qopt->locredit); 6700 if (err) 6701 return err; 6702 6703 return igc_tsn_offload_apply(adapter); 6704 } 6705 6706 static int igc_tc_query_caps(struct igc_adapter *adapter, 6707 struct tc_query_caps_base *base) 6708 { 6709 struct igc_hw *hw = &adapter->hw; 6710 6711 switch (base->type) { 6712 case TC_SETUP_QDISC_MQPRIO: { 6713 struct tc_mqprio_caps *caps = base->caps; 6714 6715 caps->validate_queue_counts = true; 6716 6717 return 0; 6718 } 6719 case TC_SETUP_QDISC_TAPRIO: { 6720 struct tc_taprio_caps *caps = base->caps; 6721 6722 if (!(adapter->flags & IGC_FLAG_TSN_REVERSE_TXQ_PRIO)) 6723 caps->broken_mqprio = true; 6724 6725 if (hw->mac.type == igc_i225) { 6726 caps->supports_queue_max_sdu = true; 6727 caps->gate_mask_per_txq = true; 6728 } 6729 6730 return 0; 6731 } 6732 default: 6733 return -EOPNOTSUPP; 6734 } 6735 } 6736 6737 static void igc_save_mqprio_params(struct igc_adapter *adapter, u8 num_tc, 6738 u16 *offset) 6739 { 6740 int i; 6741 6742 adapter->strict_priority_enable = true; 6743 adapter->num_tc = num_tc; 6744 6745 for (i = 0; i < num_tc; i++) 6746 adapter->queue_per_tc[i] = offset[i]; 6747 } 6748 6749 static bool 6750 igc_tsn_is_tc_to_queue_priority_ordered(struct tc_mqprio_qopt_offload *mqprio) 6751 { 6752 int num_tc = mqprio->qopt.num_tc; 6753 int i; 6754 6755 for (i = 1; i < num_tc; i++) { 6756 if (mqprio->qopt.offset[i - 1] > mqprio->qopt.offset[i]) 6757 return false; 6758 } 6759 6760 return true; 6761 } 6762 6763 static int igc_tsn_enable_mqprio(struct igc_adapter *adapter, 6764 struct tc_mqprio_qopt_offload *mqprio) 6765 { 6766 struct igc_hw *hw = &adapter->hw; 6767 int err, i; 6768 6769 if (hw->mac.type != igc_i225) 6770 return -EOPNOTSUPP; 6771 6772 if (!mqprio->qopt.num_tc) { 6773 adapter->strict_priority_enable = false; 6774 igc_fpe_clear_preempt_queue(adapter); 6775 netdev_reset_tc(adapter->netdev); 6776 goto apply; 6777 } 6778 6779 /* There are as many TCs as Tx queues. */ 6780 if (mqprio->qopt.num_tc != adapter->num_tx_queues) { 6781 NL_SET_ERR_MSG_FMT_MOD(mqprio->extack, 6782 "Only %d traffic classes supported", 6783 adapter->num_tx_queues); 6784 return -EOPNOTSUPP; 6785 } 6786 6787 /* Only one queue per TC is supported. */ 6788 for (i = 0; i < mqprio->qopt.num_tc; i++) { 6789 if (mqprio->qopt.count[i] != 1) { 6790 NL_SET_ERR_MSG_MOD(mqprio->extack, 6791 "Only one queue per TC supported"); 6792 return -EOPNOTSUPP; 6793 } 6794 } 6795 6796 if (!igc_tsn_is_tc_to_queue_priority_ordered(mqprio)) { 6797 NL_SET_ERR_MSG_MOD(mqprio->extack, 6798 "tc to queue mapping must preserve increasing priority (higher tc -> higher queue)"); 6799 return -EOPNOTSUPP; 6800 } 6801 6802 igc_save_mqprio_params(adapter, mqprio->qopt.num_tc, 6803 mqprio->qopt.offset); 6804 6805 err = netdev_set_num_tc(adapter->netdev, adapter->num_tc); 6806 if (err) 6807 return err; 6808 6809 for (i = 0; i < adapter->num_tc; i++) { 6810 err = netdev_set_tc_queue(adapter->netdev, i, 1, 6811 adapter->queue_per_tc[i]); 6812 if (err) 6813 return err; 6814 } 6815 6816 /* In case the card is configured with less than four queues. */ 6817 for (; i < IGC_MAX_TX_QUEUES; i++) 6818 adapter->queue_per_tc[i] = i; 6819 6820 mqprio->qopt.hw = TC_MQPRIO_HW_OFFLOAD_TCS; 6821 igc_fpe_save_preempt_queue(adapter, mqprio); 6822 6823 apply: 6824 return igc_tsn_offload_apply(adapter); 6825 } 6826 6827 static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, 6828 void *type_data) 6829 { 6830 struct igc_adapter *adapter = netdev_priv(dev); 6831 6832 adapter->tc_setup_type = type; 6833 6834 switch (type) { 6835 case TC_QUERY_CAPS: 6836 return igc_tc_query_caps(adapter, type_data); 6837 case TC_SETUP_QDISC_TAPRIO: 6838 return igc_tsn_enable_qbv_scheduling(adapter, type_data); 6839 6840 case TC_SETUP_QDISC_ETF: 6841 return igc_tsn_enable_launchtime(adapter, type_data); 6842 6843 case TC_SETUP_QDISC_CBS: 6844 return igc_tsn_enable_cbs(adapter, type_data); 6845 6846 case TC_SETUP_QDISC_MQPRIO: 6847 return igc_tsn_enable_mqprio(adapter, type_data); 6848 6849 default: 6850 return -EOPNOTSUPP; 6851 } 6852 } 6853 6854 static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf) 6855 { 6856 struct igc_adapter *adapter = netdev_priv(dev); 6857 6858 switch (bpf->command) { 6859 case XDP_SETUP_PROG: 6860 return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack); 6861 case XDP_SETUP_XSK_POOL: 6862 return igc_xdp_setup_pool(adapter, bpf->xsk.pool, 6863 bpf->xsk.queue_id); 6864 default: 6865 return -EOPNOTSUPP; 6866 } 6867 } 6868 6869 static int igc_xdp_xmit(struct net_device *dev, int num_frames, 6870 struct xdp_frame **frames, u32 flags) 6871 { 6872 struct igc_adapter *adapter = netdev_priv(dev); 6873 int cpu = smp_processor_id(); 6874 struct netdev_queue *nq; 6875 struct igc_ring *ring; 6876 int i, nxmit; 6877 6878 if (unlikely(!netif_carrier_ok(dev))) 6879 return -ENETDOWN; 6880 6881 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 6882 return -EINVAL; 6883 6884 ring = igc_get_tx_ring(adapter, cpu); 6885 nq = txring_txq(ring); 6886 6887 __netif_tx_lock(nq, cpu); 6888 6889 /* Avoid transmit queue timeout since we share it with the slow path */ 6890 txq_trans_cond_update(nq); 6891 6892 nxmit = 0; 6893 for (i = 0; i < num_frames; i++) { 6894 int err; 6895 struct xdp_frame *xdpf = frames[i]; 6896 6897 err = igc_xdp_init_tx_descriptor(ring, xdpf); 6898 if (err) 6899 break; 6900 nxmit++; 6901 } 6902 6903 if (flags & XDP_XMIT_FLUSH) 6904 igc_flush_tx_descriptors(ring); 6905 6906 __netif_tx_unlock(nq); 6907 6908 return nxmit; 6909 } 6910 6911 static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, 6912 struct igc_q_vector *q_vector) 6913 { 6914 struct igc_hw *hw = &adapter->hw; 6915 u32 eics = 0; 6916 6917 eics |= q_vector->eims_value; 6918 wr32(IGC_EICS, eics); 6919 } 6920 6921 int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 6922 { 6923 struct igc_adapter *adapter = netdev_priv(dev); 6924 struct igc_q_vector *q_vector; 6925 struct igc_ring *ring; 6926 6927 if (test_bit(__IGC_DOWN, &adapter->state)) 6928 return -ENETDOWN; 6929 6930 if (!igc_xdp_is_enabled(adapter)) 6931 return -ENXIO; 6932 6933 if (queue_id >= adapter->num_rx_queues) 6934 return -EINVAL; 6935 6936 ring = adapter->rx_ring[queue_id]; 6937 6938 if (!ring->xsk_pool) 6939 return -ENXIO; 6940 6941 q_vector = adapter->q_vector[queue_id]; 6942 if (!napi_if_scheduled_mark_missed(&q_vector->napi)) 6943 igc_trigger_rxtxq_interrupt(adapter, q_vector); 6944 6945 return 0; 6946 } 6947 6948 static ktime_t igc_get_tstamp(struct net_device *dev, 6949 const struct skb_shared_hwtstamps *hwtstamps, 6950 bool cycles) 6951 { 6952 struct igc_adapter *adapter = netdev_priv(dev); 6953 struct igc_inline_rx_tstamps *tstamp; 6954 ktime_t timestamp; 6955 6956 tstamp = hwtstamps->netdev_data; 6957 6958 if (cycles) 6959 timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer1); 6960 else 6961 timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); 6962 6963 return timestamp; 6964 } 6965 6966 static const struct net_device_ops igc_netdev_ops = { 6967 .ndo_open = igc_open, 6968 .ndo_stop = igc_close, 6969 .ndo_start_xmit = igc_xmit_frame, 6970 .ndo_set_rx_mode = igc_set_rx_mode, 6971 .ndo_set_mac_address = igc_set_mac, 6972 .ndo_change_mtu = igc_change_mtu, 6973 .ndo_tx_timeout = igc_tx_timeout, 6974 .ndo_get_stats64 = igc_get_stats64, 6975 .ndo_fix_features = igc_fix_features, 6976 .ndo_set_features = igc_set_features, 6977 .ndo_features_check = igc_features_check, 6978 .ndo_setup_tc = igc_setup_tc, 6979 .ndo_bpf = igc_bpf, 6980 .ndo_xdp_xmit = igc_xdp_xmit, 6981 .ndo_xsk_wakeup = igc_xsk_wakeup, 6982 .ndo_get_tstamp = igc_get_tstamp, 6983 .ndo_hwtstamp_get = igc_ptp_hwtstamp_get, 6984 .ndo_hwtstamp_set = igc_ptp_hwtstamp_set, 6985 }; 6986 6987 u32 igc_rd32(struct igc_hw *hw, u32 reg) 6988 { 6989 struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw); 6990 u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); 6991 u32 value = 0; 6992 6993 if (IGC_REMOVED(hw_addr)) 6994 return ~value; 6995 6996 value = readl(&hw_addr[reg]); 6997 6998 /* reads should not return all F's */ 6999 if (!(~value) && (!reg || !(~readl(hw_addr)))) { 7000 struct net_device *netdev = igc->netdev; 7001 7002 hw->hw_addr = NULL; 7003 netif_device_detach(netdev); 7004 netdev_err(netdev, "PCIe link lost, device now detached\n"); 7005 WARN(pci_device_is_present(igc->pdev), 7006 "igc: Failed to read reg 0x%x!\n", reg); 7007 } 7008 7009 return value; 7010 } 7011 7012 /* Mapping HW RSS Type to enum xdp_rss_hash_type */ 7013 static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = { 7014 [IGC_RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_L2, 7015 [IGC_RSS_TYPE_HASH_TCP_IPV4] = XDP_RSS_TYPE_L4_IPV4_TCP, 7016 [IGC_RSS_TYPE_HASH_IPV4] = XDP_RSS_TYPE_L3_IPV4, 7017 [IGC_RSS_TYPE_HASH_TCP_IPV6] = XDP_RSS_TYPE_L4_IPV6_TCP, 7018 [IGC_RSS_TYPE_HASH_IPV6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 7019 [IGC_RSS_TYPE_HASH_IPV6] = XDP_RSS_TYPE_L3_IPV6, 7020 [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 7021 [IGC_RSS_TYPE_HASH_UDP_IPV4] = XDP_RSS_TYPE_L4_IPV4_UDP, 7022 [IGC_RSS_TYPE_HASH_UDP_IPV6] = XDP_RSS_TYPE_L4_IPV6_UDP, 7023 [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX, 7024 [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ 7025 [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask */ 7026 [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons */ 7027 [13] = XDP_RSS_TYPE_NONE, 7028 [14] = XDP_RSS_TYPE_NONE, 7029 [15] = XDP_RSS_TYPE_NONE, 7030 }; 7031 7032 static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 7033 enum xdp_rss_hash_type *rss_type) 7034 { 7035 const struct igc_xdp_buff *ctx = (void *)_ctx; 7036 7037 if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)) 7038 return -ENODATA; 7039 7040 *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss); 7041 *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)]; 7042 7043 return 0; 7044 } 7045 7046 static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp) 7047 { 7048 const struct igc_xdp_buff *ctx = (void *)_ctx; 7049 struct igc_adapter *adapter = netdev_priv(ctx->xdp.rxq->dev); 7050 struct igc_inline_rx_tstamps *tstamp = ctx->rx_ts; 7051 7052 if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) { 7053 *timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); 7054 7055 return 0; 7056 } 7057 7058 return -ENODATA; 7059 } 7060 7061 static const struct xdp_metadata_ops igc_xdp_metadata_ops = { 7062 .xmo_rx_hash = igc_xdp_rx_hash, 7063 .xmo_rx_timestamp = igc_xdp_rx_timestamp, 7064 }; 7065 7066 static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer) 7067 { 7068 struct igc_adapter *adapter = container_of(timer, struct igc_adapter, 7069 hrtimer); 7070 unsigned long flags; 7071 unsigned int i; 7072 7073 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 7074 7075 adapter->qbv_transition = true; 7076 for (i = 0; i < adapter->num_tx_queues; i++) { 7077 struct igc_ring *tx_ring = adapter->tx_ring[i]; 7078 7079 if (tx_ring->admin_gate_closed) { 7080 tx_ring->admin_gate_closed = false; 7081 tx_ring->oper_gate_closed = true; 7082 } else { 7083 tx_ring->oper_gate_closed = false; 7084 } 7085 } 7086 adapter->qbv_transition = false; 7087 7088 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 7089 7090 return HRTIMER_NORESTART; 7091 } 7092 7093 /** 7094 * igc_probe - Device Initialization Routine 7095 * @pdev: PCI device information struct 7096 * @ent: entry in igc_pci_tbl 7097 * 7098 * Returns 0 on success, negative on failure 7099 * 7100 * igc_probe initializes an adapter identified by a pci_dev structure. 7101 * The OS initialization, configuring the adapter private structure, 7102 * and a hardware reset occur. 7103 */ 7104 static int igc_probe(struct pci_dev *pdev, 7105 const struct pci_device_id *ent) 7106 { 7107 struct igc_adapter *adapter; 7108 struct net_device *netdev; 7109 struct igc_hw *hw; 7110 const struct igc_info *ei = igc_info_tbl[ent->driver_data]; 7111 int err; 7112 7113 err = pci_enable_device_mem(pdev); 7114 if (err) 7115 return err; 7116 7117 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 7118 if (err) { 7119 dev_err(&pdev->dev, 7120 "No usable DMA configuration, aborting\n"); 7121 goto err_dma; 7122 } 7123 7124 err = pci_request_mem_regions(pdev, igc_driver_name); 7125 if (err) 7126 goto err_pci_reg; 7127 7128 err = pci_enable_ptm(pdev, NULL); 7129 if (err < 0) 7130 dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n"); 7131 7132 pci_set_master(pdev); 7133 7134 err = -ENOMEM; 7135 netdev = alloc_etherdev_mq(sizeof(struct igc_adapter), 7136 IGC_MAX_TX_QUEUES); 7137 7138 if (!netdev) 7139 goto err_alloc_etherdev; 7140 7141 SET_NETDEV_DEV(netdev, &pdev->dev); 7142 7143 pci_set_drvdata(pdev, netdev); 7144 adapter = netdev_priv(netdev); 7145 adapter->netdev = netdev; 7146 adapter->pdev = pdev; 7147 hw = &adapter->hw; 7148 hw->back = adapter; 7149 adapter->port_num = hw->bus.func; 7150 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); 7151 7152 /* PCI config space info */ 7153 hw->vendor_id = pdev->vendor; 7154 hw->device_id = pdev->device; 7155 hw->revision_id = pdev->revision; 7156 hw->subsystem_vendor_id = pdev->subsystem_vendor; 7157 hw->subsystem_device_id = pdev->subsystem_device; 7158 7159 /* Disable ASPM L1.2 on I226 devices to avoid packet loss */ 7160 if (igc_is_device_id_i226(hw)) 7161 pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); 7162 7163 err = pci_save_state(pdev); 7164 if (err) 7165 goto err_ioremap; 7166 7167 err = -EIO; 7168 adapter->io_addr = ioremap(pci_resource_start(pdev, 0), 7169 pci_resource_len(pdev, 0)); 7170 if (!adapter->io_addr) 7171 goto err_ioremap; 7172 7173 /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */ 7174 hw->hw_addr = adapter->io_addr; 7175 7176 netdev->netdev_ops = &igc_netdev_ops; 7177 netdev->xdp_metadata_ops = &igc_xdp_metadata_ops; 7178 netdev->xsk_tx_metadata_ops = &igc_xsk_tx_metadata_ops; 7179 igc_ethtool_set_ops(netdev); 7180 netdev->watchdog_timeo = 5 * HZ; 7181 7182 netdev->mem_start = pci_resource_start(pdev, 0); 7183 netdev->mem_end = pci_resource_end(pdev, 0); 7184 7185 /* Copy the default MAC and PHY function pointers */ 7186 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); 7187 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); 7188 7189 /* Initialize skew-specific constants */ 7190 err = ei->get_invariants(hw); 7191 if (err) 7192 goto err_sw_init; 7193 7194 /* Add supported features to the features list*/ 7195 netdev->features |= NETIF_F_SG; 7196 netdev->features |= NETIF_F_TSO; 7197 netdev->features |= NETIF_F_TSO6; 7198 netdev->features |= NETIF_F_TSO_ECN; 7199 netdev->features |= NETIF_F_RXHASH; 7200 netdev->features |= NETIF_F_RXCSUM; 7201 netdev->features |= NETIF_F_HW_CSUM; 7202 netdev->features |= NETIF_F_SCTP_CRC; 7203 netdev->features |= NETIF_F_HW_TC; 7204 7205 #define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ 7206 NETIF_F_GSO_GRE_CSUM | \ 7207 NETIF_F_GSO_IPXIP4 | \ 7208 NETIF_F_GSO_IPXIP6 | \ 7209 NETIF_F_GSO_UDP_TUNNEL | \ 7210 NETIF_F_GSO_UDP_TUNNEL_CSUM) 7211 7212 netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES; 7213 netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES; 7214 7215 /* setup the private structure */ 7216 err = igc_sw_init(adapter); 7217 if (err) 7218 goto err_sw_init; 7219 7220 /* copy netdev features into list of user selectable features */ 7221 netdev->hw_features |= NETIF_F_NTUPLE; 7222 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; 7223 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 7224 netdev->hw_features |= netdev->features; 7225 7226 netdev->features |= NETIF_F_HIGHDMA; 7227 7228 netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; 7229 netdev->mpls_features |= NETIF_F_HW_CSUM; 7230 netdev->hw_enc_features |= netdev->vlan_features; 7231 7232 netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 7233 NETDEV_XDP_ACT_XSK_ZEROCOPY; 7234 7235 /* enable HW vlan tag insertion/stripping by default */ 7236 netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; 7237 7238 /* MTU range: 68 - 9216 */ 7239 netdev->min_mtu = ETH_MIN_MTU; 7240 netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; 7241 7242 /* before reading the NVM, reset the controller to put the device in a 7243 * known good starting state 7244 */ 7245 hw->mac.ops.reset_hw(hw); 7246 7247 if (igc_get_flash_presence_i225(hw)) { 7248 if (hw->nvm.ops.validate(hw) < 0) { 7249 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); 7250 err = -EIO; 7251 goto err_eeprom; 7252 } 7253 } 7254 7255 if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) { 7256 /* copy the MAC address out of the NVM */ 7257 if (hw->mac.ops.read_mac_addr(hw)) 7258 dev_err(&pdev->dev, "NVM Read Error\n"); 7259 } 7260 7261 eth_hw_addr_set(netdev, hw->mac.addr); 7262 7263 if (!is_valid_ether_addr(netdev->dev_addr)) { 7264 dev_err(&pdev->dev, "Invalid MAC Address\n"); 7265 err = -EIO; 7266 goto err_eeprom; 7267 } 7268 7269 /* configure RXPBSIZE and TXPBSIZE */ 7270 wr32(IGC_RXPBS, IGC_RXPBSIZE_EXP_BMC_DEFAULT); 7271 wr32(IGC_TXPBS, IGC_TXPBSIZE_DEFAULT); 7272 7273 timer_setup(&adapter->watchdog_timer, igc_watchdog, 0); 7274 timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0); 7275 7276 INIT_WORK(&adapter->reset_task, igc_reset_task); 7277 INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); 7278 7279 hrtimer_setup(&adapter->hrtimer, &igc_qbv_scheduling_timer, CLOCK_MONOTONIC, 7280 HRTIMER_MODE_REL); 7281 7282 /* Initialize link properties that are user-changeable */ 7283 adapter->fc_autoneg = true; 7284 hw->phy.autoneg_advertised = 0xaf; 7285 7286 hw->fc.requested_mode = igc_fc_default; 7287 hw->fc.current_mode = igc_fc_default; 7288 7289 /* By default, support wake on port A */ 7290 adapter->flags |= IGC_FLAG_WOL_SUPPORTED; 7291 7292 /* initialize the wol settings based on the eeprom settings */ 7293 if (adapter->flags & IGC_FLAG_WOL_SUPPORTED) 7294 adapter->wol |= IGC_WUFC_MAG; 7295 7296 device_set_wakeup_enable(&adapter->pdev->dev, 7297 adapter->flags & IGC_FLAG_WOL_SUPPORTED); 7298 7299 igc_ptp_init(adapter); 7300 7301 igc_tsn_clear_schedule(adapter); 7302 7303 igc_fpe_init(adapter); 7304 7305 /* reset the hardware with the new settings */ 7306 igc_reset(adapter); 7307 7308 /* let the f/w know that the h/w is now under the control of the 7309 * driver. 7310 */ 7311 igc_get_hw_control(adapter); 7312 7313 strscpy(netdev->name, "eth%d", sizeof(netdev->name)); 7314 err = register_netdev(netdev); 7315 if (err) 7316 goto err_register; 7317 7318 /* carrier off reporting is important to ethtool even BEFORE open */ 7319 netif_carrier_off(netdev); 7320 7321 /* Check if Media Autosense is enabled */ 7322 adapter->ei = *ei; 7323 7324 /* print pcie link status and MAC address */ 7325 pcie_print_link_status(pdev); 7326 netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr); 7327 7328 dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); 7329 /* Disable EEE for internal PHY devices */ 7330 hw->dev_spec._base.eee_enable = false; 7331 adapter->flags &= ~IGC_FLAG_EEE; 7332 igc_set_eee_i225(hw, false, false, false); 7333 7334 pm_runtime_put_noidle(&pdev->dev); 7335 7336 if (IS_ENABLED(CONFIG_IGC_LEDS)) { 7337 err = igc_led_setup(adapter); 7338 if (err) 7339 goto err_register; 7340 } 7341 7342 return 0; 7343 7344 err_register: 7345 igc_release_hw_control(adapter); 7346 igc_ptp_stop(adapter); 7347 err_eeprom: 7348 if (!igc_check_reset_block(hw)) 7349 igc_reset_phy(hw); 7350 err_sw_init: 7351 igc_clear_interrupt_scheme(adapter); 7352 iounmap(adapter->io_addr); 7353 err_ioremap: 7354 free_netdev(netdev); 7355 err_alloc_etherdev: 7356 pci_release_mem_regions(pdev); 7357 err_pci_reg: 7358 err_dma: 7359 pci_disable_device(pdev); 7360 return err; 7361 } 7362 7363 /** 7364 * igc_remove - Device Removal Routine 7365 * @pdev: PCI device information struct 7366 * 7367 * igc_remove is called by the PCI subsystem to alert the driver 7368 * that it should release a PCI device. This could be caused by a 7369 * Hot-Plug event, or because the driver is going to be removed from 7370 * memory. 7371 */ 7372 static void igc_remove(struct pci_dev *pdev) 7373 { 7374 struct net_device *netdev = pci_get_drvdata(pdev); 7375 struct igc_adapter *adapter = netdev_priv(netdev); 7376 7377 pm_runtime_get_noresume(&pdev->dev); 7378 7379 igc_flush_nfc_rules(adapter); 7380 7381 igc_ptp_stop(adapter); 7382 7383 pci_disable_ptm(pdev); 7384 pci_clear_master(pdev); 7385 7386 set_bit(__IGC_DOWN, &adapter->state); 7387 7388 timer_delete_sync(&adapter->watchdog_timer); 7389 timer_delete_sync(&adapter->phy_info_timer); 7390 7391 cancel_work_sync(&adapter->reset_task); 7392 cancel_work_sync(&adapter->watchdog_task); 7393 hrtimer_cancel(&adapter->hrtimer); 7394 7395 if (IS_ENABLED(CONFIG_IGC_LEDS)) 7396 igc_led_free(adapter); 7397 7398 /* Release control of h/w to f/w. If f/w is AMT enabled, this 7399 * would have already happened in close and is redundant. 7400 */ 7401 igc_release_hw_control(adapter); 7402 unregister_netdev(netdev); 7403 7404 igc_clear_interrupt_scheme(adapter); 7405 pci_iounmap(pdev, adapter->io_addr); 7406 pci_release_mem_regions(pdev); 7407 7408 free_netdev(netdev); 7409 7410 pci_disable_device(pdev); 7411 } 7412 7413 static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake, 7414 bool runtime) 7415 { 7416 struct net_device *netdev = pci_get_drvdata(pdev); 7417 struct igc_adapter *adapter = netdev_priv(netdev); 7418 u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol; 7419 struct igc_hw *hw = &adapter->hw; 7420 u32 ctrl, rctl, status; 7421 bool wake; 7422 7423 rtnl_lock(); 7424 netif_device_detach(netdev); 7425 7426 if (netif_running(netdev)) 7427 __igc_close(netdev, true); 7428 7429 igc_ptp_suspend(adapter); 7430 7431 igc_clear_interrupt_scheme(adapter); 7432 rtnl_unlock(); 7433 7434 status = rd32(IGC_STATUS); 7435 if (status & IGC_STATUS_LU) 7436 wufc &= ~IGC_WUFC_LNKC; 7437 7438 if (wufc) { 7439 igc_setup_rctl(adapter); 7440 igc_set_rx_mode(netdev); 7441 7442 /* turn on all-multi mode if wake on multicast is enabled */ 7443 if (wufc & IGC_WUFC_MC) { 7444 rctl = rd32(IGC_RCTL); 7445 rctl |= IGC_RCTL_MPE; 7446 wr32(IGC_RCTL, rctl); 7447 } 7448 7449 ctrl = rd32(IGC_CTRL); 7450 ctrl |= IGC_CTRL_ADVD3WUC; 7451 wr32(IGC_CTRL, ctrl); 7452 7453 /* Allow time for pending master requests to run */ 7454 igc_disable_pcie_master(hw); 7455 7456 wr32(IGC_WUC, IGC_WUC_PME_EN); 7457 wr32(IGC_WUFC, wufc); 7458 } else { 7459 wr32(IGC_WUC, 0); 7460 wr32(IGC_WUFC, 0); 7461 } 7462 7463 wake = wufc || adapter->en_mng_pt; 7464 if (!wake) 7465 igc_power_down_phy_copper_base(&adapter->hw); 7466 else 7467 igc_power_up_link(adapter); 7468 7469 if (enable_wake) 7470 *enable_wake = wake; 7471 7472 /* Release control of h/w to f/w. If f/w is AMT enabled, this 7473 * would have already happened in close and is redundant. 7474 */ 7475 igc_release_hw_control(adapter); 7476 7477 pci_disable_device(pdev); 7478 7479 return 0; 7480 } 7481 7482 static int igc_runtime_suspend(struct device *dev) 7483 { 7484 return __igc_shutdown(to_pci_dev(dev), NULL, 1); 7485 } 7486 7487 static void igc_deliver_wake_packet(struct net_device *netdev) 7488 { 7489 struct igc_adapter *adapter = netdev_priv(netdev); 7490 struct igc_hw *hw = &adapter->hw; 7491 struct sk_buff *skb; 7492 u32 wupl; 7493 7494 wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK; 7495 7496 /* WUPM stores only the first 128 bytes of the wake packet. 7497 * Read the packet only if we have the whole thing. 7498 */ 7499 if (wupl == 0 || wupl > IGC_WUPM_BYTES) 7500 return; 7501 7502 skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES); 7503 if (!skb) 7504 return; 7505 7506 skb_put(skb, wupl); 7507 7508 /* Ensure reads are 32-bit aligned */ 7509 wupl = roundup(wupl, 4); 7510 7511 memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl); 7512 7513 skb->protocol = eth_type_trans(skb, netdev); 7514 netif_rx(skb); 7515 } 7516 7517 static int __igc_resume(struct device *dev, bool rpm) 7518 { 7519 struct pci_dev *pdev = to_pci_dev(dev); 7520 struct net_device *netdev = pci_get_drvdata(pdev); 7521 struct igc_adapter *adapter = netdev_priv(netdev); 7522 struct igc_hw *hw = &adapter->hw; 7523 u32 err, val; 7524 7525 pci_set_power_state(pdev, PCI_D0); 7526 pci_restore_state(pdev); 7527 pci_save_state(pdev); 7528 7529 if (!pci_device_is_present(pdev)) 7530 return -ENODEV; 7531 err = pci_enable_device_mem(pdev); 7532 if (err) { 7533 netdev_err(netdev, "Cannot enable PCI device from suspend\n"); 7534 return err; 7535 } 7536 pci_set_master(pdev); 7537 7538 pci_enable_wake(pdev, PCI_D3hot, 0); 7539 pci_enable_wake(pdev, PCI_D3cold, 0); 7540 7541 if (igc_is_device_id_i226(hw)) 7542 pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); 7543 7544 if (igc_init_interrupt_scheme(adapter, true)) { 7545 netdev_err(netdev, "Unable to allocate memory for queues\n"); 7546 return -ENOMEM; 7547 } 7548 7549 igc_reset(adapter); 7550 7551 /* let the f/w know that the h/w is now under the control of the 7552 * driver. 7553 */ 7554 igc_get_hw_control(adapter); 7555 7556 val = rd32(IGC_WUS); 7557 if (val & WAKE_PKT_WUS) 7558 igc_deliver_wake_packet(netdev); 7559 7560 wr32(IGC_WUS, ~0); 7561 7562 if (netif_running(netdev)) { 7563 if (!rpm) 7564 rtnl_lock(); 7565 err = __igc_open(netdev, true); 7566 if (!rpm) 7567 rtnl_unlock(); 7568 if (!err) 7569 netif_device_attach(netdev); 7570 } 7571 7572 return err; 7573 } 7574 7575 static int igc_resume(struct device *dev) 7576 { 7577 return __igc_resume(dev, false); 7578 } 7579 7580 static int igc_runtime_resume(struct device *dev) 7581 { 7582 return __igc_resume(dev, true); 7583 } 7584 7585 static int igc_suspend(struct device *dev) 7586 { 7587 return __igc_shutdown(to_pci_dev(dev), NULL, 0); 7588 } 7589 7590 static int __maybe_unused igc_runtime_idle(struct device *dev) 7591 { 7592 struct net_device *netdev = dev_get_drvdata(dev); 7593 struct igc_adapter *adapter = netdev_priv(netdev); 7594 7595 if (!igc_has_link(adapter)) 7596 pm_schedule_suspend(dev, MSEC_PER_SEC * 5); 7597 7598 return -EBUSY; 7599 } 7600 7601 static void igc_shutdown(struct pci_dev *pdev) 7602 { 7603 bool wake; 7604 7605 __igc_shutdown(pdev, &wake, 0); 7606 7607 if (system_state == SYSTEM_POWER_OFF) { 7608 pci_wake_from_d3(pdev, wake); 7609 pci_set_power_state(pdev, PCI_D3hot); 7610 } 7611 } 7612 7613 /** 7614 * igc_io_error_detected - called when PCI error is detected 7615 * @pdev: Pointer to PCI device 7616 * @state: The current PCI connection state 7617 * 7618 * This function is called after a PCI bus error affecting 7619 * this device has been detected. 7620 **/ 7621 static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev, 7622 pci_channel_state_t state) 7623 { 7624 struct net_device *netdev = pci_get_drvdata(pdev); 7625 struct igc_adapter *adapter = netdev_priv(netdev); 7626 7627 rtnl_lock(); 7628 netif_device_detach(netdev); 7629 7630 if (state == pci_channel_io_perm_failure) { 7631 rtnl_unlock(); 7632 return PCI_ERS_RESULT_DISCONNECT; 7633 } 7634 7635 if (netif_running(netdev)) 7636 igc_down(adapter); 7637 pci_disable_device(pdev); 7638 rtnl_unlock(); 7639 7640 /* Request a slot reset. */ 7641 return PCI_ERS_RESULT_NEED_RESET; 7642 } 7643 7644 /** 7645 * igc_io_slot_reset - called after the PCI bus has been reset. 7646 * @pdev: Pointer to PCI device 7647 * 7648 * Restart the card from scratch, as if from a cold-boot. Implementation 7649 * resembles the first-half of the __igc_resume routine. 7650 **/ 7651 static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) 7652 { 7653 struct net_device *netdev = pci_get_drvdata(pdev); 7654 struct igc_adapter *adapter = netdev_priv(netdev); 7655 struct igc_hw *hw = &adapter->hw; 7656 pci_ers_result_t result; 7657 7658 if (pci_enable_device_mem(pdev)) { 7659 netdev_err(netdev, "Could not re-enable PCI device after reset\n"); 7660 result = PCI_ERS_RESULT_DISCONNECT; 7661 } else { 7662 pci_set_master(pdev); 7663 pci_restore_state(pdev); 7664 pci_save_state(pdev); 7665 7666 pci_enable_wake(pdev, PCI_D3hot, 0); 7667 pci_enable_wake(pdev, PCI_D3cold, 0); 7668 7669 if (igc_is_device_id_i226(hw)) 7670 pci_disable_link_state_locked(pdev, PCIE_LINK_STATE_L1_2); 7671 7672 /* In case of PCI error, adapter loses its HW address 7673 * so we should re-assign it here. 7674 */ 7675 hw->hw_addr = adapter->io_addr; 7676 7677 igc_reset(adapter); 7678 wr32(IGC_WUS, ~0); 7679 result = PCI_ERS_RESULT_RECOVERED; 7680 } 7681 7682 return result; 7683 } 7684 7685 /** 7686 * igc_io_resume - called when traffic can start to flow again. 7687 * @pdev: Pointer to PCI device 7688 * 7689 * This callback is called when the error recovery driver tells us that 7690 * its OK to resume normal operation. Implementation resembles the 7691 * second-half of the __igc_resume routine. 7692 */ 7693 static void igc_io_resume(struct pci_dev *pdev) 7694 { 7695 struct net_device *netdev = pci_get_drvdata(pdev); 7696 struct igc_adapter *adapter = netdev_priv(netdev); 7697 7698 rtnl_lock(); 7699 if (netif_running(netdev)) { 7700 if (igc_open(netdev)) { 7701 rtnl_unlock(); 7702 netdev_err(netdev, "igc_open failed after reset\n"); 7703 return; 7704 } 7705 } 7706 7707 netif_device_attach(netdev); 7708 7709 /* let the f/w know that the h/w is now under the control of the 7710 * driver. 7711 */ 7712 igc_get_hw_control(adapter); 7713 rtnl_unlock(); 7714 } 7715 7716 static const struct pci_error_handlers igc_err_handler = { 7717 .error_detected = igc_io_error_detected, 7718 .slot_reset = igc_io_slot_reset, 7719 .resume = igc_io_resume, 7720 }; 7721 7722 static _DEFINE_DEV_PM_OPS(igc_pm_ops, igc_suspend, igc_resume, 7723 igc_runtime_suspend, igc_runtime_resume, 7724 igc_runtime_idle); 7725 7726 static struct pci_driver igc_driver = { 7727 .name = igc_driver_name, 7728 .id_table = igc_pci_tbl, 7729 .probe = igc_probe, 7730 .remove = igc_remove, 7731 .driver.pm = pm_ptr(&igc_pm_ops), 7732 .shutdown = igc_shutdown, 7733 .err_handler = &igc_err_handler, 7734 }; 7735 7736 /** 7737 * igc_reinit_queues - return error 7738 * @adapter: pointer to adapter structure 7739 */ 7740 int igc_reinit_queues(struct igc_adapter *adapter) 7741 { 7742 struct net_device *netdev = adapter->netdev; 7743 int err = 0; 7744 7745 if (netif_running(netdev)) 7746 igc_close(netdev); 7747 7748 igc_reset_interrupt_capability(adapter); 7749 7750 if (igc_init_interrupt_scheme(adapter, true)) { 7751 netdev_err(netdev, "Unable to allocate memory for queues\n"); 7752 return -ENOMEM; 7753 } 7754 7755 if (netif_running(netdev)) 7756 err = igc_open(netdev); 7757 7758 return err; 7759 } 7760 7761 /** 7762 * igc_get_hw_dev - return device 7763 * @hw: pointer to hardware structure 7764 * 7765 * used by hardware layer to print debugging information 7766 */ 7767 struct net_device *igc_get_hw_dev(struct igc_hw *hw) 7768 { 7769 struct igc_adapter *adapter = hw->back; 7770 7771 return adapter->netdev; 7772 } 7773 7774 static void igc_disable_rx_ring_hw(struct igc_ring *ring) 7775 { 7776 struct igc_hw *hw = &ring->q_vector->adapter->hw; 7777 u8 idx = ring->reg_idx; 7778 u32 rxdctl; 7779 7780 rxdctl = rd32(IGC_RXDCTL(idx)); 7781 rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE; 7782 rxdctl |= IGC_RXDCTL_SWFLUSH; 7783 wr32(IGC_RXDCTL(idx), rxdctl); 7784 } 7785 7786 void igc_disable_rx_ring(struct igc_ring *ring) 7787 { 7788 igc_disable_rx_ring_hw(ring); 7789 igc_clean_rx_ring(ring); 7790 } 7791 7792 void igc_enable_rx_ring(struct igc_ring *ring) 7793 { 7794 struct igc_adapter *adapter = ring->q_vector->adapter; 7795 7796 igc_configure_rx_ring(adapter, ring); 7797 7798 if (ring->xsk_pool) 7799 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 7800 else 7801 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 7802 } 7803 7804 void igc_disable_tx_ring(struct igc_ring *ring) 7805 { 7806 igc_disable_tx_ring_hw(ring); 7807 igc_clean_tx_ring(ring); 7808 } 7809 7810 void igc_enable_tx_ring(struct igc_ring *ring) 7811 { 7812 struct igc_adapter *adapter = ring->q_vector->adapter; 7813 7814 igc_configure_tx_ring(adapter, ring); 7815 } 7816 7817 /** 7818 * igc_init_module - Driver Registration Routine 7819 * 7820 * igc_init_module is the first routine called when the driver is 7821 * loaded. All it does is register with the PCI subsystem. 7822 */ 7823 static int __init igc_init_module(void) 7824 { 7825 int ret; 7826 7827 pr_info("%s\n", igc_driver_string); 7828 pr_info("%s\n", igc_copyright); 7829 7830 ret = pci_register_driver(&igc_driver); 7831 return ret; 7832 } 7833 7834 module_init(igc_init_module); 7835 7836 /** 7837 * igc_exit_module - Driver Exit Cleanup Routine 7838 * 7839 * igc_exit_module is called just before the driver is removed 7840 * from memory. 7841 */ 7842 static void __exit igc_exit_module(void) 7843 { 7844 pci_unregister_driver(&igc_driver); 7845 } 7846 7847 module_exit(igc_exit_module); 7848 /* igc_main.c */ 7849