1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018 Intel Corporation */ 3 4 #include <linux/module.h> 5 #include <linux/types.h> 6 #include <linux/if_vlan.h> 7 #include <linux/tcp.h> 8 #include <linux/udp.h> 9 #include <linux/ip.h> 10 #include <linux/pm_runtime.h> 11 #include <net/pkt_sched.h> 12 #include <linux/bpf_trace.h> 13 #include <net/xdp_sock_drv.h> 14 #include <linux/pci.h> 15 #include <linux/mdio.h> 16 17 #include <net/ipv6.h> 18 19 #include "igc.h" 20 #include "igc_hw.h" 21 #include "igc_tsn.h" 22 #include "igc_xdp.h" 23 24 #define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver" 25 26 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) 27 28 #define IGC_XDP_PASS 0 29 #define IGC_XDP_CONSUMED BIT(0) 30 #define IGC_XDP_TX BIT(1) 31 #define IGC_XDP_REDIRECT BIT(2) 32 33 static int debug = -1; 34 35 MODULE_DESCRIPTION(DRV_SUMMARY); 36 MODULE_LICENSE("GPL v2"); 37 module_param(debug, int, 0); 38 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 39 40 char igc_driver_name[] = "igc"; 41 static const char igc_driver_string[] = DRV_SUMMARY; 42 static const char igc_copyright[] = 43 "Copyright(c) 2018 Intel Corporation."; 44 45 static const struct igc_info *igc_info_tbl[] = { 46 [board_base] = &igc_base_info, 47 }; 48 49 static const struct pci_device_id igc_pci_tbl[] = { 50 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base }, 51 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base }, 52 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base }, 53 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base }, 54 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base }, 55 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base }, 56 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base }, 57 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base }, 58 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base }, 59 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base }, 60 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base }, 61 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base }, 62 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base }, 63 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base }, 64 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base }, 65 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base }, 66 /* required last entry */ 67 {0, } 68 }; 69 70 MODULE_DEVICE_TABLE(pci, igc_pci_tbl); 71 72 enum latency_range { 73 lowest_latency = 0, 74 low_latency = 1, 75 bulk_latency = 2, 76 latency_invalid = 255 77 }; 78 79 void igc_reset(struct igc_adapter *adapter) 80 { 81 struct net_device *dev = adapter->netdev; 82 struct igc_hw *hw = &adapter->hw; 83 struct igc_fc_info *fc = &hw->fc; 84 u32 pba, hwm; 85 86 /* Repartition PBA for greater than 9k MTU if required */ 87 pba = IGC_PBA_34K; 88 89 /* flow control settings 90 * The high water mark must be low enough to fit one full frame 91 * after transmitting the pause frame. As such we must have enough 92 * space to allow for us to complete our current transmit and then 93 * receive the frame that is in progress from the link partner. 94 * Set it to: 95 * - the full Rx FIFO size minus one full Tx plus one full Rx frame 96 */ 97 hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); 98 99 fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ 100 fc->low_water = fc->high_water - 16; 101 fc->pause_time = 0xFFFF; 102 fc->send_xon = 1; 103 fc->current_mode = fc->requested_mode; 104 105 hw->mac.ops.reset_hw(hw); 106 107 if (hw->mac.ops.init_hw(hw)) 108 netdev_err(dev, "Error on hardware initialization\n"); 109 110 /* Re-establish EEE setting */ 111 igc_set_eee_i225(hw, true, true, true); 112 113 if (!netif_running(adapter->netdev)) 114 igc_power_down_phy_copper_base(&adapter->hw); 115 116 /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */ 117 wr32(IGC_VET, ETH_P_8021Q); 118 119 /* Re-enable PTP, where applicable. */ 120 igc_ptp_reset(adapter); 121 122 /* Re-enable TSN offloading, where applicable. */ 123 igc_tsn_reset(adapter); 124 125 igc_get_phy_info(hw); 126 } 127 128 /** 129 * igc_power_up_link - Power up the phy link 130 * @adapter: address of board private structure 131 */ 132 static void igc_power_up_link(struct igc_adapter *adapter) 133 { 134 igc_reset_phy(&adapter->hw); 135 136 igc_power_up_phy_copper(&adapter->hw); 137 138 igc_setup_link(&adapter->hw); 139 } 140 141 /** 142 * igc_release_hw_control - release control of the h/w to f/w 143 * @adapter: address of board private structure 144 * 145 * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 146 * For ASF and Pass Through versions of f/w this means that the 147 * driver is no longer loaded. 148 */ 149 static void igc_release_hw_control(struct igc_adapter *adapter) 150 { 151 struct igc_hw *hw = &adapter->hw; 152 u32 ctrl_ext; 153 154 if (!pci_device_is_present(adapter->pdev)) 155 return; 156 157 /* Let firmware take over control of h/w */ 158 ctrl_ext = rd32(IGC_CTRL_EXT); 159 wr32(IGC_CTRL_EXT, 160 ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD); 161 } 162 163 /** 164 * igc_get_hw_control - get control of the h/w from f/w 165 * @adapter: address of board private structure 166 * 167 * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 168 * For ASF and Pass Through versions of f/w this means that 169 * the driver is loaded. 170 */ 171 static void igc_get_hw_control(struct igc_adapter *adapter) 172 { 173 struct igc_hw *hw = &adapter->hw; 174 u32 ctrl_ext; 175 176 /* Let firmware know the driver has taken over */ 177 ctrl_ext = rd32(IGC_CTRL_EXT); 178 wr32(IGC_CTRL_EXT, 179 ctrl_ext | IGC_CTRL_EXT_DRV_LOAD); 180 } 181 182 static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf) 183 { 184 dma_unmap_single(dev, dma_unmap_addr(buf, dma), 185 dma_unmap_len(buf, len), DMA_TO_DEVICE); 186 187 dma_unmap_len_set(buf, len, 0); 188 } 189 190 /** 191 * igc_clean_tx_ring - Free Tx Buffers 192 * @tx_ring: ring to be cleaned 193 */ 194 static void igc_clean_tx_ring(struct igc_ring *tx_ring) 195 { 196 u16 i = tx_ring->next_to_clean; 197 struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; 198 u32 xsk_frames = 0; 199 200 while (i != tx_ring->next_to_use) { 201 union igc_adv_tx_desc *eop_desc, *tx_desc; 202 203 switch (tx_buffer->type) { 204 case IGC_TX_BUFFER_TYPE_XSK: 205 xsk_frames++; 206 break; 207 case IGC_TX_BUFFER_TYPE_XDP: 208 xdp_return_frame(tx_buffer->xdpf); 209 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 210 break; 211 case IGC_TX_BUFFER_TYPE_SKB: 212 dev_kfree_skb_any(tx_buffer->skb); 213 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 214 break; 215 default: 216 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 217 break; 218 } 219 220 /* check for eop_desc to determine the end of the packet */ 221 eop_desc = tx_buffer->next_to_watch; 222 tx_desc = IGC_TX_DESC(tx_ring, i); 223 224 /* unmap remaining buffers */ 225 while (tx_desc != eop_desc) { 226 tx_buffer++; 227 tx_desc++; 228 i++; 229 if (unlikely(i == tx_ring->count)) { 230 i = 0; 231 tx_buffer = tx_ring->tx_buffer_info; 232 tx_desc = IGC_TX_DESC(tx_ring, 0); 233 } 234 235 /* unmap any remaining paged data */ 236 if (dma_unmap_len(tx_buffer, len)) 237 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 238 } 239 240 tx_buffer->next_to_watch = NULL; 241 242 /* move us one more past the eop_desc for start of next pkt */ 243 tx_buffer++; 244 i++; 245 if (unlikely(i == tx_ring->count)) { 246 i = 0; 247 tx_buffer = tx_ring->tx_buffer_info; 248 } 249 } 250 251 if (tx_ring->xsk_pool && xsk_frames) 252 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 253 254 /* reset BQL for queue */ 255 netdev_tx_reset_queue(txring_txq(tx_ring)); 256 257 /* Zero out the buffer ring */ 258 memset(tx_ring->tx_buffer_info, 0, 259 sizeof(*tx_ring->tx_buffer_info) * tx_ring->count); 260 261 /* Zero out the descriptor ring */ 262 memset(tx_ring->desc, 0, tx_ring->size); 263 264 /* reset next_to_use and next_to_clean */ 265 tx_ring->next_to_use = 0; 266 tx_ring->next_to_clean = 0; 267 } 268 269 /** 270 * igc_free_tx_resources - Free Tx Resources per Queue 271 * @tx_ring: Tx descriptor ring for a specific queue 272 * 273 * Free all transmit software resources 274 */ 275 void igc_free_tx_resources(struct igc_ring *tx_ring) 276 { 277 igc_disable_tx_ring(tx_ring); 278 279 vfree(tx_ring->tx_buffer_info); 280 tx_ring->tx_buffer_info = NULL; 281 282 /* if not set, then don't free */ 283 if (!tx_ring->desc) 284 return; 285 286 dma_free_coherent(tx_ring->dev, tx_ring->size, 287 tx_ring->desc, tx_ring->dma); 288 289 tx_ring->desc = NULL; 290 } 291 292 /** 293 * igc_free_all_tx_resources - Free Tx Resources for All Queues 294 * @adapter: board private structure 295 * 296 * Free all transmit software resources 297 */ 298 static void igc_free_all_tx_resources(struct igc_adapter *adapter) 299 { 300 int i; 301 302 for (i = 0; i < adapter->num_tx_queues; i++) 303 igc_free_tx_resources(adapter->tx_ring[i]); 304 } 305 306 /** 307 * igc_clean_all_tx_rings - Free Tx Buffers for all queues 308 * @adapter: board private structure 309 */ 310 static void igc_clean_all_tx_rings(struct igc_adapter *adapter) 311 { 312 int i; 313 314 for (i = 0; i < adapter->num_tx_queues; i++) 315 if (adapter->tx_ring[i]) 316 igc_clean_tx_ring(adapter->tx_ring[i]); 317 } 318 319 static void igc_disable_tx_ring_hw(struct igc_ring *ring) 320 { 321 struct igc_hw *hw = &ring->q_vector->adapter->hw; 322 u8 idx = ring->reg_idx; 323 u32 txdctl; 324 325 txdctl = rd32(IGC_TXDCTL(idx)); 326 txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; 327 txdctl |= IGC_TXDCTL_SWFLUSH; 328 wr32(IGC_TXDCTL(idx), txdctl); 329 } 330 331 /** 332 * igc_disable_all_tx_rings_hw - Disable all transmit queue operation 333 * @adapter: board private structure 334 */ 335 static void igc_disable_all_tx_rings_hw(struct igc_adapter *adapter) 336 { 337 int i; 338 339 for (i = 0; i < adapter->num_tx_queues; i++) { 340 struct igc_ring *tx_ring = adapter->tx_ring[i]; 341 342 igc_disable_tx_ring_hw(tx_ring); 343 } 344 } 345 346 /** 347 * igc_setup_tx_resources - allocate Tx resources (Descriptors) 348 * @tx_ring: tx descriptor ring (for a specific queue) to setup 349 * 350 * Return 0 on success, negative on failure 351 */ 352 int igc_setup_tx_resources(struct igc_ring *tx_ring) 353 { 354 struct net_device *ndev = tx_ring->netdev; 355 struct device *dev = tx_ring->dev; 356 int size = 0; 357 358 size = sizeof(struct igc_tx_buffer) * tx_ring->count; 359 tx_ring->tx_buffer_info = vzalloc(size); 360 if (!tx_ring->tx_buffer_info) 361 goto err; 362 363 /* round up to nearest 4K */ 364 tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc); 365 tx_ring->size = ALIGN(tx_ring->size, 4096); 366 367 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 368 &tx_ring->dma, GFP_KERNEL); 369 370 if (!tx_ring->desc) 371 goto err; 372 373 tx_ring->next_to_use = 0; 374 tx_ring->next_to_clean = 0; 375 376 return 0; 377 378 err: 379 vfree(tx_ring->tx_buffer_info); 380 netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n"); 381 return -ENOMEM; 382 } 383 384 /** 385 * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues 386 * @adapter: board private structure 387 * 388 * Return 0 on success, negative on failure 389 */ 390 static int igc_setup_all_tx_resources(struct igc_adapter *adapter) 391 { 392 struct net_device *dev = adapter->netdev; 393 int i, err = 0; 394 395 for (i = 0; i < adapter->num_tx_queues; i++) { 396 err = igc_setup_tx_resources(adapter->tx_ring[i]); 397 if (err) { 398 netdev_err(dev, "Error on Tx queue %u setup\n", i); 399 for (i--; i >= 0; i--) 400 igc_free_tx_resources(adapter->tx_ring[i]); 401 break; 402 } 403 } 404 405 return err; 406 } 407 408 static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring) 409 { 410 u16 i = rx_ring->next_to_clean; 411 412 dev_kfree_skb(rx_ring->skb); 413 rx_ring->skb = NULL; 414 415 /* Free all the Rx ring sk_buffs */ 416 while (i != rx_ring->next_to_alloc) { 417 struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 418 419 /* Invalidate cache lines that may have been written to by 420 * device so that we avoid corrupting memory. 421 */ 422 dma_sync_single_range_for_cpu(rx_ring->dev, 423 buffer_info->dma, 424 buffer_info->page_offset, 425 igc_rx_bufsz(rx_ring), 426 DMA_FROM_DEVICE); 427 428 /* free resources associated with mapping */ 429 dma_unmap_page_attrs(rx_ring->dev, 430 buffer_info->dma, 431 igc_rx_pg_size(rx_ring), 432 DMA_FROM_DEVICE, 433 IGC_RX_DMA_ATTR); 434 __page_frag_cache_drain(buffer_info->page, 435 buffer_info->pagecnt_bias); 436 437 i++; 438 if (i == rx_ring->count) 439 i = 0; 440 } 441 } 442 443 static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring) 444 { 445 struct igc_rx_buffer *bi; 446 u16 i; 447 448 for (i = 0; i < ring->count; i++) { 449 bi = &ring->rx_buffer_info[i]; 450 if (!bi->xdp) 451 continue; 452 453 xsk_buff_free(bi->xdp); 454 bi->xdp = NULL; 455 } 456 } 457 458 /** 459 * igc_clean_rx_ring - Free Rx Buffers per Queue 460 * @ring: ring to free buffers from 461 */ 462 static void igc_clean_rx_ring(struct igc_ring *ring) 463 { 464 if (ring->xsk_pool) 465 igc_clean_rx_ring_xsk_pool(ring); 466 else 467 igc_clean_rx_ring_page_shared(ring); 468 469 clear_ring_uses_large_buffer(ring); 470 471 ring->next_to_alloc = 0; 472 ring->next_to_clean = 0; 473 ring->next_to_use = 0; 474 } 475 476 /** 477 * igc_clean_all_rx_rings - Free Rx Buffers for all queues 478 * @adapter: board private structure 479 */ 480 static void igc_clean_all_rx_rings(struct igc_adapter *adapter) 481 { 482 int i; 483 484 for (i = 0; i < adapter->num_rx_queues; i++) 485 if (adapter->rx_ring[i]) 486 igc_clean_rx_ring(adapter->rx_ring[i]); 487 } 488 489 /** 490 * igc_free_rx_resources - Free Rx Resources 491 * @rx_ring: ring to clean the resources from 492 * 493 * Free all receive software resources 494 */ 495 void igc_free_rx_resources(struct igc_ring *rx_ring) 496 { 497 igc_clean_rx_ring(rx_ring); 498 499 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 500 501 vfree(rx_ring->rx_buffer_info); 502 rx_ring->rx_buffer_info = NULL; 503 504 /* if not set, then don't free */ 505 if (!rx_ring->desc) 506 return; 507 508 dma_free_coherent(rx_ring->dev, rx_ring->size, 509 rx_ring->desc, rx_ring->dma); 510 511 rx_ring->desc = NULL; 512 } 513 514 /** 515 * igc_free_all_rx_resources - Free Rx Resources for All Queues 516 * @adapter: board private structure 517 * 518 * Free all receive software resources 519 */ 520 static void igc_free_all_rx_resources(struct igc_adapter *adapter) 521 { 522 int i; 523 524 for (i = 0; i < adapter->num_rx_queues; i++) 525 igc_free_rx_resources(adapter->rx_ring[i]); 526 } 527 528 /** 529 * igc_setup_rx_resources - allocate Rx resources (Descriptors) 530 * @rx_ring: rx descriptor ring (for a specific queue) to setup 531 * 532 * Returns 0 on success, negative on failure 533 */ 534 int igc_setup_rx_resources(struct igc_ring *rx_ring) 535 { 536 struct net_device *ndev = rx_ring->netdev; 537 struct device *dev = rx_ring->dev; 538 u8 index = rx_ring->queue_index; 539 int size, desc_len, res; 540 541 /* XDP RX-queue info */ 542 if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) 543 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 544 res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, 545 rx_ring->q_vector->napi.napi_id); 546 if (res < 0) { 547 netdev_err(ndev, "Failed to register xdp_rxq index %u\n", 548 index); 549 return res; 550 } 551 552 size = sizeof(struct igc_rx_buffer) * rx_ring->count; 553 rx_ring->rx_buffer_info = vzalloc(size); 554 if (!rx_ring->rx_buffer_info) 555 goto err; 556 557 desc_len = sizeof(union igc_adv_rx_desc); 558 559 /* Round up to nearest 4K */ 560 rx_ring->size = rx_ring->count * desc_len; 561 rx_ring->size = ALIGN(rx_ring->size, 4096); 562 563 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 564 &rx_ring->dma, GFP_KERNEL); 565 566 if (!rx_ring->desc) 567 goto err; 568 569 rx_ring->next_to_alloc = 0; 570 rx_ring->next_to_clean = 0; 571 rx_ring->next_to_use = 0; 572 573 return 0; 574 575 err: 576 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 577 vfree(rx_ring->rx_buffer_info); 578 rx_ring->rx_buffer_info = NULL; 579 netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n"); 580 return -ENOMEM; 581 } 582 583 /** 584 * igc_setup_all_rx_resources - wrapper to allocate Rx resources 585 * (Descriptors) for all queues 586 * @adapter: board private structure 587 * 588 * Return 0 on success, negative on failure 589 */ 590 static int igc_setup_all_rx_resources(struct igc_adapter *adapter) 591 { 592 struct net_device *dev = adapter->netdev; 593 int i, err = 0; 594 595 for (i = 0; i < adapter->num_rx_queues; i++) { 596 err = igc_setup_rx_resources(adapter->rx_ring[i]); 597 if (err) { 598 netdev_err(dev, "Error on Rx queue %u setup\n", i); 599 for (i--; i >= 0; i--) 600 igc_free_rx_resources(adapter->rx_ring[i]); 601 break; 602 } 603 } 604 605 return err; 606 } 607 608 static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter, 609 struct igc_ring *ring) 610 { 611 if (!igc_xdp_is_enabled(adapter) || 612 !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags)) 613 return NULL; 614 615 return xsk_get_pool_from_qid(ring->netdev, ring->queue_index); 616 } 617 618 /** 619 * igc_configure_rx_ring - Configure a receive ring after Reset 620 * @adapter: board private structure 621 * @ring: receive ring to be configured 622 * 623 * Configure the Rx unit of the MAC after a reset. 624 */ 625 static void igc_configure_rx_ring(struct igc_adapter *adapter, 626 struct igc_ring *ring) 627 { 628 struct igc_hw *hw = &adapter->hw; 629 union igc_adv_rx_desc *rx_desc; 630 int reg_idx = ring->reg_idx; 631 u32 srrctl = 0, rxdctl = 0; 632 u64 rdba = ring->dma; 633 u32 buf_size; 634 635 xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); 636 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 637 if (ring->xsk_pool) { 638 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 639 MEM_TYPE_XSK_BUFF_POOL, 640 NULL)); 641 xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); 642 } else { 643 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 644 MEM_TYPE_PAGE_SHARED, 645 NULL)); 646 } 647 648 if (igc_xdp_is_enabled(adapter)) 649 set_ring_uses_large_buffer(ring); 650 651 /* disable the queue */ 652 wr32(IGC_RXDCTL(reg_idx), 0); 653 654 /* Set DMA base address registers */ 655 wr32(IGC_RDBAL(reg_idx), 656 rdba & 0x00000000ffffffffULL); 657 wr32(IGC_RDBAH(reg_idx), rdba >> 32); 658 wr32(IGC_RDLEN(reg_idx), 659 ring->count * sizeof(union igc_adv_rx_desc)); 660 661 /* initialize head and tail */ 662 ring->tail = adapter->io_addr + IGC_RDT(reg_idx); 663 wr32(IGC_RDH(reg_idx), 0); 664 writel(0, ring->tail); 665 666 /* reset next-to- use/clean to place SW in sync with hardware */ 667 ring->next_to_clean = 0; 668 ring->next_to_use = 0; 669 670 if (ring->xsk_pool) 671 buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool); 672 else if (ring_uses_large_buffer(ring)) 673 buf_size = IGC_RXBUFFER_3072; 674 else 675 buf_size = IGC_RXBUFFER_2048; 676 677 srrctl = rd32(IGC_SRRCTL(reg_idx)); 678 srrctl &= ~(IGC_SRRCTL_BSIZEPKT_MASK | IGC_SRRCTL_BSIZEHDR_MASK | 679 IGC_SRRCTL_DESCTYPE_MASK); 680 srrctl |= IGC_SRRCTL_BSIZEHDR(IGC_RX_HDR_LEN); 681 srrctl |= IGC_SRRCTL_BSIZEPKT(buf_size); 682 srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; 683 684 wr32(IGC_SRRCTL(reg_idx), srrctl); 685 686 rxdctl |= IGC_RXDCTL_PTHRESH; 687 rxdctl |= IGC_RXDCTL_HTHRESH << 8; 688 rxdctl |= IGC_RXDCTL_WTHRESH << 16; 689 690 /* initialize rx_buffer_info */ 691 memset(ring->rx_buffer_info, 0, 692 sizeof(struct igc_rx_buffer) * ring->count); 693 694 /* initialize Rx descriptor 0 */ 695 rx_desc = IGC_RX_DESC(ring, 0); 696 rx_desc->wb.upper.length = 0; 697 698 /* enable receive descriptor fetching */ 699 rxdctl |= IGC_RXDCTL_QUEUE_ENABLE; 700 701 wr32(IGC_RXDCTL(reg_idx), rxdctl); 702 } 703 704 /** 705 * igc_configure_rx - Configure receive Unit after Reset 706 * @adapter: board private structure 707 * 708 * Configure the Rx unit of the MAC after a reset. 709 */ 710 static void igc_configure_rx(struct igc_adapter *adapter) 711 { 712 int i; 713 714 /* Setup the HW Rx Head and Tail Descriptor Pointers and 715 * the Base and Length of the Rx Descriptor Ring 716 */ 717 for (i = 0; i < adapter->num_rx_queues; i++) 718 igc_configure_rx_ring(adapter, adapter->rx_ring[i]); 719 } 720 721 /** 722 * igc_configure_tx_ring - Configure transmit ring after Reset 723 * @adapter: board private structure 724 * @ring: tx ring to configure 725 * 726 * Configure a transmit ring after a reset. 727 */ 728 static void igc_configure_tx_ring(struct igc_adapter *adapter, 729 struct igc_ring *ring) 730 { 731 struct igc_hw *hw = &adapter->hw; 732 int reg_idx = ring->reg_idx; 733 u64 tdba = ring->dma; 734 u32 txdctl = 0; 735 736 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 737 738 /* disable the queue */ 739 wr32(IGC_TXDCTL(reg_idx), 0); 740 wrfl(); 741 742 wr32(IGC_TDLEN(reg_idx), 743 ring->count * sizeof(union igc_adv_tx_desc)); 744 wr32(IGC_TDBAL(reg_idx), 745 tdba & 0x00000000ffffffffULL); 746 wr32(IGC_TDBAH(reg_idx), tdba >> 32); 747 748 ring->tail = adapter->io_addr + IGC_TDT(reg_idx); 749 wr32(IGC_TDH(reg_idx), 0); 750 writel(0, ring->tail); 751 752 txdctl |= IGC_TXDCTL_PTHRESH(8) | IGC_TXDCTL_HTHRESH(1) | 753 IGC_TXDCTL_WTHRESH(16) | IGC_TXDCTL_QUEUE_ENABLE; 754 755 wr32(IGC_TXDCTL(reg_idx), txdctl); 756 } 757 758 /** 759 * igc_configure_tx - Configure transmit Unit after Reset 760 * @adapter: board private structure 761 * 762 * Configure the Tx unit of the MAC after a reset. 763 */ 764 static void igc_configure_tx(struct igc_adapter *adapter) 765 { 766 int i; 767 768 for (i = 0; i < adapter->num_tx_queues; i++) 769 igc_configure_tx_ring(adapter, adapter->tx_ring[i]); 770 } 771 772 /** 773 * igc_setup_mrqc - configure the multiple receive queue control registers 774 * @adapter: Board private structure 775 */ 776 static void igc_setup_mrqc(struct igc_adapter *adapter) 777 { 778 struct igc_hw *hw = &adapter->hw; 779 u32 j, num_rx_queues; 780 u32 mrqc, rxcsum; 781 u32 rss_key[10]; 782 783 netdev_rss_key_fill(rss_key, sizeof(rss_key)); 784 for (j = 0; j < 10; j++) 785 wr32(IGC_RSSRK(j), rss_key[j]); 786 787 num_rx_queues = adapter->rss_queues; 788 789 if (adapter->rss_indir_tbl_init != num_rx_queues) { 790 for (j = 0; j < IGC_RETA_SIZE; j++) 791 adapter->rss_indir_tbl[j] = 792 (j * num_rx_queues) / IGC_RETA_SIZE; 793 adapter->rss_indir_tbl_init = num_rx_queues; 794 } 795 igc_write_rss_indir_tbl(adapter); 796 797 /* Disable raw packet checksumming so that RSS hash is placed in 798 * descriptor on writeback. No need to enable TCP/UDP/IP checksum 799 * offloads as they are enabled by default 800 */ 801 rxcsum = rd32(IGC_RXCSUM); 802 rxcsum |= IGC_RXCSUM_PCSD; 803 804 /* Enable Receive Checksum Offload for SCTP */ 805 rxcsum |= IGC_RXCSUM_CRCOFL; 806 807 /* Don't need to set TUOFL or IPOFL, they default to 1 */ 808 wr32(IGC_RXCSUM, rxcsum); 809 810 /* Generate RSS hash based on packet types, TCP/UDP 811 * port numbers and/or IPv4/v6 src and dst addresses 812 */ 813 mrqc = IGC_MRQC_RSS_FIELD_IPV4 | 814 IGC_MRQC_RSS_FIELD_IPV4_TCP | 815 IGC_MRQC_RSS_FIELD_IPV6 | 816 IGC_MRQC_RSS_FIELD_IPV6_TCP | 817 IGC_MRQC_RSS_FIELD_IPV6_TCP_EX; 818 819 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP) 820 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP; 821 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP) 822 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP; 823 824 mrqc |= IGC_MRQC_ENABLE_RSS_MQ; 825 826 wr32(IGC_MRQC, mrqc); 827 } 828 829 /** 830 * igc_setup_rctl - configure the receive control registers 831 * @adapter: Board private structure 832 */ 833 static void igc_setup_rctl(struct igc_adapter *adapter) 834 { 835 struct igc_hw *hw = &adapter->hw; 836 u32 rctl; 837 838 rctl = rd32(IGC_RCTL); 839 840 rctl &= ~(3 << IGC_RCTL_MO_SHIFT); 841 rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC); 842 843 rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF | 844 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT); 845 846 /* enable stripping of CRC. Newer features require 847 * that the HW strips the CRC. 848 */ 849 rctl |= IGC_RCTL_SECRC; 850 851 /* disable store bad packets and clear size bits. */ 852 rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256); 853 854 /* enable LPE to allow for reception of jumbo frames */ 855 rctl |= IGC_RCTL_LPE; 856 857 /* disable queue 0 to prevent tail write w/o re-config */ 858 wr32(IGC_RXDCTL(0), 0); 859 860 /* This is useful for sniffing bad packets. */ 861 if (adapter->netdev->features & NETIF_F_RXALL) { 862 /* UPE and MPE will be handled by normal PROMISC logic 863 * in set_rx_mode 864 */ 865 rctl |= (IGC_RCTL_SBP | /* Receive bad packets */ 866 IGC_RCTL_BAM | /* RX All Bcast Pkts */ 867 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ 868 869 rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */ 870 IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */ 871 } 872 873 wr32(IGC_RCTL, rctl); 874 } 875 876 /** 877 * igc_setup_tctl - configure the transmit control registers 878 * @adapter: Board private structure 879 */ 880 static void igc_setup_tctl(struct igc_adapter *adapter) 881 { 882 struct igc_hw *hw = &adapter->hw; 883 u32 tctl; 884 885 /* disable queue 0 which icould be enabled by default */ 886 wr32(IGC_TXDCTL(0), 0); 887 888 /* Program the Transmit Control Register */ 889 tctl = rd32(IGC_TCTL); 890 tctl &= ~IGC_TCTL_CT; 891 tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC | 892 (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT); 893 894 /* Enable transmits */ 895 tctl |= IGC_TCTL_EN; 896 897 wr32(IGC_TCTL, tctl); 898 } 899 900 /** 901 * igc_set_mac_filter_hw() - Set MAC address filter in hardware 902 * @adapter: Pointer to adapter where the filter should be set 903 * @index: Filter index 904 * @type: MAC address filter type (source or destination) 905 * @addr: MAC address 906 * @queue: If non-negative, queue assignment feature is enabled and frames 907 * matching the filter are enqueued onto 'queue'. Otherwise, queue 908 * assignment is disabled. 909 */ 910 static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index, 911 enum igc_mac_filter_type type, 912 const u8 *addr, int queue) 913 { 914 struct net_device *dev = adapter->netdev; 915 struct igc_hw *hw = &adapter->hw; 916 u32 ral, rah; 917 918 if (WARN_ON(index >= hw->mac.rar_entry_count)) 919 return; 920 921 ral = le32_to_cpup((__le32 *)(addr)); 922 rah = le16_to_cpup((__le16 *)(addr + 4)); 923 924 if (type == IGC_MAC_FILTER_TYPE_SRC) { 925 rah &= ~IGC_RAH_ASEL_MASK; 926 rah |= IGC_RAH_ASEL_SRC_ADDR; 927 } 928 929 if (queue >= 0) { 930 rah &= ~IGC_RAH_QSEL_MASK; 931 rah |= (queue << IGC_RAH_QSEL_SHIFT); 932 rah |= IGC_RAH_QSEL_ENABLE; 933 } 934 935 rah |= IGC_RAH_AV; 936 937 wr32(IGC_RAL(index), ral); 938 wr32(IGC_RAH(index), rah); 939 940 netdev_dbg(dev, "MAC address filter set in HW: index %d", index); 941 } 942 943 /** 944 * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware 945 * @adapter: Pointer to adapter where the filter should be cleared 946 * @index: Filter index 947 */ 948 static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index) 949 { 950 struct net_device *dev = adapter->netdev; 951 struct igc_hw *hw = &adapter->hw; 952 953 if (WARN_ON(index >= hw->mac.rar_entry_count)) 954 return; 955 956 wr32(IGC_RAL(index), 0); 957 wr32(IGC_RAH(index), 0); 958 959 netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index); 960 } 961 962 /* Set default MAC address for the PF in the first RAR entry */ 963 static void igc_set_default_mac_filter(struct igc_adapter *adapter) 964 { 965 struct net_device *dev = adapter->netdev; 966 u8 *addr = adapter->hw.mac.addr; 967 968 netdev_dbg(dev, "Set default MAC address filter: address %pM", addr); 969 970 igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1); 971 } 972 973 /** 974 * igc_set_mac - Change the Ethernet Address of the NIC 975 * @netdev: network interface device structure 976 * @p: pointer to an address structure 977 * 978 * Returns 0 on success, negative on failure 979 */ 980 static int igc_set_mac(struct net_device *netdev, void *p) 981 { 982 struct igc_adapter *adapter = netdev_priv(netdev); 983 struct igc_hw *hw = &adapter->hw; 984 struct sockaddr *addr = p; 985 986 if (!is_valid_ether_addr(addr->sa_data)) 987 return -EADDRNOTAVAIL; 988 989 eth_hw_addr_set(netdev, addr->sa_data); 990 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); 991 992 /* set the correct pool for the new PF MAC address in entry 0 */ 993 igc_set_default_mac_filter(adapter); 994 995 return 0; 996 } 997 998 /** 999 * igc_write_mc_addr_list - write multicast addresses to MTA 1000 * @netdev: network interface device structure 1001 * 1002 * Writes multicast address list to the MTA hash table. 1003 * Returns: -ENOMEM on failure 1004 * 0 on no addresses written 1005 * X on writing X addresses to MTA 1006 **/ 1007 static int igc_write_mc_addr_list(struct net_device *netdev) 1008 { 1009 struct igc_adapter *adapter = netdev_priv(netdev); 1010 struct igc_hw *hw = &adapter->hw; 1011 struct netdev_hw_addr *ha; 1012 u8 *mta_list; 1013 int i; 1014 1015 if (netdev_mc_empty(netdev)) { 1016 /* nothing to program, so clear mc list */ 1017 igc_update_mc_addr_list(hw, NULL, 0); 1018 return 0; 1019 } 1020 1021 mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC); 1022 if (!mta_list) 1023 return -ENOMEM; 1024 1025 /* The shared function expects a packed array of only addresses. */ 1026 i = 0; 1027 netdev_for_each_mc_addr(ha, netdev) 1028 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); 1029 1030 igc_update_mc_addr_list(hw, mta_list, i); 1031 kfree(mta_list); 1032 1033 return netdev_mc_count(netdev); 1034 } 1035 1036 static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, 1037 bool *first_flag, bool *insert_empty) 1038 { 1039 struct igc_adapter *adapter = netdev_priv(ring->netdev); 1040 ktime_t cycle_time = adapter->cycle_time; 1041 ktime_t base_time = adapter->base_time; 1042 ktime_t now = ktime_get_clocktai(); 1043 ktime_t baset_est, end_of_cycle; 1044 s32 launchtime; 1045 s64 n; 1046 1047 n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); 1048 1049 baset_est = ktime_add_ns(base_time, cycle_time * (n)); 1050 end_of_cycle = ktime_add_ns(baset_est, cycle_time); 1051 1052 if (ktime_compare(txtime, end_of_cycle) >= 0) { 1053 if (baset_est != ring->last_ff_cycle) { 1054 *first_flag = true; 1055 ring->last_ff_cycle = baset_est; 1056 1057 if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) 1058 *insert_empty = true; 1059 } 1060 } 1061 1062 /* Introducing a window at end of cycle on which packets 1063 * potentially not honor launchtime. Window of 5us chosen 1064 * considering software update the tail pointer and packets 1065 * are dma'ed to packet buffer. 1066 */ 1067 if ((ktime_sub_ns(end_of_cycle, now) < 5 * NSEC_PER_USEC)) 1068 netdev_warn(ring->netdev, "Packet with txtime=%llu may not be honoured\n", 1069 txtime); 1070 1071 ring->last_tx_cycle = end_of_cycle; 1072 1073 launchtime = ktime_sub_ns(txtime, baset_est); 1074 if (launchtime > 0) 1075 div_s64_rem(launchtime, cycle_time, &launchtime); 1076 else 1077 launchtime = 0; 1078 1079 return cpu_to_le32(launchtime); 1080 } 1081 1082 static int igc_init_empty_frame(struct igc_ring *ring, 1083 struct igc_tx_buffer *buffer, 1084 struct sk_buff *skb) 1085 { 1086 unsigned int size; 1087 dma_addr_t dma; 1088 1089 size = skb_headlen(skb); 1090 1091 dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE); 1092 if (dma_mapping_error(ring->dev, dma)) { 1093 net_err_ratelimited("%s: DMA mapping error for empty frame\n", 1094 netdev_name(ring->netdev)); 1095 return -ENOMEM; 1096 } 1097 1098 buffer->type = IGC_TX_BUFFER_TYPE_SKB; 1099 buffer->skb = skb; 1100 buffer->protocol = 0; 1101 buffer->bytecount = skb->len; 1102 buffer->gso_segs = 1; 1103 buffer->time_stamp = jiffies; 1104 dma_unmap_len_set(buffer, len, skb->len); 1105 dma_unmap_addr_set(buffer, dma, dma); 1106 1107 return 0; 1108 } 1109 1110 static void igc_init_tx_empty_descriptor(struct igc_ring *ring, 1111 struct sk_buff *skb, 1112 struct igc_tx_buffer *first) 1113 { 1114 union igc_adv_tx_desc *desc; 1115 u32 cmd_type, olinfo_status; 1116 1117 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 1118 IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | 1119 first->bytecount; 1120 olinfo_status = first->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; 1121 1122 desc = IGC_TX_DESC(ring, ring->next_to_use); 1123 desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1124 desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1125 desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(first, dma)); 1126 1127 netdev_tx_sent_queue(txring_txq(ring), skb->len); 1128 1129 first->next_to_watch = desc; 1130 1131 ring->next_to_use++; 1132 if (ring->next_to_use == ring->count) 1133 ring->next_to_use = 0; 1134 } 1135 1136 #define IGC_EMPTY_FRAME_SIZE 60 1137 1138 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, 1139 __le32 launch_time, bool first_flag, 1140 u32 vlan_macip_lens, u32 type_tucmd, 1141 u32 mss_l4len_idx) 1142 { 1143 struct igc_adv_tx_context_desc *context_desc; 1144 u16 i = tx_ring->next_to_use; 1145 1146 context_desc = IGC_TX_CTXTDESC(tx_ring, i); 1147 1148 i++; 1149 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1150 1151 /* set bits to identify this as an advanced context descriptor */ 1152 type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT; 1153 1154 /* For i225, context index must be unique per ring. */ 1155 if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 1156 mss_l4len_idx |= tx_ring->reg_idx << 4; 1157 1158 if (first_flag) 1159 mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FIRST; 1160 1161 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 1162 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 1163 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 1164 context_desc->launch_time = launch_time; 1165 } 1166 1167 static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first, 1168 __le32 launch_time, bool first_flag) 1169 { 1170 struct sk_buff *skb = first->skb; 1171 u32 vlan_macip_lens = 0; 1172 u32 type_tucmd = 0; 1173 1174 if (skb->ip_summed != CHECKSUM_PARTIAL) { 1175 csum_failed: 1176 if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) && 1177 !tx_ring->launchtime_enable) 1178 return; 1179 goto no_csum; 1180 } 1181 1182 switch (skb->csum_offset) { 1183 case offsetof(struct tcphdr, check): 1184 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1185 fallthrough; 1186 case offsetof(struct udphdr, check): 1187 break; 1188 case offsetof(struct sctphdr, checksum): 1189 /* validate that this is actually an SCTP request */ 1190 if (skb_csum_is_sctp(skb)) { 1191 type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP; 1192 break; 1193 } 1194 fallthrough; 1195 default: 1196 skb_checksum_help(skb); 1197 goto csum_failed; 1198 } 1199 1200 /* update TX checksum flag */ 1201 first->tx_flags |= IGC_TX_FLAGS_CSUM; 1202 vlan_macip_lens = skb_checksum_start_offset(skb) - 1203 skb_network_offset(skb); 1204 no_csum: 1205 vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT; 1206 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1207 1208 igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, 1209 vlan_macip_lens, type_tucmd, 0); 1210 } 1211 1212 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1213 { 1214 struct net_device *netdev = tx_ring->netdev; 1215 1216 netif_stop_subqueue(netdev, tx_ring->queue_index); 1217 1218 /* memory barriier comment */ 1219 smp_mb(); 1220 1221 /* We need to check again in a case another CPU has just 1222 * made room available. 1223 */ 1224 if (igc_desc_unused(tx_ring) < size) 1225 return -EBUSY; 1226 1227 /* A reprieve! */ 1228 netif_wake_subqueue(netdev, tx_ring->queue_index); 1229 1230 u64_stats_update_begin(&tx_ring->tx_syncp2); 1231 tx_ring->tx_stats.restart_queue2++; 1232 u64_stats_update_end(&tx_ring->tx_syncp2); 1233 1234 return 0; 1235 } 1236 1237 static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1238 { 1239 if (igc_desc_unused(tx_ring) >= size) 1240 return 0; 1241 return __igc_maybe_stop_tx(tx_ring, size); 1242 } 1243 1244 #define IGC_SET_FLAG(_input, _flag, _result) \ 1245 (((_flag) <= (_result)) ? \ 1246 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ 1247 ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) 1248 1249 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) 1250 { 1251 /* set type for advanced descriptor with frame checksum insertion */ 1252 u32 cmd_type = IGC_ADVTXD_DTYP_DATA | 1253 IGC_ADVTXD_DCMD_DEXT | 1254 IGC_ADVTXD_DCMD_IFCS; 1255 1256 /* set HW vlan bit if vlan is present */ 1257 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN, 1258 IGC_ADVTXD_DCMD_VLE); 1259 1260 /* set segmentation bits for TSO */ 1261 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO, 1262 (IGC_ADVTXD_DCMD_TSE)); 1263 1264 /* set timestamp bit if present, will select the register set 1265 * based on the _TSTAMP(_X) bit. 1266 */ 1267 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP, 1268 (IGC_ADVTXD_MAC_TSTAMP)); 1269 1270 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_1, 1271 (IGC_ADVTXD_TSTAMP_REG_1)); 1272 1273 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_2, 1274 (IGC_ADVTXD_TSTAMP_REG_2)); 1275 1276 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_3, 1277 (IGC_ADVTXD_TSTAMP_REG_3)); 1278 1279 /* insert frame checksum */ 1280 cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS); 1281 1282 return cmd_type; 1283 } 1284 1285 static void igc_tx_olinfo_status(struct igc_ring *tx_ring, 1286 union igc_adv_tx_desc *tx_desc, 1287 u32 tx_flags, unsigned int paylen) 1288 { 1289 u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT; 1290 1291 /* insert L4 checksum */ 1292 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_CSUM, 1293 (IGC_TXD_POPTS_TXSM << 8)); 1294 1295 /* insert IPv4 checksum */ 1296 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_IPV4, 1297 (IGC_TXD_POPTS_IXSM << 8)); 1298 1299 /* Use the second timer (free running, in general) for the timestamp */ 1300 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_TIMER_1, 1301 IGC_TXD_PTP2_TIMER_1); 1302 1303 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1304 } 1305 1306 static int igc_tx_map(struct igc_ring *tx_ring, 1307 struct igc_tx_buffer *first, 1308 const u8 hdr_len) 1309 { 1310 struct sk_buff *skb = first->skb; 1311 struct igc_tx_buffer *tx_buffer; 1312 union igc_adv_tx_desc *tx_desc; 1313 u32 tx_flags = first->tx_flags; 1314 skb_frag_t *frag; 1315 u16 i = tx_ring->next_to_use; 1316 unsigned int data_len, size; 1317 dma_addr_t dma; 1318 u32 cmd_type; 1319 1320 cmd_type = igc_tx_cmd_type(skb, tx_flags); 1321 tx_desc = IGC_TX_DESC(tx_ring, i); 1322 1323 igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); 1324 1325 size = skb_headlen(skb); 1326 data_len = skb->data_len; 1327 1328 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 1329 1330 tx_buffer = first; 1331 1332 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 1333 if (dma_mapping_error(tx_ring->dev, dma)) 1334 goto dma_error; 1335 1336 /* record length, and DMA address */ 1337 dma_unmap_len_set(tx_buffer, len, size); 1338 dma_unmap_addr_set(tx_buffer, dma, dma); 1339 1340 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1341 1342 while (unlikely(size > IGC_MAX_DATA_PER_TXD)) { 1343 tx_desc->read.cmd_type_len = 1344 cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD); 1345 1346 i++; 1347 tx_desc++; 1348 if (i == tx_ring->count) { 1349 tx_desc = IGC_TX_DESC(tx_ring, 0); 1350 i = 0; 1351 } 1352 tx_desc->read.olinfo_status = 0; 1353 1354 dma += IGC_MAX_DATA_PER_TXD; 1355 size -= IGC_MAX_DATA_PER_TXD; 1356 1357 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1358 } 1359 1360 if (likely(!data_len)) 1361 break; 1362 1363 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); 1364 1365 i++; 1366 tx_desc++; 1367 if (i == tx_ring->count) { 1368 tx_desc = IGC_TX_DESC(tx_ring, 0); 1369 i = 0; 1370 } 1371 tx_desc->read.olinfo_status = 0; 1372 1373 size = skb_frag_size(frag); 1374 data_len -= size; 1375 1376 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, 1377 size, DMA_TO_DEVICE); 1378 1379 tx_buffer = &tx_ring->tx_buffer_info[i]; 1380 } 1381 1382 /* write last descriptor with RS and EOP bits */ 1383 cmd_type |= size | IGC_TXD_DCMD; 1384 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1385 1386 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 1387 1388 /* set the timestamp */ 1389 first->time_stamp = jiffies; 1390 1391 skb_tx_timestamp(skb); 1392 1393 /* Force memory writes to complete before letting h/w know there 1394 * are new descriptors to fetch. (Only applicable for weak-ordered 1395 * memory model archs, such as IA-64). 1396 * 1397 * We also need this memory barrier to make certain all of the 1398 * status bits have been updated before next_to_watch is written. 1399 */ 1400 wmb(); 1401 1402 /* set next_to_watch value indicating a packet is present */ 1403 first->next_to_watch = tx_desc; 1404 1405 i++; 1406 if (i == tx_ring->count) 1407 i = 0; 1408 1409 tx_ring->next_to_use = i; 1410 1411 /* Make sure there is space in the ring for the next send. */ 1412 igc_maybe_stop_tx(tx_ring, DESC_NEEDED); 1413 1414 if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { 1415 writel(i, tx_ring->tail); 1416 } 1417 1418 return 0; 1419 dma_error: 1420 netdev_err(tx_ring->netdev, "TX DMA map failed\n"); 1421 tx_buffer = &tx_ring->tx_buffer_info[i]; 1422 1423 /* clear dma mappings for failed tx_buffer_info map */ 1424 while (tx_buffer != first) { 1425 if (dma_unmap_len(tx_buffer, len)) 1426 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1427 1428 if (i-- == 0) 1429 i += tx_ring->count; 1430 tx_buffer = &tx_ring->tx_buffer_info[i]; 1431 } 1432 1433 if (dma_unmap_len(tx_buffer, len)) 1434 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1435 1436 dev_kfree_skb_any(tx_buffer->skb); 1437 tx_buffer->skb = NULL; 1438 1439 tx_ring->next_to_use = i; 1440 1441 return -1; 1442 } 1443 1444 static int igc_tso(struct igc_ring *tx_ring, 1445 struct igc_tx_buffer *first, 1446 __le32 launch_time, bool first_flag, 1447 u8 *hdr_len) 1448 { 1449 u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; 1450 struct sk_buff *skb = first->skb; 1451 union { 1452 struct iphdr *v4; 1453 struct ipv6hdr *v6; 1454 unsigned char *hdr; 1455 } ip; 1456 union { 1457 struct tcphdr *tcp; 1458 struct udphdr *udp; 1459 unsigned char *hdr; 1460 } l4; 1461 u32 paylen, l4_offset; 1462 int err; 1463 1464 if (skb->ip_summed != CHECKSUM_PARTIAL) 1465 return 0; 1466 1467 if (!skb_is_gso(skb)) 1468 return 0; 1469 1470 err = skb_cow_head(skb, 0); 1471 if (err < 0) 1472 return err; 1473 1474 ip.hdr = skb_network_header(skb); 1475 l4.hdr = skb_checksum_start(skb); 1476 1477 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ 1478 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1479 1480 /* initialize outer IP header fields */ 1481 if (ip.v4->version == 4) { 1482 unsigned char *csum_start = skb_checksum_start(skb); 1483 unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); 1484 1485 /* IP header will have to cancel out any data that 1486 * is not a part of the outer IP header 1487 */ 1488 ip.v4->check = csum_fold(csum_partial(trans_start, 1489 csum_start - trans_start, 1490 0)); 1491 type_tucmd |= IGC_ADVTXD_TUCMD_IPV4; 1492 1493 ip.v4->tot_len = 0; 1494 first->tx_flags |= IGC_TX_FLAGS_TSO | 1495 IGC_TX_FLAGS_CSUM | 1496 IGC_TX_FLAGS_IPV4; 1497 } else { 1498 ip.v6->payload_len = 0; 1499 first->tx_flags |= IGC_TX_FLAGS_TSO | 1500 IGC_TX_FLAGS_CSUM; 1501 } 1502 1503 /* determine offset of inner transport header */ 1504 l4_offset = l4.hdr - skb->data; 1505 1506 /* remove payload length from inner checksum */ 1507 paylen = skb->len - l4_offset; 1508 if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) { 1509 /* compute length of segmentation header */ 1510 *hdr_len = (l4.tcp->doff * 4) + l4_offset; 1511 csum_replace_by_diff(&l4.tcp->check, 1512 (__force __wsum)htonl(paylen)); 1513 } else { 1514 /* compute length of segmentation header */ 1515 *hdr_len = sizeof(*l4.udp) + l4_offset; 1516 csum_replace_by_diff(&l4.udp->check, 1517 (__force __wsum)htonl(paylen)); 1518 } 1519 1520 /* update gso size and bytecount with header size */ 1521 first->gso_segs = skb_shinfo(skb)->gso_segs; 1522 first->bytecount += (first->gso_segs - 1) * *hdr_len; 1523 1524 /* MSS L4LEN IDX */ 1525 mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT; 1526 mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT; 1527 1528 /* VLAN MACLEN IPLEN */ 1529 vlan_macip_lens = l4.hdr - ip.hdr; 1530 vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT; 1531 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1532 1533 igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, 1534 vlan_macip_lens, type_tucmd, mss_l4len_idx); 1535 1536 return 1; 1537 } 1538 1539 static bool igc_request_tx_tstamp(struct igc_adapter *adapter, struct sk_buff *skb, u32 *flags) 1540 { 1541 int i; 1542 1543 for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { 1544 struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; 1545 1546 if (tstamp->skb) 1547 continue; 1548 1549 tstamp->skb = skb_get(skb); 1550 tstamp->start = jiffies; 1551 *flags = tstamp->flags; 1552 1553 return true; 1554 } 1555 1556 return false; 1557 } 1558 1559 static int igc_insert_empty_frame(struct igc_ring *tx_ring) 1560 { 1561 struct igc_tx_buffer *empty_info; 1562 struct sk_buff *empty_skb; 1563 void *data; 1564 int ret; 1565 1566 empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1567 empty_skb = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC); 1568 if (unlikely(!empty_skb)) { 1569 net_err_ratelimited("%s: skb alloc error for empty frame\n", 1570 netdev_name(tx_ring->netdev)); 1571 return -ENOMEM; 1572 } 1573 1574 data = skb_put(empty_skb, IGC_EMPTY_FRAME_SIZE); 1575 memset(data, 0, IGC_EMPTY_FRAME_SIZE); 1576 1577 /* Prepare DMA mapping and Tx buffer information */ 1578 ret = igc_init_empty_frame(tx_ring, empty_info, empty_skb); 1579 if (unlikely(ret)) { 1580 dev_kfree_skb_any(empty_skb); 1581 return ret; 1582 } 1583 1584 /* Prepare advanced context descriptor for empty packet */ 1585 igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0); 1586 1587 /* Prepare advanced data descriptor for empty packet */ 1588 igc_init_tx_empty_descriptor(tx_ring, empty_skb, empty_info); 1589 1590 return 0; 1591 } 1592 1593 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, 1594 struct igc_ring *tx_ring) 1595 { 1596 struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); 1597 bool first_flag = false, insert_empty = false; 1598 u16 count = TXD_USE_COUNT(skb_headlen(skb)); 1599 __be16 protocol = vlan_get_protocol(skb); 1600 struct igc_tx_buffer *first; 1601 __le32 launch_time = 0; 1602 u32 tx_flags = 0; 1603 unsigned short f; 1604 ktime_t txtime; 1605 u8 hdr_len = 0; 1606 int tso = 0; 1607 1608 /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD, 1609 * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD, 1610 * + 2 desc gap to keep tail from touching head, 1611 * + 1 desc for context descriptor, 1612 * + 2 desc for inserting an empty packet for launch time, 1613 * otherwise try next time 1614 */ 1615 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1616 count += TXD_USE_COUNT(skb_frag_size( 1617 &skb_shinfo(skb)->frags[f])); 1618 1619 if (igc_maybe_stop_tx(tx_ring, count + 5)) { 1620 /* this is a hard error */ 1621 return NETDEV_TX_BUSY; 1622 } 1623 1624 if (!tx_ring->launchtime_enable) 1625 goto done; 1626 1627 txtime = skb->tstamp; 1628 skb->tstamp = ktime_set(0, 0); 1629 launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty); 1630 1631 if (insert_empty) { 1632 /* Reset the launch time if the required empty frame fails to 1633 * be inserted. However, this packet is not dropped, so it 1634 * "dirties" the current Qbv cycle. This ensures that the 1635 * upcoming packet, which is scheduled in the next Qbv cycle, 1636 * does not require an empty frame. This way, the launch time 1637 * continues to function correctly despite the current failure 1638 * to insert the empty frame. 1639 */ 1640 if (igc_insert_empty_frame(tx_ring)) 1641 launch_time = 0; 1642 } 1643 1644 done: 1645 /* record the location of the first descriptor for this packet */ 1646 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1647 first->type = IGC_TX_BUFFER_TYPE_SKB; 1648 first->skb = skb; 1649 first->bytecount = skb->len; 1650 first->gso_segs = 1; 1651 1652 if (adapter->qbv_transition || tx_ring->oper_gate_closed) 1653 goto out_drop; 1654 1655 if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) { 1656 adapter->stats.txdrop++; 1657 goto out_drop; 1658 } 1659 1660 if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && 1661 skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 1662 unsigned long flags; 1663 u32 tstamp_flags; 1664 1665 spin_lock_irqsave(&adapter->ptp_tx_lock, flags); 1666 if (igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) { 1667 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 1668 tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags; 1669 if (skb->sk && 1670 READ_ONCE(skb->sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC) 1671 tx_flags |= IGC_TX_FLAGS_TSTAMP_TIMER_1; 1672 } else { 1673 adapter->tx_hwtstamp_skipped++; 1674 } 1675 1676 spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); 1677 } 1678 1679 if (skb_vlan_tag_present(skb)) { 1680 tx_flags |= IGC_TX_FLAGS_VLAN; 1681 tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT); 1682 } 1683 1684 /* record initial flags and protocol */ 1685 first->tx_flags = tx_flags; 1686 first->protocol = protocol; 1687 1688 /* For preemptible queue, manually pad the skb so that HW includes 1689 * padding bytes in mCRC calculation 1690 */ 1691 if (tx_ring->preemptible && skb->len < ETH_ZLEN) { 1692 if (skb_padto(skb, ETH_ZLEN)) 1693 goto out_drop; 1694 skb_put(skb, ETH_ZLEN - skb->len); 1695 } 1696 1697 tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len); 1698 if (tso < 0) 1699 goto out_drop; 1700 else if (!tso) 1701 igc_tx_csum(tx_ring, first, launch_time, first_flag); 1702 1703 igc_tx_map(tx_ring, first, hdr_len); 1704 1705 return NETDEV_TX_OK; 1706 1707 out_drop: 1708 dev_kfree_skb_any(first->skb); 1709 first->skb = NULL; 1710 1711 return NETDEV_TX_OK; 1712 } 1713 1714 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter, 1715 struct sk_buff *skb) 1716 { 1717 unsigned int r_idx = skb->queue_mapping; 1718 1719 if (r_idx >= adapter->num_tx_queues) 1720 r_idx = r_idx % adapter->num_tx_queues; 1721 1722 return adapter->tx_ring[r_idx]; 1723 } 1724 1725 static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, 1726 struct net_device *netdev) 1727 { 1728 struct igc_adapter *adapter = netdev_priv(netdev); 1729 1730 /* The minimum packet size with TCTL.PSP set is 17 so pad the skb 1731 * in order to meet this minimum size requirement. 1732 */ 1733 if (skb->len < 17) { 1734 if (skb_padto(skb, 17)) 1735 return NETDEV_TX_OK; 1736 skb->len = 17; 1737 } 1738 1739 return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb)); 1740 } 1741 1742 static void igc_rx_checksum(struct igc_ring *ring, 1743 union igc_adv_rx_desc *rx_desc, 1744 struct sk_buff *skb) 1745 { 1746 skb_checksum_none_assert(skb); 1747 1748 /* Ignore Checksum bit is set */ 1749 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM)) 1750 return; 1751 1752 /* Rx checksum disabled via ethtool */ 1753 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 1754 return; 1755 1756 /* TCP/UDP checksum error bit is set */ 1757 if (igc_test_staterr(rx_desc, 1758 IGC_RXDEXT_STATERR_L4E | 1759 IGC_RXDEXT_STATERR_IPE)) { 1760 /* work around errata with sctp packets where the TCPE aka 1761 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) 1762 * packets (aka let the stack check the crc32c) 1763 */ 1764 if (!(skb->len == 60 && 1765 test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { 1766 u64_stats_update_begin(&ring->rx_syncp); 1767 ring->rx_stats.csum_err++; 1768 u64_stats_update_end(&ring->rx_syncp); 1769 } 1770 /* let the stack verify checksum errors */ 1771 return; 1772 } 1773 /* It must be a TCP or UDP packet with a valid checksum */ 1774 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS | 1775 IGC_RXD_STAT_UDPCS)) 1776 skb->ip_summed = CHECKSUM_UNNECESSARY; 1777 1778 netdev_dbg(ring->netdev, "cksum success: bits %08X\n", 1779 le32_to_cpu(rx_desc->wb.upper.status_error)); 1780 } 1781 1782 /* Mapping HW RSS Type to enum pkt_hash_types */ 1783 static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = { 1784 [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2, 1785 [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4, 1786 [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3, 1787 [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4, 1788 [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3, 1789 [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3, 1790 [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4, 1791 [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4, 1792 [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4, 1793 [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4, 1794 [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ 1795 [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */ 1796 [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */ 1797 [13] = PKT_HASH_TYPE_NONE, 1798 [14] = PKT_HASH_TYPE_NONE, 1799 [15] = PKT_HASH_TYPE_NONE, 1800 }; 1801 1802 static inline void igc_rx_hash(struct igc_ring *ring, 1803 union igc_adv_rx_desc *rx_desc, 1804 struct sk_buff *skb) 1805 { 1806 if (ring->netdev->features & NETIF_F_RXHASH) { 1807 u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); 1808 u32 rss_type = igc_rss_type(rx_desc); 1809 1810 skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]); 1811 } 1812 } 1813 1814 static void igc_rx_vlan(struct igc_ring *rx_ring, 1815 union igc_adv_rx_desc *rx_desc, 1816 struct sk_buff *skb) 1817 { 1818 struct net_device *dev = rx_ring->netdev; 1819 u16 vid; 1820 1821 if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && 1822 igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) { 1823 if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) && 1824 test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) 1825 vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan); 1826 else 1827 vid = le16_to_cpu(rx_desc->wb.upper.vlan); 1828 1829 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); 1830 } 1831 } 1832 1833 /** 1834 * igc_process_skb_fields - Populate skb header fields from Rx descriptor 1835 * @rx_ring: rx descriptor ring packet is being transacted on 1836 * @rx_desc: pointer to the EOP Rx descriptor 1837 * @skb: pointer to current skb being populated 1838 * 1839 * This function checks the ring, descriptor, and packet information in order 1840 * to populate the hash, checksum, VLAN, protocol, and other fields within the 1841 * skb. 1842 */ 1843 static void igc_process_skb_fields(struct igc_ring *rx_ring, 1844 union igc_adv_rx_desc *rx_desc, 1845 struct sk_buff *skb) 1846 { 1847 igc_rx_hash(rx_ring, rx_desc, skb); 1848 1849 igc_rx_checksum(rx_ring, rx_desc, skb); 1850 1851 igc_rx_vlan(rx_ring, rx_desc, skb); 1852 1853 skb_record_rx_queue(skb, rx_ring->queue_index); 1854 1855 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1856 } 1857 1858 static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features) 1859 { 1860 bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); 1861 struct igc_adapter *adapter = netdev_priv(netdev); 1862 struct igc_hw *hw = &adapter->hw; 1863 u32 ctrl; 1864 1865 ctrl = rd32(IGC_CTRL); 1866 1867 if (enable) { 1868 /* enable VLAN tag insert/strip */ 1869 ctrl |= IGC_CTRL_VME; 1870 } else { 1871 /* disable VLAN tag insert/strip */ 1872 ctrl &= ~IGC_CTRL_VME; 1873 } 1874 wr32(IGC_CTRL, ctrl); 1875 } 1876 1877 static void igc_restore_vlan(struct igc_adapter *adapter) 1878 { 1879 igc_vlan_mode(adapter->netdev, adapter->netdev->features); 1880 } 1881 1882 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, 1883 const unsigned int size, 1884 int *rx_buffer_pgcnt) 1885 { 1886 struct igc_rx_buffer *rx_buffer; 1887 1888 rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; 1889 *rx_buffer_pgcnt = 1890 #if (PAGE_SIZE < 8192) 1891 page_count(rx_buffer->page); 1892 #else 1893 0; 1894 #endif 1895 prefetchw(rx_buffer->page); 1896 1897 /* we are reusing so sync this buffer for CPU use */ 1898 dma_sync_single_range_for_cpu(rx_ring->dev, 1899 rx_buffer->dma, 1900 rx_buffer->page_offset, 1901 size, 1902 DMA_FROM_DEVICE); 1903 1904 rx_buffer->pagecnt_bias--; 1905 1906 return rx_buffer; 1907 } 1908 1909 static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer, 1910 unsigned int truesize) 1911 { 1912 #if (PAGE_SIZE < 8192) 1913 buffer->page_offset ^= truesize; 1914 #else 1915 buffer->page_offset += truesize; 1916 #endif 1917 } 1918 1919 static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring, 1920 unsigned int size) 1921 { 1922 unsigned int truesize; 1923 1924 #if (PAGE_SIZE < 8192) 1925 truesize = igc_rx_pg_size(ring) / 2; 1926 #else 1927 truesize = ring_uses_build_skb(ring) ? 1928 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + 1929 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1930 SKB_DATA_ALIGN(size); 1931 #endif 1932 return truesize; 1933 } 1934 1935 /** 1936 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff 1937 * @rx_ring: rx descriptor ring to transact packets on 1938 * @rx_buffer: buffer containing page to add 1939 * @skb: sk_buff to place the data into 1940 * @size: size of buffer to be added 1941 * 1942 * This function will add the data contained in rx_buffer->page to the skb. 1943 */ 1944 static void igc_add_rx_frag(struct igc_ring *rx_ring, 1945 struct igc_rx_buffer *rx_buffer, 1946 struct sk_buff *skb, 1947 unsigned int size) 1948 { 1949 unsigned int truesize; 1950 1951 #if (PAGE_SIZE < 8192) 1952 truesize = igc_rx_pg_size(rx_ring) / 2; 1953 #else 1954 truesize = ring_uses_build_skb(rx_ring) ? 1955 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1956 SKB_DATA_ALIGN(size); 1957 #endif 1958 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, 1959 rx_buffer->page_offset, size, truesize); 1960 1961 igc_rx_buffer_flip(rx_buffer, truesize); 1962 } 1963 1964 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, 1965 struct igc_rx_buffer *rx_buffer, 1966 struct xdp_buff *xdp) 1967 { 1968 unsigned int size = xdp->data_end - xdp->data; 1969 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1970 unsigned int metasize = xdp->data - xdp->data_meta; 1971 struct sk_buff *skb; 1972 1973 /* prefetch first cache line of first page */ 1974 net_prefetch(xdp->data_meta); 1975 1976 /* build an skb around the page buffer */ 1977 skb = napi_build_skb(xdp->data_hard_start, truesize); 1978 if (unlikely(!skb)) 1979 return NULL; 1980 1981 /* update pointers within the skb to store the data */ 1982 skb_reserve(skb, xdp->data - xdp->data_hard_start); 1983 __skb_put(skb, size); 1984 if (metasize) 1985 skb_metadata_set(skb, metasize); 1986 1987 igc_rx_buffer_flip(rx_buffer, truesize); 1988 return skb; 1989 } 1990 1991 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, 1992 struct igc_rx_buffer *rx_buffer, 1993 struct igc_xdp_buff *ctx) 1994 { 1995 struct xdp_buff *xdp = &ctx->xdp; 1996 unsigned int metasize = xdp->data - xdp->data_meta; 1997 unsigned int size = xdp->data_end - xdp->data; 1998 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1999 void *va = xdp->data; 2000 unsigned int headlen; 2001 struct sk_buff *skb; 2002 2003 /* prefetch first cache line of first page */ 2004 net_prefetch(xdp->data_meta); 2005 2006 /* allocate a skb to store the frags */ 2007 skb = napi_alloc_skb(&rx_ring->q_vector->napi, 2008 IGC_RX_HDR_LEN + metasize); 2009 if (unlikely(!skb)) 2010 return NULL; 2011 2012 if (ctx->rx_ts) { 2013 skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; 2014 skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; 2015 } 2016 2017 /* Determine available headroom for copy */ 2018 headlen = size; 2019 if (headlen > IGC_RX_HDR_LEN) 2020 headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN); 2021 2022 /* align pull length to size of long to optimize memcpy performance */ 2023 memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta, 2024 ALIGN(headlen + metasize, sizeof(long))); 2025 2026 if (metasize) { 2027 skb_metadata_set(skb, metasize); 2028 __skb_pull(skb, metasize); 2029 } 2030 2031 /* update all of the pointers */ 2032 size -= headlen; 2033 if (size) { 2034 skb_add_rx_frag(skb, 0, rx_buffer->page, 2035 (va + headlen) - page_address(rx_buffer->page), 2036 size, truesize); 2037 igc_rx_buffer_flip(rx_buffer, truesize); 2038 } else { 2039 rx_buffer->pagecnt_bias++; 2040 } 2041 2042 return skb; 2043 } 2044 2045 /** 2046 * igc_reuse_rx_page - page flip buffer and store it back on the ring 2047 * @rx_ring: rx descriptor ring to store buffers on 2048 * @old_buff: donor buffer to have page reused 2049 * 2050 * Synchronizes page for reuse by the adapter 2051 */ 2052 static void igc_reuse_rx_page(struct igc_ring *rx_ring, 2053 struct igc_rx_buffer *old_buff) 2054 { 2055 u16 nta = rx_ring->next_to_alloc; 2056 struct igc_rx_buffer *new_buff; 2057 2058 new_buff = &rx_ring->rx_buffer_info[nta]; 2059 2060 /* update, and store next to alloc */ 2061 nta++; 2062 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 2063 2064 /* Transfer page from old buffer to new buffer. 2065 * Move each member individually to avoid possible store 2066 * forwarding stalls. 2067 */ 2068 new_buff->dma = old_buff->dma; 2069 new_buff->page = old_buff->page; 2070 new_buff->page_offset = old_buff->page_offset; 2071 new_buff->pagecnt_bias = old_buff->pagecnt_bias; 2072 } 2073 2074 static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer, 2075 int rx_buffer_pgcnt) 2076 { 2077 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; 2078 struct page *page = rx_buffer->page; 2079 2080 /* avoid re-using remote and pfmemalloc pages */ 2081 if (!dev_page_is_reusable(page)) 2082 return false; 2083 2084 #if (PAGE_SIZE < 8192) 2085 /* if we are only owner of page we can reuse it */ 2086 if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) 2087 return false; 2088 #else 2089 #define IGC_LAST_OFFSET \ 2090 (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048) 2091 2092 if (rx_buffer->page_offset > IGC_LAST_OFFSET) 2093 return false; 2094 #endif 2095 2096 /* If we have drained the page fragment pool we need to update 2097 * the pagecnt_bias and page count so that we fully restock the 2098 * number of references the driver holds. 2099 */ 2100 if (unlikely(pagecnt_bias == 1)) { 2101 page_ref_add(page, USHRT_MAX - 1); 2102 rx_buffer->pagecnt_bias = USHRT_MAX; 2103 } 2104 2105 return true; 2106 } 2107 2108 /** 2109 * igc_is_non_eop - process handling of non-EOP buffers 2110 * @rx_ring: Rx ring being processed 2111 * @rx_desc: Rx descriptor for current buffer 2112 * 2113 * This function updates next to clean. If the buffer is an EOP buffer 2114 * this function exits returning false, otherwise it will place the 2115 * sk_buff in the next buffer to be chained and return true indicating 2116 * that this is in fact a non-EOP buffer. 2117 */ 2118 static bool igc_is_non_eop(struct igc_ring *rx_ring, 2119 union igc_adv_rx_desc *rx_desc) 2120 { 2121 u32 ntc = rx_ring->next_to_clean + 1; 2122 2123 /* fetch, update, and store next to clean */ 2124 ntc = (ntc < rx_ring->count) ? ntc : 0; 2125 rx_ring->next_to_clean = ntc; 2126 2127 prefetch(IGC_RX_DESC(rx_ring, ntc)); 2128 2129 if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP))) 2130 return false; 2131 2132 return true; 2133 } 2134 2135 /** 2136 * igc_cleanup_headers - Correct corrupted or empty headers 2137 * @rx_ring: rx descriptor ring packet is being transacted on 2138 * @rx_desc: pointer to the EOP Rx descriptor 2139 * @skb: pointer to current skb being fixed 2140 * 2141 * Address the case where we are pulling data in on pages only 2142 * and as such no data is present in the skb header. 2143 * 2144 * In addition if skb is not at least 60 bytes we need to pad it so that 2145 * it is large enough to qualify as a valid Ethernet frame. 2146 * 2147 * Returns true if an error was encountered and skb was freed. 2148 */ 2149 static bool igc_cleanup_headers(struct igc_ring *rx_ring, 2150 union igc_adv_rx_desc *rx_desc, 2151 struct sk_buff *skb) 2152 { 2153 if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) { 2154 struct net_device *netdev = rx_ring->netdev; 2155 2156 if (!(netdev->features & NETIF_F_RXALL)) { 2157 dev_kfree_skb_any(skb); 2158 return true; 2159 } 2160 } 2161 2162 /* if eth_skb_pad returns an error the skb was freed */ 2163 if (eth_skb_pad(skb)) 2164 return true; 2165 2166 return false; 2167 } 2168 2169 static void igc_put_rx_buffer(struct igc_ring *rx_ring, 2170 struct igc_rx_buffer *rx_buffer, 2171 int rx_buffer_pgcnt) 2172 { 2173 if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { 2174 /* hand second half of page back to the ring */ 2175 igc_reuse_rx_page(rx_ring, rx_buffer); 2176 } else { 2177 /* We are not reusing the buffer so unmap it and free 2178 * any references we are holding to it 2179 */ 2180 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, 2181 igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE, 2182 IGC_RX_DMA_ATTR); 2183 __page_frag_cache_drain(rx_buffer->page, 2184 rx_buffer->pagecnt_bias); 2185 } 2186 2187 /* clear contents of rx_buffer */ 2188 rx_buffer->page = NULL; 2189 } 2190 2191 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) 2192 { 2193 struct igc_adapter *adapter = rx_ring->q_vector->adapter; 2194 2195 if (ring_uses_build_skb(rx_ring)) 2196 return IGC_SKB_PAD; 2197 if (igc_xdp_is_enabled(adapter)) 2198 return XDP_PACKET_HEADROOM; 2199 2200 return 0; 2201 } 2202 2203 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, 2204 struct igc_rx_buffer *bi) 2205 { 2206 struct page *page = bi->page; 2207 dma_addr_t dma; 2208 2209 /* since we are recycling buffers we should seldom need to alloc */ 2210 if (likely(page)) 2211 return true; 2212 2213 /* alloc new page for storage */ 2214 page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); 2215 if (unlikely(!page)) { 2216 rx_ring->rx_stats.alloc_failed++; 2217 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 2218 return false; 2219 } 2220 2221 /* map page for use */ 2222 dma = dma_map_page_attrs(rx_ring->dev, page, 0, 2223 igc_rx_pg_size(rx_ring), 2224 DMA_FROM_DEVICE, 2225 IGC_RX_DMA_ATTR); 2226 2227 /* if mapping failed free memory back to system since 2228 * there isn't much point in holding memory we can't use 2229 */ 2230 if (dma_mapping_error(rx_ring->dev, dma)) { 2231 __free_page(page); 2232 2233 rx_ring->rx_stats.alloc_failed++; 2234 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 2235 return false; 2236 } 2237 2238 bi->dma = dma; 2239 bi->page = page; 2240 bi->page_offset = igc_rx_offset(rx_ring); 2241 page_ref_add(page, USHRT_MAX - 1); 2242 bi->pagecnt_bias = USHRT_MAX; 2243 2244 return true; 2245 } 2246 2247 /** 2248 * igc_alloc_rx_buffers - Replace used receive buffers; packet split 2249 * @rx_ring: rx descriptor ring 2250 * @cleaned_count: number of buffers to clean 2251 */ 2252 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) 2253 { 2254 union igc_adv_rx_desc *rx_desc; 2255 u16 i = rx_ring->next_to_use; 2256 struct igc_rx_buffer *bi; 2257 u16 bufsz; 2258 2259 /* nothing to do */ 2260 if (!cleaned_count) 2261 return; 2262 2263 rx_desc = IGC_RX_DESC(rx_ring, i); 2264 bi = &rx_ring->rx_buffer_info[i]; 2265 i -= rx_ring->count; 2266 2267 bufsz = igc_rx_bufsz(rx_ring); 2268 2269 do { 2270 if (!igc_alloc_mapped_page(rx_ring, bi)) 2271 break; 2272 2273 /* sync the buffer for use by the device */ 2274 dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 2275 bi->page_offset, bufsz, 2276 DMA_FROM_DEVICE); 2277 2278 /* Refresh the desc even if buffer_addrs didn't change 2279 * because each write-back erases this info. 2280 */ 2281 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); 2282 2283 rx_desc++; 2284 bi++; 2285 i++; 2286 if (unlikely(!i)) { 2287 rx_desc = IGC_RX_DESC(rx_ring, 0); 2288 bi = rx_ring->rx_buffer_info; 2289 i -= rx_ring->count; 2290 } 2291 2292 /* clear the length for the next_to_use descriptor */ 2293 rx_desc->wb.upper.length = 0; 2294 2295 cleaned_count--; 2296 } while (cleaned_count); 2297 2298 i += rx_ring->count; 2299 2300 if (rx_ring->next_to_use != i) { 2301 /* record the next descriptor to use */ 2302 rx_ring->next_to_use = i; 2303 2304 /* update next to alloc since we have filled the ring */ 2305 rx_ring->next_to_alloc = i; 2306 2307 /* Force memory writes to complete before letting h/w 2308 * know there are new descriptors to fetch. (Only 2309 * applicable for weak-ordered memory model archs, 2310 * such as IA-64). 2311 */ 2312 wmb(); 2313 writel(i, rx_ring->tail); 2314 } 2315 } 2316 2317 static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) 2318 { 2319 union igc_adv_rx_desc *desc; 2320 u16 i = ring->next_to_use; 2321 struct igc_rx_buffer *bi; 2322 dma_addr_t dma; 2323 bool ok = true; 2324 2325 if (!count) 2326 return ok; 2327 2328 XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff); 2329 2330 desc = IGC_RX_DESC(ring, i); 2331 bi = &ring->rx_buffer_info[i]; 2332 i -= ring->count; 2333 2334 do { 2335 bi->xdp = xsk_buff_alloc(ring->xsk_pool); 2336 if (!bi->xdp) { 2337 ok = false; 2338 break; 2339 } 2340 2341 dma = xsk_buff_xdp_get_dma(bi->xdp); 2342 desc->read.pkt_addr = cpu_to_le64(dma); 2343 2344 desc++; 2345 bi++; 2346 i++; 2347 if (unlikely(!i)) { 2348 desc = IGC_RX_DESC(ring, 0); 2349 bi = ring->rx_buffer_info; 2350 i -= ring->count; 2351 } 2352 2353 /* Clear the length for the next_to_use descriptor. */ 2354 desc->wb.upper.length = 0; 2355 2356 count--; 2357 } while (count); 2358 2359 i += ring->count; 2360 2361 if (ring->next_to_use != i) { 2362 ring->next_to_use = i; 2363 2364 /* Force memory writes to complete before letting h/w 2365 * know there are new descriptors to fetch. (Only 2366 * applicable for weak-ordered memory model archs, 2367 * such as IA-64). 2368 */ 2369 wmb(); 2370 writel(i, ring->tail); 2371 } 2372 2373 return ok; 2374 } 2375 2376 /* This function requires __netif_tx_lock is held by the caller. */ 2377 static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, 2378 struct xdp_frame *xdpf) 2379 { 2380 struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); 2381 u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0; 2382 u16 count, index = ring->next_to_use; 2383 struct igc_tx_buffer *head = &ring->tx_buffer_info[index]; 2384 struct igc_tx_buffer *buffer = head; 2385 union igc_adv_tx_desc *desc = IGC_TX_DESC(ring, index); 2386 u32 olinfo_status, len = xdpf->len, cmd_type; 2387 void *data = xdpf->data; 2388 u16 i; 2389 2390 count = TXD_USE_COUNT(len); 2391 for (i = 0; i < nr_frags; i++) 2392 count += TXD_USE_COUNT(skb_frag_size(&sinfo->frags[i])); 2393 2394 if (igc_maybe_stop_tx(ring, count + 3)) { 2395 /* this is a hard error */ 2396 return -EBUSY; 2397 } 2398 2399 i = 0; 2400 head->bytecount = xdp_get_frame_len(xdpf); 2401 head->type = IGC_TX_BUFFER_TYPE_XDP; 2402 head->gso_segs = 1; 2403 head->xdpf = xdpf; 2404 2405 olinfo_status = head->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; 2406 desc->read.olinfo_status = cpu_to_le32(olinfo_status); 2407 2408 for (;;) { 2409 dma_addr_t dma; 2410 2411 dma = dma_map_single(ring->dev, data, len, DMA_TO_DEVICE); 2412 if (dma_mapping_error(ring->dev, dma)) { 2413 netdev_err_once(ring->netdev, 2414 "Failed to map DMA for TX\n"); 2415 goto unmap; 2416 } 2417 2418 dma_unmap_len_set(buffer, len, len); 2419 dma_unmap_addr_set(buffer, dma, dma); 2420 2421 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 2422 IGC_ADVTXD_DCMD_IFCS | len; 2423 2424 desc->read.cmd_type_len = cpu_to_le32(cmd_type); 2425 desc->read.buffer_addr = cpu_to_le64(dma); 2426 2427 buffer->protocol = 0; 2428 2429 if (++index == ring->count) 2430 index = 0; 2431 2432 if (i == nr_frags) 2433 break; 2434 2435 buffer = &ring->tx_buffer_info[index]; 2436 desc = IGC_TX_DESC(ring, index); 2437 desc->read.olinfo_status = 0; 2438 2439 data = skb_frag_address(&sinfo->frags[i]); 2440 len = skb_frag_size(&sinfo->frags[i]); 2441 i++; 2442 } 2443 desc->read.cmd_type_len |= cpu_to_le32(IGC_TXD_DCMD); 2444 2445 netdev_tx_sent_queue(txring_txq(ring), head->bytecount); 2446 /* set the timestamp */ 2447 head->time_stamp = jiffies; 2448 /* set next_to_watch value indicating a packet is present */ 2449 head->next_to_watch = desc; 2450 ring->next_to_use = index; 2451 2452 return 0; 2453 2454 unmap: 2455 for (;;) { 2456 buffer = &ring->tx_buffer_info[index]; 2457 if (dma_unmap_len(buffer, len)) 2458 dma_unmap_page(ring->dev, 2459 dma_unmap_addr(buffer, dma), 2460 dma_unmap_len(buffer, len), 2461 DMA_TO_DEVICE); 2462 dma_unmap_len_set(buffer, len, 0); 2463 if (buffer == head) 2464 break; 2465 2466 if (!index) 2467 index += ring->count; 2468 index--; 2469 } 2470 2471 return -ENOMEM; 2472 } 2473 2474 struct igc_ring *igc_get_tx_ring(struct igc_adapter *adapter, int cpu) 2475 { 2476 int index = cpu; 2477 2478 if (unlikely(index < 0)) 2479 index = 0; 2480 2481 while (index >= adapter->num_tx_queues) 2482 index -= adapter->num_tx_queues; 2483 2484 return adapter->tx_ring[index]; 2485 } 2486 2487 static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) 2488 { 2489 struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); 2490 int cpu = smp_processor_id(); 2491 struct netdev_queue *nq; 2492 struct igc_ring *ring; 2493 int res; 2494 2495 if (unlikely(!xdpf)) 2496 return -EFAULT; 2497 2498 ring = igc_get_tx_ring(adapter, cpu); 2499 nq = txring_txq(ring); 2500 2501 __netif_tx_lock(nq, cpu); 2502 /* Avoid transmit queue timeout since we share it with the slow path */ 2503 txq_trans_cond_update(nq); 2504 res = igc_xdp_init_tx_descriptor(ring, xdpf); 2505 __netif_tx_unlock(nq); 2506 return res; 2507 } 2508 2509 /* This function assumes rcu_read_lock() is held by the caller. */ 2510 static int __igc_xdp_run_prog(struct igc_adapter *adapter, 2511 struct bpf_prog *prog, 2512 struct xdp_buff *xdp) 2513 { 2514 u32 act = bpf_prog_run_xdp(prog, xdp); 2515 2516 switch (act) { 2517 case XDP_PASS: 2518 return IGC_XDP_PASS; 2519 case XDP_TX: 2520 if (igc_xdp_xmit_back(adapter, xdp) < 0) 2521 goto out_failure; 2522 return IGC_XDP_TX; 2523 case XDP_REDIRECT: 2524 if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0) 2525 goto out_failure; 2526 return IGC_XDP_REDIRECT; 2527 break; 2528 default: 2529 bpf_warn_invalid_xdp_action(adapter->netdev, prog, act); 2530 fallthrough; 2531 case XDP_ABORTED: 2532 out_failure: 2533 trace_xdp_exception(adapter->netdev, prog, act); 2534 fallthrough; 2535 case XDP_DROP: 2536 return IGC_XDP_CONSUMED; 2537 } 2538 } 2539 2540 static int igc_xdp_run_prog(struct igc_adapter *adapter, struct xdp_buff *xdp) 2541 { 2542 struct bpf_prog *prog; 2543 int res; 2544 2545 prog = READ_ONCE(adapter->xdp_prog); 2546 if (!prog) { 2547 res = IGC_XDP_PASS; 2548 goto out; 2549 } 2550 2551 res = __igc_xdp_run_prog(adapter, prog, xdp); 2552 2553 out: 2554 return res; 2555 } 2556 2557 /* This function assumes __netif_tx_lock is held by the caller. */ 2558 void igc_flush_tx_descriptors(struct igc_ring *ring) 2559 { 2560 /* Once tail pointer is updated, hardware can fetch the descriptors 2561 * any time so we issue a write membar here to ensure all memory 2562 * writes are complete before the tail pointer is updated. 2563 */ 2564 wmb(); 2565 writel(ring->next_to_use, ring->tail); 2566 } 2567 2568 static void igc_finalize_xdp(struct igc_adapter *adapter, int status) 2569 { 2570 int cpu = smp_processor_id(); 2571 struct netdev_queue *nq; 2572 struct igc_ring *ring; 2573 2574 if (status & IGC_XDP_TX) { 2575 ring = igc_get_tx_ring(adapter, cpu); 2576 nq = txring_txq(ring); 2577 2578 __netif_tx_lock(nq, cpu); 2579 igc_flush_tx_descriptors(ring); 2580 __netif_tx_unlock(nq); 2581 } 2582 2583 if (status & IGC_XDP_REDIRECT) 2584 xdp_do_flush(); 2585 } 2586 2587 static void igc_update_rx_stats(struct igc_q_vector *q_vector, 2588 unsigned int packets, unsigned int bytes) 2589 { 2590 struct igc_ring *ring = q_vector->rx.ring; 2591 2592 u64_stats_update_begin(&ring->rx_syncp); 2593 ring->rx_stats.packets += packets; 2594 ring->rx_stats.bytes += bytes; 2595 u64_stats_update_end(&ring->rx_syncp); 2596 2597 q_vector->rx.total_packets += packets; 2598 q_vector->rx.total_bytes += bytes; 2599 } 2600 2601 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) 2602 { 2603 unsigned int total_bytes = 0, total_packets = 0; 2604 struct igc_adapter *adapter = q_vector->adapter; 2605 struct igc_ring *rx_ring = q_vector->rx.ring; 2606 struct sk_buff *skb = rx_ring->skb; 2607 u16 cleaned_count = igc_desc_unused(rx_ring); 2608 int xdp_status = 0, rx_buffer_pgcnt; 2609 int xdp_res = 0; 2610 2611 while (likely(total_packets < budget)) { 2612 struct igc_xdp_buff ctx = { .rx_ts = NULL }; 2613 struct igc_rx_buffer *rx_buffer; 2614 union igc_adv_rx_desc *rx_desc; 2615 unsigned int size, truesize; 2616 int pkt_offset = 0; 2617 void *pktbuf; 2618 2619 /* return some buffers to hardware, one at a time is too slow */ 2620 if (cleaned_count >= IGC_RX_BUFFER_WRITE) { 2621 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2622 cleaned_count = 0; 2623 } 2624 2625 rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean); 2626 size = le16_to_cpu(rx_desc->wb.upper.length); 2627 if (!size) 2628 break; 2629 2630 /* This memory barrier is needed to keep us from reading 2631 * any other fields out of the rx_desc until we know the 2632 * descriptor has been written back 2633 */ 2634 dma_rmb(); 2635 2636 rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); 2637 truesize = igc_get_rx_frame_truesize(rx_ring, size); 2638 2639 pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; 2640 2641 if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { 2642 ctx.rx_ts = pktbuf; 2643 pkt_offset = IGC_TS_HDR_LEN; 2644 size -= IGC_TS_HDR_LEN; 2645 } 2646 2647 if (igc_fpe_is_pmac_enabled(adapter) && 2648 igc_fpe_handle_mpacket(adapter, rx_desc, size, pktbuf)) { 2649 /* Advance the ring next-to-clean */ 2650 igc_is_non_eop(rx_ring, rx_desc); 2651 cleaned_count++; 2652 continue; 2653 } 2654 2655 if (!skb) { 2656 xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq); 2657 xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring), 2658 igc_rx_offset(rx_ring) + pkt_offset, 2659 size, true); 2660 xdp_buff_clear_frags_flag(&ctx.xdp); 2661 ctx.rx_desc = rx_desc; 2662 2663 xdp_res = igc_xdp_run_prog(adapter, &ctx.xdp); 2664 } 2665 2666 if (xdp_res) { 2667 switch (xdp_res) { 2668 case IGC_XDP_CONSUMED: 2669 rx_buffer->pagecnt_bias++; 2670 break; 2671 case IGC_XDP_TX: 2672 case IGC_XDP_REDIRECT: 2673 igc_rx_buffer_flip(rx_buffer, truesize); 2674 xdp_status |= xdp_res; 2675 break; 2676 } 2677 2678 total_packets++; 2679 total_bytes += size; 2680 } else if (skb) 2681 igc_add_rx_frag(rx_ring, rx_buffer, skb, size); 2682 else if (ring_uses_build_skb(rx_ring)) 2683 skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp); 2684 else 2685 skb = igc_construct_skb(rx_ring, rx_buffer, &ctx); 2686 2687 /* exit if we failed to retrieve a buffer */ 2688 if (!xdp_res && !skb) { 2689 rx_ring->rx_stats.alloc_failed++; 2690 rx_buffer->pagecnt_bias++; 2691 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 2692 break; 2693 } 2694 2695 igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); 2696 cleaned_count++; 2697 2698 /* fetch next buffer in frame if non-eop */ 2699 if (igc_is_non_eop(rx_ring, rx_desc)) 2700 continue; 2701 2702 /* verify the packet layout is correct */ 2703 if (xdp_res || igc_cleanup_headers(rx_ring, rx_desc, skb)) { 2704 skb = NULL; 2705 continue; 2706 } 2707 2708 /* probably a little skewed due to removing CRC */ 2709 total_bytes += skb->len; 2710 2711 /* populate checksum, VLAN, and protocol */ 2712 igc_process_skb_fields(rx_ring, rx_desc, skb); 2713 2714 napi_gro_receive(&q_vector->napi, skb); 2715 2716 /* reset skb pointer */ 2717 skb = NULL; 2718 2719 /* update budget accounting */ 2720 total_packets++; 2721 } 2722 2723 if (xdp_status) 2724 igc_finalize_xdp(adapter, xdp_status); 2725 2726 /* place incomplete frames back on ring for completion */ 2727 rx_ring->skb = skb; 2728 2729 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2730 2731 if (cleaned_count) 2732 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2733 2734 return total_packets; 2735 } 2736 2737 static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, 2738 struct igc_xdp_buff *ctx) 2739 { 2740 struct xdp_buff *xdp = &ctx->xdp; 2741 unsigned int totalsize = xdp->data_end - xdp->data_meta; 2742 unsigned int metasize = xdp->data - xdp->data_meta; 2743 struct sk_buff *skb; 2744 2745 net_prefetch(xdp->data_meta); 2746 2747 skb = napi_alloc_skb(&ring->q_vector->napi, totalsize); 2748 if (unlikely(!skb)) 2749 return NULL; 2750 2751 memcpy(__skb_put(skb, totalsize), xdp->data_meta, 2752 ALIGN(totalsize, sizeof(long))); 2753 2754 if (metasize) { 2755 skb_metadata_set(skb, metasize); 2756 __skb_pull(skb, metasize); 2757 } 2758 2759 if (ctx->rx_ts) { 2760 skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; 2761 skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; 2762 } 2763 2764 return skb; 2765 } 2766 2767 static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, 2768 union igc_adv_rx_desc *desc, 2769 struct igc_xdp_buff *ctx) 2770 { 2771 struct igc_ring *ring = q_vector->rx.ring; 2772 struct sk_buff *skb; 2773 2774 skb = igc_construct_skb_zc(ring, ctx); 2775 if (!skb) { 2776 ring->rx_stats.alloc_failed++; 2777 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &ring->flags); 2778 return; 2779 } 2780 2781 if (igc_cleanup_headers(ring, desc, skb)) 2782 return; 2783 2784 igc_process_skb_fields(ring, desc, skb); 2785 napi_gro_receive(&q_vector->napi, skb); 2786 } 2787 2788 static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp) 2789 { 2790 /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The 2791 * igc_xdp_buff shares its layout with xdp_buff_xsk and private 2792 * igc_xdp_buff fields fall into xdp_buff_xsk->cb 2793 */ 2794 return (struct igc_xdp_buff *)xdp; 2795 } 2796 2797 static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) 2798 { 2799 struct igc_adapter *adapter = q_vector->adapter; 2800 struct igc_ring *ring = q_vector->rx.ring; 2801 u16 cleaned_count = igc_desc_unused(ring); 2802 int total_bytes = 0, total_packets = 0; 2803 u16 ntc = ring->next_to_clean; 2804 struct bpf_prog *prog; 2805 bool failure = false; 2806 int xdp_status = 0; 2807 2808 rcu_read_lock(); 2809 2810 prog = READ_ONCE(adapter->xdp_prog); 2811 2812 while (likely(total_packets < budget)) { 2813 union igc_adv_rx_desc *desc; 2814 struct igc_rx_buffer *bi; 2815 struct igc_xdp_buff *ctx; 2816 unsigned int size; 2817 int res; 2818 2819 desc = IGC_RX_DESC(ring, ntc); 2820 size = le16_to_cpu(desc->wb.upper.length); 2821 if (!size) 2822 break; 2823 2824 /* This memory barrier is needed to keep us from reading 2825 * any other fields out of the rx_desc until we know the 2826 * descriptor has been written back 2827 */ 2828 dma_rmb(); 2829 2830 bi = &ring->rx_buffer_info[ntc]; 2831 2832 ctx = xsk_buff_to_igc_ctx(bi->xdp); 2833 ctx->rx_desc = desc; 2834 2835 if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { 2836 ctx->rx_ts = bi->xdp->data; 2837 2838 bi->xdp->data += IGC_TS_HDR_LEN; 2839 2840 /* HW timestamp has been copied into local variable. Metadata 2841 * length when XDP program is called should be 0. 2842 */ 2843 bi->xdp->data_meta += IGC_TS_HDR_LEN; 2844 size -= IGC_TS_HDR_LEN; 2845 } else { 2846 ctx->rx_ts = NULL; 2847 } 2848 2849 bi->xdp->data_end = bi->xdp->data + size; 2850 xsk_buff_dma_sync_for_cpu(bi->xdp); 2851 2852 res = __igc_xdp_run_prog(adapter, prog, bi->xdp); 2853 switch (res) { 2854 case IGC_XDP_PASS: 2855 igc_dispatch_skb_zc(q_vector, desc, ctx); 2856 fallthrough; 2857 case IGC_XDP_CONSUMED: 2858 xsk_buff_free(bi->xdp); 2859 break; 2860 case IGC_XDP_TX: 2861 case IGC_XDP_REDIRECT: 2862 xdp_status |= res; 2863 break; 2864 } 2865 2866 bi->xdp = NULL; 2867 total_bytes += size; 2868 total_packets++; 2869 cleaned_count++; 2870 ntc++; 2871 if (ntc == ring->count) 2872 ntc = 0; 2873 } 2874 2875 ring->next_to_clean = ntc; 2876 rcu_read_unlock(); 2877 2878 if (cleaned_count >= IGC_RX_BUFFER_WRITE) 2879 failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count); 2880 2881 if (xdp_status) 2882 igc_finalize_xdp(adapter, xdp_status); 2883 2884 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2885 2886 if (xsk_uses_need_wakeup(ring->xsk_pool)) { 2887 if (failure || ring->next_to_clean == ring->next_to_use) 2888 xsk_set_rx_need_wakeup(ring->xsk_pool); 2889 else 2890 xsk_clear_rx_need_wakeup(ring->xsk_pool); 2891 return total_packets; 2892 } 2893 2894 return failure ? budget : total_packets; 2895 } 2896 2897 static void igc_update_tx_stats(struct igc_q_vector *q_vector, 2898 unsigned int packets, unsigned int bytes) 2899 { 2900 struct igc_ring *ring = q_vector->tx.ring; 2901 2902 u64_stats_update_begin(&ring->tx_syncp); 2903 ring->tx_stats.bytes += bytes; 2904 ring->tx_stats.packets += packets; 2905 u64_stats_update_end(&ring->tx_syncp); 2906 2907 q_vector->tx.total_bytes += bytes; 2908 q_vector->tx.total_packets += packets; 2909 } 2910 2911 static void igc_xsk_request_timestamp(void *_priv) 2912 { 2913 struct igc_metadata_request *meta_req = _priv; 2914 struct igc_ring *tx_ring = meta_req->tx_ring; 2915 struct igc_tx_timestamp_request *tstamp; 2916 u32 tx_flags = IGC_TX_FLAGS_TSTAMP; 2917 struct igc_adapter *adapter; 2918 unsigned long lock_flags; 2919 bool found = false; 2920 int i; 2921 2922 if (test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags)) { 2923 adapter = netdev_priv(tx_ring->netdev); 2924 2925 spin_lock_irqsave(&adapter->ptp_tx_lock, lock_flags); 2926 2927 /* Search for available tstamp regs */ 2928 for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { 2929 tstamp = &adapter->tx_tstamp[i]; 2930 2931 /* tstamp->skb and tstamp->xsk_tx_buffer are in union. 2932 * When tstamp->skb is equal to NULL, 2933 * tstamp->xsk_tx_buffer is equal to NULL as well. 2934 * This condition means that the particular tstamp reg 2935 * is not occupied by other packet. 2936 */ 2937 if (!tstamp->skb) { 2938 found = true; 2939 break; 2940 } 2941 } 2942 2943 /* Return if no available tstamp regs */ 2944 if (!found) { 2945 adapter->tx_hwtstamp_skipped++; 2946 spin_unlock_irqrestore(&adapter->ptp_tx_lock, 2947 lock_flags); 2948 return; 2949 } 2950 2951 tstamp->start = jiffies; 2952 tstamp->xsk_queue_index = tx_ring->queue_index; 2953 tstamp->xsk_tx_buffer = meta_req->tx_buffer; 2954 tstamp->buffer_type = IGC_TX_BUFFER_TYPE_XSK; 2955 2956 /* Hold the transmit completion until timestamp is ready */ 2957 meta_req->tx_buffer->xsk_pending_ts = true; 2958 2959 /* Keep the pointer to tx_timestamp, which is located in XDP 2960 * metadata area. It is the location to store the value of 2961 * tx hardware timestamp. 2962 */ 2963 xsk_tx_metadata_to_compl(meta_req->meta, &tstamp->xsk_meta); 2964 2965 /* Set timestamp bit based on the _TSTAMP(_X) bit. */ 2966 tx_flags |= tstamp->flags; 2967 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2968 IGC_TX_FLAGS_TSTAMP, 2969 (IGC_ADVTXD_MAC_TSTAMP)); 2970 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2971 IGC_TX_FLAGS_TSTAMP_1, 2972 (IGC_ADVTXD_TSTAMP_REG_1)); 2973 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2974 IGC_TX_FLAGS_TSTAMP_2, 2975 (IGC_ADVTXD_TSTAMP_REG_2)); 2976 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2977 IGC_TX_FLAGS_TSTAMP_3, 2978 (IGC_ADVTXD_TSTAMP_REG_3)); 2979 2980 spin_unlock_irqrestore(&adapter->ptp_tx_lock, lock_flags); 2981 } 2982 } 2983 2984 static u64 igc_xsk_fill_timestamp(void *_priv) 2985 { 2986 return *(u64 *)_priv; 2987 } 2988 2989 static void igc_xsk_request_launch_time(u64 launch_time, void *_priv) 2990 { 2991 struct igc_metadata_request *meta_req = _priv; 2992 struct igc_ring *tx_ring = meta_req->tx_ring; 2993 __le32 launch_time_offset; 2994 bool insert_empty = false; 2995 bool first_flag = false; 2996 u16 used_desc = 0; 2997 2998 if (!tx_ring->launchtime_enable) 2999 return; 3000 3001 launch_time_offset = igc_tx_launchtime(tx_ring, 3002 ns_to_ktime(launch_time), 3003 &first_flag, &insert_empty); 3004 if (insert_empty) { 3005 /* Disregard the launch time request if the required empty frame 3006 * fails to be inserted. 3007 */ 3008 if (igc_insert_empty_frame(tx_ring)) 3009 return; 3010 3011 meta_req->tx_buffer = 3012 &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 3013 /* Inserting an empty packet requires two descriptors: 3014 * one data descriptor and one context descriptor. 3015 */ 3016 used_desc += 2; 3017 } 3018 3019 /* Use one context descriptor to specify launch time and first flag. */ 3020 igc_tx_ctxtdesc(tx_ring, launch_time_offset, first_flag, 0, 0, 0); 3021 used_desc += 1; 3022 3023 /* Update the number of used descriptors in this request */ 3024 meta_req->used_desc += used_desc; 3025 } 3026 3027 const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops = { 3028 .tmo_request_timestamp = igc_xsk_request_timestamp, 3029 .tmo_fill_timestamp = igc_xsk_fill_timestamp, 3030 .tmo_request_launch_time = igc_xsk_request_launch_time, 3031 }; 3032 3033 static void igc_xdp_xmit_zc(struct igc_ring *ring) 3034 { 3035 struct xsk_buff_pool *pool = ring->xsk_pool; 3036 struct netdev_queue *nq = txring_txq(ring); 3037 union igc_adv_tx_desc *tx_desc = NULL; 3038 int cpu = smp_processor_id(); 3039 struct xdp_desc xdp_desc; 3040 u16 budget, ntu; 3041 3042 if (!netif_carrier_ok(ring->netdev)) 3043 return; 3044 3045 __netif_tx_lock(nq, cpu); 3046 3047 /* Avoid transmit queue timeout since we share it with the slow path */ 3048 txq_trans_cond_update(nq); 3049 3050 ntu = ring->next_to_use; 3051 budget = igc_desc_unused(ring); 3052 3053 /* Packets with launch time require one data descriptor and one context 3054 * descriptor. When the launch time falls into the next Qbv cycle, we 3055 * may need to insert an empty packet, which requires two more 3056 * descriptors. Therefore, to be safe, we always ensure we have at least 3057 * 4 descriptors available. 3058 */ 3059 while (budget >= 4 && xsk_tx_peek_desc(pool, &xdp_desc)) { 3060 struct igc_metadata_request meta_req; 3061 struct xsk_tx_metadata *meta = NULL; 3062 struct igc_tx_buffer *bi; 3063 u32 olinfo_status; 3064 dma_addr_t dma; 3065 3066 meta_req.cmd_type = IGC_ADVTXD_DTYP_DATA | 3067 IGC_ADVTXD_DCMD_DEXT | 3068 IGC_ADVTXD_DCMD_IFCS | 3069 IGC_TXD_DCMD | xdp_desc.len; 3070 olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT; 3071 3072 dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr); 3073 meta = xsk_buff_get_metadata(pool, xdp_desc.addr); 3074 xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len); 3075 bi = &ring->tx_buffer_info[ntu]; 3076 3077 meta_req.tx_ring = ring; 3078 meta_req.tx_buffer = bi; 3079 meta_req.meta = meta; 3080 meta_req.used_desc = 0; 3081 xsk_tx_metadata_request(meta, &igc_xsk_tx_metadata_ops, 3082 &meta_req); 3083 3084 /* xsk_tx_metadata_request() may have updated next_to_use */ 3085 ntu = ring->next_to_use; 3086 3087 /* xsk_tx_metadata_request() may have updated Tx buffer info */ 3088 bi = meta_req.tx_buffer; 3089 3090 /* xsk_tx_metadata_request() may use a few descriptors */ 3091 budget -= meta_req.used_desc; 3092 3093 tx_desc = IGC_TX_DESC(ring, ntu); 3094 tx_desc->read.cmd_type_len = cpu_to_le32(meta_req.cmd_type); 3095 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 3096 tx_desc->read.buffer_addr = cpu_to_le64(dma); 3097 3098 bi->type = IGC_TX_BUFFER_TYPE_XSK; 3099 bi->protocol = 0; 3100 bi->bytecount = xdp_desc.len; 3101 bi->gso_segs = 1; 3102 bi->time_stamp = jiffies; 3103 bi->next_to_watch = tx_desc; 3104 3105 netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len); 3106 3107 ntu++; 3108 if (ntu == ring->count) 3109 ntu = 0; 3110 3111 ring->next_to_use = ntu; 3112 budget--; 3113 } 3114 3115 if (tx_desc) { 3116 igc_flush_tx_descriptors(ring); 3117 xsk_tx_release(pool); 3118 } 3119 3120 __netif_tx_unlock(nq); 3121 } 3122 3123 /** 3124 * igc_clean_tx_irq - Reclaim resources after transmit completes 3125 * @q_vector: pointer to q_vector containing needed info 3126 * @napi_budget: Used to determine if we are in netpoll 3127 * 3128 * returns true if ring is completely cleaned 3129 */ 3130 static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) 3131 { 3132 struct igc_adapter *adapter = q_vector->adapter; 3133 unsigned int total_bytes = 0, total_packets = 0; 3134 unsigned int budget = q_vector->tx.work_limit; 3135 struct igc_ring *tx_ring = q_vector->tx.ring; 3136 unsigned int i = tx_ring->next_to_clean; 3137 struct igc_tx_buffer *tx_buffer; 3138 union igc_adv_tx_desc *tx_desc; 3139 u32 xsk_frames = 0; 3140 3141 if (test_bit(__IGC_DOWN, &adapter->state)) 3142 return true; 3143 3144 tx_buffer = &tx_ring->tx_buffer_info[i]; 3145 tx_desc = IGC_TX_DESC(tx_ring, i); 3146 i -= tx_ring->count; 3147 3148 do { 3149 union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; 3150 3151 /* if next_to_watch is not set then there is no work pending */ 3152 if (!eop_desc) 3153 break; 3154 3155 /* prevent any other reads prior to eop_desc */ 3156 smp_rmb(); 3157 3158 /* if DD is not set pending work has not been completed */ 3159 if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) 3160 break; 3161 3162 if (igc_fpe_is_pmac_enabled(adapter) && 3163 igc_fpe_transmitted_smd_v(tx_desc)) 3164 ethtool_mmsv_event_handle(&adapter->fpe.mmsv, 3165 ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET); 3166 3167 /* Hold the completions while there's a pending tx hardware 3168 * timestamp request from XDP Tx metadata. 3169 */ 3170 if (tx_buffer->type == IGC_TX_BUFFER_TYPE_XSK && 3171 tx_buffer->xsk_pending_ts) 3172 break; 3173 3174 /* clear next_to_watch to prevent false hangs */ 3175 tx_buffer->next_to_watch = NULL; 3176 3177 /* update the statistics for this packet */ 3178 total_bytes += tx_buffer->bytecount; 3179 total_packets += tx_buffer->gso_segs; 3180 3181 switch (tx_buffer->type) { 3182 case IGC_TX_BUFFER_TYPE_XSK: 3183 xsk_frames++; 3184 break; 3185 case IGC_TX_BUFFER_TYPE_XDP: 3186 xdp_return_frame(tx_buffer->xdpf); 3187 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3188 break; 3189 case IGC_TX_BUFFER_TYPE_SKB: 3190 napi_consume_skb(tx_buffer->skb, napi_budget); 3191 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3192 break; 3193 default: 3194 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 3195 break; 3196 } 3197 3198 /* clear last DMA location and unmap remaining buffers */ 3199 while (tx_desc != eop_desc) { 3200 tx_buffer++; 3201 tx_desc++; 3202 i++; 3203 if (unlikely(!i)) { 3204 i -= tx_ring->count; 3205 tx_buffer = tx_ring->tx_buffer_info; 3206 tx_desc = IGC_TX_DESC(tx_ring, 0); 3207 } 3208 3209 /* unmap any remaining paged data */ 3210 if (dma_unmap_len(tx_buffer, len)) 3211 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3212 } 3213 3214 /* move us one more past the eop_desc for start of next pkt */ 3215 tx_buffer++; 3216 tx_desc++; 3217 i++; 3218 if (unlikely(!i)) { 3219 i -= tx_ring->count; 3220 tx_buffer = tx_ring->tx_buffer_info; 3221 tx_desc = IGC_TX_DESC(tx_ring, 0); 3222 } 3223 3224 /* issue prefetch for next Tx descriptor */ 3225 prefetch(tx_desc); 3226 3227 /* update budget accounting */ 3228 budget--; 3229 } while (likely(budget)); 3230 3231 netdev_tx_completed_queue(txring_txq(tx_ring), 3232 total_packets, total_bytes); 3233 3234 i += tx_ring->count; 3235 tx_ring->next_to_clean = i; 3236 3237 igc_update_tx_stats(q_vector, total_packets, total_bytes); 3238 3239 if (tx_ring->xsk_pool) { 3240 if (xsk_frames) 3241 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 3242 if (xsk_uses_need_wakeup(tx_ring->xsk_pool)) 3243 xsk_set_tx_need_wakeup(tx_ring->xsk_pool); 3244 igc_xdp_xmit_zc(tx_ring); 3245 } 3246 3247 if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { 3248 struct igc_hw *hw = &adapter->hw; 3249 3250 /* Detect a transmit hang in hardware, this serializes the 3251 * check with the clearing of time_stamp and movement of i 3252 */ 3253 clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 3254 if (tx_buffer->next_to_watch && 3255 time_after(jiffies, tx_buffer->time_stamp + 3256 (adapter->tx_timeout_factor * HZ)) && 3257 !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && 3258 (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) && 3259 !tx_ring->oper_gate_closed) { 3260 /* detected Tx unit hang */ 3261 netdev_err(tx_ring->netdev, 3262 "Detected Tx Unit Hang\n" 3263 " Tx Queue <%d>\n" 3264 " TDH <%x>\n" 3265 " TDT <%x>\n" 3266 " next_to_use <%x>\n" 3267 " next_to_clean <%x>\n" 3268 "buffer_info[next_to_clean]\n" 3269 " time_stamp <%lx>\n" 3270 " next_to_watch <%p>\n" 3271 " jiffies <%lx>\n" 3272 " desc.status <%x>\n", 3273 tx_ring->queue_index, 3274 rd32(IGC_TDH(tx_ring->reg_idx)), 3275 readl(tx_ring->tail), 3276 tx_ring->next_to_use, 3277 tx_ring->next_to_clean, 3278 tx_buffer->time_stamp, 3279 tx_buffer->next_to_watch, 3280 jiffies, 3281 tx_buffer->next_to_watch->wb.status); 3282 netif_stop_subqueue(tx_ring->netdev, 3283 tx_ring->queue_index); 3284 3285 /* we are about to reset, no point in enabling stuff */ 3286 return true; 3287 } 3288 } 3289 3290 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 3291 if (unlikely(total_packets && 3292 netif_carrier_ok(tx_ring->netdev) && 3293 igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { 3294 /* Make sure that anybody stopping the queue after this 3295 * sees the new next_to_clean. 3296 */ 3297 smp_mb(); 3298 if (__netif_subqueue_stopped(tx_ring->netdev, 3299 tx_ring->queue_index) && 3300 !(test_bit(__IGC_DOWN, &adapter->state))) { 3301 netif_wake_subqueue(tx_ring->netdev, 3302 tx_ring->queue_index); 3303 3304 u64_stats_update_begin(&tx_ring->tx_syncp); 3305 tx_ring->tx_stats.restart_queue++; 3306 u64_stats_update_end(&tx_ring->tx_syncp); 3307 } 3308 } 3309 3310 return !!budget; 3311 } 3312 3313 static int igc_find_mac_filter(struct igc_adapter *adapter, 3314 enum igc_mac_filter_type type, const u8 *addr) 3315 { 3316 struct igc_hw *hw = &adapter->hw; 3317 int max_entries = hw->mac.rar_entry_count; 3318 u32 ral, rah; 3319 int i; 3320 3321 for (i = 0; i < max_entries; i++) { 3322 ral = rd32(IGC_RAL(i)); 3323 rah = rd32(IGC_RAH(i)); 3324 3325 if (!(rah & IGC_RAH_AV)) 3326 continue; 3327 if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type) 3328 continue; 3329 if ((rah & IGC_RAH_RAH_MASK) != 3330 le16_to_cpup((__le16 *)(addr + 4))) 3331 continue; 3332 if (ral != le32_to_cpup((__le32 *)(addr))) 3333 continue; 3334 3335 return i; 3336 } 3337 3338 return -1; 3339 } 3340 3341 static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter) 3342 { 3343 struct igc_hw *hw = &adapter->hw; 3344 int max_entries = hw->mac.rar_entry_count; 3345 u32 rah; 3346 int i; 3347 3348 for (i = 0; i < max_entries; i++) { 3349 rah = rd32(IGC_RAH(i)); 3350 3351 if (!(rah & IGC_RAH_AV)) 3352 return i; 3353 } 3354 3355 return -1; 3356 } 3357 3358 /** 3359 * igc_add_mac_filter() - Add MAC address filter 3360 * @adapter: Pointer to adapter where the filter should be added 3361 * @type: MAC address filter type (source or destination) 3362 * @addr: MAC address 3363 * @queue: If non-negative, queue assignment feature is enabled and frames 3364 * matching the filter are enqueued onto 'queue'. Otherwise, queue 3365 * assignment is disabled. 3366 * 3367 * Return: 0 in case of success, negative errno code otherwise. 3368 */ 3369 static int igc_add_mac_filter(struct igc_adapter *adapter, 3370 enum igc_mac_filter_type type, const u8 *addr, 3371 int queue) 3372 { 3373 struct net_device *dev = adapter->netdev; 3374 int index; 3375 3376 index = igc_find_mac_filter(adapter, type, addr); 3377 if (index >= 0) 3378 goto update_filter; 3379 3380 index = igc_get_avail_mac_filter_slot(adapter); 3381 if (index < 0) 3382 return -ENOSPC; 3383 3384 netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n", 3385 index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 3386 addr, queue); 3387 3388 update_filter: 3389 igc_set_mac_filter_hw(adapter, index, type, addr, queue); 3390 return 0; 3391 } 3392 3393 /** 3394 * igc_del_mac_filter() - Delete MAC address filter 3395 * @adapter: Pointer to adapter where the filter should be deleted from 3396 * @type: MAC address filter type (source or destination) 3397 * @addr: MAC address 3398 */ 3399 static void igc_del_mac_filter(struct igc_adapter *adapter, 3400 enum igc_mac_filter_type type, const u8 *addr) 3401 { 3402 struct net_device *dev = adapter->netdev; 3403 int index; 3404 3405 index = igc_find_mac_filter(adapter, type, addr); 3406 if (index < 0) 3407 return; 3408 3409 if (index == 0) { 3410 /* If this is the default filter, we don't actually delete it. 3411 * We just reset to its default value i.e. disable queue 3412 * assignment. 3413 */ 3414 netdev_dbg(dev, "Disable default MAC filter queue assignment"); 3415 3416 igc_set_mac_filter_hw(adapter, 0, type, addr, -1); 3417 } else { 3418 netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n", 3419 index, 3420 type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 3421 addr); 3422 3423 igc_clear_mac_filter_hw(adapter, index); 3424 } 3425 } 3426 3427 /** 3428 * igc_add_vlan_prio_filter() - Add VLAN priority filter 3429 * @adapter: Pointer to adapter where the filter should be added 3430 * @prio: VLAN priority value 3431 * @queue: Queue number which matching frames are assigned to 3432 * 3433 * Return: 0 in case of success, negative errno code otherwise. 3434 */ 3435 static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio, 3436 int queue) 3437 { 3438 struct net_device *dev = adapter->netdev; 3439 struct igc_hw *hw = &adapter->hw; 3440 u32 vlanpqf; 3441 3442 vlanpqf = rd32(IGC_VLANPQF); 3443 3444 if (vlanpqf & IGC_VLANPQF_VALID(prio)) { 3445 netdev_dbg(dev, "VLAN priority filter already in use\n"); 3446 return -EEXIST; 3447 } 3448 3449 vlanpqf |= IGC_VLANPQF_QSEL(prio, queue); 3450 vlanpqf |= IGC_VLANPQF_VALID(prio); 3451 3452 wr32(IGC_VLANPQF, vlanpqf); 3453 3454 netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n", 3455 prio, queue); 3456 return 0; 3457 } 3458 3459 /** 3460 * igc_del_vlan_prio_filter() - Delete VLAN priority filter 3461 * @adapter: Pointer to adapter where the filter should be deleted from 3462 * @prio: VLAN priority value 3463 */ 3464 static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio) 3465 { 3466 struct igc_hw *hw = &adapter->hw; 3467 u32 vlanpqf; 3468 3469 vlanpqf = rd32(IGC_VLANPQF); 3470 3471 vlanpqf &= ~IGC_VLANPQF_VALID(prio); 3472 vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK); 3473 3474 wr32(IGC_VLANPQF, vlanpqf); 3475 3476 netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n", 3477 prio); 3478 } 3479 3480 static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter) 3481 { 3482 struct igc_hw *hw = &adapter->hw; 3483 int i; 3484 3485 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3486 u32 etqf = rd32(IGC_ETQF(i)); 3487 3488 if (!(etqf & IGC_ETQF_FILTER_ENABLE)) 3489 return i; 3490 } 3491 3492 return -1; 3493 } 3494 3495 /** 3496 * igc_add_etype_filter() - Add ethertype filter 3497 * @adapter: Pointer to adapter where the filter should be added 3498 * @etype: Ethertype value 3499 * @queue: If non-negative, queue assignment feature is enabled and frames 3500 * matching the filter are enqueued onto 'queue'. Otherwise, queue 3501 * assignment is disabled. 3502 * 3503 * Return: 0 in case of success, negative errno code otherwise. 3504 */ 3505 static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, 3506 int queue) 3507 { 3508 struct igc_hw *hw = &adapter->hw; 3509 int index; 3510 u32 etqf; 3511 3512 index = igc_get_avail_etype_filter_slot(adapter); 3513 if (index < 0) 3514 return -ENOSPC; 3515 3516 etqf = rd32(IGC_ETQF(index)); 3517 3518 etqf &= ~IGC_ETQF_ETYPE_MASK; 3519 etqf |= etype; 3520 3521 if (queue >= 0) { 3522 etqf &= ~IGC_ETQF_QUEUE_MASK; 3523 etqf |= (queue << IGC_ETQF_QUEUE_SHIFT); 3524 etqf |= IGC_ETQF_QUEUE_ENABLE; 3525 } 3526 3527 etqf |= IGC_ETQF_FILTER_ENABLE; 3528 3529 wr32(IGC_ETQF(index), etqf); 3530 3531 netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n", 3532 etype, queue); 3533 return 0; 3534 } 3535 3536 static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype) 3537 { 3538 struct igc_hw *hw = &adapter->hw; 3539 int i; 3540 3541 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3542 u32 etqf = rd32(IGC_ETQF(i)); 3543 3544 if ((etqf & IGC_ETQF_ETYPE_MASK) == etype) 3545 return i; 3546 } 3547 3548 return -1; 3549 } 3550 3551 /** 3552 * igc_del_etype_filter() - Delete ethertype filter 3553 * @adapter: Pointer to adapter where the filter should be deleted from 3554 * @etype: Ethertype value 3555 */ 3556 static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype) 3557 { 3558 struct igc_hw *hw = &adapter->hw; 3559 int index; 3560 3561 index = igc_find_etype_filter(adapter, etype); 3562 if (index < 0) 3563 return; 3564 3565 wr32(IGC_ETQF(index), 0); 3566 3567 netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n", 3568 etype); 3569 } 3570 3571 static int igc_flex_filter_select(struct igc_adapter *adapter, 3572 struct igc_flex_filter *input, 3573 u32 *fhft) 3574 { 3575 struct igc_hw *hw = &adapter->hw; 3576 u8 fhft_index; 3577 u32 fhftsl; 3578 3579 if (input->index >= MAX_FLEX_FILTER) { 3580 netdev_err(adapter->netdev, "Wrong Flex Filter index selected!\n"); 3581 return -EINVAL; 3582 } 3583 3584 /* Indirect table select register */ 3585 fhftsl = rd32(IGC_FHFTSL); 3586 fhftsl &= ~IGC_FHFTSL_FTSL_MASK; 3587 switch (input->index) { 3588 case 0 ... 7: 3589 fhftsl |= 0x00; 3590 break; 3591 case 8 ... 15: 3592 fhftsl |= 0x01; 3593 break; 3594 case 16 ... 23: 3595 fhftsl |= 0x02; 3596 break; 3597 case 24 ... 31: 3598 fhftsl |= 0x03; 3599 break; 3600 } 3601 wr32(IGC_FHFTSL, fhftsl); 3602 3603 /* Normalize index down to host table register */ 3604 fhft_index = input->index % 8; 3605 3606 *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) : 3607 IGC_FHFT_EXT(fhft_index - 4); 3608 3609 return 0; 3610 } 3611 3612 static int igc_write_flex_filter_ll(struct igc_adapter *adapter, 3613 struct igc_flex_filter *input) 3614 { 3615 struct igc_hw *hw = &adapter->hw; 3616 u8 *data = input->data; 3617 u8 *mask = input->mask; 3618 u32 queuing; 3619 u32 fhft; 3620 u32 wufc; 3621 int ret; 3622 int i; 3623 3624 /* Length has to be aligned to 8. Otherwise the filter will fail. Bail 3625 * out early to avoid surprises later. 3626 */ 3627 if (input->length % 8 != 0) { 3628 netdev_err(adapter->netdev, "The length of a flex filter has to be 8 byte aligned!\n"); 3629 return -EINVAL; 3630 } 3631 3632 /* Select corresponding flex filter register and get base for host table. */ 3633 ret = igc_flex_filter_select(adapter, input, &fhft); 3634 if (ret) 3635 return ret; 3636 3637 /* When adding a filter globally disable flex filter feature. That is 3638 * recommended within the datasheet. 3639 */ 3640 wufc = rd32(IGC_WUFC); 3641 wufc &= ~IGC_WUFC_FLEX_HQ; 3642 wr32(IGC_WUFC, wufc); 3643 3644 /* Configure filter */ 3645 queuing = input->length & IGC_FHFT_LENGTH_MASK; 3646 queuing |= FIELD_PREP(IGC_FHFT_QUEUE_MASK, input->rx_queue); 3647 queuing |= FIELD_PREP(IGC_FHFT_PRIO_MASK, input->prio); 3648 3649 if (input->immediate_irq) 3650 queuing |= IGC_FHFT_IMM_INT; 3651 3652 if (input->drop) 3653 queuing |= IGC_FHFT_DROP; 3654 3655 wr32(fhft + 0xFC, queuing); 3656 3657 /* Write data (128 byte) and mask (128 bit) */ 3658 for (i = 0; i < 16; ++i) { 3659 const size_t data_idx = i * 8; 3660 const size_t row_idx = i * 16; 3661 u32 dw0 = 3662 (data[data_idx + 0] << 0) | 3663 (data[data_idx + 1] << 8) | 3664 (data[data_idx + 2] << 16) | 3665 (data[data_idx + 3] << 24); 3666 u32 dw1 = 3667 (data[data_idx + 4] << 0) | 3668 (data[data_idx + 5] << 8) | 3669 (data[data_idx + 6] << 16) | 3670 (data[data_idx + 7] << 24); 3671 u32 tmp; 3672 3673 /* Write row: dw0, dw1 and mask */ 3674 wr32(fhft + row_idx, dw0); 3675 wr32(fhft + row_idx + 4, dw1); 3676 3677 /* mask is only valid for MASK(7, 0) */ 3678 tmp = rd32(fhft + row_idx + 8); 3679 tmp &= ~GENMASK(7, 0); 3680 tmp |= mask[i]; 3681 wr32(fhft + row_idx + 8, tmp); 3682 } 3683 3684 /* Enable filter. */ 3685 wufc |= IGC_WUFC_FLEX_HQ; 3686 if (input->index > 8) { 3687 /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */ 3688 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3689 3690 wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8)); 3691 3692 wr32(IGC_WUFC_EXT, wufc_ext); 3693 } else { 3694 wufc |= (IGC_WUFC_FLX0 << input->index); 3695 } 3696 wr32(IGC_WUFC, wufc); 3697 3698 netdev_dbg(adapter->netdev, "Added flex filter %u to HW.\n", 3699 input->index); 3700 3701 return 0; 3702 } 3703 3704 static void igc_flex_filter_add_field(struct igc_flex_filter *flex, 3705 const void *src, unsigned int offset, 3706 size_t len, const void *mask) 3707 { 3708 int i; 3709 3710 /* data */ 3711 memcpy(&flex->data[offset], src, len); 3712 3713 /* mask */ 3714 for (i = 0; i < len; ++i) { 3715 const unsigned int idx = i + offset; 3716 const u8 *ptr = mask; 3717 3718 if (mask) { 3719 if (ptr[i] & 0xff) 3720 flex->mask[idx / 8] |= BIT(idx % 8); 3721 3722 continue; 3723 } 3724 3725 flex->mask[idx / 8] |= BIT(idx % 8); 3726 } 3727 } 3728 3729 static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter) 3730 { 3731 struct igc_hw *hw = &adapter->hw; 3732 u32 wufc, wufc_ext; 3733 int i; 3734 3735 wufc = rd32(IGC_WUFC); 3736 wufc_ext = rd32(IGC_WUFC_EXT); 3737 3738 for (i = 0; i < MAX_FLEX_FILTER; i++) { 3739 if (i < 8) { 3740 if (!(wufc & (IGC_WUFC_FLX0 << i))) 3741 return i; 3742 } else { 3743 if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8)))) 3744 return i; 3745 } 3746 } 3747 3748 return -ENOSPC; 3749 } 3750 3751 static bool igc_flex_filter_in_use(struct igc_adapter *adapter) 3752 { 3753 struct igc_hw *hw = &adapter->hw; 3754 u32 wufc, wufc_ext; 3755 3756 wufc = rd32(IGC_WUFC); 3757 wufc_ext = rd32(IGC_WUFC_EXT); 3758 3759 if (wufc & IGC_WUFC_FILTER_MASK) 3760 return true; 3761 3762 if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK) 3763 return true; 3764 3765 return false; 3766 } 3767 3768 static int igc_add_flex_filter(struct igc_adapter *adapter, 3769 struct igc_nfc_rule *rule) 3770 { 3771 struct igc_nfc_filter *filter = &rule->filter; 3772 unsigned int eth_offset, user_offset; 3773 struct igc_flex_filter flex = { }; 3774 int ret, index; 3775 bool vlan; 3776 3777 index = igc_find_avail_flex_filter_slot(adapter); 3778 if (index < 0) 3779 return -ENOSPC; 3780 3781 /* Construct the flex filter: 3782 * -> dest_mac [6] 3783 * -> src_mac [6] 3784 * -> tpid [2] 3785 * -> vlan tci [2] 3786 * -> ether type [2] 3787 * -> user data [8] 3788 * -> = 26 bytes => 32 length 3789 */ 3790 flex.index = index; 3791 flex.length = 32; 3792 flex.rx_queue = rule->action; 3793 3794 vlan = rule->filter.vlan_tci || rule->filter.vlan_etype; 3795 eth_offset = vlan ? 16 : 12; 3796 user_offset = vlan ? 18 : 14; 3797 3798 /* Add destination MAC */ 3799 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3800 igc_flex_filter_add_field(&flex, &filter->dst_addr, 0, 3801 ETH_ALEN, NULL); 3802 3803 /* Add source MAC */ 3804 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3805 igc_flex_filter_add_field(&flex, &filter->src_addr, 6, 3806 ETH_ALEN, NULL); 3807 3808 /* Add VLAN etype */ 3809 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) { 3810 __be16 vlan_etype = cpu_to_be16(filter->vlan_etype); 3811 3812 igc_flex_filter_add_field(&flex, &vlan_etype, 12, 3813 sizeof(vlan_etype), NULL); 3814 } 3815 3816 /* Add VLAN TCI */ 3817 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) 3818 igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14, 3819 sizeof(filter->vlan_tci), NULL); 3820 3821 /* Add Ether type */ 3822 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3823 __be16 etype = cpu_to_be16(filter->etype); 3824 3825 igc_flex_filter_add_field(&flex, &etype, eth_offset, 3826 sizeof(etype), NULL); 3827 } 3828 3829 /* Add user data */ 3830 if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) 3831 igc_flex_filter_add_field(&flex, &filter->user_data, 3832 user_offset, 3833 sizeof(filter->user_data), 3834 filter->user_mask); 3835 3836 /* Add it down to the hardware and enable it. */ 3837 ret = igc_write_flex_filter_ll(adapter, &flex); 3838 if (ret) 3839 return ret; 3840 3841 filter->flex_index = index; 3842 3843 return 0; 3844 } 3845 3846 static void igc_del_flex_filter(struct igc_adapter *adapter, 3847 u16 reg_index) 3848 { 3849 struct igc_hw *hw = &adapter->hw; 3850 u32 wufc; 3851 3852 /* Just disable the filter. The filter table itself is kept 3853 * intact. Another flex_filter_add() should override the "old" data 3854 * then. 3855 */ 3856 if (reg_index > 8) { 3857 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3858 3859 wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8)); 3860 wr32(IGC_WUFC_EXT, wufc_ext); 3861 } else { 3862 wufc = rd32(IGC_WUFC); 3863 3864 wufc &= ~(IGC_WUFC_FLX0 << reg_index); 3865 wr32(IGC_WUFC, wufc); 3866 } 3867 3868 if (igc_flex_filter_in_use(adapter)) 3869 return; 3870 3871 /* No filters are in use, we may disable flex filters */ 3872 wufc = rd32(IGC_WUFC); 3873 wufc &= ~IGC_WUFC_FLEX_HQ; 3874 wr32(IGC_WUFC, wufc); 3875 } 3876 3877 static int igc_enable_nfc_rule(struct igc_adapter *adapter, 3878 struct igc_nfc_rule *rule) 3879 { 3880 int err; 3881 3882 if (rule->flex) { 3883 return igc_add_flex_filter(adapter, rule); 3884 } 3885 3886 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3887 err = igc_add_etype_filter(adapter, rule->filter.etype, 3888 rule->action); 3889 if (err) 3890 return err; 3891 } 3892 3893 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { 3894 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3895 rule->filter.src_addr, rule->action); 3896 if (err) 3897 return err; 3898 } 3899 3900 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { 3901 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3902 rule->filter.dst_addr, rule->action); 3903 if (err) 3904 return err; 3905 } 3906 3907 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3908 int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); 3909 3910 err = igc_add_vlan_prio_filter(adapter, prio, rule->action); 3911 if (err) 3912 return err; 3913 } 3914 3915 return 0; 3916 } 3917 3918 static void igc_disable_nfc_rule(struct igc_adapter *adapter, 3919 const struct igc_nfc_rule *rule) 3920 { 3921 if (rule->flex) { 3922 igc_del_flex_filter(adapter, rule->filter.flex_index); 3923 return; 3924 } 3925 3926 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) 3927 igc_del_etype_filter(adapter, rule->filter.etype); 3928 3929 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3930 int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); 3931 3932 igc_del_vlan_prio_filter(adapter, prio); 3933 } 3934 3935 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3936 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3937 rule->filter.src_addr); 3938 3939 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3940 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3941 rule->filter.dst_addr); 3942 } 3943 3944 /** 3945 * igc_get_nfc_rule() - Get NFC rule 3946 * @adapter: Pointer to adapter 3947 * @location: Rule location 3948 * 3949 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3950 * 3951 * Return: Pointer to NFC rule at @location. If not found, NULL. 3952 */ 3953 struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter, 3954 u32 location) 3955 { 3956 struct igc_nfc_rule *rule; 3957 3958 list_for_each_entry(rule, &adapter->nfc_rule_list, list) { 3959 if (rule->location == location) 3960 return rule; 3961 if (rule->location > location) 3962 break; 3963 } 3964 3965 return NULL; 3966 } 3967 3968 /** 3969 * igc_del_nfc_rule() - Delete NFC rule 3970 * @adapter: Pointer to adapter 3971 * @rule: Pointer to rule to be deleted 3972 * 3973 * Disable NFC rule in hardware and delete it from adapter. 3974 * 3975 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3976 */ 3977 void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 3978 { 3979 igc_disable_nfc_rule(adapter, rule); 3980 3981 list_del(&rule->list); 3982 adapter->nfc_rule_count--; 3983 3984 kfree(rule); 3985 } 3986 3987 static void igc_flush_nfc_rules(struct igc_adapter *adapter) 3988 { 3989 struct igc_nfc_rule *rule, *tmp; 3990 3991 mutex_lock(&adapter->nfc_rule_lock); 3992 3993 list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list) 3994 igc_del_nfc_rule(adapter, rule); 3995 3996 mutex_unlock(&adapter->nfc_rule_lock); 3997 } 3998 3999 /** 4000 * igc_add_nfc_rule() - Add NFC rule 4001 * @adapter: Pointer to adapter 4002 * @rule: Pointer to rule to be added 4003 * 4004 * Enable NFC rule in hardware and add it to adapter. 4005 * 4006 * Context: Expects adapter->nfc_rule_lock to be held by caller. 4007 * 4008 * Return: 0 on success, negative errno on failure. 4009 */ 4010 int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 4011 { 4012 struct igc_nfc_rule *pred, *cur; 4013 int err; 4014 4015 err = igc_enable_nfc_rule(adapter, rule); 4016 if (err) 4017 return err; 4018 4019 pred = NULL; 4020 list_for_each_entry(cur, &adapter->nfc_rule_list, list) { 4021 if (cur->location >= rule->location) 4022 break; 4023 pred = cur; 4024 } 4025 4026 list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list); 4027 adapter->nfc_rule_count++; 4028 return 0; 4029 } 4030 4031 static void igc_restore_nfc_rules(struct igc_adapter *adapter) 4032 { 4033 struct igc_nfc_rule *rule; 4034 4035 mutex_lock(&adapter->nfc_rule_lock); 4036 4037 list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list) 4038 igc_enable_nfc_rule(adapter, rule); 4039 4040 mutex_unlock(&adapter->nfc_rule_lock); 4041 } 4042 4043 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr) 4044 { 4045 struct igc_adapter *adapter = netdev_priv(netdev); 4046 4047 return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1); 4048 } 4049 4050 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) 4051 { 4052 struct igc_adapter *adapter = netdev_priv(netdev); 4053 4054 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr); 4055 return 0; 4056 } 4057 4058 /** 4059 * igc_enable_empty_addr_recv - Enable Rx of packets with all-zeroes MAC address 4060 * @adapter: Pointer to the igc_adapter structure. 4061 * 4062 * Frame preemption verification requires that packets with the all-zeroes 4063 * MAC address are allowed to be received by the driver. This function adds the 4064 * all-zeroes destination address to the list of acceptable addresses. 4065 * 4066 * Return: 0 on success, negative value otherwise. 4067 */ 4068 int igc_enable_empty_addr_recv(struct igc_adapter *adapter) 4069 { 4070 u8 empty[ETH_ALEN] = {}; 4071 4072 return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, empty, -1); 4073 } 4074 4075 void igc_disable_empty_addr_recv(struct igc_adapter *adapter) 4076 { 4077 u8 empty[ETH_ALEN] = {}; 4078 4079 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, empty); 4080 } 4081 4082 /** 4083 * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set 4084 * @netdev: network interface device structure 4085 * 4086 * The set_rx_mode entry point is called whenever the unicast or multicast 4087 * address lists or the network interface flags are updated. This routine is 4088 * responsible for configuring the hardware for proper unicast, multicast, 4089 * promiscuous mode, and all-multi behavior. 4090 */ 4091 static void igc_set_rx_mode(struct net_device *netdev) 4092 { 4093 struct igc_adapter *adapter = netdev_priv(netdev); 4094 struct igc_hw *hw = &adapter->hw; 4095 u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE; 4096 int count; 4097 4098 /* Check for Promiscuous and All Multicast modes */ 4099 if (netdev->flags & IFF_PROMISC) { 4100 rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE; 4101 } else { 4102 if (netdev->flags & IFF_ALLMULTI) { 4103 rctl |= IGC_RCTL_MPE; 4104 } else { 4105 /* Write addresses to the MTA, if the attempt fails 4106 * then we should just turn on promiscuous mode so 4107 * that we can at least receive multicast traffic 4108 */ 4109 count = igc_write_mc_addr_list(netdev); 4110 if (count < 0) 4111 rctl |= IGC_RCTL_MPE; 4112 } 4113 } 4114 4115 /* Write addresses to available RAR registers, if there is not 4116 * sufficient space to store all the addresses then enable 4117 * unicast promiscuous mode 4118 */ 4119 if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync)) 4120 rctl |= IGC_RCTL_UPE; 4121 4122 /* update state of unicast and multicast */ 4123 rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE); 4124 wr32(IGC_RCTL, rctl); 4125 4126 #if (PAGE_SIZE < 8192) 4127 if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB) 4128 rlpml = IGC_MAX_FRAME_BUILD_SKB; 4129 #endif 4130 wr32(IGC_RLPML, rlpml); 4131 } 4132 4133 /** 4134 * igc_configure - configure the hardware for RX and TX 4135 * @adapter: private board structure 4136 */ 4137 static void igc_configure(struct igc_adapter *adapter) 4138 { 4139 struct net_device *netdev = adapter->netdev; 4140 int i = 0; 4141 4142 igc_get_hw_control(adapter); 4143 igc_set_rx_mode(netdev); 4144 4145 igc_restore_vlan(adapter); 4146 4147 igc_setup_tctl(adapter); 4148 igc_setup_mrqc(adapter); 4149 igc_setup_rctl(adapter); 4150 4151 igc_set_default_mac_filter(adapter); 4152 igc_restore_nfc_rules(adapter); 4153 4154 igc_configure_tx(adapter); 4155 igc_configure_rx(adapter); 4156 4157 igc_rx_fifo_flush_base(&adapter->hw); 4158 4159 /* call igc_desc_unused which always leaves 4160 * at least 1 descriptor unused to make sure 4161 * next_to_use != next_to_clean 4162 */ 4163 for (i = 0; i < adapter->num_rx_queues; i++) { 4164 struct igc_ring *ring = adapter->rx_ring[i]; 4165 4166 if (ring->xsk_pool) 4167 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 4168 else 4169 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 4170 } 4171 } 4172 4173 /** 4174 * igc_write_ivar - configure ivar for given MSI-X vector 4175 * @hw: pointer to the HW structure 4176 * @msix_vector: vector number we are allocating to a given ring 4177 * @index: row index of IVAR register to write within IVAR table 4178 * @offset: column offset of in IVAR, should be multiple of 8 4179 * 4180 * The IVAR table consists of 2 columns, 4181 * each containing an cause allocation for an Rx and Tx ring, and a 4182 * variable number of rows depending on the number of queues supported. 4183 */ 4184 static void igc_write_ivar(struct igc_hw *hw, int msix_vector, 4185 int index, int offset) 4186 { 4187 u32 ivar = array_rd32(IGC_IVAR0, index); 4188 4189 /* clear any bits that are currently set */ 4190 ivar &= ~((u32)0xFF << offset); 4191 4192 /* write vector and valid bit */ 4193 ivar |= (msix_vector | IGC_IVAR_VALID) << offset; 4194 4195 array_wr32(IGC_IVAR0, index, ivar); 4196 } 4197 4198 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector) 4199 { 4200 struct igc_adapter *adapter = q_vector->adapter; 4201 struct igc_hw *hw = &adapter->hw; 4202 int rx_queue = IGC_N0_QUEUE; 4203 int tx_queue = IGC_N0_QUEUE; 4204 4205 if (q_vector->rx.ring) 4206 rx_queue = q_vector->rx.ring->reg_idx; 4207 if (q_vector->tx.ring) 4208 tx_queue = q_vector->tx.ring->reg_idx; 4209 4210 switch (hw->mac.type) { 4211 case igc_i225: 4212 if (rx_queue > IGC_N0_QUEUE) 4213 igc_write_ivar(hw, msix_vector, 4214 rx_queue >> 1, 4215 (rx_queue & 0x1) << 4); 4216 if (tx_queue > IGC_N0_QUEUE) 4217 igc_write_ivar(hw, msix_vector, 4218 tx_queue >> 1, 4219 ((tx_queue & 0x1) << 4) + 8); 4220 q_vector->eims_value = BIT(msix_vector); 4221 break; 4222 default: 4223 WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n"); 4224 break; 4225 } 4226 4227 /* add q_vector eims value to global eims_enable_mask */ 4228 adapter->eims_enable_mask |= q_vector->eims_value; 4229 4230 /* configure q_vector to set itr on first interrupt */ 4231 q_vector->set_itr = 1; 4232 } 4233 4234 /** 4235 * igc_configure_msix - Configure MSI-X hardware 4236 * @adapter: Pointer to adapter structure 4237 * 4238 * igc_configure_msix sets up the hardware to properly 4239 * generate MSI-X interrupts. 4240 */ 4241 static void igc_configure_msix(struct igc_adapter *adapter) 4242 { 4243 struct igc_hw *hw = &adapter->hw; 4244 int i, vector = 0; 4245 u32 tmp; 4246 4247 adapter->eims_enable_mask = 0; 4248 4249 /* set vector for other causes, i.e. link changes */ 4250 switch (hw->mac.type) { 4251 case igc_i225: 4252 /* Turn on MSI-X capability first, or our settings 4253 * won't stick. And it will take days to debug. 4254 */ 4255 wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE | 4256 IGC_GPIE_PBA | IGC_GPIE_EIAME | 4257 IGC_GPIE_NSICR); 4258 4259 /* enable msix_other interrupt */ 4260 adapter->eims_other = BIT(vector); 4261 tmp = (vector++ | IGC_IVAR_VALID) << 8; 4262 4263 wr32(IGC_IVAR_MISC, tmp); 4264 break; 4265 default: 4266 /* do nothing, since nothing else supports MSI-X */ 4267 break; 4268 } /* switch (hw->mac.type) */ 4269 4270 adapter->eims_enable_mask |= adapter->eims_other; 4271 4272 for (i = 0; i < adapter->num_q_vectors; i++) 4273 igc_assign_vector(adapter->q_vector[i], vector++); 4274 4275 wrfl(); 4276 } 4277 4278 /** 4279 * igc_irq_enable - Enable default interrupt generation settings 4280 * @adapter: board private structure 4281 */ 4282 static void igc_irq_enable(struct igc_adapter *adapter) 4283 { 4284 struct igc_hw *hw = &adapter->hw; 4285 4286 if (adapter->msix_entries) { 4287 u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA; 4288 u32 regval = rd32(IGC_EIAC); 4289 4290 wr32(IGC_EIAC, regval | adapter->eims_enable_mask); 4291 regval = rd32(IGC_EIAM); 4292 wr32(IGC_EIAM, regval | adapter->eims_enable_mask); 4293 wr32(IGC_EIMS, adapter->eims_enable_mask); 4294 wr32(IGC_IMS, ims); 4295 } else { 4296 wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 4297 wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 4298 } 4299 } 4300 4301 /** 4302 * igc_irq_disable - Mask off interrupt generation on the NIC 4303 * @adapter: board private structure 4304 */ 4305 static void igc_irq_disable(struct igc_adapter *adapter) 4306 { 4307 struct igc_hw *hw = &adapter->hw; 4308 4309 if (adapter->msix_entries) { 4310 u32 regval = rd32(IGC_EIAM); 4311 4312 wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask); 4313 wr32(IGC_EIMC, adapter->eims_enable_mask); 4314 regval = rd32(IGC_EIAC); 4315 wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask); 4316 } 4317 4318 wr32(IGC_IAM, 0); 4319 wr32(IGC_IMC, ~0); 4320 wrfl(); 4321 4322 if (adapter->msix_entries) { 4323 int vector = 0, i; 4324 4325 synchronize_irq(adapter->msix_entries[vector++].vector); 4326 4327 for (i = 0; i < adapter->num_q_vectors; i++) 4328 synchronize_irq(adapter->msix_entries[vector++].vector); 4329 } else { 4330 synchronize_irq(adapter->pdev->irq); 4331 } 4332 } 4333 4334 void igc_set_flag_queue_pairs(struct igc_adapter *adapter, 4335 const u32 max_rss_queues) 4336 { 4337 /* Determine if we need to pair queues. */ 4338 /* If rss_queues > half of max_rss_queues, pair the queues in 4339 * order to conserve interrupts due to limited supply. 4340 */ 4341 if (adapter->rss_queues > (max_rss_queues / 2)) 4342 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 4343 else 4344 adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS; 4345 } 4346 4347 unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) 4348 { 4349 return IGC_MAX_RX_QUEUES; 4350 } 4351 4352 static void igc_init_queue_configuration(struct igc_adapter *adapter) 4353 { 4354 u32 max_rss_queues; 4355 4356 max_rss_queues = igc_get_max_rss_queues(adapter); 4357 adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); 4358 4359 igc_set_flag_queue_pairs(adapter, max_rss_queues); 4360 } 4361 4362 /** 4363 * igc_reset_q_vector - Reset config for interrupt vector 4364 * @adapter: board private structure to initialize 4365 * @v_idx: Index of vector to be reset 4366 * 4367 * If NAPI is enabled it will delete any references to the 4368 * NAPI struct. This is preparation for igc_free_q_vector. 4369 */ 4370 static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx) 4371 { 4372 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 4373 4374 /* if we're coming from igc_set_interrupt_capability, the vectors are 4375 * not yet allocated 4376 */ 4377 if (!q_vector) 4378 return; 4379 4380 if (q_vector->tx.ring) 4381 adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; 4382 4383 if (q_vector->rx.ring) 4384 adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; 4385 4386 netif_napi_del(&q_vector->napi); 4387 } 4388 4389 /** 4390 * igc_free_q_vector - Free memory allocated for specific interrupt vector 4391 * @adapter: board private structure to initialize 4392 * @v_idx: Index of vector to be freed 4393 * 4394 * This function frees the memory allocated to the q_vector. 4395 */ 4396 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx) 4397 { 4398 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 4399 4400 adapter->q_vector[v_idx] = NULL; 4401 4402 /* igc_get_stats64() might access the rings on this vector, 4403 * we must wait a grace period before freeing it. 4404 */ 4405 if (q_vector) 4406 kfree_rcu(q_vector, rcu); 4407 } 4408 4409 /** 4410 * igc_free_q_vectors - Free memory allocated for interrupt vectors 4411 * @adapter: board private structure to initialize 4412 * 4413 * This function frees the memory allocated to the q_vectors. In addition if 4414 * NAPI is enabled it will delete any references to the NAPI struct prior 4415 * to freeing the q_vector. 4416 */ 4417 static void igc_free_q_vectors(struct igc_adapter *adapter) 4418 { 4419 int v_idx = adapter->num_q_vectors; 4420 4421 adapter->num_tx_queues = 0; 4422 adapter->num_rx_queues = 0; 4423 adapter->num_q_vectors = 0; 4424 4425 while (v_idx--) { 4426 igc_reset_q_vector(adapter, v_idx); 4427 igc_free_q_vector(adapter, v_idx); 4428 } 4429 } 4430 4431 /** 4432 * igc_update_itr - update the dynamic ITR value based on statistics 4433 * @q_vector: pointer to q_vector 4434 * @ring_container: ring info to update the itr for 4435 * 4436 * Stores a new ITR value based on packets and byte 4437 * counts during the last interrupt. The advantage of per interrupt 4438 * computation is faster updates and more accurate ITR for the current 4439 * traffic pattern. Constants in this function were computed 4440 * based on theoretical maximum wire speed and thresholds were set based 4441 * on testing data as well as attempting to minimize response time 4442 * while increasing bulk throughput. 4443 * NOTE: These calculations are only valid when operating in a single- 4444 * queue environment. 4445 */ 4446 static void igc_update_itr(struct igc_q_vector *q_vector, 4447 struct igc_ring_container *ring_container) 4448 { 4449 unsigned int packets = ring_container->total_packets; 4450 unsigned int bytes = ring_container->total_bytes; 4451 u8 itrval = ring_container->itr; 4452 4453 /* no packets, exit with status unchanged */ 4454 if (packets == 0) 4455 return; 4456 4457 switch (itrval) { 4458 case lowest_latency: 4459 /* handle TSO and jumbo frames */ 4460 if (bytes / packets > 8000) 4461 itrval = bulk_latency; 4462 else if ((packets < 5) && (bytes > 512)) 4463 itrval = low_latency; 4464 break; 4465 case low_latency: /* 50 usec aka 20000 ints/s */ 4466 if (bytes > 10000) { 4467 /* this if handles the TSO accounting */ 4468 if (bytes / packets > 8000) 4469 itrval = bulk_latency; 4470 else if ((packets < 10) || ((bytes / packets) > 1200)) 4471 itrval = bulk_latency; 4472 else if ((packets > 35)) 4473 itrval = lowest_latency; 4474 } else if (bytes / packets > 2000) { 4475 itrval = bulk_latency; 4476 } else if (packets <= 2 && bytes < 512) { 4477 itrval = lowest_latency; 4478 } 4479 break; 4480 case bulk_latency: /* 250 usec aka 4000 ints/s */ 4481 if (bytes > 25000) { 4482 if (packets > 35) 4483 itrval = low_latency; 4484 } else if (bytes < 1500) { 4485 itrval = low_latency; 4486 } 4487 break; 4488 } 4489 4490 /* clear work counters since we have the values we need */ 4491 ring_container->total_bytes = 0; 4492 ring_container->total_packets = 0; 4493 4494 /* write updated itr to ring container */ 4495 ring_container->itr = itrval; 4496 } 4497 4498 static void igc_set_itr(struct igc_q_vector *q_vector) 4499 { 4500 struct igc_adapter *adapter = q_vector->adapter; 4501 u32 new_itr = q_vector->itr_val; 4502 u8 current_itr = 0; 4503 4504 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ 4505 switch (adapter->link_speed) { 4506 case SPEED_10: 4507 case SPEED_100: 4508 current_itr = 0; 4509 new_itr = IGC_4K_ITR; 4510 goto set_itr_now; 4511 default: 4512 break; 4513 } 4514 4515 igc_update_itr(q_vector, &q_vector->tx); 4516 igc_update_itr(q_vector, &q_vector->rx); 4517 4518 current_itr = max(q_vector->rx.itr, q_vector->tx.itr); 4519 4520 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4521 if (current_itr == lowest_latency && 4522 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4523 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4524 current_itr = low_latency; 4525 4526 switch (current_itr) { 4527 /* counts and packets in update_itr are dependent on these numbers */ 4528 case lowest_latency: 4529 new_itr = IGC_70K_ITR; /* 70,000 ints/sec */ 4530 break; 4531 case low_latency: 4532 new_itr = IGC_20K_ITR; /* 20,000 ints/sec */ 4533 break; 4534 case bulk_latency: 4535 new_itr = IGC_4K_ITR; /* 4,000 ints/sec */ 4536 break; 4537 default: 4538 break; 4539 } 4540 4541 set_itr_now: 4542 if (new_itr != q_vector->itr_val) { 4543 /* this attempts to bias the interrupt rate towards Bulk 4544 * by adding intermediate steps when interrupt rate is 4545 * increasing 4546 */ 4547 new_itr = new_itr > q_vector->itr_val ? 4548 max((new_itr * q_vector->itr_val) / 4549 (new_itr + (q_vector->itr_val >> 2)), 4550 new_itr) : new_itr; 4551 /* Don't write the value here; it resets the adapter's 4552 * internal timer, and causes us to delay far longer than 4553 * we should between interrupts. Instead, we write the ITR 4554 * value at the beginning of the next interrupt so the timing 4555 * ends up being correct. 4556 */ 4557 q_vector->itr_val = new_itr; 4558 q_vector->set_itr = 1; 4559 } 4560 } 4561 4562 static void igc_reset_interrupt_capability(struct igc_adapter *adapter) 4563 { 4564 int v_idx = adapter->num_q_vectors; 4565 4566 if (adapter->msix_entries) { 4567 pci_disable_msix(adapter->pdev); 4568 kfree(adapter->msix_entries); 4569 adapter->msix_entries = NULL; 4570 } else if (adapter->flags & IGC_FLAG_HAS_MSI) { 4571 pci_disable_msi(adapter->pdev); 4572 } 4573 4574 while (v_idx--) 4575 igc_reset_q_vector(adapter, v_idx); 4576 } 4577 4578 /** 4579 * igc_set_interrupt_capability - set MSI or MSI-X if supported 4580 * @adapter: Pointer to adapter structure 4581 * @msix: boolean value for MSI-X capability 4582 * 4583 * Attempt to configure interrupts using the best available 4584 * capabilities of the hardware and kernel. 4585 */ 4586 static void igc_set_interrupt_capability(struct igc_adapter *adapter, 4587 bool msix) 4588 { 4589 int numvecs, i; 4590 int err; 4591 4592 if (!msix) 4593 goto msi_only; 4594 adapter->flags |= IGC_FLAG_HAS_MSIX; 4595 4596 /* Number of supported queues. */ 4597 adapter->num_rx_queues = adapter->rss_queues; 4598 4599 adapter->num_tx_queues = adapter->rss_queues; 4600 4601 /* start with one vector for every Rx queue */ 4602 numvecs = adapter->num_rx_queues; 4603 4604 /* if Tx handler is separate add 1 for every Tx queue */ 4605 if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) 4606 numvecs += adapter->num_tx_queues; 4607 4608 /* store the number of vectors reserved for queues */ 4609 adapter->num_q_vectors = numvecs; 4610 4611 /* add 1 vector for link status interrupts */ 4612 numvecs++; 4613 4614 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), 4615 GFP_KERNEL); 4616 4617 if (!adapter->msix_entries) 4618 return; 4619 4620 /* populate entry values */ 4621 for (i = 0; i < numvecs; i++) 4622 adapter->msix_entries[i].entry = i; 4623 4624 err = pci_enable_msix_range(adapter->pdev, 4625 adapter->msix_entries, 4626 numvecs, 4627 numvecs); 4628 if (err > 0) 4629 return; 4630 4631 kfree(adapter->msix_entries); 4632 adapter->msix_entries = NULL; 4633 4634 igc_reset_interrupt_capability(adapter); 4635 4636 msi_only: 4637 adapter->flags &= ~IGC_FLAG_HAS_MSIX; 4638 4639 adapter->rss_queues = 1; 4640 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 4641 adapter->num_rx_queues = 1; 4642 adapter->num_tx_queues = 1; 4643 adapter->num_q_vectors = 1; 4644 if (!pci_enable_msi(adapter->pdev)) 4645 adapter->flags |= IGC_FLAG_HAS_MSI; 4646 } 4647 4648 /** 4649 * igc_update_ring_itr - update the dynamic ITR value based on packet size 4650 * @q_vector: pointer to q_vector 4651 * 4652 * Stores a new ITR value based on strictly on packet size. This 4653 * algorithm is less sophisticated than that used in igc_update_itr, 4654 * due to the difficulty of synchronizing statistics across multiple 4655 * receive rings. The divisors and thresholds used by this function 4656 * were determined based on theoretical maximum wire speed and testing 4657 * data, in order to minimize response time while increasing bulk 4658 * throughput. 4659 * NOTE: This function is called only when operating in a multiqueue 4660 * receive environment. 4661 */ 4662 static void igc_update_ring_itr(struct igc_q_vector *q_vector) 4663 { 4664 struct igc_adapter *adapter = q_vector->adapter; 4665 int new_val = q_vector->itr_val; 4666 int avg_wire_size = 0; 4667 unsigned int packets; 4668 4669 /* For non-gigabit speeds, just fix the interrupt rate at 4000 4670 * ints/sec - ITR timer value of 120 ticks. 4671 */ 4672 switch (adapter->link_speed) { 4673 case SPEED_10: 4674 case SPEED_100: 4675 new_val = IGC_4K_ITR; 4676 goto set_itr_val; 4677 default: 4678 break; 4679 } 4680 4681 packets = q_vector->rx.total_packets; 4682 if (packets) 4683 avg_wire_size = q_vector->rx.total_bytes / packets; 4684 4685 packets = q_vector->tx.total_packets; 4686 if (packets) 4687 avg_wire_size = max_t(u32, avg_wire_size, 4688 q_vector->tx.total_bytes / packets); 4689 4690 /* if avg_wire_size isn't set no work was done */ 4691 if (!avg_wire_size) 4692 goto clear_counts; 4693 4694 /* Add 24 bytes to size to account for CRC, preamble, and gap */ 4695 avg_wire_size += 24; 4696 4697 /* Don't starve jumbo frames */ 4698 avg_wire_size = min(avg_wire_size, 3000); 4699 4700 /* Give a little boost to mid-size frames */ 4701 if (avg_wire_size > 300 && avg_wire_size < 1200) 4702 new_val = avg_wire_size / 3; 4703 else 4704 new_val = avg_wire_size / 2; 4705 4706 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4707 if (new_val < IGC_20K_ITR && 4708 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4709 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4710 new_val = IGC_20K_ITR; 4711 4712 set_itr_val: 4713 if (new_val != q_vector->itr_val) { 4714 q_vector->itr_val = new_val; 4715 q_vector->set_itr = 1; 4716 } 4717 clear_counts: 4718 q_vector->rx.total_bytes = 0; 4719 q_vector->rx.total_packets = 0; 4720 q_vector->tx.total_bytes = 0; 4721 q_vector->tx.total_packets = 0; 4722 } 4723 4724 static void igc_ring_irq_enable(struct igc_q_vector *q_vector) 4725 { 4726 struct igc_adapter *adapter = q_vector->adapter; 4727 struct igc_hw *hw = &adapter->hw; 4728 4729 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || 4730 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { 4731 if (adapter->num_q_vectors == 1) 4732 igc_set_itr(q_vector); 4733 else 4734 igc_update_ring_itr(q_vector); 4735 } 4736 4737 if (!test_bit(__IGC_DOWN, &adapter->state)) { 4738 if (adapter->msix_entries) 4739 wr32(IGC_EIMS, q_vector->eims_value); 4740 else 4741 igc_irq_enable(adapter); 4742 } 4743 } 4744 4745 static void igc_add_ring(struct igc_ring *ring, 4746 struct igc_ring_container *head) 4747 { 4748 head->ring = ring; 4749 head->count++; 4750 } 4751 4752 /** 4753 * igc_cache_ring_register - Descriptor ring to register mapping 4754 * @adapter: board private structure to initialize 4755 * 4756 * Once we know the feature-set enabled for the device, we'll cache 4757 * the register offset the descriptor ring is assigned to. 4758 */ 4759 static void igc_cache_ring_register(struct igc_adapter *adapter) 4760 { 4761 int i = 0, j = 0; 4762 4763 switch (adapter->hw.mac.type) { 4764 case igc_i225: 4765 default: 4766 for (; i < adapter->num_rx_queues; i++) 4767 adapter->rx_ring[i]->reg_idx = i; 4768 for (; j < adapter->num_tx_queues; j++) 4769 adapter->tx_ring[j]->reg_idx = j; 4770 break; 4771 } 4772 } 4773 4774 /** 4775 * igc_poll - NAPI Rx polling callback 4776 * @napi: napi polling structure 4777 * @budget: count of how many packets we should handle 4778 */ 4779 static int igc_poll(struct napi_struct *napi, int budget) 4780 { 4781 struct igc_q_vector *q_vector = container_of(napi, 4782 struct igc_q_vector, 4783 napi); 4784 struct igc_ring *rx_ring = q_vector->rx.ring; 4785 bool clean_complete = true; 4786 int work_done = 0; 4787 4788 if (q_vector->tx.ring) 4789 clean_complete = igc_clean_tx_irq(q_vector, budget); 4790 4791 if (rx_ring) { 4792 int cleaned = rx_ring->xsk_pool ? 4793 igc_clean_rx_irq_zc(q_vector, budget) : 4794 igc_clean_rx_irq(q_vector, budget); 4795 4796 work_done += cleaned; 4797 if (cleaned >= budget) 4798 clean_complete = false; 4799 } 4800 4801 /* If all work not completed, return budget and keep polling */ 4802 if (!clean_complete) 4803 return budget; 4804 4805 /* Exit the polling mode, but don't re-enable interrupts if stack might 4806 * poll us due to busy-polling 4807 */ 4808 if (likely(napi_complete_done(napi, work_done))) 4809 igc_ring_irq_enable(q_vector); 4810 4811 return min(work_done, budget - 1); 4812 } 4813 4814 /** 4815 * igc_alloc_q_vector - Allocate memory for a single interrupt vector 4816 * @adapter: board private structure to initialize 4817 * @v_count: q_vectors allocated on adapter, used for ring interleaving 4818 * @v_idx: index of vector in adapter struct 4819 * @txr_count: total number of Tx rings to allocate 4820 * @txr_idx: index of first Tx ring to allocate 4821 * @rxr_count: total number of Rx rings to allocate 4822 * @rxr_idx: index of first Rx ring to allocate 4823 * 4824 * We allocate one q_vector. If allocation fails we return -ENOMEM. 4825 */ 4826 static int igc_alloc_q_vector(struct igc_adapter *adapter, 4827 unsigned int v_count, unsigned int v_idx, 4828 unsigned int txr_count, unsigned int txr_idx, 4829 unsigned int rxr_count, unsigned int rxr_idx) 4830 { 4831 struct igc_q_vector *q_vector; 4832 struct igc_ring *ring; 4833 int ring_count; 4834 4835 /* igc only supports 1 Tx and/or 1 Rx queue per vector */ 4836 if (txr_count > 1 || rxr_count > 1) 4837 return -ENOMEM; 4838 4839 ring_count = txr_count + rxr_count; 4840 4841 /* allocate q_vector and rings */ 4842 q_vector = adapter->q_vector[v_idx]; 4843 if (!q_vector) 4844 q_vector = kzalloc(struct_size(q_vector, ring, ring_count), 4845 GFP_KERNEL); 4846 else 4847 memset(q_vector, 0, struct_size(q_vector, ring, ring_count)); 4848 if (!q_vector) 4849 return -ENOMEM; 4850 4851 /* initialize NAPI */ 4852 netif_napi_add(adapter->netdev, &q_vector->napi, igc_poll); 4853 4854 /* tie q_vector and adapter together */ 4855 adapter->q_vector[v_idx] = q_vector; 4856 q_vector->adapter = adapter; 4857 4858 /* initialize work limits */ 4859 q_vector->tx.work_limit = adapter->tx_work_limit; 4860 4861 /* initialize ITR configuration */ 4862 q_vector->itr_register = adapter->io_addr + IGC_EITR(0); 4863 q_vector->itr_val = IGC_START_ITR; 4864 4865 /* initialize pointer to rings */ 4866 ring = q_vector->ring; 4867 4868 /* initialize ITR */ 4869 if (rxr_count) { 4870 /* rx or rx/tx vector */ 4871 if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) 4872 q_vector->itr_val = adapter->rx_itr_setting; 4873 } else { 4874 /* tx only vector */ 4875 if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) 4876 q_vector->itr_val = adapter->tx_itr_setting; 4877 } 4878 4879 if (txr_count) { 4880 /* assign generic ring traits */ 4881 ring->dev = &adapter->pdev->dev; 4882 ring->netdev = adapter->netdev; 4883 4884 /* configure backlink on ring */ 4885 ring->q_vector = q_vector; 4886 4887 /* update q_vector Tx values */ 4888 igc_add_ring(ring, &q_vector->tx); 4889 4890 /* apply Tx specific ring traits */ 4891 ring->count = adapter->tx_ring_count; 4892 ring->queue_index = txr_idx; 4893 4894 /* assign ring to adapter */ 4895 adapter->tx_ring[txr_idx] = ring; 4896 4897 /* push pointer to next ring */ 4898 ring++; 4899 } 4900 4901 if (rxr_count) { 4902 /* assign generic ring traits */ 4903 ring->dev = &adapter->pdev->dev; 4904 ring->netdev = adapter->netdev; 4905 4906 /* configure backlink on ring */ 4907 ring->q_vector = q_vector; 4908 4909 /* update q_vector Rx values */ 4910 igc_add_ring(ring, &q_vector->rx); 4911 4912 /* apply Rx specific ring traits */ 4913 ring->count = adapter->rx_ring_count; 4914 ring->queue_index = rxr_idx; 4915 4916 /* assign ring to adapter */ 4917 adapter->rx_ring[rxr_idx] = ring; 4918 } 4919 4920 return 0; 4921 } 4922 4923 /** 4924 * igc_alloc_q_vectors - Allocate memory for interrupt vectors 4925 * @adapter: board private structure to initialize 4926 * 4927 * We allocate one q_vector per queue interrupt. If allocation fails we 4928 * return -ENOMEM. 4929 */ 4930 static int igc_alloc_q_vectors(struct igc_adapter *adapter) 4931 { 4932 int rxr_remaining = adapter->num_rx_queues; 4933 int txr_remaining = adapter->num_tx_queues; 4934 int rxr_idx = 0, txr_idx = 0, v_idx = 0; 4935 int q_vectors = adapter->num_q_vectors; 4936 int err; 4937 4938 if (q_vectors >= (rxr_remaining + txr_remaining)) { 4939 for (; rxr_remaining; v_idx++) { 4940 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4941 0, 0, 1, rxr_idx); 4942 4943 if (err) 4944 goto err_out; 4945 4946 /* update counts and index */ 4947 rxr_remaining--; 4948 rxr_idx++; 4949 } 4950 } 4951 4952 for (; v_idx < q_vectors; v_idx++) { 4953 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 4954 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 4955 4956 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4957 tqpv, txr_idx, rqpv, rxr_idx); 4958 4959 if (err) 4960 goto err_out; 4961 4962 /* update counts and index */ 4963 rxr_remaining -= rqpv; 4964 txr_remaining -= tqpv; 4965 rxr_idx++; 4966 txr_idx++; 4967 } 4968 4969 return 0; 4970 4971 err_out: 4972 adapter->num_tx_queues = 0; 4973 adapter->num_rx_queues = 0; 4974 adapter->num_q_vectors = 0; 4975 4976 while (v_idx--) 4977 igc_free_q_vector(adapter, v_idx); 4978 4979 return -ENOMEM; 4980 } 4981 4982 /** 4983 * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors 4984 * @adapter: Pointer to adapter structure 4985 * @msix: boolean for MSI-X capability 4986 * 4987 * This function initializes the interrupts and allocates all of the queues. 4988 */ 4989 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) 4990 { 4991 struct net_device *dev = adapter->netdev; 4992 int err = 0; 4993 4994 igc_set_interrupt_capability(adapter, msix); 4995 4996 err = igc_alloc_q_vectors(adapter); 4997 if (err) { 4998 netdev_err(dev, "Unable to allocate memory for vectors\n"); 4999 goto err_alloc_q_vectors; 5000 } 5001 5002 igc_cache_ring_register(adapter); 5003 5004 return 0; 5005 5006 err_alloc_q_vectors: 5007 igc_reset_interrupt_capability(adapter); 5008 return err; 5009 } 5010 5011 /** 5012 * igc_sw_init - Initialize general software structures (struct igc_adapter) 5013 * @adapter: board private structure to initialize 5014 * 5015 * igc_sw_init initializes the Adapter private data structure. 5016 * Fields are initialized based on PCI device information and 5017 * OS network device settings (MTU size). 5018 */ 5019 static int igc_sw_init(struct igc_adapter *adapter) 5020 { 5021 struct net_device *netdev = adapter->netdev; 5022 struct pci_dev *pdev = adapter->pdev; 5023 struct igc_hw *hw = &adapter->hw; 5024 5025 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 5026 5027 /* set default ring sizes */ 5028 adapter->tx_ring_count = IGC_DEFAULT_TXD; 5029 adapter->rx_ring_count = IGC_DEFAULT_RXD; 5030 5031 /* set default ITR values */ 5032 adapter->rx_itr_setting = IGC_DEFAULT_ITR; 5033 adapter->tx_itr_setting = IGC_DEFAULT_ITR; 5034 5035 /* set default work limits */ 5036 adapter->tx_work_limit = IGC_DEFAULT_TX_WORK; 5037 5038 /* adjust max frame to be at least the size of a standard frame */ 5039 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 5040 VLAN_HLEN; 5041 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 5042 5043 mutex_init(&adapter->nfc_rule_lock); 5044 INIT_LIST_HEAD(&adapter->nfc_rule_list); 5045 adapter->nfc_rule_count = 0; 5046 5047 spin_lock_init(&adapter->stats64_lock); 5048 spin_lock_init(&adapter->qbv_tx_lock); 5049 /* Assume MSI-X interrupts, will be checked during IRQ allocation */ 5050 adapter->flags |= IGC_FLAG_HAS_MSIX; 5051 5052 igc_init_queue_configuration(adapter); 5053 5054 /* This call may decrease the number of queues */ 5055 if (igc_init_interrupt_scheme(adapter, true)) { 5056 netdev_err(netdev, "Unable to allocate memory for queues\n"); 5057 return -ENOMEM; 5058 } 5059 5060 /* Explicitly disable IRQ since the NIC can be in any state. */ 5061 igc_irq_disable(adapter); 5062 5063 set_bit(__IGC_DOWN, &adapter->state); 5064 5065 return 0; 5066 } 5067 5068 static void igc_set_queue_napi(struct igc_adapter *adapter, int vector, 5069 struct napi_struct *napi) 5070 { 5071 struct igc_q_vector *q_vector = adapter->q_vector[vector]; 5072 5073 if (q_vector->rx.ring) 5074 netif_queue_set_napi(adapter->netdev, 5075 q_vector->rx.ring->queue_index, 5076 NETDEV_QUEUE_TYPE_RX, napi); 5077 5078 if (q_vector->tx.ring) 5079 netif_queue_set_napi(adapter->netdev, 5080 q_vector->tx.ring->queue_index, 5081 NETDEV_QUEUE_TYPE_TX, napi); 5082 } 5083 5084 /** 5085 * igc_up - Open the interface and prepare it to handle traffic 5086 * @adapter: board private structure 5087 */ 5088 void igc_up(struct igc_adapter *adapter) 5089 { 5090 struct igc_hw *hw = &adapter->hw; 5091 struct napi_struct *napi; 5092 int i = 0; 5093 5094 /* hardware has been reset, we need to reload some things */ 5095 igc_configure(adapter); 5096 5097 clear_bit(__IGC_DOWN, &adapter->state); 5098 5099 for (i = 0; i < adapter->num_q_vectors; i++) { 5100 napi = &adapter->q_vector[i]->napi; 5101 napi_enable(napi); 5102 igc_set_queue_napi(adapter, i, napi); 5103 } 5104 5105 if (adapter->msix_entries) 5106 igc_configure_msix(adapter); 5107 else 5108 igc_assign_vector(adapter->q_vector[0], 0); 5109 5110 /* Clear any pending interrupts. */ 5111 rd32(IGC_ICR); 5112 igc_irq_enable(adapter); 5113 5114 netif_tx_start_all_queues(adapter->netdev); 5115 5116 /* start the watchdog. */ 5117 hw->mac.get_link_status = true; 5118 schedule_work(&adapter->watchdog_task); 5119 } 5120 5121 /** 5122 * igc_update_stats - Update the board statistics counters 5123 * @adapter: board private structure 5124 */ 5125 void igc_update_stats(struct igc_adapter *adapter) 5126 { 5127 struct rtnl_link_stats64 *net_stats = &adapter->stats64; 5128 struct pci_dev *pdev = adapter->pdev; 5129 struct igc_hw *hw = &adapter->hw; 5130 u64 _bytes, _packets; 5131 u64 bytes, packets; 5132 unsigned int start; 5133 u32 mpc; 5134 int i; 5135 5136 /* Prevent stats update while adapter is being reset, or if the pci 5137 * connection is down. 5138 */ 5139 if (adapter->link_speed == 0) 5140 return; 5141 if (pci_channel_offline(pdev)) 5142 return; 5143 5144 packets = 0; 5145 bytes = 0; 5146 5147 rcu_read_lock(); 5148 for (i = 0; i < adapter->num_rx_queues; i++) { 5149 struct igc_ring *ring = adapter->rx_ring[i]; 5150 u32 rqdpc = rd32(IGC_RQDPC(i)); 5151 5152 if (hw->mac.type >= igc_i225) 5153 wr32(IGC_RQDPC(i), 0); 5154 5155 if (rqdpc) { 5156 ring->rx_stats.drops += rqdpc; 5157 net_stats->rx_fifo_errors += rqdpc; 5158 } 5159 5160 do { 5161 start = u64_stats_fetch_begin(&ring->rx_syncp); 5162 _bytes = ring->rx_stats.bytes; 5163 _packets = ring->rx_stats.packets; 5164 } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); 5165 bytes += _bytes; 5166 packets += _packets; 5167 } 5168 5169 net_stats->rx_bytes = bytes; 5170 net_stats->rx_packets = packets; 5171 5172 packets = 0; 5173 bytes = 0; 5174 for (i = 0; i < adapter->num_tx_queues; i++) { 5175 struct igc_ring *ring = adapter->tx_ring[i]; 5176 5177 do { 5178 start = u64_stats_fetch_begin(&ring->tx_syncp); 5179 _bytes = ring->tx_stats.bytes; 5180 _packets = ring->tx_stats.packets; 5181 } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); 5182 bytes += _bytes; 5183 packets += _packets; 5184 } 5185 net_stats->tx_bytes = bytes; 5186 net_stats->tx_packets = packets; 5187 rcu_read_unlock(); 5188 5189 /* read stats registers */ 5190 adapter->stats.crcerrs += rd32(IGC_CRCERRS); 5191 adapter->stats.gprc += rd32(IGC_GPRC); 5192 adapter->stats.gorc += rd32(IGC_GORCL); 5193 rd32(IGC_GORCH); /* clear GORCL */ 5194 adapter->stats.bprc += rd32(IGC_BPRC); 5195 adapter->stats.mprc += rd32(IGC_MPRC); 5196 adapter->stats.roc += rd32(IGC_ROC); 5197 5198 adapter->stats.prc64 += rd32(IGC_PRC64); 5199 adapter->stats.prc127 += rd32(IGC_PRC127); 5200 adapter->stats.prc255 += rd32(IGC_PRC255); 5201 adapter->stats.prc511 += rd32(IGC_PRC511); 5202 adapter->stats.prc1023 += rd32(IGC_PRC1023); 5203 adapter->stats.prc1522 += rd32(IGC_PRC1522); 5204 adapter->stats.tlpic += rd32(IGC_TLPIC); 5205 adapter->stats.rlpic += rd32(IGC_RLPIC); 5206 adapter->stats.hgptc += rd32(IGC_HGPTC); 5207 5208 mpc = rd32(IGC_MPC); 5209 adapter->stats.mpc += mpc; 5210 net_stats->rx_fifo_errors += mpc; 5211 adapter->stats.scc += rd32(IGC_SCC); 5212 adapter->stats.ecol += rd32(IGC_ECOL); 5213 adapter->stats.mcc += rd32(IGC_MCC); 5214 adapter->stats.latecol += rd32(IGC_LATECOL); 5215 adapter->stats.dc += rd32(IGC_DC); 5216 adapter->stats.rlec += rd32(IGC_RLEC); 5217 adapter->stats.xonrxc += rd32(IGC_XONRXC); 5218 adapter->stats.xontxc += rd32(IGC_XONTXC); 5219 adapter->stats.xoffrxc += rd32(IGC_XOFFRXC); 5220 adapter->stats.xofftxc += rd32(IGC_XOFFTXC); 5221 adapter->stats.fcruc += rd32(IGC_FCRUC); 5222 adapter->stats.gptc += rd32(IGC_GPTC); 5223 adapter->stats.gotc += rd32(IGC_GOTCL); 5224 rd32(IGC_GOTCH); /* clear GOTCL */ 5225 adapter->stats.rnbc += rd32(IGC_RNBC); 5226 adapter->stats.ruc += rd32(IGC_RUC); 5227 adapter->stats.rfc += rd32(IGC_RFC); 5228 adapter->stats.rjc += rd32(IGC_RJC); 5229 adapter->stats.tor += rd32(IGC_TORH); 5230 adapter->stats.tot += rd32(IGC_TOTH); 5231 adapter->stats.tpr += rd32(IGC_TPR); 5232 5233 adapter->stats.ptc64 += rd32(IGC_PTC64); 5234 adapter->stats.ptc127 += rd32(IGC_PTC127); 5235 adapter->stats.ptc255 += rd32(IGC_PTC255); 5236 adapter->stats.ptc511 += rd32(IGC_PTC511); 5237 adapter->stats.ptc1023 += rd32(IGC_PTC1023); 5238 adapter->stats.ptc1522 += rd32(IGC_PTC1522); 5239 5240 adapter->stats.mptc += rd32(IGC_MPTC); 5241 adapter->stats.bptc += rd32(IGC_BPTC); 5242 5243 adapter->stats.tpt += rd32(IGC_TPT); 5244 adapter->stats.colc += rd32(IGC_COLC); 5245 adapter->stats.colc += rd32(IGC_RERC); 5246 5247 adapter->stats.algnerrc += rd32(IGC_ALGNERRC); 5248 5249 adapter->stats.tsctc += rd32(IGC_TSCTC); 5250 5251 adapter->stats.iac += rd32(IGC_IAC); 5252 5253 /* Fill out the OS statistics structure */ 5254 net_stats->multicast = adapter->stats.mprc; 5255 net_stats->collisions = adapter->stats.colc; 5256 5257 /* Rx Errors */ 5258 5259 /* RLEC on some newer hardware can be incorrect so build 5260 * our own version based on RUC and ROC 5261 */ 5262 net_stats->rx_errors = adapter->stats.rxerrc + 5263 adapter->stats.crcerrs + adapter->stats.algnerrc + 5264 adapter->stats.ruc + adapter->stats.roc + 5265 adapter->stats.cexterr; 5266 net_stats->rx_length_errors = adapter->stats.ruc + 5267 adapter->stats.roc; 5268 net_stats->rx_crc_errors = adapter->stats.crcerrs; 5269 net_stats->rx_frame_errors = adapter->stats.algnerrc; 5270 net_stats->rx_missed_errors = adapter->stats.mpc; 5271 5272 /* Tx Errors */ 5273 net_stats->tx_errors = adapter->stats.ecol + 5274 adapter->stats.latecol; 5275 net_stats->tx_aborted_errors = adapter->stats.ecol; 5276 net_stats->tx_window_errors = adapter->stats.latecol; 5277 net_stats->tx_carrier_errors = adapter->stats.tncrs; 5278 5279 /* Tx Dropped */ 5280 net_stats->tx_dropped = adapter->stats.txdrop; 5281 5282 /* Management Stats */ 5283 adapter->stats.mgptc += rd32(IGC_MGTPTC); 5284 adapter->stats.mgprc += rd32(IGC_MGTPRC); 5285 adapter->stats.mgpdc += rd32(IGC_MGTPDC); 5286 } 5287 5288 /** 5289 * igc_down - Close the interface 5290 * @adapter: board private structure 5291 */ 5292 void igc_down(struct igc_adapter *adapter) 5293 { 5294 struct net_device *netdev = adapter->netdev; 5295 struct igc_hw *hw = &adapter->hw; 5296 u32 tctl, rctl; 5297 int i = 0; 5298 5299 set_bit(__IGC_DOWN, &adapter->state); 5300 5301 igc_ptp_suspend(adapter); 5302 5303 if (pci_device_is_present(adapter->pdev)) { 5304 /* disable receives in the hardware */ 5305 rctl = rd32(IGC_RCTL); 5306 wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); 5307 /* flush and sleep below */ 5308 } 5309 /* set trans_start so we don't get spurious watchdogs during reset */ 5310 netif_trans_update(netdev); 5311 5312 netif_carrier_off(netdev); 5313 netif_tx_stop_all_queues(netdev); 5314 5315 if (pci_device_is_present(adapter->pdev)) { 5316 /* disable transmits in the hardware */ 5317 tctl = rd32(IGC_TCTL); 5318 tctl &= ~IGC_TCTL_EN; 5319 wr32(IGC_TCTL, tctl); 5320 /* flush both disables and wait for them to finish */ 5321 wrfl(); 5322 usleep_range(10000, 20000); 5323 5324 igc_irq_disable(adapter); 5325 } 5326 5327 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5328 5329 for (i = 0; i < adapter->num_q_vectors; i++) { 5330 if (adapter->q_vector[i]) { 5331 napi_synchronize(&adapter->q_vector[i]->napi); 5332 igc_set_queue_napi(adapter, i, NULL); 5333 napi_disable(&adapter->q_vector[i]->napi); 5334 } 5335 } 5336 5337 timer_delete_sync(&adapter->watchdog_timer); 5338 timer_delete_sync(&adapter->phy_info_timer); 5339 5340 /* record the stats before reset*/ 5341 spin_lock(&adapter->stats64_lock); 5342 igc_update_stats(adapter); 5343 spin_unlock(&adapter->stats64_lock); 5344 5345 adapter->link_speed = 0; 5346 adapter->link_duplex = 0; 5347 5348 if (!pci_channel_offline(adapter->pdev)) 5349 igc_reset(adapter); 5350 5351 /* clear VLAN promisc flag so VFTA will be updated if necessary */ 5352 adapter->flags &= ~IGC_FLAG_VLAN_PROMISC; 5353 5354 igc_disable_all_tx_rings_hw(adapter); 5355 igc_clean_all_tx_rings(adapter); 5356 igc_clean_all_rx_rings(adapter); 5357 5358 if (adapter->fpe.mmsv.pmac_enabled) 5359 ethtool_mmsv_stop(&adapter->fpe.mmsv); 5360 } 5361 5362 void igc_reinit_locked(struct igc_adapter *adapter) 5363 { 5364 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 5365 usleep_range(1000, 2000); 5366 igc_down(adapter); 5367 igc_up(adapter); 5368 clear_bit(__IGC_RESETTING, &adapter->state); 5369 } 5370 5371 static void igc_reset_task(struct work_struct *work) 5372 { 5373 struct igc_adapter *adapter; 5374 5375 adapter = container_of(work, struct igc_adapter, reset_task); 5376 5377 rtnl_lock(); 5378 /* If we're already down or resetting, just bail */ 5379 if (test_bit(__IGC_DOWN, &adapter->state) || 5380 test_bit(__IGC_RESETTING, &adapter->state)) { 5381 rtnl_unlock(); 5382 return; 5383 } 5384 5385 igc_rings_dump(adapter); 5386 igc_regs_dump(adapter); 5387 netdev_err(adapter->netdev, "Reset adapter\n"); 5388 igc_reinit_locked(adapter); 5389 rtnl_unlock(); 5390 } 5391 5392 /** 5393 * igc_change_mtu - Change the Maximum Transfer Unit 5394 * @netdev: network interface device structure 5395 * @new_mtu: new value for maximum frame size 5396 * 5397 * Returns 0 on success, negative on failure 5398 */ 5399 static int igc_change_mtu(struct net_device *netdev, int new_mtu) 5400 { 5401 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 5402 struct igc_adapter *adapter = netdev_priv(netdev); 5403 5404 if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) { 5405 netdev_dbg(netdev, "Jumbo frames not supported with XDP"); 5406 return -EINVAL; 5407 } 5408 5409 /* adjust max frame to be at least the size of a standard frame */ 5410 if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) 5411 max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; 5412 5413 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 5414 usleep_range(1000, 2000); 5415 5416 /* igc_down has a dependency on max_frame_size */ 5417 adapter->max_frame_size = max_frame; 5418 5419 if (netif_running(netdev)) 5420 igc_down(adapter); 5421 5422 netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); 5423 WRITE_ONCE(netdev->mtu, new_mtu); 5424 5425 if (netif_running(netdev)) 5426 igc_up(adapter); 5427 else 5428 igc_reset(adapter); 5429 5430 clear_bit(__IGC_RESETTING, &adapter->state); 5431 5432 return 0; 5433 } 5434 5435 /** 5436 * igc_tx_timeout - Respond to a Tx Hang 5437 * @netdev: network interface device structure 5438 * @txqueue: queue number that timed out 5439 **/ 5440 static void igc_tx_timeout(struct net_device *netdev, 5441 unsigned int __always_unused txqueue) 5442 { 5443 struct igc_adapter *adapter = netdev_priv(netdev); 5444 struct igc_hw *hw = &adapter->hw; 5445 5446 /* Do the reset outside of interrupt context */ 5447 adapter->tx_timeout_count++; 5448 schedule_work(&adapter->reset_task); 5449 wr32(IGC_EICS, 5450 (adapter->eims_enable_mask & ~adapter->eims_other)); 5451 } 5452 5453 /** 5454 * igc_get_stats64 - Get System Network Statistics 5455 * @netdev: network interface device structure 5456 * @stats: rtnl_link_stats64 pointer 5457 * 5458 * Returns the address of the device statistics structure. 5459 * The statistics are updated here and also from the timer callback. 5460 */ 5461 static void igc_get_stats64(struct net_device *netdev, 5462 struct rtnl_link_stats64 *stats) 5463 { 5464 struct igc_adapter *adapter = netdev_priv(netdev); 5465 5466 spin_lock(&adapter->stats64_lock); 5467 if (!test_bit(__IGC_RESETTING, &adapter->state)) 5468 igc_update_stats(adapter); 5469 memcpy(stats, &adapter->stats64, sizeof(*stats)); 5470 spin_unlock(&adapter->stats64_lock); 5471 } 5472 5473 static netdev_features_t igc_fix_features(struct net_device *netdev, 5474 netdev_features_t features) 5475 { 5476 /* Since there is no support for separate Rx/Tx vlan accel 5477 * enable/disable make sure Tx flag is always in same state as Rx. 5478 */ 5479 if (features & NETIF_F_HW_VLAN_CTAG_RX) 5480 features |= NETIF_F_HW_VLAN_CTAG_TX; 5481 else 5482 features &= ~NETIF_F_HW_VLAN_CTAG_TX; 5483 5484 return features; 5485 } 5486 5487 static int igc_set_features(struct net_device *netdev, 5488 netdev_features_t features) 5489 { 5490 netdev_features_t changed = netdev->features ^ features; 5491 struct igc_adapter *adapter = netdev_priv(netdev); 5492 5493 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 5494 igc_vlan_mode(netdev, features); 5495 5496 /* Add VLAN support */ 5497 if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) 5498 return 0; 5499 5500 if (!(features & NETIF_F_NTUPLE)) 5501 igc_flush_nfc_rules(adapter); 5502 5503 netdev->features = features; 5504 5505 if (netif_running(netdev)) 5506 igc_reinit_locked(adapter); 5507 else 5508 igc_reset(adapter); 5509 5510 return 1; 5511 } 5512 5513 static netdev_features_t 5514 igc_features_check(struct sk_buff *skb, struct net_device *dev, 5515 netdev_features_t features) 5516 { 5517 unsigned int network_hdr_len, mac_hdr_len; 5518 5519 /* Make certain the headers can be described by a context descriptor */ 5520 mac_hdr_len = skb_network_offset(skb); 5521 if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) 5522 return features & ~(NETIF_F_HW_CSUM | 5523 NETIF_F_SCTP_CRC | 5524 NETIF_F_HW_VLAN_CTAG_TX | 5525 NETIF_F_TSO | 5526 NETIF_F_TSO6); 5527 5528 network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); 5529 if (unlikely(network_hdr_len > IGC_MAX_NETWORK_HDR_LEN)) 5530 return features & ~(NETIF_F_HW_CSUM | 5531 NETIF_F_SCTP_CRC | 5532 NETIF_F_TSO | 5533 NETIF_F_TSO6); 5534 5535 /* We can only support IPv4 TSO in tunnels if we can mangle the 5536 * inner IP ID field, so strip TSO if MANGLEID is not supported. 5537 */ 5538 if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) 5539 features &= ~NETIF_F_TSO; 5540 5541 return features; 5542 } 5543 5544 static void igc_tsync_interrupt(struct igc_adapter *adapter) 5545 { 5546 struct igc_hw *hw = &adapter->hw; 5547 u32 tsauxc, sec, nsec, tsicr; 5548 struct ptp_clock_event event; 5549 struct timespec64 ts; 5550 5551 tsicr = rd32(IGC_TSICR); 5552 5553 if (tsicr & IGC_TSICR_SYS_WRAP) { 5554 event.type = PTP_CLOCK_PPS; 5555 if (adapter->ptp_caps.pps) 5556 ptp_clock_event(adapter->ptp_clock, &event); 5557 } 5558 5559 if (tsicr & IGC_TSICR_TXTS) { 5560 /* retrieve hardware timestamp */ 5561 igc_ptp_tx_tstamp_event(adapter); 5562 } 5563 5564 if (tsicr & IGC_TSICR_TT0) { 5565 spin_lock(&adapter->tmreg_lock); 5566 ts = timespec64_add(adapter->perout[0].start, 5567 adapter->perout[0].period); 5568 wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5569 wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec); 5570 tsauxc = rd32(IGC_TSAUXC); 5571 tsauxc |= IGC_TSAUXC_EN_TT0; 5572 wr32(IGC_TSAUXC, tsauxc); 5573 adapter->perout[0].start = ts; 5574 spin_unlock(&adapter->tmreg_lock); 5575 } 5576 5577 if (tsicr & IGC_TSICR_TT1) { 5578 spin_lock(&adapter->tmreg_lock); 5579 ts = timespec64_add(adapter->perout[1].start, 5580 adapter->perout[1].period); 5581 wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5582 wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec); 5583 tsauxc = rd32(IGC_TSAUXC); 5584 tsauxc |= IGC_TSAUXC_EN_TT1; 5585 wr32(IGC_TSAUXC, tsauxc); 5586 adapter->perout[1].start = ts; 5587 spin_unlock(&adapter->tmreg_lock); 5588 } 5589 5590 if (tsicr & IGC_TSICR_AUTT0) { 5591 nsec = rd32(IGC_AUXSTMPL0); 5592 sec = rd32(IGC_AUXSTMPH0); 5593 event.type = PTP_CLOCK_EXTTS; 5594 event.index = 0; 5595 event.timestamp = sec * NSEC_PER_SEC + nsec; 5596 ptp_clock_event(adapter->ptp_clock, &event); 5597 } 5598 5599 if (tsicr & IGC_TSICR_AUTT1) { 5600 nsec = rd32(IGC_AUXSTMPL1); 5601 sec = rd32(IGC_AUXSTMPH1); 5602 event.type = PTP_CLOCK_EXTTS; 5603 event.index = 1; 5604 event.timestamp = sec * NSEC_PER_SEC + nsec; 5605 ptp_clock_event(adapter->ptp_clock, &event); 5606 } 5607 } 5608 5609 /** 5610 * igc_msix_other - msix other interrupt handler 5611 * @irq: interrupt number 5612 * @data: pointer to a q_vector 5613 */ 5614 static irqreturn_t igc_msix_other(int irq, void *data) 5615 { 5616 struct igc_adapter *adapter = data; 5617 struct igc_hw *hw = &adapter->hw; 5618 u32 icr = rd32(IGC_ICR); 5619 5620 /* reading ICR causes bit 31 of EICR to be cleared */ 5621 if (icr & IGC_ICR_DRSTA) 5622 schedule_work(&adapter->reset_task); 5623 5624 if (icr & IGC_ICR_DOUTSYNC) { 5625 /* HW is reporting DMA is out of sync */ 5626 adapter->stats.doosync++; 5627 } 5628 5629 if (icr & IGC_ICR_LSC) { 5630 hw->mac.get_link_status = true; 5631 /* guard against interrupt when we're going down */ 5632 if (!test_bit(__IGC_DOWN, &adapter->state)) 5633 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5634 } 5635 5636 if (icr & IGC_ICR_TS) 5637 igc_tsync_interrupt(adapter); 5638 5639 wr32(IGC_EIMS, adapter->eims_other); 5640 5641 return IRQ_HANDLED; 5642 } 5643 5644 static void igc_write_itr(struct igc_q_vector *q_vector) 5645 { 5646 u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK; 5647 5648 if (!q_vector->set_itr) 5649 return; 5650 5651 if (!itr_val) 5652 itr_val = IGC_ITR_VAL_MASK; 5653 5654 itr_val |= IGC_EITR_CNT_IGNR; 5655 5656 writel(itr_val, q_vector->itr_register); 5657 q_vector->set_itr = 0; 5658 } 5659 5660 static irqreturn_t igc_msix_ring(int irq, void *data) 5661 { 5662 struct igc_q_vector *q_vector = data; 5663 5664 /* Write the ITR value calculated from the previous interrupt. */ 5665 igc_write_itr(q_vector); 5666 5667 napi_schedule(&q_vector->napi); 5668 5669 return IRQ_HANDLED; 5670 } 5671 5672 /** 5673 * igc_request_msix - Initialize MSI-X interrupts 5674 * @adapter: Pointer to adapter structure 5675 * 5676 * igc_request_msix allocates MSI-X vectors and requests interrupts from the 5677 * kernel. 5678 */ 5679 static int igc_request_msix(struct igc_adapter *adapter) 5680 { 5681 unsigned int num_q_vectors = adapter->num_q_vectors; 5682 int i = 0, err = 0, vector = 0, free_vector = 0; 5683 struct net_device *netdev = adapter->netdev; 5684 5685 err = request_irq(adapter->msix_entries[vector].vector, 5686 &igc_msix_other, 0, netdev->name, adapter); 5687 if (err) 5688 goto err_out; 5689 5690 if (num_q_vectors > MAX_Q_VECTORS) { 5691 num_q_vectors = MAX_Q_VECTORS; 5692 dev_warn(&adapter->pdev->dev, 5693 "The number of queue vectors (%d) is higher than max allowed (%d)\n", 5694 adapter->num_q_vectors, MAX_Q_VECTORS); 5695 } 5696 for (i = 0; i < num_q_vectors; i++) { 5697 struct igc_q_vector *q_vector = adapter->q_vector[i]; 5698 5699 vector++; 5700 5701 q_vector->itr_register = adapter->io_addr + IGC_EITR(vector); 5702 5703 if (q_vector->rx.ring && q_vector->tx.ring) 5704 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, 5705 q_vector->rx.ring->queue_index); 5706 else if (q_vector->tx.ring) 5707 sprintf(q_vector->name, "%s-tx-%u", netdev->name, 5708 q_vector->tx.ring->queue_index); 5709 else if (q_vector->rx.ring) 5710 sprintf(q_vector->name, "%s-rx-%u", netdev->name, 5711 q_vector->rx.ring->queue_index); 5712 else 5713 sprintf(q_vector->name, "%s-unused", netdev->name); 5714 5715 err = request_irq(adapter->msix_entries[vector].vector, 5716 igc_msix_ring, 0, q_vector->name, 5717 q_vector); 5718 if (err) 5719 goto err_free; 5720 5721 netif_napi_set_irq(&q_vector->napi, 5722 adapter->msix_entries[vector].vector); 5723 } 5724 5725 igc_configure_msix(adapter); 5726 return 0; 5727 5728 err_free: 5729 /* free already assigned IRQs */ 5730 free_irq(adapter->msix_entries[free_vector++].vector, adapter); 5731 5732 vector--; 5733 for (i = 0; i < vector; i++) { 5734 free_irq(adapter->msix_entries[free_vector++].vector, 5735 adapter->q_vector[i]); 5736 } 5737 err_out: 5738 return err; 5739 } 5740 5741 /** 5742 * igc_clear_interrupt_scheme - reset the device to a state of no interrupts 5743 * @adapter: Pointer to adapter structure 5744 * 5745 * This function resets the device so that it has 0 rx queues, tx queues, and 5746 * MSI-X interrupts allocated. 5747 */ 5748 static void igc_clear_interrupt_scheme(struct igc_adapter *adapter) 5749 { 5750 igc_free_q_vectors(adapter); 5751 igc_reset_interrupt_capability(adapter); 5752 } 5753 5754 /* Need to wait a few seconds after link up to get diagnostic information from 5755 * the phy 5756 */ 5757 static void igc_update_phy_info(struct timer_list *t) 5758 { 5759 struct igc_adapter *adapter = timer_container_of(adapter, t, 5760 phy_info_timer); 5761 5762 igc_get_phy_info(&adapter->hw); 5763 } 5764 5765 /** 5766 * igc_has_link - check shared code for link and determine up/down 5767 * @adapter: pointer to driver private info 5768 */ 5769 bool igc_has_link(struct igc_adapter *adapter) 5770 { 5771 struct igc_hw *hw = &adapter->hw; 5772 bool link_active = false; 5773 5774 /* get_link_status is set on LSC (link status) interrupt or 5775 * rx sequence error interrupt. get_link_status will stay 5776 * false until the igc_check_for_link establishes link 5777 * for copper adapters ONLY 5778 */ 5779 if (!hw->mac.get_link_status) 5780 return true; 5781 hw->mac.ops.check_for_link(hw); 5782 link_active = !hw->mac.get_link_status; 5783 5784 if (hw->mac.type == igc_i225) { 5785 if (!netif_carrier_ok(adapter->netdev)) { 5786 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5787 } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { 5788 adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE; 5789 adapter->link_check_timeout = jiffies; 5790 } 5791 } 5792 5793 return link_active; 5794 } 5795 5796 /** 5797 * igc_watchdog - Timer Call-back 5798 * @t: timer for the watchdog 5799 */ 5800 static void igc_watchdog(struct timer_list *t) 5801 { 5802 struct igc_adapter *adapter = timer_container_of(adapter, t, 5803 watchdog_timer); 5804 /* Do the rest outside of interrupt context */ 5805 schedule_work(&adapter->watchdog_task); 5806 } 5807 5808 static void igc_watchdog_task(struct work_struct *work) 5809 { 5810 struct igc_adapter *adapter = container_of(work, 5811 struct igc_adapter, 5812 watchdog_task); 5813 struct net_device *netdev = adapter->netdev; 5814 struct igc_hw *hw = &adapter->hw; 5815 struct igc_phy_info *phy = &hw->phy; 5816 u16 phy_data, retry_count = 20; 5817 u32 link; 5818 int i; 5819 5820 link = igc_has_link(adapter); 5821 5822 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) { 5823 if (time_after(jiffies, (adapter->link_check_timeout + HZ))) 5824 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5825 else 5826 link = false; 5827 } 5828 5829 if (link) { 5830 /* Cancel scheduled suspend requests. */ 5831 pm_runtime_resume(netdev->dev.parent); 5832 5833 if (!netif_carrier_ok(netdev)) { 5834 u32 ctrl; 5835 5836 hw->mac.ops.get_speed_and_duplex(hw, 5837 &adapter->link_speed, 5838 &adapter->link_duplex); 5839 5840 ctrl = rd32(IGC_CTRL); 5841 /* Link status message must follow this format */ 5842 netdev_info(netdev, 5843 "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", 5844 adapter->link_speed, 5845 adapter->link_duplex == FULL_DUPLEX ? 5846 "Full" : "Half", 5847 (ctrl & IGC_CTRL_TFCE) && 5848 (ctrl & IGC_CTRL_RFCE) ? "RX/TX" : 5849 (ctrl & IGC_CTRL_RFCE) ? "RX" : 5850 (ctrl & IGC_CTRL_TFCE) ? "TX" : "None"); 5851 5852 /* disable EEE if enabled */ 5853 if ((adapter->flags & IGC_FLAG_EEE) && 5854 adapter->link_duplex == HALF_DUPLEX) { 5855 netdev_info(netdev, 5856 "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n"); 5857 adapter->hw.dev_spec._base.eee_enable = false; 5858 adapter->flags &= ~IGC_FLAG_EEE; 5859 } 5860 5861 /* check if SmartSpeed worked */ 5862 igc_check_downshift(hw); 5863 if (phy->speed_downgraded) 5864 netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n"); 5865 5866 /* adjust timeout factor according to speed/duplex */ 5867 adapter->tx_timeout_factor = 1; 5868 switch (adapter->link_speed) { 5869 case SPEED_10: 5870 adapter->tx_timeout_factor = 14; 5871 break; 5872 case SPEED_100: 5873 case SPEED_1000: 5874 case SPEED_2500: 5875 adapter->tx_timeout_factor = 1; 5876 break; 5877 } 5878 5879 /* Once the launch time has been set on the wire, there 5880 * is a delay before the link speed can be determined 5881 * based on link-up activity. Write into the register 5882 * as soon as we know the correct link speed. 5883 */ 5884 igc_tsn_adjust_txtime_offset(adapter); 5885 5886 if (adapter->fpe.mmsv.pmac_enabled) 5887 ethtool_mmsv_link_state_handle(&adapter->fpe.mmsv, 5888 true); 5889 5890 if (adapter->link_speed != SPEED_1000) 5891 goto no_wait; 5892 5893 /* wait for Remote receiver status OK */ 5894 retry_read_status: 5895 if (!igc_read_phy_reg(hw, PHY_1000T_STATUS, 5896 &phy_data)) { 5897 if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && 5898 retry_count) { 5899 msleep(100); 5900 retry_count--; 5901 goto retry_read_status; 5902 } else if (!retry_count) { 5903 netdev_err(netdev, "exceed max 2 second\n"); 5904 } 5905 } else { 5906 netdev_err(netdev, "read 1000Base-T Status Reg\n"); 5907 } 5908 no_wait: 5909 netif_carrier_on(netdev); 5910 5911 /* link state has changed, schedule phy info update */ 5912 if (!test_bit(__IGC_DOWN, &adapter->state)) 5913 mod_timer(&adapter->phy_info_timer, 5914 round_jiffies(jiffies + 2 * HZ)); 5915 } 5916 } else { 5917 if (netif_carrier_ok(netdev)) { 5918 adapter->link_speed = 0; 5919 adapter->link_duplex = 0; 5920 5921 /* Links status message must follow this format */ 5922 netdev_info(netdev, "NIC Link is Down\n"); 5923 netif_carrier_off(netdev); 5924 5925 if (adapter->fpe.mmsv.pmac_enabled) 5926 ethtool_mmsv_link_state_handle(&adapter->fpe.mmsv, 5927 false); 5928 5929 /* link state has changed, schedule phy info update */ 5930 if (!test_bit(__IGC_DOWN, &adapter->state)) 5931 mod_timer(&adapter->phy_info_timer, 5932 round_jiffies(jiffies + 2 * HZ)); 5933 5934 pm_schedule_suspend(netdev->dev.parent, 5935 MSEC_PER_SEC * 5); 5936 } 5937 } 5938 5939 spin_lock(&adapter->stats64_lock); 5940 igc_update_stats(adapter); 5941 spin_unlock(&adapter->stats64_lock); 5942 5943 for (i = 0; i < adapter->num_tx_queues; i++) { 5944 struct igc_ring *tx_ring = adapter->tx_ring[i]; 5945 5946 if (!netif_carrier_ok(netdev)) { 5947 /* We've lost link, so the controller stops DMA, 5948 * but we've got queued Tx work that's never going 5949 * to get done, so reset controller to flush Tx. 5950 * (Do the reset outside of interrupt context). 5951 */ 5952 if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) { 5953 adapter->tx_timeout_count++; 5954 schedule_work(&adapter->reset_task); 5955 /* return immediately since reset is imminent */ 5956 return; 5957 } 5958 } 5959 5960 /* Force detection of hung controller every watchdog period */ 5961 set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 5962 } 5963 5964 /* Cause software interrupt to ensure Rx ring is cleaned */ 5965 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 5966 u32 eics = 0; 5967 5968 for (i = 0; i < adapter->num_q_vectors; i++) { 5969 struct igc_q_vector *q_vector = adapter->q_vector[i]; 5970 struct igc_ring *rx_ring; 5971 5972 if (!q_vector->rx.ring) 5973 continue; 5974 5975 rx_ring = adapter->rx_ring[q_vector->rx.ring->queue_index]; 5976 5977 if (test_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { 5978 eics |= q_vector->eims_value; 5979 clear_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 5980 } 5981 } 5982 if (eics) 5983 wr32(IGC_EICS, eics); 5984 } else { 5985 struct igc_ring *rx_ring = adapter->rx_ring[0]; 5986 5987 if (test_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { 5988 clear_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 5989 wr32(IGC_ICS, IGC_ICS_RXDMT0); 5990 } 5991 } 5992 5993 igc_ptp_tx_hang(adapter); 5994 5995 /* Reset the timer */ 5996 if (!test_bit(__IGC_DOWN, &adapter->state)) { 5997 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) 5998 mod_timer(&adapter->watchdog_timer, 5999 round_jiffies(jiffies + HZ)); 6000 else 6001 mod_timer(&adapter->watchdog_timer, 6002 round_jiffies(jiffies + 2 * HZ)); 6003 } 6004 } 6005 6006 /** 6007 * igc_intr_msi - Interrupt Handler 6008 * @irq: interrupt number 6009 * @data: pointer to a network interface device structure 6010 */ 6011 static irqreturn_t igc_intr_msi(int irq, void *data) 6012 { 6013 struct igc_adapter *adapter = data; 6014 struct igc_q_vector *q_vector = adapter->q_vector[0]; 6015 struct igc_hw *hw = &adapter->hw; 6016 /* read ICR disables interrupts using IAM */ 6017 u32 icr = rd32(IGC_ICR); 6018 6019 igc_write_itr(q_vector); 6020 6021 if (icr & IGC_ICR_DRSTA) 6022 schedule_work(&adapter->reset_task); 6023 6024 if (icr & IGC_ICR_DOUTSYNC) { 6025 /* HW is reporting DMA is out of sync */ 6026 adapter->stats.doosync++; 6027 } 6028 6029 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 6030 hw->mac.get_link_status = true; 6031 if (!test_bit(__IGC_DOWN, &adapter->state)) 6032 mod_timer(&adapter->watchdog_timer, jiffies + 1); 6033 } 6034 6035 if (icr & IGC_ICR_TS) 6036 igc_tsync_interrupt(adapter); 6037 6038 napi_schedule(&q_vector->napi); 6039 6040 return IRQ_HANDLED; 6041 } 6042 6043 /** 6044 * igc_intr - Legacy Interrupt Handler 6045 * @irq: interrupt number 6046 * @data: pointer to a network interface device structure 6047 */ 6048 static irqreturn_t igc_intr(int irq, void *data) 6049 { 6050 struct igc_adapter *adapter = data; 6051 struct igc_q_vector *q_vector = adapter->q_vector[0]; 6052 struct igc_hw *hw = &adapter->hw; 6053 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No 6054 * need for the IMC write 6055 */ 6056 u32 icr = rd32(IGC_ICR); 6057 6058 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is 6059 * not set, then the adapter didn't send an interrupt 6060 */ 6061 if (!(icr & IGC_ICR_INT_ASSERTED)) 6062 return IRQ_NONE; 6063 6064 igc_write_itr(q_vector); 6065 6066 if (icr & IGC_ICR_DRSTA) 6067 schedule_work(&adapter->reset_task); 6068 6069 if (icr & IGC_ICR_DOUTSYNC) { 6070 /* HW is reporting DMA is out of sync */ 6071 adapter->stats.doosync++; 6072 } 6073 6074 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 6075 hw->mac.get_link_status = true; 6076 /* guard against interrupt when we're going down */ 6077 if (!test_bit(__IGC_DOWN, &adapter->state)) 6078 mod_timer(&adapter->watchdog_timer, jiffies + 1); 6079 } 6080 6081 if (icr & IGC_ICR_TS) 6082 igc_tsync_interrupt(adapter); 6083 6084 napi_schedule(&q_vector->napi); 6085 6086 return IRQ_HANDLED; 6087 } 6088 6089 static void igc_free_irq(struct igc_adapter *adapter) 6090 { 6091 if (adapter->msix_entries) { 6092 int vector = 0, i; 6093 6094 free_irq(adapter->msix_entries[vector++].vector, adapter); 6095 6096 for (i = 0; i < adapter->num_q_vectors; i++) 6097 free_irq(adapter->msix_entries[vector++].vector, 6098 adapter->q_vector[i]); 6099 } else { 6100 free_irq(adapter->pdev->irq, adapter); 6101 } 6102 } 6103 6104 /** 6105 * igc_request_irq - initialize interrupts 6106 * @adapter: Pointer to adapter structure 6107 * 6108 * Attempts to configure interrupts using the best available 6109 * capabilities of the hardware and kernel. 6110 */ 6111 static int igc_request_irq(struct igc_adapter *adapter) 6112 { 6113 struct net_device *netdev = adapter->netdev; 6114 struct pci_dev *pdev = adapter->pdev; 6115 int err = 0; 6116 6117 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 6118 err = igc_request_msix(adapter); 6119 if (!err) 6120 goto request_done; 6121 /* fall back to MSI */ 6122 igc_free_all_tx_resources(adapter); 6123 igc_free_all_rx_resources(adapter); 6124 6125 igc_clear_interrupt_scheme(adapter); 6126 err = igc_init_interrupt_scheme(adapter, false); 6127 if (err) 6128 goto request_done; 6129 igc_setup_all_tx_resources(adapter); 6130 igc_setup_all_rx_resources(adapter); 6131 igc_configure(adapter); 6132 } 6133 6134 igc_assign_vector(adapter->q_vector[0], 0); 6135 6136 if (adapter->flags & IGC_FLAG_HAS_MSI) { 6137 err = request_irq(pdev->irq, &igc_intr_msi, 0, 6138 netdev->name, adapter); 6139 if (!err) 6140 goto request_done; 6141 6142 /* fall back to legacy interrupts */ 6143 igc_reset_interrupt_capability(adapter); 6144 adapter->flags &= ~IGC_FLAG_HAS_MSI; 6145 } 6146 6147 err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED, 6148 netdev->name, adapter); 6149 6150 if (err) 6151 netdev_err(netdev, "Error %d getting interrupt\n", err); 6152 6153 request_done: 6154 return err; 6155 } 6156 6157 /** 6158 * __igc_open - Called when a network interface is made active 6159 * @netdev: network interface device structure 6160 * @resuming: boolean indicating if the device is resuming 6161 * 6162 * Returns 0 on success, negative value on failure 6163 * 6164 * The open entry point is called when a network interface is made 6165 * active by the system (IFF_UP). At this point all resources needed 6166 * for transmit and receive operations are allocated, the interrupt 6167 * handler is registered with the OS, the watchdog timer is started, 6168 * and the stack is notified that the interface is ready. 6169 */ 6170 static int __igc_open(struct net_device *netdev, bool resuming) 6171 { 6172 struct igc_adapter *adapter = netdev_priv(netdev); 6173 struct pci_dev *pdev = adapter->pdev; 6174 struct igc_hw *hw = &adapter->hw; 6175 struct napi_struct *napi; 6176 int err = 0; 6177 int i = 0; 6178 6179 /* disallow open during test */ 6180 6181 if (test_bit(__IGC_TESTING, &adapter->state)) { 6182 WARN_ON(resuming); 6183 return -EBUSY; 6184 } 6185 6186 if (!resuming) 6187 pm_runtime_get_sync(&pdev->dev); 6188 6189 netif_carrier_off(netdev); 6190 6191 /* allocate transmit descriptors */ 6192 err = igc_setup_all_tx_resources(adapter); 6193 if (err) 6194 goto err_setup_tx; 6195 6196 /* allocate receive descriptors */ 6197 err = igc_setup_all_rx_resources(adapter); 6198 if (err) 6199 goto err_setup_rx; 6200 6201 igc_power_up_link(adapter); 6202 6203 igc_configure(adapter); 6204 6205 err = igc_request_irq(adapter); 6206 if (err) 6207 goto err_req_irq; 6208 6209 clear_bit(__IGC_DOWN, &adapter->state); 6210 6211 for (i = 0; i < adapter->num_q_vectors; i++) { 6212 napi = &adapter->q_vector[i]->napi; 6213 napi_enable(napi); 6214 igc_set_queue_napi(adapter, i, napi); 6215 } 6216 6217 /* Clear any pending interrupts. */ 6218 rd32(IGC_ICR); 6219 igc_irq_enable(adapter); 6220 6221 if (!resuming) 6222 pm_runtime_put(&pdev->dev); 6223 6224 netif_tx_start_all_queues(netdev); 6225 6226 /* start the watchdog. */ 6227 hw->mac.get_link_status = true; 6228 schedule_work(&adapter->watchdog_task); 6229 6230 return IGC_SUCCESS; 6231 6232 err_req_irq: 6233 igc_release_hw_control(adapter); 6234 igc_power_down_phy_copper_base(&adapter->hw); 6235 igc_free_all_rx_resources(adapter); 6236 err_setup_rx: 6237 igc_free_all_tx_resources(adapter); 6238 err_setup_tx: 6239 igc_reset(adapter); 6240 if (!resuming) 6241 pm_runtime_put(&pdev->dev); 6242 6243 return err; 6244 } 6245 6246 int igc_open(struct net_device *netdev) 6247 { 6248 struct igc_adapter *adapter = netdev_priv(netdev); 6249 int err; 6250 6251 /* Notify the stack of the actual queue counts. */ 6252 err = netif_set_real_num_queues(netdev, adapter->num_tx_queues, 6253 adapter->num_rx_queues); 6254 if (err) { 6255 netdev_err(netdev, "error setting real queue count\n"); 6256 return err; 6257 } 6258 6259 return __igc_open(netdev, false); 6260 } 6261 6262 /** 6263 * __igc_close - Disables a network interface 6264 * @netdev: network interface device structure 6265 * @suspending: boolean indicating the device is suspending 6266 * 6267 * Returns 0, this is not allowed to fail 6268 * 6269 * The close entry point is called when an interface is de-activated 6270 * by the OS. The hardware is still under the driver's control, but 6271 * needs to be disabled. A global MAC reset is issued to stop the 6272 * hardware, and all transmit and receive resources are freed. 6273 */ 6274 static int __igc_close(struct net_device *netdev, bool suspending) 6275 { 6276 struct igc_adapter *adapter = netdev_priv(netdev); 6277 struct pci_dev *pdev = adapter->pdev; 6278 6279 WARN_ON(test_bit(__IGC_RESETTING, &adapter->state)); 6280 6281 if (!suspending) 6282 pm_runtime_get_sync(&pdev->dev); 6283 6284 igc_down(adapter); 6285 6286 igc_release_hw_control(adapter); 6287 6288 igc_free_irq(adapter); 6289 6290 igc_free_all_tx_resources(adapter); 6291 igc_free_all_rx_resources(adapter); 6292 6293 if (!suspending) 6294 pm_runtime_put_sync(&pdev->dev); 6295 6296 return 0; 6297 } 6298 6299 int igc_close(struct net_device *netdev) 6300 { 6301 if (netif_device_present(netdev) || netdev->dismantle) 6302 return __igc_close(netdev, false); 6303 return 0; 6304 } 6305 6306 static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue, 6307 bool enable) 6308 { 6309 struct igc_ring *ring; 6310 6311 if (queue < 0 || queue >= adapter->num_tx_queues) 6312 return -EINVAL; 6313 6314 ring = adapter->tx_ring[queue]; 6315 ring->launchtime_enable = enable; 6316 6317 return 0; 6318 } 6319 6320 static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now) 6321 { 6322 struct timespec64 b; 6323 6324 b = ktime_to_timespec64(base_time); 6325 6326 return timespec64_compare(now, &b) > 0; 6327 } 6328 6329 static bool validate_schedule(struct igc_adapter *adapter, 6330 const struct tc_taprio_qopt_offload *qopt) 6331 { 6332 int queue_uses[IGC_MAX_TX_QUEUES] = { }; 6333 struct igc_hw *hw = &adapter->hw; 6334 struct timespec64 now; 6335 size_t n; 6336 6337 if (qopt->cycle_time_extension) 6338 return false; 6339 6340 igc_ptp_read(adapter, &now); 6341 6342 /* If we program the controller's BASET registers with a time 6343 * in the future, it will hold all the packets until that 6344 * time, causing a lot of TX Hangs, so to avoid that, we 6345 * reject schedules that would start in the future. 6346 * Note: Limitation above is no longer in i226. 6347 */ 6348 if (!is_base_time_past(qopt->base_time, &now) && 6349 igc_is_device_id_i225(hw)) 6350 return false; 6351 6352 for (n = 0; n < qopt->num_entries; n++) { 6353 const struct tc_taprio_sched_entry *e, *prev; 6354 int i; 6355 6356 prev = n ? &qopt->entries[n - 1] : NULL; 6357 e = &qopt->entries[n]; 6358 6359 /* i225 only supports "global" frame preemption 6360 * settings. 6361 */ 6362 if (e->command != TC_TAPRIO_CMD_SET_GATES) 6363 return false; 6364 6365 for (i = 0; i < adapter->num_tx_queues; i++) 6366 if (e->gate_mask & BIT(i)) { 6367 queue_uses[i]++; 6368 6369 /* There are limitations: A single queue cannot 6370 * be opened and closed multiple times per cycle 6371 * unless the gate stays open. Check for it. 6372 */ 6373 if (queue_uses[i] > 1 && 6374 !(prev->gate_mask & BIT(i))) 6375 return false; 6376 } 6377 } 6378 6379 return true; 6380 } 6381 6382 static int igc_tsn_enable_launchtime(struct igc_adapter *adapter, 6383 struct tc_etf_qopt_offload *qopt) 6384 { 6385 struct igc_hw *hw = &adapter->hw; 6386 int err; 6387 6388 if (hw->mac.type != igc_i225) 6389 return -EOPNOTSUPP; 6390 6391 err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable); 6392 if (err) 6393 return err; 6394 6395 return igc_tsn_offload_apply(adapter); 6396 } 6397 6398 static int igc_qbv_clear_schedule(struct igc_adapter *adapter) 6399 { 6400 unsigned long flags; 6401 int i; 6402 6403 adapter->base_time = 0; 6404 adapter->cycle_time = NSEC_PER_SEC; 6405 adapter->taprio_offload_enable = false; 6406 adapter->qbv_config_change_errors = 0; 6407 adapter->qbv_count = 0; 6408 6409 for (i = 0; i < adapter->num_tx_queues; i++) { 6410 struct igc_ring *ring = adapter->tx_ring[i]; 6411 6412 ring->start_time = 0; 6413 ring->end_time = NSEC_PER_SEC; 6414 ring->max_sdu = 0; 6415 ring->preemptible = false; 6416 } 6417 6418 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 6419 6420 adapter->qbv_transition = false; 6421 6422 for (i = 0; i < adapter->num_tx_queues; i++) { 6423 struct igc_ring *ring = adapter->tx_ring[i]; 6424 6425 ring->oper_gate_closed = false; 6426 ring->admin_gate_closed = false; 6427 } 6428 6429 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 6430 6431 return 0; 6432 } 6433 6434 static int igc_tsn_clear_schedule(struct igc_adapter *adapter) 6435 { 6436 igc_qbv_clear_schedule(adapter); 6437 6438 return 0; 6439 } 6440 6441 static void igc_taprio_stats(struct net_device *dev, 6442 struct tc_taprio_qopt_stats *stats) 6443 { 6444 /* When Strict_End is enabled, the tx_overruns counter 6445 * will always be zero. 6446 */ 6447 stats->tx_overruns = 0; 6448 } 6449 6450 static void igc_taprio_queue_stats(struct net_device *dev, 6451 struct tc_taprio_qopt_queue_stats *queue_stats) 6452 { 6453 struct tc_taprio_qopt_stats *stats = &queue_stats->stats; 6454 6455 /* When Strict_End is enabled, the tx_overruns counter 6456 * will always be zero. 6457 */ 6458 stats->tx_overruns = 0; 6459 } 6460 6461 static int igc_save_qbv_schedule(struct igc_adapter *adapter, 6462 struct tc_taprio_qopt_offload *qopt) 6463 { 6464 bool queue_configured[IGC_MAX_TX_QUEUES] = { }; 6465 struct igc_hw *hw = &adapter->hw; 6466 u32 start_time = 0, end_time = 0; 6467 struct timespec64 now; 6468 unsigned long flags; 6469 size_t n; 6470 int i; 6471 6472 if (qopt->base_time < 0) 6473 return -ERANGE; 6474 6475 if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable) 6476 return -EALREADY; 6477 6478 if (!validate_schedule(adapter, qopt)) 6479 return -EINVAL; 6480 6481 if (qopt->mqprio.preemptible_tcs && 6482 !(adapter->flags & IGC_FLAG_TSN_REVERSE_TXQ_PRIO)) { 6483 NL_SET_ERR_MSG_MOD(qopt->extack, 6484 "reverse-tsn-txq-prio private flag must be enabled before setting preemptible tc"); 6485 return -ENODEV; 6486 } 6487 6488 igc_ptp_read(adapter, &now); 6489 6490 if (igc_tsn_is_taprio_activated_by_user(adapter) && 6491 is_base_time_past(qopt->base_time, &now)) 6492 adapter->qbv_config_change_errors++; 6493 6494 adapter->cycle_time = qopt->cycle_time; 6495 adapter->base_time = qopt->base_time; 6496 adapter->taprio_offload_enable = true; 6497 6498 for (n = 0; n < qopt->num_entries; n++) { 6499 struct tc_taprio_sched_entry *e = &qopt->entries[n]; 6500 6501 end_time += e->interval; 6502 6503 /* If any of the conditions below are true, we need to manually 6504 * control the end time of the cycle. 6505 * 1. Qbv users can specify a cycle time that is not equal 6506 * to the total GCL intervals. Hence, recalculation is 6507 * necessary here to exclude the time interval that 6508 * exceeds the cycle time. 6509 * 2. According to IEEE Std. 802.1Q-2018 section 8.6.9.2, 6510 * once the end of the list is reached, it will switch 6511 * to the END_OF_CYCLE state and leave the gates in the 6512 * same state until the next cycle is started. 6513 */ 6514 if (end_time > adapter->cycle_time || 6515 n + 1 == qopt->num_entries) 6516 end_time = adapter->cycle_time; 6517 6518 for (i = 0; i < adapter->num_tx_queues; i++) { 6519 struct igc_ring *ring = adapter->tx_ring[i]; 6520 6521 if (!(e->gate_mask & BIT(i))) 6522 continue; 6523 6524 /* Check whether a queue stays open for more than one 6525 * entry. If so, keep the start and advance the end 6526 * time. 6527 */ 6528 if (!queue_configured[i]) 6529 ring->start_time = start_time; 6530 ring->end_time = end_time; 6531 6532 if (ring->start_time >= adapter->cycle_time) 6533 queue_configured[i] = false; 6534 else 6535 queue_configured[i] = true; 6536 } 6537 6538 start_time += e->interval; 6539 } 6540 6541 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 6542 6543 /* Check whether a queue gets configured. 6544 * If not, set the start and end time to be end time. 6545 */ 6546 for (i = 0; i < adapter->num_tx_queues; i++) { 6547 struct igc_ring *ring = adapter->tx_ring[i]; 6548 6549 if (!is_base_time_past(qopt->base_time, &now)) { 6550 ring->admin_gate_closed = false; 6551 } else { 6552 ring->oper_gate_closed = false; 6553 ring->admin_gate_closed = false; 6554 } 6555 6556 if (!queue_configured[i]) { 6557 if (!is_base_time_past(qopt->base_time, &now)) 6558 ring->admin_gate_closed = true; 6559 else 6560 ring->oper_gate_closed = true; 6561 6562 ring->start_time = end_time; 6563 ring->end_time = end_time; 6564 } 6565 } 6566 6567 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 6568 6569 for (i = 0; i < adapter->num_tx_queues; i++) { 6570 struct igc_ring *ring = adapter->tx_ring[i]; 6571 struct net_device *dev = adapter->netdev; 6572 6573 if (qopt->max_sdu[i]) 6574 ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN; 6575 else 6576 ring->max_sdu = 0; 6577 } 6578 6579 igc_fpe_save_preempt_queue(adapter, &qopt->mqprio); 6580 6581 return 0; 6582 } 6583 6584 static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter, 6585 struct tc_taprio_qopt_offload *qopt) 6586 { 6587 struct igc_hw *hw = &adapter->hw; 6588 int err; 6589 6590 if (hw->mac.type != igc_i225) 6591 return -EOPNOTSUPP; 6592 6593 switch (qopt->cmd) { 6594 case TAPRIO_CMD_REPLACE: 6595 err = igc_save_qbv_schedule(adapter, qopt); 6596 break; 6597 case TAPRIO_CMD_DESTROY: 6598 err = igc_tsn_clear_schedule(adapter); 6599 break; 6600 case TAPRIO_CMD_STATS: 6601 igc_taprio_stats(adapter->netdev, &qopt->stats); 6602 return 0; 6603 case TAPRIO_CMD_QUEUE_STATS: 6604 igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); 6605 return 0; 6606 default: 6607 return -EOPNOTSUPP; 6608 } 6609 6610 if (err) 6611 return err; 6612 6613 return igc_tsn_offload_apply(adapter); 6614 } 6615 6616 static int igc_save_cbs_params(struct igc_adapter *adapter, int queue, 6617 bool enable, int idleslope, int sendslope, 6618 int hicredit, int locredit) 6619 { 6620 bool cbs_status[IGC_MAX_SR_QUEUES] = { false }; 6621 struct net_device *netdev = adapter->netdev; 6622 struct igc_ring *ring; 6623 int i; 6624 6625 /* i225 has two sets of credit-based shaper logic. 6626 * Supporting it only on the top two priority queues 6627 */ 6628 if (queue < 0 || queue > 1) 6629 return -EINVAL; 6630 6631 ring = adapter->tx_ring[queue]; 6632 6633 for (i = 0; i < IGC_MAX_SR_QUEUES; i++) 6634 if (adapter->tx_ring[i]) 6635 cbs_status[i] = adapter->tx_ring[i]->cbs_enable; 6636 6637 /* CBS should be enabled on the highest priority queue first in order 6638 * for the CBS algorithm to operate as intended. 6639 */ 6640 if (enable) { 6641 if (queue == 1 && !cbs_status[0]) { 6642 netdev_err(netdev, 6643 "Enabling CBS on queue1 before queue0\n"); 6644 return -EINVAL; 6645 } 6646 } else { 6647 if (queue == 0 && cbs_status[1]) { 6648 netdev_err(netdev, 6649 "Disabling CBS on queue0 before queue1\n"); 6650 return -EINVAL; 6651 } 6652 } 6653 6654 ring->cbs_enable = enable; 6655 ring->idleslope = idleslope; 6656 ring->sendslope = sendslope; 6657 ring->hicredit = hicredit; 6658 ring->locredit = locredit; 6659 6660 return 0; 6661 } 6662 6663 static int igc_tsn_enable_cbs(struct igc_adapter *adapter, 6664 struct tc_cbs_qopt_offload *qopt) 6665 { 6666 struct igc_hw *hw = &adapter->hw; 6667 int err; 6668 6669 if (hw->mac.type != igc_i225) 6670 return -EOPNOTSUPP; 6671 6672 if (qopt->queue < 0 || qopt->queue > 1) 6673 return -EINVAL; 6674 6675 err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable, 6676 qopt->idleslope, qopt->sendslope, 6677 qopt->hicredit, qopt->locredit); 6678 if (err) 6679 return err; 6680 6681 return igc_tsn_offload_apply(adapter); 6682 } 6683 6684 static int igc_tc_query_caps(struct igc_adapter *adapter, 6685 struct tc_query_caps_base *base) 6686 { 6687 struct igc_hw *hw = &adapter->hw; 6688 6689 switch (base->type) { 6690 case TC_SETUP_QDISC_MQPRIO: { 6691 struct tc_mqprio_caps *caps = base->caps; 6692 6693 caps->validate_queue_counts = true; 6694 6695 return 0; 6696 } 6697 case TC_SETUP_QDISC_TAPRIO: { 6698 struct tc_taprio_caps *caps = base->caps; 6699 6700 if (!(adapter->flags & IGC_FLAG_TSN_REVERSE_TXQ_PRIO)) 6701 caps->broken_mqprio = true; 6702 6703 if (hw->mac.type == igc_i225) { 6704 caps->supports_queue_max_sdu = true; 6705 caps->gate_mask_per_txq = true; 6706 } 6707 6708 return 0; 6709 } 6710 default: 6711 return -EOPNOTSUPP; 6712 } 6713 } 6714 6715 static void igc_save_mqprio_params(struct igc_adapter *adapter, u8 num_tc, 6716 u16 *offset) 6717 { 6718 int i; 6719 6720 adapter->strict_priority_enable = true; 6721 adapter->num_tc = num_tc; 6722 6723 for (i = 0; i < num_tc; i++) 6724 adapter->queue_per_tc[i] = offset[i]; 6725 } 6726 6727 static bool 6728 igc_tsn_is_tc_to_queue_priority_ordered(struct tc_mqprio_qopt_offload *mqprio) 6729 { 6730 int num_tc = mqprio->qopt.num_tc; 6731 int i; 6732 6733 for (i = 1; i < num_tc; i++) { 6734 if (mqprio->qopt.offset[i - 1] > mqprio->qopt.offset[i]) 6735 return false; 6736 } 6737 6738 return true; 6739 } 6740 6741 static int igc_tsn_enable_mqprio(struct igc_adapter *adapter, 6742 struct tc_mqprio_qopt_offload *mqprio) 6743 { 6744 struct igc_hw *hw = &adapter->hw; 6745 int err, i; 6746 6747 if (hw->mac.type != igc_i225) 6748 return -EOPNOTSUPP; 6749 6750 if (!mqprio->qopt.num_tc) { 6751 adapter->strict_priority_enable = false; 6752 igc_fpe_clear_preempt_queue(adapter); 6753 netdev_reset_tc(adapter->netdev); 6754 goto apply; 6755 } 6756 6757 /* There are as many TCs as Tx queues. */ 6758 if (mqprio->qopt.num_tc != adapter->num_tx_queues) { 6759 NL_SET_ERR_MSG_FMT_MOD(mqprio->extack, 6760 "Only %d traffic classes supported", 6761 adapter->num_tx_queues); 6762 return -EOPNOTSUPP; 6763 } 6764 6765 /* Only one queue per TC is supported. */ 6766 for (i = 0; i < mqprio->qopt.num_tc; i++) { 6767 if (mqprio->qopt.count[i] != 1) { 6768 NL_SET_ERR_MSG_MOD(mqprio->extack, 6769 "Only one queue per TC supported"); 6770 return -EOPNOTSUPP; 6771 } 6772 } 6773 6774 if (!igc_tsn_is_tc_to_queue_priority_ordered(mqprio)) { 6775 NL_SET_ERR_MSG_MOD(mqprio->extack, 6776 "tc to queue mapping must preserve increasing priority (higher tc -> higher queue)"); 6777 return -EOPNOTSUPP; 6778 } 6779 6780 igc_save_mqprio_params(adapter, mqprio->qopt.num_tc, 6781 mqprio->qopt.offset); 6782 6783 err = netdev_set_num_tc(adapter->netdev, adapter->num_tc); 6784 if (err) 6785 return err; 6786 6787 for (i = 0; i < adapter->num_tc; i++) { 6788 err = netdev_set_tc_queue(adapter->netdev, i, 1, 6789 adapter->queue_per_tc[i]); 6790 if (err) 6791 return err; 6792 } 6793 6794 /* In case the card is configured with less than four queues. */ 6795 for (; i < IGC_MAX_TX_QUEUES; i++) 6796 adapter->queue_per_tc[i] = i; 6797 6798 mqprio->qopt.hw = TC_MQPRIO_HW_OFFLOAD_TCS; 6799 igc_fpe_save_preempt_queue(adapter, mqprio); 6800 6801 apply: 6802 return igc_tsn_offload_apply(adapter); 6803 } 6804 6805 static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, 6806 void *type_data) 6807 { 6808 struct igc_adapter *adapter = netdev_priv(dev); 6809 6810 adapter->tc_setup_type = type; 6811 6812 switch (type) { 6813 case TC_QUERY_CAPS: 6814 return igc_tc_query_caps(adapter, type_data); 6815 case TC_SETUP_QDISC_TAPRIO: 6816 return igc_tsn_enable_qbv_scheduling(adapter, type_data); 6817 6818 case TC_SETUP_QDISC_ETF: 6819 return igc_tsn_enable_launchtime(adapter, type_data); 6820 6821 case TC_SETUP_QDISC_CBS: 6822 return igc_tsn_enable_cbs(adapter, type_data); 6823 6824 case TC_SETUP_QDISC_MQPRIO: 6825 return igc_tsn_enable_mqprio(adapter, type_data); 6826 6827 default: 6828 return -EOPNOTSUPP; 6829 } 6830 } 6831 6832 static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf) 6833 { 6834 struct igc_adapter *adapter = netdev_priv(dev); 6835 6836 switch (bpf->command) { 6837 case XDP_SETUP_PROG: 6838 return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack); 6839 case XDP_SETUP_XSK_POOL: 6840 return igc_xdp_setup_pool(adapter, bpf->xsk.pool, 6841 bpf->xsk.queue_id); 6842 default: 6843 return -EOPNOTSUPP; 6844 } 6845 } 6846 6847 static int igc_xdp_xmit(struct net_device *dev, int num_frames, 6848 struct xdp_frame **frames, u32 flags) 6849 { 6850 struct igc_adapter *adapter = netdev_priv(dev); 6851 int cpu = smp_processor_id(); 6852 struct netdev_queue *nq; 6853 struct igc_ring *ring; 6854 int i, nxmit; 6855 6856 if (unlikely(!netif_carrier_ok(dev))) 6857 return -ENETDOWN; 6858 6859 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 6860 return -EINVAL; 6861 6862 ring = igc_get_tx_ring(adapter, cpu); 6863 nq = txring_txq(ring); 6864 6865 __netif_tx_lock(nq, cpu); 6866 6867 /* Avoid transmit queue timeout since we share it with the slow path */ 6868 txq_trans_cond_update(nq); 6869 6870 nxmit = 0; 6871 for (i = 0; i < num_frames; i++) { 6872 int err; 6873 struct xdp_frame *xdpf = frames[i]; 6874 6875 err = igc_xdp_init_tx_descriptor(ring, xdpf); 6876 if (err) 6877 break; 6878 nxmit++; 6879 } 6880 6881 if (flags & XDP_XMIT_FLUSH) 6882 igc_flush_tx_descriptors(ring); 6883 6884 __netif_tx_unlock(nq); 6885 6886 return nxmit; 6887 } 6888 6889 static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, 6890 struct igc_q_vector *q_vector) 6891 { 6892 struct igc_hw *hw = &adapter->hw; 6893 u32 eics = 0; 6894 6895 eics |= q_vector->eims_value; 6896 wr32(IGC_EICS, eics); 6897 } 6898 6899 int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 6900 { 6901 struct igc_adapter *adapter = netdev_priv(dev); 6902 struct igc_q_vector *q_vector; 6903 struct igc_ring *ring; 6904 6905 if (test_bit(__IGC_DOWN, &adapter->state)) 6906 return -ENETDOWN; 6907 6908 if (!igc_xdp_is_enabled(adapter)) 6909 return -ENXIO; 6910 6911 if (queue_id >= adapter->num_rx_queues) 6912 return -EINVAL; 6913 6914 ring = adapter->rx_ring[queue_id]; 6915 6916 if (!ring->xsk_pool) 6917 return -ENXIO; 6918 6919 q_vector = adapter->q_vector[queue_id]; 6920 if (!napi_if_scheduled_mark_missed(&q_vector->napi)) 6921 igc_trigger_rxtxq_interrupt(adapter, q_vector); 6922 6923 return 0; 6924 } 6925 6926 static ktime_t igc_get_tstamp(struct net_device *dev, 6927 const struct skb_shared_hwtstamps *hwtstamps, 6928 bool cycles) 6929 { 6930 struct igc_adapter *adapter = netdev_priv(dev); 6931 struct igc_inline_rx_tstamps *tstamp; 6932 ktime_t timestamp; 6933 6934 tstamp = hwtstamps->netdev_data; 6935 6936 if (cycles) 6937 timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer1); 6938 else 6939 timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); 6940 6941 return timestamp; 6942 } 6943 6944 static const struct net_device_ops igc_netdev_ops = { 6945 .ndo_open = igc_open, 6946 .ndo_stop = igc_close, 6947 .ndo_start_xmit = igc_xmit_frame, 6948 .ndo_set_rx_mode = igc_set_rx_mode, 6949 .ndo_set_mac_address = igc_set_mac, 6950 .ndo_change_mtu = igc_change_mtu, 6951 .ndo_tx_timeout = igc_tx_timeout, 6952 .ndo_get_stats64 = igc_get_stats64, 6953 .ndo_fix_features = igc_fix_features, 6954 .ndo_set_features = igc_set_features, 6955 .ndo_features_check = igc_features_check, 6956 .ndo_setup_tc = igc_setup_tc, 6957 .ndo_bpf = igc_bpf, 6958 .ndo_xdp_xmit = igc_xdp_xmit, 6959 .ndo_xsk_wakeup = igc_xsk_wakeup, 6960 .ndo_get_tstamp = igc_get_tstamp, 6961 .ndo_hwtstamp_get = igc_ptp_hwtstamp_get, 6962 .ndo_hwtstamp_set = igc_ptp_hwtstamp_set, 6963 }; 6964 6965 u32 igc_rd32(struct igc_hw *hw, u32 reg) 6966 { 6967 struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw); 6968 u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); 6969 u32 value = 0; 6970 6971 if (IGC_REMOVED(hw_addr)) 6972 return ~value; 6973 6974 value = readl(&hw_addr[reg]); 6975 6976 /* reads should not return all F's */ 6977 if (!(~value) && (!reg || !(~readl(hw_addr)))) { 6978 struct net_device *netdev = igc->netdev; 6979 6980 hw->hw_addr = NULL; 6981 netif_device_detach(netdev); 6982 netdev_err(netdev, "PCIe link lost, device now detached\n"); 6983 WARN(pci_device_is_present(igc->pdev), 6984 "igc: Failed to read reg 0x%x!\n", reg); 6985 } 6986 6987 return value; 6988 } 6989 6990 /* Mapping HW RSS Type to enum xdp_rss_hash_type */ 6991 static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = { 6992 [IGC_RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_L2, 6993 [IGC_RSS_TYPE_HASH_TCP_IPV4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6994 [IGC_RSS_TYPE_HASH_IPV4] = XDP_RSS_TYPE_L3_IPV4, 6995 [IGC_RSS_TYPE_HASH_TCP_IPV6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6996 [IGC_RSS_TYPE_HASH_IPV6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6997 [IGC_RSS_TYPE_HASH_IPV6] = XDP_RSS_TYPE_L3_IPV6, 6998 [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6999 [IGC_RSS_TYPE_HASH_UDP_IPV4] = XDP_RSS_TYPE_L4_IPV4_UDP, 7000 [IGC_RSS_TYPE_HASH_UDP_IPV6] = XDP_RSS_TYPE_L4_IPV6_UDP, 7001 [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX, 7002 [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ 7003 [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask */ 7004 [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons */ 7005 [13] = XDP_RSS_TYPE_NONE, 7006 [14] = XDP_RSS_TYPE_NONE, 7007 [15] = XDP_RSS_TYPE_NONE, 7008 }; 7009 7010 static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 7011 enum xdp_rss_hash_type *rss_type) 7012 { 7013 const struct igc_xdp_buff *ctx = (void *)_ctx; 7014 7015 if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)) 7016 return -ENODATA; 7017 7018 *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss); 7019 *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)]; 7020 7021 return 0; 7022 } 7023 7024 static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp) 7025 { 7026 const struct igc_xdp_buff *ctx = (void *)_ctx; 7027 struct igc_adapter *adapter = netdev_priv(ctx->xdp.rxq->dev); 7028 struct igc_inline_rx_tstamps *tstamp = ctx->rx_ts; 7029 7030 if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) { 7031 *timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); 7032 7033 return 0; 7034 } 7035 7036 return -ENODATA; 7037 } 7038 7039 static const struct xdp_metadata_ops igc_xdp_metadata_ops = { 7040 .xmo_rx_hash = igc_xdp_rx_hash, 7041 .xmo_rx_timestamp = igc_xdp_rx_timestamp, 7042 }; 7043 7044 static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer) 7045 { 7046 struct igc_adapter *adapter = container_of(timer, struct igc_adapter, 7047 hrtimer); 7048 unsigned long flags; 7049 unsigned int i; 7050 7051 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 7052 7053 adapter->qbv_transition = true; 7054 for (i = 0; i < adapter->num_tx_queues; i++) { 7055 struct igc_ring *tx_ring = adapter->tx_ring[i]; 7056 7057 if (tx_ring->admin_gate_closed) { 7058 tx_ring->admin_gate_closed = false; 7059 tx_ring->oper_gate_closed = true; 7060 } else { 7061 tx_ring->oper_gate_closed = false; 7062 } 7063 } 7064 adapter->qbv_transition = false; 7065 7066 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 7067 7068 return HRTIMER_NORESTART; 7069 } 7070 7071 /** 7072 * igc_probe - Device Initialization Routine 7073 * @pdev: PCI device information struct 7074 * @ent: entry in igc_pci_tbl 7075 * 7076 * Returns 0 on success, negative on failure 7077 * 7078 * igc_probe initializes an adapter identified by a pci_dev structure. 7079 * The OS initialization, configuring the adapter private structure, 7080 * and a hardware reset occur. 7081 */ 7082 static int igc_probe(struct pci_dev *pdev, 7083 const struct pci_device_id *ent) 7084 { 7085 struct igc_adapter *adapter; 7086 struct net_device *netdev; 7087 struct igc_hw *hw; 7088 const struct igc_info *ei = igc_info_tbl[ent->driver_data]; 7089 int err; 7090 7091 err = pci_enable_device_mem(pdev); 7092 if (err) 7093 return err; 7094 7095 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 7096 if (err) { 7097 dev_err(&pdev->dev, 7098 "No usable DMA configuration, aborting\n"); 7099 goto err_dma; 7100 } 7101 7102 err = pci_request_mem_regions(pdev, igc_driver_name); 7103 if (err) 7104 goto err_pci_reg; 7105 7106 err = pci_enable_ptm(pdev, NULL); 7107 if (err < 0) 7108 dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n"); 7109 7110 pci_set_master(pdev); 7111 7112 err = -ENOMEM; 7113 netdev = alloc_etherdev_mq(sizeof(struct igc_adapter), 7114 IGC_MAX_TX_QUEUES); 7115 7116 if (!netdev) 7117 goto err_alloc_etherdev; 7118 7119 SET_NETDEV_DEV(netdev, &pdev->dev); 7120 7121 pci_set_drvdata(pdev, netdev); 7122 adapter = netdev_priv(netdev); 7123 adapter->netdev = netdev; 7124 adapter->pdev = pdev; 7125 hw = &adapter->hw; 7126 hw->back = adapter; 7127 adapter->port_num = hw->bus.func; 7128 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); 7129 7130 /* Disable ASPM L1.2 on I226 devices to avoid packet loss */ 7131 if (igc_is_device_id_i226(hw)) 7132 pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); 7133 7134 err = pci_save_state(pdev); 7135 if (err) 7136 goto err_ioremap; 7137 7138 err = -EIO; 7139 adapter->io_addr = ioremap(pci_resource_start(pdev, 0), 7140 pci_resource_len(pdev, 0)); 7141 if (!adapter->io_addr) 7142 goto err_ioremap; 7143 7144 /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */ 7145 hw->hw_addr = adapter->io_addr; 7146 7147 netdev->netdev_ops = &igc_netdev_ops; 7148 netdev->xdp_metadata_ops = &igc_xdp_metadata_ops; 7149 netdev->xsk_tx_metadata_ops = &igc_xsk_tx_metadata_ops; 7150 igc_ethtool_set_ops(netdev); 7151 netdev->watchdog_timeo = 5 * HZ; 7152 7153 netdev->mem_start = pci_resource_start(pdev, 0); 7154 netdev->mem_end = pci_resource_end(pdev, 0); 7155 7156 /* PCI config space info */ 7157 hw->vendor_id = pdev->vendor; 7158 hw->device_id = pdev->device; 7159 hw->revision_id = pdev->revision; 7160 hw->subsystem_vendor_id = pdev->subsystem_vendor; 7161 hw->subsystem_device_id = pdev->subsystem_device; 7162 7163 /* Copy the default MAC and PHY function pointers */ 7164 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); 7165 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); 7166 7167 /* Initialize skew-specific constants */ 7168 err = ei->get_invariants(hw); 7169 if (err) 7170 goto err_sw_init; 7171 7172 /* Add supported features to the features list*/ 7173 netdev->features |= NETIF_F_SG; 7174 netdev->features |= NETIF_F_TSO; 7175 netdev->features |= NETIF_F_TSO6; 7176 netdev->features |= NETIF_F_TSO_ECN; 7177 netdev->features |= NETIF_F_RXHASH; 7178 netdev->features |= NETIF_F_RXCSUM; 7179 netdev->features |= NETIF_F_HW_CSUM; 7180 netdev->features |= NETIF_F_SCTP_CRC; 7181 netdev->features |= NETIF_F_HW_TC; 7182 7183 #define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ 7184 NETIF_F_GSO_GRE_CSUM | \ 7185 NETIF_F_GSO_IPXIP4 | \ 7186 NETIF_F_GSO_IPXIP6 | \ 7187 NETIF_F_GSO_UDP_TUNNEL | \ 7188 NETIF_F_GSO_UDP_TUNNEL_CSUM) 7189 7190 netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES; 7191 netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES; 7192 7193 /* setup the private structure */ 7194 err = igc_sw_init(adapter); 7195 if (err) 7196 goto err_sw_init; 7197 7198 /* copy netdev features into list of user selectable features */ 7199 netdev->hw_features |= NETIF_F_NTUPLE; 7200 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; 7201 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 7202 netdev->hw_features |= netdev->features; 7203 7204 netdev->features |= NETIF_F_HIGHDMA; 7205 7206 netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; 7207 netdev->mpls_features |= NETIF_F_HW_CSUM; 7208 netdev->hw_enc_features |= netdev->vlan_features; 7209 7210 netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 7211 NETDEV_XDP_ACT_XSK_ZEROCOPY; 7212 7213 /* enable HW vlan tag insertion/stripping by default */ 7214 netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; 7215 7216 /* MTU range: 68 - 9216 */ 7217 netdev->min_mtu = ETH_MIN_MTU; 7218 netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; 7219 7220 /* before reading the NVM, reset the controller to put the device in a 7221 * known good starting state 7222 */ 7223 hw->mac.ops.reset_hw(hw); 7224 7225 if (igc_get_flash_presence_i225(hw)) { 7226 if (hw->nvm.ops.validate(hw) < 0) { 7227 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); 7228 err = -EIO; 7229 goto err_eeprom; 7230 } 7231 } 7232 7233 if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) { 7234 /* copy the MAC address out of the NVM */ 7235 if (hw->mac.ops.read_mac_addr(hw)) 7236 dev_err(&pdev->dev, "NVM Read Error\n"); 7237 } 7238 7239 eth_hw_addr_set(netdev, hw->mac.addr); 7240 7241 if (!is_valid_ether_addr(netdev->dev_addr)) { 7242 dev_err(&pdev->dev, "Invalid MAC Address\n"); 7243 err = -EIO; 7244 goto err_eeprom; 7245 } 7246 7247 /* configure RXPBSIZE and TXPBSIZE */ 7248 wr32(IGC_RXPBS, IGC_RXPBSIZE_EXP_BMC_DEFAULT); 7249 wr32(IGC_TXPBS, IGC_TXPBSIZE_DEFAULT); 7250 7251 timer_setup(&adapter->watchdog_timer, igc_watchdog, 0); 7252 timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0); 7253 7254 INIT_WORK(&adapter->reset_task, igc_reset_task); 7255 INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); 7256 7257 hrtimer_setup(&adapter->hrtimer, &igc_qbv_scheduling_timer, CLOCK_MONOTONIC, 7258 HRTIMER_MODE_REL); 7259 7260 /* Initialize link properties that are user-changeable */ 7261 adapter->fc_autoneg = true; 7262 hw->phy.autoneg_advertised = 0xaf; 7263 7264 hw->fc.requested_mode = igc_fc_default; 7265 hw->fc.current_mode = igc_fc_default; 7266 7267 /* By default, support wake on port A */ 7268 adapter->flags |= IGC_FLAG_WOL_SUPPORTED; 7269 7270 /* initialize the wol settings based on the eeprom settings */ 7271 if (adapter->flags & IGC_FLAG_WOL_SUPPORTED) 7272 adapter->wol |= IGC_WUFC_MAG; 7273 7274 device_set_wakeup_enable(&adapter->pdev->dev, 7275 adapter->flags & IGC_FLAG_WOL_SUPPORTED); 7276 7277 igc_ptp_init(adapter); 7278 7279 igc_tsn_clear_schedule(adapter); 7280 7281 igc_fpe_init(adapter); 7282 7283 /* reset the hardware with the new settings */ 7284 igc_reset(adapter); 7285 7286 /* let the f/w know that the h/w is now under the control of the 7287 * driver. 7288 */ 7289 igc_get_hw_control(adapter); 7290 7291 strscpy(netdev->name, "eth%d", sizeof(netdev->name)); 7292 err = register_netdev(netdev); 7293 if (err) 7294 goto err_register; 7295 7296 /* carrier off reporting is important to ethtool even BEFORE open */ 7297 netif_carrier_off(netdev); 7298 7299 /* Check if Media Autosense is enabled */ 7300 adapter->ei = *ei; 7301 7302 /* print pcie link status and MAC address */ 7303 pcie_print_link_status(pdev); 7304 netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr); 7305 7306 dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); 7307 /* Disable EEE for internal PHY devices */ 7308 hw->dev_spec._base.eee_enable = false; 7309 adapter->flags &= ~IGC_FLAG_EEE; 7310 igc_set_eee_i225(hw, false, false, false); 7311 7312 pm_runtime_put_noidle(&pdev->dev); 7313 7314 if (IS_ENABLED(CONFIG_IGC_LEDS)) { 7315 err = igc_led_setup(adapter); 7316 if (err) 7317 goto err_register; 7318 } 7319 7320 return 0; 7321 7322 err_register: 7323 igc_release_hw_control(adapter); 7324 igc_ptp_stop(adapter); 7325 err_eeprom: 7326 if (!igc_check_reset_block(hw)) 7327 igc_reset_phy(hw); 7328 err_sw_init: 7329 igc_clear_interrupt_scheme(adapter); 7330 iounmap(adapter->io_addr); 7331 err_ioremap: 7332 free_netdev(netdev); 7333 err_alloc_etherdev: 7334 pci_release_mem_regions(pdev); 7335 err_pci_reg: 7336 err_dma: 7337 pci_disable_device(pdev); 7338 return err; 7339 } 7340 7341 /** 7342 * igc_remove - Device Removal Routine 7343 * @pdev: PCI device information struct 7344 * 7345 * igc_remove is called by the PCI subsystem to alert the driver 7346 * that it should release a PCI device. This could be caused by a 7347 * Hot-Plug event, or because the driver is going to be removed from 7348 * memory. 7349 */ 7350 static void igc_remove(struct pci_dev *pdev) 7351 { 7352 struct net_device *netdev = pci_get_drvdata(pdev); 7353 struct igc_adapter *adapter = netdev_priv(netdev); 7354 7355 pm_runtime_get_noresume(&pdev->dev); 7356 7357 igc_flush_nfc_rules(adapter); 7358 7359 igc_ptp_stop(adapter); 7360 7361 pci_disable_ptm(pdev); 7362 pci_clear_master(pdev); 7363 7364 set_bit(__IGC_DOWN, &adapter->state); 7365 7366 timer_delete_sync(&adapter->watchdog_timer); 7367 timer_delete_sync(&adapter->phy_info_timer); 7368 7369 cancel_work_sync(&adapter->reset_task); 7370 cancel_work_sync(&adapter->watchdog_task); 7371 hrtimer_cancel(&adapter->hrtimer); 7372 7373 if (IS_ENABLED(CONFIG_IGC_LEDS)) 7374 igc_led_free(adapter); 7375 7376 /* Release control of h/w to f/w. If f/w is AMT enabled, this 7377 * would have already happened in close and is redundant. 7378 */ 7379 igc_release_hw_control(adapter); 7380 unregister_netdev(netdev); 7381 7382 igc_clear_interrupt_scheme(adapter); 7383 pci_iounmap(pdev, adapter->io_addr); 7384 pci_release_mem_regions(pdev); 7385 7386 free_netdev(netdev); 7387 7388 pci_disable_device(pdev); 7389 } 7390 7391 static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake, 7392 bool runtime) 7393 { 7394 struct net_device *netdev = pci_get_drvdata(pdev); 7395 struct igc_adapter *adapter = netdev_priv(netdev); 7396 u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol; 7397 struct igc_hw *hw = &adapter->hw; 7398 u32 ctrl, rctl, status; 7399 bool wake; 7400 7401 rtnl_lock(); 7402 netif_device_detach(netdev); 7403 7404 if (netif_running(netdev)) 7405 __igc_close(netdev, true); 7406 7407 igc_ptp_suspend(adapter); 7408 7409 igc_clear_interrupt_scheme(adapter); 7410 rtnl_unlock(); 7411 7412 status = rd32(IGC_STATUS); 7413 if (status & IGC_STATUS_LU) 7414 wufc &= ~IGC_WUFC_LNKC; 7415 7416 if (wufc) { 7417 igc_setup_rctl(adapter); 7418 igc_set_rx_mode(netdev); 7419 7420 /* turn on all-multi mode if wake on multicast is enabled */ 7421 if (wufc & IGC_WUFC_MC) { 7422 rctl = rd32(IGC_RCTL); 7423 rctl |= IGC_RCTL_MPE; 7424 wr32(IGC_RCTL, rctl); 7425 } 7426 7427 ctrl = rd32(IGC_CTRL); 7428 ctrl |= IGC_CTRL_ADVD3WUC; 7429 wr32(IGC_CTRL, ctrl); 7430 7431 /* Allow time for pending master requests to run */ 7432 igc_disable_pcie_master(hw); 7433 7434 wr32(IGC_WUC, IGC_WUC_PME_EN); 7435 wr32(IGC_WUFC, wufc); 7436 } else { 7437 wr32(IGC_WUC, 0); 7438 wr32(IGC_WUFC, 0); 7439 } 7440 7441 wake = wufc || adapter->en_mng_pt; 7442 if (!wake) 7443 igc_power_down_phy_copper_base(&adapter->hw); 7444 else 7445 igc_power_up_link(adapter); 7446 7447 if (enable_wake) 7448 *enable_wake = wake; 7449 7450 /* Release control of h/w to f/w. If f/w is AMT enabled, this 7451 * would have already happened in close and is redundant. 7452 */ 7453 igc_release_hw_control(adapter); 7454 7455 pci_disable_device(pdev); 7456 7457 return 0; 7458 } 7459 7460 static int igc_runtime_suspend(struct device *dev) 7461 { 7462 return __igc_shutdown(to_pci_dev(dev), NULL, 1); 7463 } 7464 7465 static void igc_deliver_wake_packet(struct net_device *netdev) 7466 { 7467 struct igc_adapter *adapter = netdev_priv(netdev); 7468 struct igc_hw *hw = &adapter->hw; 7469 struct sk_buff *skb; 7470 u32 wupl; 7471 7472 wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK; 7473 7474 /* WUPM stores only the first 128 bytes of the wake packet. 7475 * Read the packet only if we have the whole thing. 7476 */ 7477 if (wupl == 0 || wupl > IGC_WUPM_BYTES) 7478 return; 7479 7480 skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES); 7481 if (!skb) 7482 return; 7483 7484 skb_put(skb, wupl); 7485 7486 /* Ensure reads are 32-bit aligned */ 7487 wupl = roundup(wupl, 4); 7488 7489 memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl); 7490 7491 skb->protocol = eth_type_trans(skb, netdev); 7492 netif_rx(skb); 7493 } 7494 7495 static int __igc_resume(struct device *dev, bool rpm) 7496 { 7497 struct pci_dev *pdev = to_pci_dev(dev); 7498 struct net_device *netdev = pci_get_drvdata(pdev); 7499 struct igc_adapter *adapter = netdev_priv(netdev); 7500 struct igc_hw *hw = &adapter->hw; 7501 u32 err, val; 7502 7503 pci_set_power_state(pdev, PCI_D0); 7504 pci_restore_state(pdev); 7505 pci_save_state(pdev); 7506 7507 if (!pci_device_is_present(pdev)) 7508 return -ENODEV; 7509 err = pci_enable_device_mem(pdev); 7510 if (err) { 7511 netdev_err(netdev, "Cannot enable PCI device from suspend\n"); 7512 return err; 7513 } 7514 pci_set_master(pdev); 7515 7516 pci_enable_wake(pdev, PCI_D3hot, 0); 7517 pci_enable_wake(pdev, PCI_D3cold, 0); 7518 7519 if (igc_is_device_id_i226(hw)) 7520 pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); 7521 7522 if (igc_init_interrupt_scheme(adapter, true)) { 7523 netdev_err(netdev, "Unable to allocate memory for queues\n"); 7524 return -ENOMEM; 7525 } 7526 7527 igc_reset(adapter); 7528 7529 /* let the f/w know that the h/w is now under the control of the 7530 * driver. 7531 */ 7532 igc_get_hw_control(adapter); 7533 7534 val = rd32(IGC_WUS); 7535 if (val & WAKE_PKT_WUS) 7536 igc_deliver_wake_packet(netdev); 7537 7538 wr32(IGC_WUS, ~0); 7539 7540 if (netif_running(netdev)) { 7541 if (!rpm) 7542 rtnl_lock(); 7543 err = __igc_open(netdev, true); 7544 if (!rpm) 7545 rtnl_unlock(); 7546 if (!err) 7547 netif_device_attach(netdev); 7548 } 7549 7550 return err; 7551 } 7552 7553 static int igc_resume(struct device *dev) 7554 { 7555 return __igc_resume(dev, false); 7556 } 7557 7558 static int igc_runtime_resume(struct device *dev) 7559 { 7560 return __igc_resume(dev, true); 7561 } 7562 7563 static int igc_suspend(struct device *dev) 7564 { 7565 return __igc_shutdown(to_pci_dev(dev), NULL, 0); 7566 } 7567 7568 static int __maybe_unused igc_runtime_idle(struct device *dev) 7569 { 7570 struct net_device *netdev = dev_get_drvdata(dev); 7571 struct igc_adapter *adapter = netdev_priv(netdev); 7572 7573 if (!igc_has_link(adapter)) 7574 pm_schedule_suspend(dev, MSEC_PER_SEC * 5); 7575 7576 return -EBUSY; 7577 } 7578 7579 static void igc_shutdown(struct pci_dev *pdev) 7580 { 7581 bool wake; 7582 7583 __igc_shutdown(pdev, &wake, 0); 7584 7585 if (system_state == SYSTEM_POWER_OFF) { 7586 pci_wake_from_d3(pdev, wake); 7587 pci_set_power_state(pdev, PCI_D3hot); 7588 } 7589 } 7590 7591 /** 7592 * igc_io_error_detected - called when PCI error is detected 7593 * @pdev: Pointer to PCI device 7594 * @state: The current PCI connection state 7595 * 7596 * This function is called after a PCI bus error affecting 7597 * this device has been detected. 7598 **/ 7599 static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev, 7600 pci_channel_state_t state) 7601 { 7602 struct net_device *netdev = pci_get_drvdata(pdev); 7603 struct igc_adapter *adapter = netdev_priv(netdev); 7604 7605 rtnl_lock(); 7606 netif_device_detach(netdev); 7607 7608 if (state == pci_channel_io_perm_failure) { 7609 rtnl_unlock(); 7610 return PCI_ERS_RESULT_DISCONNECT; 7611 } 7612 7613 if (netif_running(netdev)) 7614 igc_down(adapter); 7615 pci_disable_device(pdev); 7616 rtnl_unlock(); 7617 7618 /* Request a slot reset. */ 7619 return PCI_ERS_RESULT_NEED_RESET; 7620 } 7621 7622 /** 7623 * igc_io_slot_reset - called after the PCI bus has been reset. 7624 * @pdev: Pointer to PCI device 7625 * 7626 * Restart the card from scratch, as if from a cold-boot. Implementation 7627 * resembles the first-half of the __igc_resume routine. 7628 **/ 7629 static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) 7630 { 7631 struct net_device *netdev = pci_get_drvdata(pdev); 7632 struct igc_adapter *adapter = netdev_priv(netdev); 7633 struct igc_hw *hw = &adapter->hw; 7634 pci_ers_result_t result; 7635 7636 if (pci_enable_device_mem(pdev)) { 7637 netdev_err(netdev, "Could not re-enable PCI device after reset\n"); 7638 result = PCI_ERS_RESULT_DISCONNECT; 7639 } else { 7640 pci_set_master(pdev); 7641 pci_restore_state(pdev); 7642 pci_save_state(pdev); 7643 7644 pci_enable_wake(pdev, PCI_D3hot, 0); 7645 pci_enable_wake(pdev, PCI_D3cold, 0); 7646 7647 if (igc_is_device_id_i226(hw)) 7648 pci_disable_link_state_locked(pdev, PCIE_LINK_STATE_L1_2); 7649 7650 /* In case of PCI error, adapter loses its HW address 7651 * so we should re-assign it here. 7652 */ 7653 hw->hw_addr = adapter->io_addr; 7654 7655 igc_reset(adapter); 7656 wr32(IGC_WUS, ~0); 7657 result = PCI_ERS_RESULT_RECOVERED; 7658 } 7659 7660 return result; 7661 } 7662 7663 /** 7664 * igc_io_resume - called when traffic can start to flow again. 7665 * @pdev: Pointer to PCI device 7666 * 7667 * This callback is called when the error recovery driver tells us that 7668 * its OK to resume normal operation. Implementation resembles the 7669 * second-half of the __igc_resume routine. 7670 */ 7671 static void igc_io_resume(struct pci_dev *pdev) 7672 { 7673 struct net_device *netdev = pci_get_drvdata(pdev); 7674 struct igc_adapter *adapter = netdev_priv(netdev); 7675 7676 rtnl_lock(); 7677 if (netif_running(netdev)) { 7678 if (igc_open(netdev)) { 7679 rtnl_unlock(); 7680 netdev_err(netdev, "igc_open failed after reset\n"); 7681 return; 7682 } 7683 } 7684 7685 netif_device_attach(netdev); 7686 7687 /* let the f/w know that the h/w is now under the control of the 7688 * driver. 7689 */ 7690 igc_get_hw_control(adapter); 7691 rtnl_unlock(); 7692 } 7693 7694 static const struct pci_error_handlers igc_err_handler = { 7695 .error_detected = igc_io_error_detected, 7696 .slot_reset = igc_io_slot_reset, 7697 .resume = igc_io_resume, 7698 }; 7699 7700 static _DEFINE_DEV_PM_OPS(igc_pm_ops, igc_suspend, igc_resume, 7701 igc_runtime_suspend, igc_runtime_resume, 7702 igc_runtime_idle); 7703 7704 static struct pci_driver igc_driver = { 7705 .name = igc_driver_name, 7706 .id_table = igc_pci_tbl, 7707 .probe = igc_probe, 7708 .remove = igc_remove, 7709 .driver.pm = pm_ptr(&igc_pm_ops), 7710 .shutdown = igc_shutdown, 7711 .err_handler = &igc_err_handler, 7712 }; 7713 7714 /** 7715 * igc_reinit_queues - return error 7716 * @adapter: pointer to adapter structure 7717 */ 7718 int igc_reinit_queues(struct igc_adapter *adapter) 7719 { 7720 struct net_device *netdev = adapter->netdev; 7721 int err = 0; 7722 7723 if (netif_running(netdev)) 7724 igc_close(netdev); 7725 7726 igc_reset_interrupt_capability(adapter); 7727 7728 if (igc_init_interrupt_scheme(adapter, true)) { 7729 netdev_err(netdev, "Unable to allocate memory for queues\n"); 7730 return -ENOMEM; 7731 } 7732 7733 if (netif_running(netdev)) 7734 err = igc_open(netdev); 7735 7736 return err; 7737 } 7738 7739 /** 7740 * igc_get_hw_dev - return device 7741 * @hw: pointer to hardware structure 7742 * 7743 * used by hardware layer to print debugging information 7744 */ 7745 struct net_device *igc_get_hw_dev(struct igc_hw *hw) 7746 { 7747 struct igc_adapter *adapter = hw->back; 7748 7749 return adapter->netdev; 7750 } 7751 7752 static void igc_disable_rx_ring_hw(struct igc_ring *ring) 7753 { 7754 struct igc_hw *hw = &ring->q_vector->adapter->hw; 7755 u8 idx = ring->reg_idx; 7756 u32 rxdctl; 7757 7758 rxdctl = rd32(IGC_RXDCTL(idx)); 7759 rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE; 7760 rxdctl |= IGC_RXDCTL_SWFLUSH; 7761 wr32(IGC_RXDCTL(idx), rxdctl); 7762 } 7763 7764 void igc_disable_rx_ring(struct igc_ring *ring) 7765 { 7766 igc_disable_rx_ring_hw(ring); 7767 igc_clean_rx_ring(ring); 7768 } 7769 7770 void igc_enable_rx_ring(struct igc_ring *ring) 7771 { 7772 struct igc_adapter *adapter = ring->q_vector->adapter; 7773 7774 igc_configure_rx_ring(adapter, ring); 7775 7776 if (ring->xsk_pool) 7777 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 7778 else 7779 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 7780 } 7781 7782 void igc_disable_tx_ring(struct igc_ring *ring) 7783 { 7784 igc_disable_tx_ring_hw(ring); 7785 igc_clean_tx_ring(ring); 7786 } 7787 7788 void igc_enable_tx_ring(struct igc_ring *ring) 7789 { 7790 struct igc_adapter *adapter = ring->q_vector->adapter; 7791 7792 igc_configure_tx_ring(adapter, ring); 7793 } 7794 7795 /** 7796 * igc_init_module - Driver Registration Routine 7797 * 7798 * igc_init_module is the first routine called when the driver is 7799 * loaded. All it does is register with the PCI subsystem. 7800 */ 7801 static int __init igc_init_module(void) 7802 { 7803 int ret; 7804 7805 pr_info("%s\n", igc_driver_string); 7806 pr_info("%s\n", igc_copyright); 7807 7808 ret = pci_register_driver(&igc_driver); 7809 return ret; 7810 } 7811 7812 module_init(igc_init_module); 7813 7814 /** 7815 * igc_exit_module - Driver Exit Cleanup Routine 7816 * 7817 * igc_exit_module is called just before the driver is removed 7818 * from memory. 7819 */ 7820 static void __exit igc_exit_module(void) 7821 { 7822 pci_unregister_driver(&igc_driver); 7823 } 7824 7825 module_exit(igc_exit_module); 7826 /* igc_main.c */ 7827