1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018 Intel Corporation */ 3 4 #include <linux/module.h> 5 #include <linux/types.h> 6 #include <linux/if_vlan.h> 7 #include <linux/tcp.h> 8 #include <linux/udp.h> 9 #include <linux/ip.h> 10 #include <linux/pm_runtime.h> 11 #include <net/pkt_sched.h> 12 #include <linux/bpf_trace.h> 13 #include <net/xdp_sock_drv.h> 14 #include <linux/pci.h> 15 #include <linux/mdio.h> 16 17 #include <net/ipv6.h> 18 19 #include "igc.h" 20 #include "igc_hw.h" 21 #include "igc_tsn.h" 22 #include "igc_xdp.h" 23 24 #define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver" 25 26 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) 27 28 #define IGC_XDP_PASS 0 29 #define IGC_XDP_CONSUMED BIT(0) 30 #define IGC_XDP_TX BIT(1) 31 #define IGC_XDP_REDIRECT BIT(2) 32 33 static int debug = -1; 34 35 MODULE_DESCRIPTION(DRV_SUMMARY); 36 MODULE_LICENSE("GPL v2"); 37 module_param(debug, int, 0); 38 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 39 40 char igc_driver_name[] = "igc"; 41 static const char igc_driver_string[] = DRV_SUMMARY; 42 static const char igc_copyright[] = 43 "Copyright(c) 2018 Intel Corporation."; 44 45 static const struct igc_info *igc_info_tbl[] = { 46 [board_base] = &igc_base_info, 47 }; 48 49 static const struct pci_device_id igc_pci_tbl[] = { 50 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base }, 51 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base }, 52 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base }, 53 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base }, 54 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base }, 55 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base }, 56 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base }, 57 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base }, 58 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base }, 59 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base }, 60 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base }, 61 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base }, 62 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base }, 63 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base }, 64 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base }, 65 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base }, 66 /* required last entry */ 67 {0, } 68 }; 69 70 MODULE_DEVICE_TABLE(pci, igc_pci_tbl); 71 72 enum latency_range { 73 lowest_latency = 0, 74 low_latency = 1, 75 bulk_latency = 2, 76 latency_invalid = 255 77 }; 78 igc_reset(struct igc_adapter * adapter)79 void igc_reset(struct igc_adapter *adapter) 80 { 81 struct net_device *dev = adapter->netdev; 82 struct igc_hw *hw = &adapter->hw; 83 struct igc_fc_info *fc = &hw->fc; 84 u32 pba, hwm; 85 86 /* Repartition PBA for greater than 9k MTU if required */ 87 pba = IGC_PBA_34K; 88 89 /* flow control settings 90 * The high water mark must be low enough to fit one full frame 91 * after transmitting the pause frame. As such we must have enough 92 * space to allow for us to complete our current transmit and then 93 * receive the frame that is in progress from the link partner. 94 * Set it to: 95 * - the full Rx FIFO size minus one full Tx plus one full Rx frame 96 */ 97 hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); 98 99 fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ 100 fc->low_water = fc->high_water - 16; 101 fc->pause_time = 0xFFFF; 102 fc->send_xon = 1; 103 fc->current_mode = fc->requested_mode; 104 105 hw->mac.ops.reset_hw(hw); 106 107 if (hw->mac.ops.init_hw(hw)) 108 netdev_err(dev, "Error on hardware initialization\n"); 109 110 /* Re-establish EEE setting */ 111 igc_set_eee_i225(hw, true, true, true); 112 113 if (!netif_running(adapter->netdev)) 114 igc_power_down_phy_copper_base(&adapter->hw); 115 116 /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */ 117 wr32(IGC_VET, ETH_P_8021Q); 118 119 /* Re-enable PTP, where applicable. */ 120 igc_ptp_reset(adapter); 121 122 /* Re-enable TSN offloading, where applicable. */ 123 igc_tsn_reset(adapter); 124 125 igc_get_phy_info(hw); 126 } 127 128 /** 129 * igc_power_up_link - Power up the phy link 130 * @adapter: address of board private structure 131 */ igc_power_up_link(struct igc_adapter * adapter)132 static void igc_power_up_link(struct igc_adapter *adapter) 133 { 134 igc_reset_phy(&adapter->hw); 135 136 igc_power_up_phy_copper(&adapter->hw); 137 138 igc_setup_link(&adapter->hw); 139 } 140 141 /** 142 * igc_release_hw_control - release control of the h/w to f/w 143 * @adapter: address of board private structure 144 * 145 * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 146 * For ASF and Pass Through versions of f/w this means that the 147 * driver is no longer loaded. 148 */ igc_release_hw_control(struct igc_adapter * adapter)149 static void igc_release_hw_control(struct igc_adapter *adapter) 150 { 151 struct igc_hw *hw = &adapter->hw; 152 u32 ctrl_ext; 153 154 if (!pci_device_is_present(adapter->pdev)) 155 return; 156 157 /* Let firmware take over control of h/w */ 158 ctrl_ext = rd32(IGC_CTRL_EXT); 159 wr32(IGC_CTRL_EXT, 160 ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD); 161 } 162 163 /** 164 * igc_get_hw_control - get control of the h/w from f/w 165 * @adapter: address of board private structure 166 * 167 * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 168 * For ASF and Pass Through versions of f/w this means that 169 * the driver is loaded. 170 */ igc_get_hw_control(struct igc_adapter * adapter)171 static void igc_get_hw_control(struct igc_adapter *adapter) 172 { 173 struct igc_hw *hw = &adapter->hw; 174 u32 ctrl_ext; 175 176 /* Let firmware know the driver has taken over */ 177 ctrl_ext = rd32(IGC_CTRL_EXT); 178 wr32(IGC_CTRL_EXT, 179 ctrl_ext | IGC_CTRL_EXT_DRV_LOAD); 180 } 181 igc_unmap_tx_buffer(struct device * dev,struct igc_tx_buffer * buf)182 static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf) 183 { 184 dma_unmap_single(dev, dma_unmap_addr(buf, dma), 185 dma_unmap_len(buf, len), DMA_TO_DEVICE); 186 187 dma_unmap_len_set(buf, len, 0); 188 } 189 190 /** 191 * igc_clean_tx_ring - Free Tx Buffers 192 * @tx_ring: ring to be cleaned 193 */ igc_clean_tx_ring(struct igc_ring * tx_ring)194 static void igc_clean_tx_ring(struct igc_ring *tx_ring) 195 { 196 u16 i = tx_ring->next_to_clean; 197 struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; 198 u32 xsk_frames = 0; 199 200 while (i != tx_ring->next_to_use) { 201 union igc_adv_tx_desc *eop_desc, *tx_desc; 202 203 switch (tx_buffer->type) { 204 case IGC_TX_BUFFER_TYPE_XSK: 205 xsk_frames++; 206 break; 207 case IGC_TX_BUFFER_TYPE_XDP: 208 xdp_return_frame(tx_buffer->xdpf); 209 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 210 break; 211 case IGC_TX_BUFFER_TYPE_SKB: 212 dev_kfree_skb_any(tx_buffer->skb); 213 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 214 break; 215 default: 216 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 217 break; 218 } 219 220 /* check for eop_desc to determine the end of the packet */ 221 eop_desc = tx_buffer->next_to_watch; 222 tx_desc = IGC_TX_DESC(tx_ring, i); 223 224 /* unmap remaining buffers */ 225 while (tx_desc != eop_desc) { 226 tx_buffer++; 227 tx_desc++; 228 i++; 229 if (unlikely(i == tx_ring->count)) { 230 i = 0; 231 tx_buffer = tx_ring->tx_buffer_info; 232 tx_desc = IGC_TX_DESC(tx_ring, 0); 233 } 234 235 /* unmap any remaining paged data */ 236 if (dma_unmap_len(tx_buffer, len)) 237 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 238 } 239 240 tx_buffer->next_to_watch = NULL; 241 242 /* move us one more past the eop_desc for start of next pkt */ 243 tx_buffer++; 244 i++; 245 if (unlikely(i == tx_ring->count)) { 246 i = 0; 247 tx_buffer = tx_ring->tx_buffer_info; 248 } 249 } 250 251 if (tx_ring->xsk_pool && xsk_frames) 252 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 253 254 /* reset BQL for queue */ 255 netdev_tx_reset_queue(txring_txq(tx_ring)); 256 257 /* Zero out the buffer ring */ 258 memset(tx_ring->tx_buffer_info, 0, 259 sizeof(*tx_ring->tx_buffer_info) * tx_ring->count); 260 261 /* Zero out the descriptor ring */ 262 memset(tx_ring->desc, 0, tx_ring->size); 263 264 /* reset next_to_use and next_to_clean */ 265 tx_ring->next_to_use = 0; 266 tx_ring->next_to_clean = 0; 267 } 268 269 /** 270 * igc_free_tx_resources - Free Tx Resources per Queue 271 * @tx_ring: Tx descriptor ring for a specific queue 272 * 273 * Free all transmit software resources 274 */ igc_free_tx_resources(struct igc_ring * tx_ring)275 void igc_free_tx_resources(struct igc_ring *tx_ring) 276 { 277 igc_disable_tx_ring(tx_ring); 278 279 vfree(tx_ring->tx_buffer_info); 280 tx_ring->tx_buffer_info = NULL; 281 282 /* if not set, then don't free */ 283 if (!tx_ring->desc) 284 return; 285 286 dma_free_coherent(tx_ring->dev, tx_ring->size, 287 tx_ring->desc, tx_ring->dma); 288 289 tx_ring->desc = NULL; 290 } 291 292 /** 293 * igc_free_all_tx_resources - Free Tx Resources for All Queues 294 * @adapter: board private structure 295 * 296 * Free all transmit software resources 297 */ igc_free_all_tx_resources(struct igc_adapter * adapter)298 static void igc_free_all_tx_resources(struct igc_adapter *adapter) 299 { 300 int i; 301 302 for (i = 0; i < adapter->num_tx_queues; i++) 303 igc_free_tx_resources(adapter->tx_ring[i]); 304 } 305 306 /** 307 * igc_clean_all_tx_rings - Free Tx Buffers for all queues 308 * @adapter: board private structure 309 */ igc_clean_all_tx_rings(struct igc_adapter * adapter)310 static void igc_clean_all_tx_rings(struct igc_adapter *adapter) 311 { 312 int i; 313 314 for (i = 0; i < adapter->num_tx_queues; i++) 315 if (adapter->tx_ring[i]) 316 igc_clean_tx_ring(adapter->tx_ring[i]); 317 } 318 igc_disable_tx_ring_hw(struct igc_ring * ring)319 static void igc_disable_tx_ring_hw(struct igc_ring *ring) 320 { 321 struct igc_hw *hw = &ring->q_vector->adapter->hw; 322 u8 idx = ring->reg_idx; 323 u32 txdctl; 324 325 txdctl = rd32(IGC_TXDCTL(idx)); 326 txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; 327 txdctl |= IGC_TXDCTL_SWFLUSH; 328 wr32(IGC_TXDCTL(idx), txdctl); 329 } 330 331 /** 332 * igc_disable_all_tx_rings_hw - Disable all transmit queue operation 333 * @adapter: board private structure 334 */ igc_disable_all_tx_rings_hw(struct igc_adapter * adapter)335 static void igc_disable_all_tx_rings_hw(struct igc_adapter *adapter) 336 { 337 int i; 338 339 for (i = 0; i < adapter->num_tx_queues; i++) { 340 struct igc_ring *tx_ring = adapter->tx_ring[i]; 341 342 igc_disable_tx_ring_hw(tx_ring); 343 } 344 } 345 346 /** 347 * igc_setup_tx_resources - allocate Tx resources (Descriptors) 348 * @tx_ring: tx descriptor ring (for a specific queue) to setup 349 * 350 * Return 0 on success, negative on failure 351 */ igc_setup_tx_resources(struct igc_ring * tx_ring)352 int igc_setup_tx_resources(struct igc_ring *tx_ring) 353 { 354 struct net_device *ndev = tx_ring->netdev; 355 struct device *dev = tx_ring->dev; 356 int size = 0; 357 358 size = sizeof(struct igc_tx_buffer) * tx_ring->count; 359 tx_ring->tx_buffer_info = vzalloc(size); 360 if (!tx_ring->tx_buffer_info) 361 goto err; 362 363 /* round up to nearest 4K */ 364 tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc); 365 tx_ring->size = ALIGN(tx_ring->size, 4096); 366 367 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 368 &tx_ring->dma, GFP_KERNEL); 369 370 if (!tx_ring->desc) 371 goto err; 372 373 tx_ring->next_to_use = 0; 374 tx_ring->next_to_clean = 0; 375 376 return 0; 377 378 err: 379 vfree(tx_ring->tx_buffer_info); 380 netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n"); 381 return -ENOMEM; 382 } 383 384 /** 385 * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues 386 * @adapter: board private structure 387 * 388 * Return 0 on success, negative on failure 389 */ igc_setup_all_tx_resources(struct igc_adapter * adapter)390 static int igc_setup_all_tx_resources(struct igc_adapter *adapter) 391 { 392 struct net_device *dev = adapter->netdev; 393 int i, err = 0; 394 395 for (i = 0; i < adapter->num_tx_queues; i++) { 396 err = igc_setup_tx_resources(adapter->tx_ring[i]); 397 if (err) { 398 netdev_err(dev, "Error on Tx queue %u setup\n", i); 399 for (i--; i >= 0; i--) 400 igc_free_tx_resources(adapter->tx_ring[i]); 401 break; 402 } 403 } 404 405 return err; 406 } 407 igc_clean_rx_ring_page_shared(struct igc_ring * rx_ring)408 static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring) 409 { 410 u16 i = rx_ring->next_to_clean; 411 412 dev_kfree_skb(rx_ring->skb); 413 rx_ring->skb = NULL; 414 415 /* Free all the Rx ring sk_buffs */ 416 while (i != rx_ring->next_to_alloc) { 417 struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 418 419 /* Invalidate cache lines that may have been written to by 420 * device so that we avoid corrupting memory. 421 */ 422 dma_sync_single_range_for_cpu(rx_ring->dev, 423 buffer_info->dma, 424 buffer_info->page_offset, 425 igc_rx_bufsz(rx_ring), 426 DMA_FROM_DEVICE); 427 428 /* free resources associated with mapping */ 429 dma_unmap_page_attrs(rx_ring->dev, 430 buffer_info->dma, 431 igc_rx_pg_size(rx_ring), 432 DMA_FROM_DEVICE, 433 IGC_RX_DMA_ATTR); 434 __page_frag_cache_drain(buffer_info->page, 435 buffer_info->pagecnt_bias); 436 437 i++; 438 if (i == rx_ring->count) 439 i = 0; 440 } 441 } 442 igc_clean_rx_ring_xsk_pool(struct igc_ring * ring)443 static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring) 444 { 445 struct igc_rx_buffer *bi; 446 u16 i; 447 448 for (i = 0; i < ring->count; i++) { 449 bi = &ring->rx_buffer_info[i]; 450 if (!bi->xdp) 451 continue; 452 453 xsk_buff_free(bi->xdp); 454 bi->xdp = NULL; 455 } 456 } 457 458 /** 459 * igc_clean_rx_ring - Free Rx Buffers per Queue 460 * @ring: ring to free buffers from 461 */ igc_clean_rx_ring(struct igc_ring * ring)462 static void igc_clean_rx_ring(struct igc_ring *ring) 463 { 464 if (ring->xsk_pool) 465 igc_clean_rx_ring_xsk_pool(ring); 466 else 467 igc_clean_rx_ring_page_shared(ring); 468 469 clear_ring_uses_large_buffer(ring); 470 471 ring->next_to_alloc = 0; 472 ring->next_to_clean = 0; 473 ring->next_to_use = 0; 474 } 475 476 /** 477 * igc_clean_all_rx_rings - Free Rx Buffers for all queues 478 * @adapter: board private structure 479 */ igc_clean_all_rx_rings(struct igc_adapter * adapter)480 static void igc_clean_all_rx_rings(struct igc_adapter *adapter) 481 { 482 int i; 483 484 for (i = 0; i < adapter->num_rx_queues; i++) 485 if (adapter->rx_ring[i]) 486 igc_clean_rx_ring(adapter->rx_ring[i]); 487 } 488 489 /** 490 * igc_free_rx_resources - Free Rx Resources 491 * @rx_ring: ring to clean the resources from 492 * 493 * Free all receive software resources 494 */ igc_free_rx_resources(struct igc_ring * rx_ring)495 void igc_free_rx_resources(struct igc_ring *rx_ring) 496 { 497 igc_clean_rx_ring(rx_ring); 498 499 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 500 501 vfree(rx_ring->rx_buffer_info); 502 rx_ring->rx_buffer_info = NULL; 503 504 /* if not set, then don't free */ 505 if (!rx_ring->desc) 506 return; 507 508 dma_free_coherent(rx_ring->dev, rx_ring->size, 509 rx_ring->desc, rx_ring->dma); 510 511 rx_ring->desc = NULL; 512 } 513 514 /** 515 * igc_free_all_rx_resources - Free Rx Resources for All Queues 516 * @adapter: board private structure 517 * 518 * Free all receive software resources 519 */ igc_free_all_rx_resources(struct igc_adapter * adapter)520 static void igc_free_all_rx_resources(struct igc_adapter *adapter) 521 { 522 int i; 523 524 for (i = 0; i < adapter->num_rx_queues; i++) 525 igc_free_rx_resources(adapter->rx_ring[i]); 526 } 527 528 /** 529 * igc_setup_rx_resources - allocate Rx resources (Descriptors) 530 * @rx_ring: rx descriptor ring (for a specific queue) to setup 531 * 532 * Returns 0 on success, negative on failure 533 */ igc_setup_rx_resources(struct igc_ring * rx_ring)534 int igc_setup_rx_resources(struct igc_ring *rx_ring) 535 { 536 struct net_device *ndev = rx_ring->netdev; 537 struct device *dev = rx_ring->dev; 538 u8 index = rx_ring->queue_index; 539 int size, desc_len, res; 540 541 /* XDP RX-queue info */ 542 if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) 543 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 544 res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, 545 rx_ring->q_vector->napi.napi_id); 546 if (res < 0) { 547 netdev_err(ndev, "Failed to register xdp_rxq index %u\n", 548 index); 549 return res; 550 } 551 552 size = sizeof(struct igc_rx_buffer) * rx_ring->count; 553 rx_ring->rx_buffer_info = vzalloc(size); 554 if (!rx_ring->rx_buffer_info) 555 goto err; 556 557 desc_len = sizeof(union igc_adv_rx_desc); 558 559 /* Round up to nearest 4K */ 560 rx_ring->size = rx_ring->count * desc_len; 561 rx_ring->size = ALIGN(rx_ring->size, 4096); 562 563 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 564 &rx_ring->dma, GFP_KERNEL); 565 566 if (!rx_ring->desc) 567 goto err; 568 569 rx_ring->next_to_alloc = 0; 570 rx_ring->next_to_clean = 0; 571 rx_ring->next_to_use = 0; 572 573 return 0; 574 575 err: 576 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 577 vfree(rx_ring->rx_buffer_info); 578 rx_ring->rx_buffer_info = NULL; 579 netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n"); 580 return -ENOMEM; 581 } 582 583 /** 584 * igc_setup_all_rx_resources - wrapper to allocate Rx resources 585 * (Descriptors) for all queues 586 * @adapter: board private structure 587 * 588 * Return 0 on success, negative on failure 589 */ igc_setup_all_rx_resources(struct igc_adapter * adapter)590 static int igc_setup_all_rx_resources(struct igc_adapter *adapter) 591 { 592 struct net_device *dev = adapter->netdev; 593 int i, err = 0; 594 595 for (i = 0; i < adapter->num_rx_queues; i++) { 596 err = igc_setup_rx_resources(adapter->rx_ring[i]); 597 if (err) { 598 netdev_err(dev, "Error on Rx queue %u setup\n", i); 599 for (i--; i >= 0; i--) 600 igc_free_rx_resources(adapter->rx_ring[i]); 601 break; 602 } 603 } 604 605 return err; 606 } 607 igc_get_xsk_pool(struct igc_adapter * adapter,struct igc_ring * ring)608 static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter, 609 struct igc_ring *ring) 610 { 611 if (!igc_xdp_is_enabled(adapter) || 612 !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags)) 613 return NULL; 614 615 return xsk_get_pool_from_qid(ring->netdev, ring->queue_index); 616 } 617 618 /** 619 * igc_configure_rx_ring - Configure a receive ring after Reset 620 * @adapter: board private structure 621 * @ring: receive ring to be configured 622 * 623 * Configure the Rx unit of the MAC after a reset. 624 */ igc_configure_rx_ring(struct igc_adapter * adapter,struct igc_ring * ring)625 static void igc_configure_rx_ring(struct igc_adapter *adapter, 626 struct igc_ring *ring) 627 { 628 struct igc_hw *hw = &adapter->hw; 629 union igc_adv_rx_desc *rx_desc; 630 int reg_idx = ring->reg_idx; 631 u32 srrctl = 0, rxdctl = 0; 632 u64 rdba = ring->dma; 633 u32 buf_size; 634 635 xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); 636 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 637 if (ring->xsk_pool) { 638 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 639 MEM_TYPE_XSK_BUFF_POOL, 640 NULL)); 641 xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); 642 } else { 643 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 644 MEM_TYPE_PAGE_SHARED, 645 NULL)); 646 } 647 648 if (igc_xdp_is_enabled(adapter)) 649 set_ring_uses_large_buffer(ring); 650 651 /* disable the queue */ 652 wr32(IGC_RXDCTL(reg_idx), 0); 653 654 /* Set DMA base address registers */ 655 wr32(IGC_RDBAL(reg_idx), 656 rdba & 0x00000000ffffffffULL); 657 wr32(IGC_RDBAH(reg_idx), rdba >> 32); 658 wr32(IGC_RDLEN(reg_idx), 659 ring->count * sizeof(union igc_adv_rx_desc)); 660 661 /* initialize head and tail */ 662 ring->tail = adapter->io_addr + IGC_RDT(reg_idx); 663 wr32(IGC_RDH(reg_idx), 0); 664 writel(0, ring->tail); 665 666 /* reset next-to- use/clean to place SW in sync with hardware */ 667 ring->next_to_clean = 0; 668 ring->next_to_use = 0; 669 670 if (ring->xsk_pool) 671 buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool); 672 else if (ring_uses_large_buffer(ring)) 673 buf_size = IGC_RXBUFFER_3072; 674 else 675 buf_size = IGC_RXBUFFER_2048; 676 677 srrctl = rd32(IGC_SRRCTL(reg_idx)); 678 srrctl &= ~(IGC_SRRCTL_BSIZEPKT_MASK | IGC_SRRCTL_BSIZEHDR_MASK | 679 IGC_SRRCTL_DESCTYPE_MASK); 680 srrctl |= IGC_SRRCTL_BSIZEHDR(IGC_RX_HDR_LEN); 681 srrctl |= IGC_SRRCTL_BSIZEPKT(buf_size); 682 srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; 683 684 wr32(IGC_SRRCTL(reg_idx), srrctl); 685 686 rxdctl |= IGC_RX_PTHRESH; 687 rxdctl |= IGC_RX_HTHRESH << 8; 688 rxdctl |= IGC_RX_WTHRESH << 16; 689 690 /* initialize rx_buffer_info */ 691 memset(ring->rx_buffer_info, 0, 692 sizeof(struct igc_rx_buffer) * ring->count); 693 694 /* initialize Rx descriptor 0 */ 695 rx_desc = IGC_RX_DESC(ring, 0); 696 rx_desc->wb.upper.length = 0; 697 698 /* enable receive descriptor fetching */ 699 rxdctl |= IGC_RXDCTL_QUEUE_ENABLE; 700 701 wr32(IGC_RXDCTL(reg_idx), rxdctl); 702 } 703 704 /** 705 * igc_configure_rx - Configure receive Unit after Reset 706 * @adapter: board private structure 707 * 708 * Configure the Rx unit of the MAC after a reset. 709 */ igc_configure_rx(struct igc_adapter * adapter)710 static void igc_configure_rx(struct igc_adapter *adapter) 711 { 712 int i; 713 714 /* Setup the HW Rx Head and Tail Descriptor Pointers and 715 * the Base and Length of the Rx Descriptor Ring 716 */ 717 for (i = 0; i < adapter->num_rx_queues; i++) 718 igc_configure_rx_ring(adapter, adapter->rx_ring[i]); 719 } 720 721 /** 722 * igc_configure_tx_ring - Configure transmit ring after Reset 723 * @adapter: board private structure 724 * @ring: tx ring to configure 725 * 726 * Configure a transmit ring after a reset. 727 */ igc_configure_tx_ring(struct igc_adapter * adapter,struct igc_ring * ring)728 static void igc_configure_tx_ring(struct igc_adapter *adapter, 729 struct igc_ring *ring) 730 { 731 struct igc_hw *hw = &adapter->hw; 732 int reg_idx = ring->reg_idx; 733 u64 tdba = ring->dma; 734 u32 txdctl = 0; 735 736 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 737 738 /* disable the queue */ 739 wr32(IGC_TXDCTL(reg_idx), 0); 740 wrfl(); 741 742 wr32(IGC_TDLEN(reg_idx), 743 ring->count * sizeof(union igc_adv_tx_desc)); 744 wr32(IGC_TDBAL(reg_idx), 745 tdba & 0x00000000ffffffffULL); 746 wr32(IGC_TDBAH(reg_idx), tdba >> 32); 747 748 ring->tail = adapter->io_addr + IGC_TDT(reg_idx); 749 wr32(IGC_TDH(reg_idx), 0); 750 writel(0, ring->tail); 751 752 txdctl |= IGC_TX_PTHRESH; 753 txdctl |= IGC_TX_HTHRESH << 8; 754 txdctl |= IGC_TX_WTHRESH << 16; 755 756 txdctl |= IGC_TXDCTL_QUEUE_ENABLE; 757 wr32(IGC_TXDCTL(reg_idx), txdctl); 758 } 759 760 /** 761 * igc_configure_tx - Configure transmit Unit after Reset 762 * @adapter: board private structure 763 * 764 * Configure the Tx unit of the MAC after a reset. 765 */ igc_configure_tx(struct igc_adapter * adapter)766 static void igc_configure_tx(struct igc_adapter *adapter) 767 { 768 int i; 769 770 for (i = 0; i < adapter->num_tx_queues; i++) 771 igc_configure_tx_ring(adapter, adapter->tx_ring[i]); 772 } 773 774 /** 775 * igc_setup_mrqc - configure the multiple receive queue control registers 776 * @adapter: Board private structure 777 */ igc_setup_mrqc(struct igc_adapter * adapter)778 static void igc_setup_mrqc(struct igc_adapter *adapter) 779 { 780 struct igc_hw *hw = &adapter->hw; 781 u32 j, num_rx_queues; 782 u32 mrqc, rxcsum; 783 u32 rss_key[10]; 784 785 netdev_rss_key_fill(rss_key, sizeof(rss_key)); 786 for (j = 0; j < 10; j++) 787 wr32(IGC_RSSRK(j), rss_key[j]); 788 789 num_rx_queues = adapter->rss_queues; 790 791 if (adapter->rss_indir_tbl_init != num_rx_queues) { 792 for (j = 0; j < IGC_RETA_SIZE; j++) 793 adapter->rss_indir_tbl[j] = 794 (j * num_rx_queues) / IGC_RETA_SIZE; 795 adapter->rss_indir_tbl_init = num_rx_queues; 796 } 797 igc_write_rss_indir_tbl(adapter); 798 799 /* Disable raw packet checksumming so that RSS hash is placed in 800 * descriptor on writeback. No need to enable TCP/UDP/IP checksum 801 * offloads as they are enabled by default 802 */ 803 rxcsum = rd32(IGC_RXCSUM); 804 rxcsum |= IGC_RXCSUM_PCSD; 805 806 /* Enable Receive Checksum Offload for SCTP */ 807 rxcsum |= IGC_RXCSUM_CRCOFL; 808 809 /* Don't need to set TUOFL or IPOFL, they default to 1 */ 810 wr32(IGC_RXCSUM, rxcsum); 811 812 /* Generate RSS hash based on packet types, TCP/UDP 813 * port numbers and/or IPv4/v6 src and dst addresses 814 */ 815 mrqc = IGC_MRQC_RSS_FIELD_IPV4 | 816 IGC_MRQC_RSS_FIELD_IPV4_TCP | 817 IGC_MRQC_RSS_FIELD_IPV6 | 818 IGC_MRQC_RSS_FIELD_IPV6_TCP | 819 IGC_MRQC_RSS_FIELD_IPV6_TCP_EX; 820 821 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP) 822 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP; 823 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP) 824 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP; 825 826 mrqc |= IGC_MRQC_ENABLE_RSS_MQ; 827 828 wr32(IGC_MRQC, mrqc); 829 } 830 831 /** 832 * igc_setup_rctl - configure the receive control registers 833 * @adapter: Board private structure 834 */ igc_setup_rctl(struct igc_adapter * adapter)835 static void igc_setup_rctl(struct igc_adapter *adapter) 836 { 837 struct igc_hw *hw = &adapter->hw; 838 u32 rctl; 839 840 rctl = rd32(IGC_RCTL); 841 842 rctl &= ~(3 << IGC_RCTL_MO_SHIFT); 843 rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC); 844 845 rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF | 846 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT); 847 848 /* enable stripping of CRC. Newer features require 849 * that the HW strips the CRC. 850 */ 851 rctl |= IGC_RCTL_SECRC; 852 853 /* disable store bad packets and clear size bits. */ 854 rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256); 855 856 /* enable LPE to allow for reception of jumbo frames */ 857 rctl |= IGC_RCTL_LPE; 858 859 /* disable queue 0 to prevent tail write w/o re-config */ 860 wr32(IGC_RXDCTL(0), 0); 861 862 /* This is useful for sniffing bad packets. */ 863 if (adapter->netdev->features & NETIF_F_RXALL) { 864 /* UPE and MPE will be handled by normal PROMISC logic 865 * in set_rx_mode 866 */ 867 rctl |= (IGC_RCTL_SBP | /* Receive bad packets */ 868 IGC_RCTL_BAM | /* RX All Bcast Pkts */ 869 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ 870 871 rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */ 872 IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */ 873 } 874 875 wr32(IGC_RCTL, rctl); 876 } 877 878 /** 879 * igc_setup_tctl - configure the transmit control registers 880 * @adapter: Board private structure 881 */ igc_setup_tctl(struct igc_adapter * adapter)882 static void igc_setup_tctl(struct igc_adapter *adapter) 883 { 884 struct igc_hw *hw = &adapter->hw; 885 u32 tctl; 886 887 /* disable queue 0 which icould be enabled by default */ 888 wr32(IGC_TXDCTL(0), 0); 889 890 /* Program the Transmit Control Register */ 891 tctl = rd32(IGC_TCTL); 892 tctl &= ~IGC_TCTL_CT; 893 tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC | 894 (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT); 895 896 /* Enable transmits */ 897 tctl |= IGC_TCTL_EN; 898 899 wr32(IGC_TCTL, tctl); 900 } 901 902 /** 903 * igc_set_mac_filter_hw() - Set MAC address filter in hardware 904 * @adapter: Pointer to adapter where the filter should be set 905 * @index: Filter index 906 * @type: MAC address filter type (source or destination) 907 * @addr: MAC address 908 * @queue: If non-negative, queue assignment feature is enabled and frames 909 * matching the filter are enqueued onto 'queue'. Otherwise, queue 910 * assignment is disabled. 911 */ igc_set_mac_filter_hw(struct igc_adapter * adapter,int index,enum igc_mac_filter_type type,const u8 * addr,int queue)912 static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index, 913 enum igc_mac_filter_type type, 914 const u8 *addr, int queue) 915 { 916 struct net_device *dev = adapter->netdev; 917 struct igc_hw *hw = &adapter->hw; 918 u32 ral, rah; 919 920 if (WARN_ON(index >= hw->mac.rar_entry_count)) 921 return; 922 923 ral = le32_to_cpup((__le32 *)(addr)); 924 rah = le16_to_cpup((__le16 *)(addr + 4)); 925 926 if (type == IGC_MAC_FILTER_TYPE_SRC) { 927 rah &= ~IGC_RAH_ASEL_MASK; 928 rah |= IGC_RAH_ASEL_SRC_ADDR; 929 } 930 931 if (queue >= 0) { 932 rah &= ~IGC_RAH_QSEL_MASK; 933 rah |= (queue << IGC_RAH_QSEL_SHIFT); 934 rah |= IGC_RAH_QSEL_ENABLE; 935 } 936 937 rah |= IGC_RAH_AV; 938 939 wr32(IGC_RAL(index), ral); 940 wr32(IGC_RAH(index), rah); 941 942 netdev_dbg(dev, "MAC address filter set in HW: index %d", index); 943 } 944 945 /** 946 * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware 947 * @adapter: Pointer to adapter where the filter should be cleared 948 * @index: Filter index 949 */ igc_clear_mac_filter_hw(struct igc_adapter * adapter,int index)950 static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index) 951 { 952 struct net_device *dev = adapter->netdev; 953 struct igc_hw *hw = &adapter->hw; 954 955 if (WARN_ON(index >= hw->mac.rar_entry_count)) 956 return; 957 958 wr32(IGC_RAL(index), 0); 959 wr32(IGC_RAH(index), 0); 960 961 netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index); 962 } 963 964 /* Set default MAC address for the PF in the first RAR entry */ igc_set_default_mac_filter(struct igc_adapter * adapter)965 static void igc_set_default_mac_filter(struct igc_adapter *adapter) 966 { 967 struct net_device *dev = adapter->netdev; 968 u8 *addr = adapter->hw.mac.addr; 969 970 netdev_dbg(dev, "Set default MAC address filter: address %pM", addr); 971 972 igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1); 973 } 974 975 /** 976 * igc_set_mac - Change the Ethernet Address of the NIC 977 * @netdev: network interface device structure 978 * @p: pointer to an address structure 979 * 980 * Returns 0 on success, negative on failure 981 */ igc_set_mac(struct net_device * netdev,void * p)982 static int igc_set_mac(struct net_device *netdev, void *p) 983 { 984 struct igc_adapter *adapter = netdev_priv(netdev); 985 struct igc_hw *hw = &adapter->hw; 986 struct sockaddr *addr = p; 987 988 if (!is_valid_ether_addr(addr->sa_data)) 989 return -EADDRNOTAVAIL; 990 991 eth_hw_addr_set(netdev, addr->sa_data); 992 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); 993 994 /* set the correct pool for the new PF MAC address in entry 0 */ 995 igc_set_default_mac_filter(adapter); 996 997 return 0; 998 } 999 1000 /** 1001 * igc_write_mc_addr_list - write multicast addresses to MTA 1002 * @netdev: network interface device structure 1003 * 1004 * Writes multicast address list to the MTA hash table. 1005 * Returns: -ENOMEM on failure 1006 * 0 on no addresses written 1007 * X on writing X addresses to MTA 1008 **/ igc_write_mc_addr_list(struct net_device * netdev)1009 static int igc_write_mc_addr_list(struct net_device *netdev) 1010 { 1011 struct igc_adapter *adapter = netdev_priv(netdev); 1012 struct igc_hw *hw = &adapter->hw; 1013 struct netdev_hw_addr *ha; 1014 u8 *mta_list; 1015 int i; 1016 1017 if (netdev_mc_empty(netdev)) { 1018 /* nothing to program, so clear mc list */ 1019 igc_update_mc_addr_list(hw, NULL, 0); 1020 return 0; 1021 } 1022 1023 mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC); 1024 if (!mta_list) 1025 return -ENOMEM; 1026 1027 /* The shared function expects a packed array of only addresses. */ 1028 i = 0; 1029 netdev_for_each_mc_addr(ha, netdev) 1030 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); 1031 1032 igc_update_mc_addr_list(hw, mta_list, i); 1033 kfree(mta_list); 1034 1035 return netdev_mc_count(netdev); 1036 } 1037 igc_tx_launchtime(struct igc_ring * ring,ktime_t txtime,bool * first_flag,bool * insert_empty)1038 static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, 1039 bool *first_flag, bool *insert_empty) 1040 { 1041 struct igc_adapter *adapter = netdev_priv(ring->netdev); 1042 ktime_t cycle_time = adapter->cycle_time; 1043 ktime_t base_time = adapter->base_time; 1044 ktime_t now = ktime_get_clocktai(); 1045 ktime_t baset_est, end_of_cycle; 1046 s32 launchtime; 1047 s64 n; 1048 1049 n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); 1050 1051 baset_est = ktime_add_ns(base_time, cycle_time * (n)); 1052 end_of_cycle = ktime_add_ns(baset_est, cycle_time); 1053 1054 if (ktime_compare(txtime, end_of_cycle) >= 0) { 1055 if (baset_est != ring->last_ff_cycle) { 1056 *first_flag = true; 1057 ring->last_ff_cycle = baset_est; 1058 1059 if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) 1060 *insert_empty = true; 1061 } 1062 } 1063 1064 /* Introducing a window at end of cycle on which packets 1065 * potentially not honor launchtime. Window of 5us chosen 1066 * considering software update the tail pointer and packets 1067 * are dma'ed to packet buffer. 1068 */ 1069 if ((ktime_sub_ns(end_of_cycle, now) < 5 * NSEC_PER_USEC)) 1070 netdev_warn(ring->netdev, "Packet with txtime=%llu may not be honoured\n", 1071 txtime); 1072 1073 ring->last_tx_cycle = end_of_cycle; 1074 1075 launchtime = ktime_sub_ns(txtime, baset_est); 1076 if (launchtime > 0) 1077 div_s64_rem(launchtime, cycle_time, &launchtime); 1078 else 1079 launchtime = 0; 1080 1081 return cpu_to_le32(launchtime); 1082 } 1083 igc_init_empty_frame(struct igc_ring * ring,struct igc_tx_buffer * buffer,struct sk_buff * skb)1084 static int igc_init_empty_frame(struct igc_ring *ring, 1085 struct igc_tx_buffer *buffer, 1086 struct sk_buff *skb) 1087 { 1088 unsigned int size; 1089 dma_addr_t dma; 1090 1091 size = skb_headlen(skb); 1092 1093 dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE); 1094 if (dma_mapping_error(ring->dev, dma)) { 1095 net_err_ratelimited("%s: DMA mapping error for empty frame\n", 1096 netdev_name(ring->netdev)); 1097 return -ENOMEM; 1098 } 1099 1100 buffer->type = IGC_TX_BUFFER_TYPE_SKB; 1101 buffer->skb = skb; 1102 buffer->protocol = 0; 1103 buffer->bytecount = skb->len; 1104 buffer->gso_segs = 1; 1105 buffer->time_stamp = jiffies; 1106 dma_unmap_len_set(buffer, len, skb->len); 1107 dma_unmap_addr_set(buffer, dma, dma); 1108 1109 return 0; 1110 } 1111 igc_init_tx_empty_descriptor(struct igc_ring * ring,struct sk_buff * skb,struct igc_tx_buffer * first)1112 static void igc_init_tx_empty_descriptor(struct igc_ring *ring, 1113 struct sk_buff *skb, 1114 struct igc_tx_buffer *first) 1115 { 1116 union igc_adv_tx_desc *desc; 1117 u32 cmd_type, olinfo_status; 1118 1119 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 1120 IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | 1121 first->bytecount; 1122 olinfo_status = first->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; 1123 1124 desc = IGC_TX_DESC(ring, ring->next_to_use); 1125 desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1126 desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1127 desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(first, dma)); 1128 1129 netdev_tx_sent_queue(txring_txq(ring), skb->len); 1130 1131 first->next_to_watch = desc; 1132 1133 ring->next_to_use++; 1134 if (ring->next_to_use == ring->count) 1135 ring->next_to_use = 0; 1136 } 1137 1138 #define IGC_EMPTY_FRAME_SIZE 60 1139 igc_tx_ctxtdesc(struct igc_ring * tx_ring,__le32 launch_time,bool first_flag,u32 vlan_macip_lens,u32 type_tucmd,u32 mss_l4len_idx)1140 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, 1141 __le32 launch_time, bool first_flag, 1142 u32 vlan_macip_lens, u32 type_tucmd, 1143 u32 mss_l4len_idx) 1144 { 1145 struct igc_adv_tx_context_desc *context_desc; 1146 u16 i = tx_ring->next_to_use; 1147 1148 context_desc = IGC_TX_CTXTDESC(tx_ring, i); 1149 1150 i++; 1151 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1152 1153 /* set bits to identify this as an advanced context descriptor */ 1154 type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT; 1155 1156 /* For i225, context index must be unique per ring. */ 1157 if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 1158 mss_l4len_idx |= tx_ring->reg_idx << 4; 1159 1160 if (first_flag) 1161 mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FIRST; 1162 1163 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 1164 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 1165 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 1166 context_desc->launch_time = launch_time; 1167 } 1168 igc_tx_csum(struct igc_ring * tx_ring,struct igc_tx_buffer * first,__le32 launch_time,bool first_flag)1169 static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first, 1170 __le32 launch_time, bool first_flag) 1171 { 1172 struct sk_buff *skb = first->skb; 1173 u32 vlan_macip_lens = 0; 1174 u32 type_tucmd = 0; 1175 1176 if (skb->ip_summed != CHECKSUM_PARTIAL) { 1177 csum_failed: 1178 if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) && 1179 !tx_ring->launchtime_enable) 1180 return; 1181 goto no_csum; 1182 } 1183 1184 switch (skb->csum_offset) { 1185 case offsetof(struct tcphdr, check): 1186 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1187 fallthrough; 1188 case offsetof(struct udphdr, check): 1189 break; 1190 case offsetof(struct sctphdr, checksum): 1191 /* validate that this is actually an SCTP request */ 1192 if (skb_csum_is_sctp(skb)) { 1193 type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP; 1194 break; 1195 } 1196 fallthrough; 1197 default: 1198 skb_checksum_help(skb); 1199 goto csum_failed; 1200 } 1201 1202 /* update TX checksum flag */ 1203 first->tx_flags |= IGC_TX_FLAGS_CSUM; 1204 vlan_macip_lens = skb_checksum_start_offset(skb) - 1205 skb_network_offset(skb); 1206 no_csum: 1207 vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT; 1208 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1209 1210 igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, 1211 vlan_macip_lens, type_tucmd, 0); 1212 } 1213 __igc_maybe_stop_tx(struct igc_ring * tx_ring,const u16 size)1214 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1215 { 1216 struct net_device *netdev = tx_ring->netdev; 1217 1218 netif_stop_subqueue(netdev, tx_ring->queue_index); 1219 1220 /* memory barriier comment */ 1221 smp_mb(); 1222 1223 /* We need to check again in a case another CPU has just 1224 * made room available. 1225 */ 1226 if (igc_desc_unused(tx_ring) < size) 1227 return -EBUSY; 1228 1229 /* A reprieve! */ 1230 netif_wake_subqueue(netdev, tx_ring->queue_index); 1231 1232 u64_stats_update_begin(&tx_ring->tx_syncp2); 1233 tx_ring->tx_stats.restart_queue2++; 1234 u64_stats_update_end(&tx_ring->tx_syncp2); 1235 1236 return 0; 1237 } 1238 igc_maybe_stop_tx(struct igc_ring * tx_ring,const u16 size)1239 static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1240 { 1241 if (igc_desc_unused(tx_ring) >= size) 1242 return 0; 1243 return __igc_maybe_stop_tx(tx_ring, size); 1244 } 1245 1246 #define IGC_SET_FLAG(_input, _flag, _result) \ 1247 (((_flag) <= (_result)) ? \ 1248 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ 1249 ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) 1250 igc_tx_cmd_type(struct sk_buff * skb,u32 tx_flags)1251 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) 1252 { 1253 /* set type for advanced descriptor with frame checksum insertion */ 1254 u32 cmd_type = IGC_ADVTXD_DTYP_DATA | 1255 IGC_ADVTXD_DCMD_DEXT | 1256 IGC_ADVTXD_DCMD_IFCS; 1257 1258 /* set HW vlan bit if vlan is present */ 1259 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN, 1260 IGC_ADVTXD_DCMD_VLE); 1261 1262 /* set segmentation bits for TSO */ 1263 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO, 1264 (IGC_ADVTXD_DCMD_TSE)); 1265 1266 /* set timestamp bit if present, will select the register set 1267 * based on the _TSTAMP(_X) bit. 1268 */ 1269 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP, 1270 (IGC_ADVTXD_MAC_TSTAMP)); 1271 1272 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_1, 1273 (IGC_ADVTXD_TSTAMP_REG_1)); 1274 1275 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_2, 1276 (IGC_ADVTXD_TSTAMP_REG_2)); 1277 1278 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_3, 1279 (IGC_ADVTXD_TSTAMP_REG_3)); 1280 1281 /* insert frame checksum */ 1282 cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS); 1283 1284 return cmd_type; 1285 } 1286 igc_tx_olinfo_status(struct igc_ring * tx_ring,union igc_adv_tx_desc * tx_desc,u32 tx_flags,unsigned int paylen)1287 static void igc_tx_olinfo_status(struct igc_ring *tx_ring, 1288 union igc_adv_tx_desc *tx_desc, 1289 u32 tx_flags, unsigned int paylen) 1290 { 1291 u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT; 1292 1293 /* insert L4 checksum */ 1294 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_CSUM, 1295 (IGC_TXD_POPTS_TXSM << 8)); 1296 1297 /* insert IPv4 checksum */ 1298 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_IPV4, 1299 (IGC_TXD_POPTS_IXSM << 8)); 1300 1301 /* Use the second timer (free running, in general) for the timestamp */ 1302 olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_TIMER_1, 1303 IGC_TXD_PTP2_TIMER_1); 1304 1305 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1306 } 1307 igc_tx_map(struct igc_ring * tx_ring,struct igc_tx_buffer * first,const u8 hdr_len)1308 static int igc_tx_map(struct igc_ring *tx_ring, 1309 struct igc_tx_buffer *first, 1310 const u8 hdr_len) 1311 { 1312 struct sk_buff *skb = first->skb; 1313 struct igc_tx_buffer *tx_buffer; 1314 union igc_adv_tx_desc *tx_desc; 1315 u32 tx_flags = first->tx_flags; 1316 skb_frag_t *frag; 1317 u16 i = tx_ring->next_to_use; 1318 unsigned int data_len, size; 1319 dma_addr_t dma; 1320 u32 cmd_type; 1321 1322 cmd_type = igc_tx_cmd_type(skb, tx_flags); 1323 tx_desc = IGC_TX_DESC(tx_ring, i); 1324 1325 igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); 1326 1327 size = skb_headlen(skb); 1328 data_len = skb->data_len; 1329 1330 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 1331 1332 tx_buffer = first; 1333 1334 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 1335 if (dma_mapping_error(tx_ring->dev, dma)) 1336 goto dma_error; 1337 1338 /* record length, and DMA address */ 1339 dma_unmap_len_set(tx_buffer, len, size); 1340 dma_unmap_addr_set(tx_buffer, dma, dma); 1341 1342 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1343 1344 while (unlikely(size > IGC_MAX_DATA_PER_TXD)) { 1345 tx_desc->read.cmd_type_len = 1346 cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD); 1347 1348 i++; 1349 tx_desc++; 1350 if (i == tx_ring->count) { 1351 tx_desc = IGC_TX_DESC(tx_ring, 0); 1352 i = 0; 1353 } 1354 tx_desc->read.olinfo_status = 0; 1355 1356 dma += IGC_MAX_DATA_PER_TXD; 1357 size -= IGC_MAX_DATA_PER_TXD; 1358 1359 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1360 } 1361 1362 if (likely(!data_len)) 1363 break; 1364 1365 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); 1366 1367 i++; 1368 tx_desc++; 1369 if (i == tx_ring->count) { 1370 tx_desc = IGC_TX_DESC(tx_ring, 0); 1371 i = 0; 1372 } 1373 tx_desc->read.olinfo_status = 0; 1374 1375 size = skb_frag_size(frag); 1376 data_len -= size; 1377 1378 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, 1379 size, DMA_TO_DEVICE); 1380 1381 tx_buffer = &tx_ring->tx_buffer_info[i]; 1382 } 1383 1384 /* write last descriptor with RS and EOP bits */ 1385 cmd_type |= size | IGC_TXD_DCMD; 1386 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1387 1388 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 1389 1390 /* set the timestamp */ 1391 first->time_stamp = jiffies; 1392 1393 skb_tx_timestamp(skb); 1394 1395 /* Force memory writes to complete before letting h/w know there 1396 * are new descriptors to fetch. (Only applicable for weak-ordered 1397 * memory model archs, such as IA-64). 1398 * 1399 * We also need this memory barrier to make certain all of the 1400 * status bits have been updated before next_to_watch is written. 1401 */ 1402 wmb(); 1403 1404 /* set next_to_watch value indicating a packet is present */ 1405 first->next_to_watch = tx_desc; 1406 1407 i++; 1408 if (i == tx_ring->count) 1409 i = 0; 1410 1411 tx_ring->next_to_use = i; 1412 1413 /* Make sure there is space in the ring for the next send. */ 1414 igc_maybe_stop_tx(tx_ring, DESC_NEEDED); 1415 1416 if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { 1417 writel(i, tx_ring->tail); 1418 } 1419 1420 return 0; 1421 dma_error: 1422 netdev_err(tx_ring->netdev, "TX DMA map failed\n"); 1423 tx_buffer = &tx_ring->tx_buffer_info[i]; 1424 1425 /* clear dma mappings for failed tx_buffer_info map */ 1426 while (tx_buffer != first) { 1427 if (dma_unmap_len(tx_buffer, len)) 1428 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1429 1430 if (i-- == 0) 1431 i += tx_ring->count; 1432 tx_buffer = &tx_ring->tx_buffer_info[i]; 1433 } 1434 1435 if (dma_unmap_len(tx_buffer, len)) 1436 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1437 1438 dev_kfree_skb_any(tx_buffer->skb); 1439 tx_buffer->skb = NULL; 1440 1441 tx_ring->next_to_use = i; 1442 1443 return -1; 1444 } 1445 igc_tso(struct igc_ring * tx_ring,struct igc_tx_buffer * first,__le32 launch_time,bool first_flag,u8 * hdr_len)1446 static int igc_tso(struct igc_ring *tx_ring, 1447 struct igc_tx_buffer *first, 1448 __le32 launch_time, bool first_flag, 1449 u8 *hdr_len) 1450 { 1451 u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; 1452 struct sk_buff *skb = first->skb; 1453 union { 1454 struct iphdr *v4; 1455 struct ipv6hdr *v6; 1456 unsigned char *hdr; 1457 } ip; 1458 union { 1459 struct tcphdr *tcp; 1460 struct udphdr *udp; 1461 unsigned char *hdr; 1462 } l4; 1463 u32 paylen, l4_offset; 1464 int err; 1465 1466 if (skb->ip_summed != CHECKSUM_PARTIAL) 1467 return 0; 1468 1469 if (!skb_is_gso(skb)) 1470 return 0; 1471 1472 err = skb_cow_head(skb, 0); 1473 if (err < 0) 1474 return err; 1475 1476 ip.hdr = skb_network_header(skb); 1477 l4.hdr = skb_checksum_start(skb); 1478 1479 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ 1480 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1481 1482 /* initialize outer IP header fields */ 1483 if (ip.v4->version == 4) { 1484 unsigned char *csum_start = skb_checksum_start(skb); 1485 unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); 1486 1487 /* IP header will have to cancel out any data that 1488 * is not a part of the outer IP header 1489 */ 1490 ip.v4->check = csum_fold(csum_partial(trans_start, 1491 csum_start - trans_start, 1492 0)); 1493 type_tucmd |= IGC_ADVTXD_TUCMD_IPV4; 1494 1495 ip.v4->tot_len = 0; 1496 first->tx_flags |= IGC_TX_FLAGS_TSO | 1497 IGC_TX_FLAGS_CSUM | 1498 IGC_TX_FLAGS_IPV4; 1499 } else { 1500 ip.v6->payload_len = 0; 1501 first->tx_flags |= IGC_TX_FLAGS_TSO | 1502 IGC_TX_FLAGS_CSUM; 1503 } 1504 1505 /* determine offset of inner transport header */ 1506 l4_offset = l4.hdr - skb->data; 1507 1508 /* remove payload length from inner checksum */ 1509 paylen = skb->len - l4_offset; 1510 if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) { 1511 /* compute length of segmentation header */ 1512 *hdr_len = (l4.tcp->doff * 4) + l4_offset; 1513 csum_replace_by_diff(&l4.tcp->check, 1514 (__force __wsum)htonl(paylen)); 1515 } else { 1516 /* compute length of segmentation header */ 1517 *hdr_len = sizeof(*l4.udp) + l4_offset; 1518 csum_replace_by_diff(&l4.udp->check, 1519 (__force __wsum)htonl(paylen)); 1520 } 1521 1522 /* update gso size and bytecount with header size */ 1523 first->gso_segs = skb_shinfo(skb)->gso_segs; 1524 first->bytecount += (first->gso_segs - 1) * *hdr_len; 1525 1526 /* MSS L4LEN IDX */ 1527 mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT; 1528 mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT; 1529 1530 /* VLAN MACLEN IPLEN */ 1531 vlan_macip_lens = l4.hdr - ip.hdr; 1532 vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT; 1533 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1534 1535 igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, 1536 vlan_macip_lens, type_tucmd, mss_l4len_idx); 1537 1538 return 1; 1539 } 1540 igc_request_tx_tstamp(struct igc_adapter * adapter,struct sk_buff * skb,u32 * flags)1541 static bool igc_request_tx_tstamp(struct igc_adapter *adapter, struct sk_buff *skb, u32 *flags) 1542 { 1543 int i; 1544 1545 for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { 1546 struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; 1547 1548 if (tstamp->skb) 1549 continue; 1550 1551 tstamp->skb = skb_get(skb); 1552 tstamp->start = jiffies; 1553 *flags = tstamp->flags; 1554 1555 return true; 1556 } 1557 1558 return false; 1559 } 1560 igc_insert_empty_frame(struct igc_ring * tx_ring)1561 static int igc_insert_empty_frame(struct igc_ring *tx_ring) 1562 { 1563 struct igc_tx_buffer *empty_info; 1564 struct sk_buff *empty_skb; 1565 void *data; 1566 int ret; 1567 1568 empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1569 empty_skb = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC); 1570 if (unlikely(!empty_skb)) { 1571 net_err_ratelimited("%s: skb alloc error for empty frame\n", 1572 netdev_name(tx_ring->netdev)); 1573 return -ENOMEM; 1574 } 1575 1576 data = skb_put(empty_skb, IGC_EMPTY_FRAME_SIZE); 1577 memset(data, 0, IGC_EMPTY_FRAME_SIZE); 1578 1579 /* Prepare DMA mapping and Tx buffer information */ 1580 ret = igc_init_empty_frame(tx_ring, empty_info, empty_skb); 1581 if (unlikely(ret)) { 1582 dev_kfree_skb_any(empty_skb); 1583 return ret; 1584 } 1585 1586 /* Prepare advanced context descriptor for empty packet */ 1587 igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0); 1588 1589 /* Prepare advanced data descriptor for empty packet */ 1590 igc_init_tx_empty_descriptor(tx_ring, empty_skb, empty_info); 1591 1592 return 0; 1593 } 1594 igc_xmit_frame_ring(struct sk_buff * skb,struct igc_ring * tx_ring)1595 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, 1596 struct igc_ring *tx_ring) 1597 { 1598 struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); 1599 bool first_flag = false, insert_empty = false; 1600 u16 count = TXD_USE_COUNT(skb_headlen(skb)); 1601 __be16 protocol = vlan_get_protocol(skb); 1602 struct igc_tx_buffer *first; 1603 __le32 launch_time = 0; 1604 u32 tx_flags = 0; 1605 unsigned short f; 1606 ktime_t txtime; 1607 u8 hdr_len = 0; 1608 int tso = 0; 1609 1610 /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD, 1611 * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD, 1612 * + 2 desc gap to keep tail from touching head, 1613 * + 1 desc for context descriptor, 1614 * + 2 desc for inserting an empty packet for launch time, 1615 * otherwise try next time 1616 */ 1617 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1618 count += TXD_USE_COUNT(skb_frag_size( 1619 &skb_shinfo(skb)->frags[f])); 1620 1621 if (igc_maybe_stop_tx(tx_ring, count + 5)) { 1622 /* this is a hard error */ 1623 return NETDEV_TX_BUSY; 1624 } 1625 1626 if (!tx_ring->launchtime_enable) 1627 goto done; 1628 1629 txtime = skb->tstamp; 1630 skb->tstamp = ktime_set(0, 0); 1631 launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty); 1632 1633 if (insert_empty) { 1634 /* Reset the launch time if the required empty frame fails to 1635 * be inserted. However, this packet is not dropped, so it 1636 * "dirties" the current Qbv cycle. This ensures that the 1637 * upcoming packet, which is scheduled in the next Qbv cycle, 1638 * does not require an empty frame. This way, the launch time 1639 * continues to function correctly despite the current failure 1640 * to insert the empty frame. 1641 */ 1642 if (igc_insert_empty_frame(tx_ring)) 1643 launch_time = 0; 1644 } 1645 1646 done: 1647 /* record the location of the first descriptor for this packet */ 1648 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1649 first->type = IGC_TX_BUFFER_TYPE_SKB; 1650 first->skb = skb; 1651 first->bytecount = skb->len; 1652 first->gso_segs = 1; 1653 1654 if (adapter->qbv_transition || tx_ring->oper_gate_closed) 1655 goto out_drop; 1656 1657 if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) { 1658 adapter->stats.txdrop++; 1659 goto out_drop; 1660 } 1661 1662 if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && 1663 skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 1664 unsigned long flags; 1665 u32 tstamp_flags; 1666 1667 spin_lock_irqsave(&adapter->ptp_tx_lock, flags); 1668 if (igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) { 1669 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 1670 tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags; 1671 if (skb->sk && 1672 READ_ONCE(skb->sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC) 1673 tx_flags |= IGC_TX_FLAGS_TSTAMP_TIMER_1; 1674 } else { 1675 adapter->tx_hwtstamp_skipped++; 1676 } 1677 1678 spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); 1679 } 1680 1681 if (skb_vlan_tag_present(skb)) { 1682 tx_flags |= IGC_TX_FLAGS_VLAN; 1683 tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT); 1684 } 1685 1686 /* record initial flags and protocol */ 1687 first->tx_flags = tx_flags; 1688 first->protocol = protocol; 1689 1690 tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len); 1691 if (tso < 0) 1692 goto out_drop; 1693 else if (!tso) 1694 igc_tx_csum(tx_ring, first, launch_time, first_flag); 1695 1696 igc_tx_map(tx_ring, first, hdr_len); 1697 1698 return NETDEV_TX_OK; 1699 1700 out_drop: 1701 dev_kfree_skb_any(first->skb); 1702 first->skb = NULL; 1703 1704 return NETDEV_TX_OK; 1705 } 1706 igc_tx_queue_mapping(struct igc_adapter * adapter,struct sk_buff * skb)1707 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter, 1708 struct sk_buff *skb) 1709 { 1710 unsigned int r_idx = skb->queue_mapping; 1711 1712 if (r_idx >= adapter->num_tx_queues) 1713 r_idx = r_idx % adapter->num_tx_queues; 1714 1715 return adapter->tx_ring[r_idx]; 1716 } 1717 igc_xmit_frame(struct sk_buff * skb,struct net_device * netdev)1718 static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, 1719 struct net_device *netdev) 1720 { 1721 struct igc_adapter *adapter = netdev_priv(netdev); 1722 1723 /* The minimum packet size with TCTL.PSP set is 17 so pad the skb 1724 * in order to meet this minimum size requirement. 1725 */ 1726 if (skb->len < 17) { 1727 if (skb_padto(skb, 17)) 1728 return NETDEV_TX_OK; 1729 skb->len = 17; 1730 } 1731 1732 return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb)); 1733 } 1734 igc_rx_checksum(struct igc_ring * ring,union igc_adv_rx_desc * rx_desc,struct sk_buff * skb)1735 static void igc_rx_checksum(struct igc_ring *ring, 1736 union igc_adv_rx_desc *rx_desc, 1737 struct sk_buff *skb) 1738 { 1739 skb_checksum_none_assert(skb); 1740 1741 /* Ignore Checksum bit is set */ 1742 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM)) 1743 return; 1744 1745 /* Rx checksum disabled via ethtool */ 1746 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 1747 return; 1748 1749 /* TCP/UDP checksum error bit is set */ 1750 if (igc_test_staterr(rx_desc, 1751 IGC_RXDEXT_STATERR_L4E | 1752 IGC_RXDEXT_STATERR_IPE)) { 1753 /* work around errata with sctp packets where the TCPE aka 1754 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) 1755 * packets (aka let the stack check the crc32c) 1756 */ 1757 if (!(skb->len == 60 && 1758 test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { 1759 u64_stats_update_begin(&ring->rx_syncp); 1760 ring->rx_stats.csum_err++; 1761 u64_stats_update_end(&ring->rx_syncp); 1762 } 1763 /* let the stack verify checksum errors */ 1764 return; 1765 } 1766 /* It must be a TCP or UDP packet with a valid checksum */ 1767 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS | 1768 IGC_RXD_STAT_UDPCS)) 1769 skb->ip_summed = CHECKSUM_UNNECESSARY; 1770 1771 netdev_dbg(ring->netdev, "cksum success: bits %08X\n", 1772 le32_to_cpu(rx_desc->wb.upper.status_error)); 1773 } 1774 1775 /* Mapping HW RSS Type to enum pkt_hash_types */ 1776 static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = { 1777 [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2, 1778 [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4, 1779 [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3, 1780 [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4, 1781 [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3, 1782 [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3, 1783 [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4, 1784 [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4, 1785 [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4, 1786 [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4, 1787 [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ 1788 [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */ 1789 [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */ 1790 [13] = PKT_HASH_TYPE_NONE, 1791 [14] = PKT_HASH_TYPE_NONE, 1792 [15] = PKT_HASH_TYPE_NONE, 1793 }; 1794 igc_rx_hash(struct igc_ring * ring,union igc_adv_rx_desc * rx_desc,struct sk_buff * skb)1795 static inline void igc_rx_hash(struct igc_ring *ring, 1796 union igc_adv_rx_desc *rx_desc, 1797 struct sk_buff *skb) 1798 { 1799 if (ring->netdev->features & NETIF_F_RXHASH) { 1800 u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); 1801 u32 rss_type = igc_rss_type(rx_desc); 1802 1803 skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]); 1804 } 1805 } 1806 igc_rx_vlan(struct igc_ring * rx_ring,union igc_adv_rx_desc * rx_desc,struct sk_buff * skb)1807 static void igc_rx_vlan(struct igc_ring *rx_ring, 1808 union igc_adv_rx_desc *rx_desc, 1809 struct sk_buff *skb) 1810 { 1811 struct net_device *dev = rx_ring->netdev; 1812 u16 vid; 1813 1814 if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && 1815 igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) { 1816 if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) && 1817 test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) 1818 vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan); 1819 else 1820 vid = le16_to_cpu(rx_desc->wb.upper.vlan); 1821 1822 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); 1823 } 1824 } 1825 1826 /** 1827 * igc_process_skb_fields - Populate skb header fields from Rx descriptor 1828 * @rx_ring: rx descriptor ring packet is being transacted on 1829 * @rx_desc: pointer to the EOP Rx descriptor 1830 * @skb: pointer to current skb being populated 1831 * 1832 * This function checks the ring, descriptor, and packet information in order 1833 * to populate the hash, checksum, VLAN, protocol, and other fields within the 1834 * skb. 1835 */ igc_process_skb_fields(struct igc_ring * rx_ring,union igc_adv_rx_desc * rx_desc,struct sk_buff * skb)1836 static void igc_process_skb_fields(struct igc_ring *rx_ring, 1837 union igc_adv_rx_desc *rx_desc, 1838 struct sk_buff *skb) 1839 { 1840 igc_rx_hash(rx_ring, rx_desc, skb); 1841 1842 igc_rx_checksum(rx_ring, rx_desc, skb); 1843 1844 igc_rx_vlan(rx_ring, rx_desc, skb); 1845 1846 skb_record_rx_queue(skb, rx_ring->queue_index); 1847 1848 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1849 } 1850 igc_vlan_mode(struct net_device * netdev,netdev_features_t features)1851 static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features) 1852 { 1853 bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); 1854 struct igc_adapter *adapter = netdev_priv(netdev); 1855 struct igc_hw *hw = &adapter->hw; 1856 u32 ctrl; 1857 1858 ctrl = rd32(IGC_CTRL); 1859 1860 if (enable) { 1861 /* enable VLAN tag insert/strip */ 1862 ctrl |= IGC_CTRL_VME; 1863 } else { 1864 /* disable VLAN tag insert/strip */ 1865 ctrl &= ~IGC_CTRL_VME; 1866 } 1867 wr32(IGC_CTRL, ctrl); 1868 } 1869 igc_restore_vlan(struct igc_adapter * adapter)1870 static void igc_restore_vlan(struct igc_adapter *adapter) 1871 { 1872 igc_vlan_mode(adapter->netdev, adapter->netdev->features); 1873 } 1874 igc_get_rx_buffer(struct igc_ring * rx_ring,const unsigned int size,int * rx_buffer_pgcnt)1875 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, 1876 const unsigned int size, 1877 int *rx_buffer_pgcnt) 1878 { 1879 struct igc_rx_buffer *rx_buffer; 1880 1881 rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; 1882 *rx_buffer_pgcnt = 1883 #if (PAGE_SIZE < 8192) 1884 page_count(rx_buffer->page); 1885 #else 1886 0; 1887 #endif 1888 prefetchw(rx_buffer->page); 1889 1890 /* we are reusing so sync this buffer for CPU use */ 1891 dma_sync_single_range_for_cpu(rx_ring->dev, 1892 rx_buffer->dma, 1893 rx_buffer->page_offset, 1894 size, 1895 DMA_FROM_DEVICE); 1896 1897 rx_buffer->pagecnt_bias--; 1898 1899 return rx_buffer; 1900 } 1901 igc_rx_buffer_flip(struct igc_rx_buffer * buffer,unsigned int truesize)1902 static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer, 1903 unsigned int truesize) 1904 { 1905 #if (PAGE_SIZE < 8192) 1906 buffer->page_offset ^= truesize; 1907 #else 1908 buffer->page_offset += truesize; 1909 #endif 1910 } 1911 igc_get_rx_frame_truesize(struct igc_ring * ring,unsigned int size)1912 static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring, 1913 unsigned int size) 1914 { 1915 unsigned int truesize; 1916 1917 #if (PAGE_SIZE < 8192) 1918 truesize = igc_rx_pg_size(ring) / 2; 1919 #else 1920 truesize = ring_uses_build_skb(ring) ? 1921 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + 1922 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1923 SKB_DATA_ALIGN(size); 1924 #endif 1925 return truesize; 1926 } 1927 1928 /** 1929 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff 1930 * @rx_ring: rx descriptor ring to transact packets on 1931 * @rx_buffer: buffer containing page to add 1932 * @skb: sk_buff to place the data into 1933 * @size: size of buffer to be added 1934 * 1935 * This function will add the data contained in rx_buffer->page to the skb. 1936 */ igc_add_rx_frag(struct igc_ring * rx_ring,struct igc_rx_buffer * rx_buffer,struct sk_buff * skb,unsigned int size)1937 static void igc_add_rx_frag(struct igc_ring *rx_ring, 1938 struct igc_rx_buffer *rx_buffer, 1939 struct sk_buff *skb, 1940 unsigned int size) 1941 { 1942 unsigned int truesize; 1943 1944 #if (PAGE_SIZE < 8192) 1945 truesize = igc_rx_pg_size(rx_ring) / 2; 1946 #else 1947 truesize = ring_uses_build_skb(rx_ring) ? 1948 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1949 SKB_DATA_ALIGN(size); 1950 #endif 1951 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, 1952 rx_buffer->page_offset, size, truesize); 1953 1954 igc_rx_buffer_flip(rx_buffer, truesize); 1955 } 1956 igc_build_skb(struct igc_ring * rx_ring,struct igc_rx_buffer * rx_buffer,struct xdp_buff * xdp)1957 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, 1958 struct igc_rx_buffer *rx_buffer, 1959 struct xdp_buff *xdp) 1960 { 1961 unsigned int size = xdp->data_end - xdp->data; 1962 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1963 unsigned int metasize = xdp->data - xdp->data_meta; 1964 struct sk_buff *skb; 1965 1966 /* prefetch first cache line of first page */ 1967 net_prefetch(xdp->data_meta); 1968 1969 /* build an skb around the page buffer */ 1970 skb = napi_build_skb(xdp->data_hard_start, truesize); 1971 if (unlikely(!skb)) 1972 return NULL; 1973 1974 /* update pointers within the skb to store the data */ 1975 skb_reserve(skb, xdp->data - xdp->data_hard_start); 1976 __skb_put(skb, size); 1977 if (metasize) 1978 skb_metadata_set(skb, metasize); 1979 1980 igc_rx_buffer_flip(rx_buffer, truesize); 1981 return skb; 1982 } 1983 igc_construct_skb(struct igc_ring * rx_ring,struct igc_rx_buffer * rx_buffer,struct igc_xdp_buff * ctx)1984 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, 1985 struct igc_rx_buffer *rx_buffer, 1986 struct igc_xdp_buff *ctx) 1987 { 1988 struct xdp_buff *xdp = &ctx->xdp; 1989 unsigned int metasize = xdp->data - xdp->data_meta; 1990 unsigned int size = xdp->data_end - xdp->data; 1991 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1992 void *va = xdp->data; 1993 unsigned int headlen; 1994 struct sk_buff *skb; 1995 1996 /* prefetch first cache line of first page */ 1997 net_prefetch(xdp->data_meta); 1998 1999 /* allocate a skb to store the frags */ 2000 skb = napi_alloc_skb(&rx_ring->q_vector->napi, 2001 IGC_RX_HDR_LEN + metasize); 2002 if (unlikely(!skb)) 2003 return NULL; 2004 2005 if (ctx->rx_ts) { 2006 skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; 2007 skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; 2008 } 2009 2010 /* Determine available headroom for copy */ 2011 headlen = size; 2012 if (headlen > IGC_RX_HDR_LEN) 2013 headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN); 2014 2015 /* align pull length to size of long to optimize memcpy performance */ 2016 memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta, 2017 ALIGN(headlen + metasize, sizeof(long))); 2018 2019 if (metasize) { 2020 skb_metadata_set(skb, metasize); 2021 __skb_pull(skb, metasize); 2022 } 2023 2024 /* update all of the pointers */ 2025 size -= headlen; 2026 if (size) { 2027 skb_add_rx_frag(skb, 0, rx_buffer->page, 2028 (va + headlen) - page_address(rx_buffer->page), 2029 size, truesize); 2030 igc_rx_buffer_flip(rx_buffer, truesize); 2031 } else { 2032 rx_buffer->pagecnt_bias++; 2033 } 2034 2035 return skb; 2036 } 2037 2038 /** 2039 * igc_reuse_rx_page - page flip buffer and store it back on the ring 2040 * @rx_ring: rx descriptor ring to store buffers on 2041 * @old_buff: donor buffer to have page reused 2042 * 2043 * Synchronizes page for reuse by the adapter 2044 */ igc_reuse_rx_page(struct igc_ring * rx_ring,struct igc_rx_buffer * old_buff)2045 static void igc_reuse_rx_page(struct igc_ring *rx_ring, 2046 struct igc_rx_buffer *old_buff) 2047 { 2048 u16 nta = rx_ring->next_to_alloc; 2049 struct igc_rx_buffer *new_buff; 2050 2051 new_buff = &rx_ring->rx_buffer_info[nta]; 2052 2053 /* update, and store next to alloc */ 2054 nta++; 2055 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 2056 2057 /* Transfer page from old buffer to new buffer. 2058 * Move each member individually to avoid possible store 2059 * forwarding stalls. 2060 */ 2061 new_buff->dma = old_buff->dma; 2062 new_buff->page = old_buff->page; 2063 new_buff->page_offset = old_buff->page_offset; 2064 new_buff->pagecnt_bias = old_buff->pagecnt_bias; 2065 } 2066 igc_can_reuse_rx_page(struct igc_rx_buffer * rx_buffer,int rx_buffer_pgcnt)2067 static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer, 2068 int rx_buffer_pgcnt) 2069 { 2070 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; 2071 struct page *page = rx_buffer->page; 2072 2073 /* avoid re-using remote and pfmemalloc pages */ 2074 if (!dev_page_is_reusable(page)) 2075 return false; 2076 2077 #if (PAGE_SIZE < 8192) 2078 /* if we are only owner of page we can reuse it */ 2079 if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) 2080 return false; 2081 #else 2082 #define IGC_LAST_OFFSET \ 2083 (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048) 2084 2085 if (rx_buffer->page_offset > IGC_LAST_OFFSET) 2086 return false; 2087 #endif 2088 2089 /* If we have drained the page fragment pool we need to update 2090 * the pagecnt_bias and page count so that we fully restock the 2091 * number of references the driver holds. 2092 */ 2093 if (unlikely(pagecnt_bias == 1)) { 2094 page_ref_add(page, USHRT_MAX - 1); 2095 rx_buffer->pagecnt_bias = USHRT_MAX; 2096 } 2097 2098 return true; 2099 } 2100 2101 /** 2102 * igc_is_non_eop - process handling of non-EOP buffers 2103 * @rx_ring: Rx ring being processed 2104 * @rx_desc: Rx descriptor for current buffer 2105 * 2106 * This function updates next to clean. If the buffer is an EOP buffer 2107 * this function exits returning false, otherwise it will place the 2108 * sk_buff in the next buffer to be chained and return true indicating 2109 * that this is in fact a non-EOP buffer. 2110 */ igc_is_non_eop(struct igc_ring * rx_ring,union igc_adv_rx_desc * rx_desc)2111 static bool igc_is_non_eop(struct igc_ring *rx_ring, 2112 union igc_adv_rx_desc *rx_desc) 2113 { 2114 u32 ntc = rx_ring->next_to_clean + 1; 2115 2116 /* fetch, update, and store next to clean */ 2117 ntc = (ntc < rx_ring->count) ? ntc : 0; 2118 rx_ring->next_to_clean = ntc; 2119 2120 prefetch(IGC_RX_DESC(rx_ring, ntc)); 2121 2122 if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP))) 2123 return false; 2124 2125 return true; 2126 } 2127 2128 /** 2129 * igc_cleanup_headers - Correct corrupted or empty headers 2130 * @rx_ring: rx descriptor ring packet is being transacted on 2131 * @rx_desc: pointer to the EOP Rx descriptor 2132 * @skb: pointer to current skb being fixed 2133 * 2134 * Address the case where we are pulling data in on pages only 2135 * and as such no data is present in the skb header. 2136 * 2137 * In addition if skb is not at least 60 bytes we need to pad it so that 2138 * it is large enough to qualify as a valid Ethernet frame. 2139 * 2140 * Returns true if an error was encountered and skb was freed. 2141 */ igc_cleanup_headers(struct igc_ring * rx_ring,union igc_adv_rx_desc * rx_desc,struct sk_buff * skb)2142 static bool igc_cleanup_headers(struct igc_ring *rx_ring, 2143 union igc_adv_rx_desc *rx_desc, 2144 struct sk_buff *skb) 2145 { 2146 if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) { 2147 struct net_device *netdev = rx_ring->netdev; 2148 2149 if (!(netdev->features & NETIF_F_RXALL)) { 2150 dev_kfree_skb_any(skb); 2151 return true; 2152 } 2153 } 2154 2155 /* if eth_skb_pad returns an error the skb was freed */ 2156 if (eth_skb_pad(skb)) 2157 return true; 2158 2159 return false; 2160 } 2161 igc_put_rx_buffer(struct igc_ring * rx_ring,struct igc_rx_buffer * rx_buffer,int rx_buffer_pgcnt)2162 static void igc_put_rx_buffer(struct igc_ring *rx_ring, 2163 struct igc_rx_buffer *rx_buffer, 2164 int rx_buffer_pgcnt) 2165 { 2166 if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { 2167 /* hand second half of page back to the ring */ 2168 igc_reuse_rx_page(rx_ring, rx_buffer); 2169 } else { 2170 /* We are not reusing the buffer so unmap it and free 2171 * any references we are holding to it 2172 */ 2173 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, 2174 igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE, 2175 IGC_RX_DMA_ATTR); 2176 __page_frag_cache_drain(rx_buffer->page, 2177 rx_buffer->pagecnt_bias); 2178 } 2179 2180 /* clear contents of rx_buffer */ 2181 rx_buffer->page = NULL; 2182 } 2183 igc_rx_offset(struct igc_ring * rx_ring)2184 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) 2185 { 2186 struct igc_adapter *adapter = rx_ring->q_vector->adapter; 2187 2188 if (ring_uses_build_skb(rx_ring)) 2189 return IGC_SKB_PAD; 2190 if (igc_xdp_is_enabled(adapter)) 2191 return XDP_PACKET_HEADROOM; 2192 2193 return 0; 2194 } 2195 igc_alloc_mapped_page(struct igc_ring * rx_ring,struct igc_rx_buffer * bi)2196 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, 2197 struct igc_rx_buffer *bi) 2198 { 2199 struct page *page = bi->page; 2200 dma_addr_t dma; 2201 2202 /* since we are recycling buffers we should seldom need to alloc */ 2203 if (likely(page)) 2204 return true; 2205 2206 /* alloc new page for storage */ 2207 page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); 2208 if (unlikely(!page)) { 2209 rx_ring->rx_stats.alloc_failed++; 2210 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 2211 return false; 2212 } 2213 2214 /* map page for use */ 2215 dma = dma_map_page_attrs(rx_ring->dev, page, 0, 2216 igc_rx_pg_size(rx_ring), 2217 DMA_FROM_DEVICE, 2218 IGC_RX_DMA_ATTR); 2219 2220 /* if mapping failed free memory back to system since 2221 * there isn't much point in holding memory we can't use 2222 */ 2223 if (dma_mapping_error(rx_ring->dev, dma)) { 2224 __free_page(page); 2225 2226 rx_ring->rx_stats.alloc_failed++; 2227 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 2228 return false; 2229 } 2230 2231 bi->dma = dma; 2232 bi->page = page; 2233 bi->page_offset = igc_rx_offset(rx_ring); 2234 page_ref_add(page, USHRT_MAX - 1); 2235 bi->pagecnt_bias = USHRT_MAX; 2236 2237 return true; 2238 } 2239 2240 /** 2241 * igc_alloc_rx_buffers - Replace used receive buffers; packet split 2242 * @rx_ring: rx descriptor ring 2243 * @cleaned_count: number of buffers to clean 2244 */ igc_alloc_rx_buffers(struct igc_ring * rx_ring,u16 cleaned_count)2245 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) 2246 { 2247 union igc_adv_rx_desc *rx_desc; 2248 u16 i = rx_ring->next_to_use; 2249 struct igc_rx_buffer *bi; 2250 u16 bufsz; 2251 2252 /* nothing to do */ 2253 if (!cleaned_count) 2254 return; 2255 2256 rx_desc = IGC_RX_DESC(rx_ring, i); 2257 bi = &rx_ring->rx_buffer_info[i]; 2258 i -= rx_ring->count; 2259 2260 bufsz = igc_rx_bufsz(rx_ring); 2261 2262 do { 2263 if (!igc_alloc_mapped_page(rx_ring, bi)) 2264 break; 2265 2266 /* sync the buffer for use by the device */ 2267 dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 2268 bi->page_offset, bufsz, 2269 DMA_FROM_DEVICE); 2270 2271 /* Refresh the desc even if buffer_addrs didn't change 2272 * because each write-back erases this info. 2273 */ 2274 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); 2275 2276 rx_desc++; 2277 bi++; 2278 i++; 2279 if (unlikely(!i)) { 2280 rx_desc = IGC_RX_DESC(rx_ring, 0); 2281 bi = rx_ring->rx_buffer_info; 2282 i -= rx_ring->count; 2283 } 2284 2285 /* clear the length for the next_to_use descriptor */ 2286 rx_desc->wb.upper.length = 0; 2287 2288 cleaned_count--; 2289 } while (cleaned_count); 2290 2291 i += rx_ring->count; 2292 2293 if (rx_ring->next_to_use != i) { 2294 /* record the next descriptor to use */ 2295 rx_ring->next_to_use = i; 2296 2297 /* update next to alloc since we have filled the ring */ 2298 rx_ring->next_to_alloc = i; 2299 2300 /* Force memory writes to complete before letting h/w 2301 * know there are new descriptors to fetch. (Only 2302 * applicable for weak-ordered memory model archs, 2303 * such as IA-64). 2304 */ 2305 wmb(); 2306 writel(i, rx_ring->tail); 2307 } 2308 } 2309 igc_alloc_rx_buffers_zc(struct igc_ring * ring,u16 count)2310 static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) 2311 { 2312 union igc_adv_rx_desc *desc; 2313 u16 i = ring->next_to_use; 2314 struct igc_rx_buffer *bi; 2315 dma_addr_t dma; 2316 bool ok = true; 2317 2318 if (!count) 2319 return ok; 2320 2321 XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff); 2322 2323 desc = IGC_RX_DESC(ring, i); 2324 bi = &ring->rx_buffer_info[i]; 2325 i -= ring->count; 2326 2327 do { 2328 bi->xdp = xsk_buff_alloc(ring->xsk_pool); 2329 if (!bi->xdp) { 2330 ok = false; 2331 break; 2332 } 2333 2334 dma = xsk_buff_xdp_get_dma(bi->xdp); 2335 desc->read.pkt_addr = cpu_to_le64(dma); 2336 2337 desc++; 2338 bi++; 2339 i++; 2340 if (unlikely(!i)) { 2341 desc = IGC_RX_DESC(ring, 0); 2342 bi = ring->rx_buffer_info; 2343 i -= ring->count; 2344 } 2345 2346 /* Clear the length for the next_to_use descriptor. */ 2347 desc->wb.upper.length = 0; 2348 2349 count--; 2350 } while (count); 2351 2352 i += ring->count; 2353 2354 if (ring->next_to_use != i) { 2355 ring->next_to_use = i; 2356 2357 /* Force memory writes to complete before letting h/w 2358 * know there are new descriptors to fetch. (Only 2359 * applicable for weak-ordered memory model archs, 2360 * such as IA-64). 2361 */ 2362 wmb(); 2363 writel(i, ring->tail); 2364 } 2365 2366 return ok; 2367 } 2368 2369 /* This function requires __netif_tx_lock is held by the caller. */ igc_xdp_init_tx_descriptor(struct igc_ring * ring,struct xdp_frame * xdpf)2370 static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, 2371 struct xdp_frame *xdpf) 2372 { 2373 struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); 2374 u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0; 2375 u16 count, index = ring->next_to_use; 2376 struct igc_tx_buffer *head = &ring->tx_buffer_info[index]; 2377 struct igc_tx_buffer *buffer = head; 2378 union igc_adv_tx_desc *desc = IGC_TX_DESC(ring, index); 2379 u32 olinfo_status, len = xdpf->len, cmd_type; 2380 void *data = xdpf->data; 2381 u16 i; 2382 2383 count = TXD_USE_COUNT(len); 2384 for (i = 0; i < nr_frags; i++) 2385 count += TXD_USE_COUNT(skb_frag_size(&sinfo->frags[i])); 2386 2387 if (igc_maybe_stop_tx(ring, count + 3)) { 2388 /* this is a hard error */ 2389 return -EBUSY; 2390 } 2391 2392 i = 0; 2393 head->bytecount = xdp_get_frame_len(xdpf); 2394 head->type = IGC_TX_BUFFER_TYPE_XDP; 2395 head->gso_segs = 1; 2396 head->xdpf = xdpf; 2397 2398 olinfo_status = head->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; 2399 desc->read.olinfo_status = cpu_to_le32(olinfo_status); 2400 2401 for (;;) { 2402 dma_addr_t dma; 2403 2404 dma = dma_map_single(ring->dev, data, len, DMA_TO_DEVICE); 2405 if (dma_mapping_error(ring->dev, dma)) { 2406 netdev_err_once(ring->netdev, 2407 "Failed to map DMA for TX\n"); 2408 goto unmap; 2409 } 2410 2411 dma_unmap_len_set(buffer, len, len); 2412 dma_unmap_addr_set(buffer, dma, dma); 2413 2414 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 2415 IGC_ADVTXD_DCMD_IFCS | len; 2416 2417 desc->read.cmd_type_len = cpu_to_le32(cmd_type); 2418 desc->read.buffer_addr = cpu_to_le64(dma); 2419 2420 buffer->protocol = 0; 2421 2422 if (++index == ring->count) 2423 index = 0; 2424 2425 if (i == nr_frags) 2426 break; 2427 2428 buffer = &ring->tx_buffer_info[index]; 2429 desc = IGC_TX_DESC(ring, index); 2430 desc->read.olinfo_status = 0; 2431 2432 data = skb_frag_address(&sinfo->frags[i]); 2433 len = skb_frag_size(&sinfo->frags[i]); 2434 i++; 2435 } 2436 desc->read.cmd_type_len |= cpu_to_le32(IGC_TXD_DCMD); 2437 2438 netdev_tx_sent_queue(txring_txq(ring), head->bytecount); 2439 /* set the timestamp */ 2440 head->time_stamp = jiffies; 2441 /* set next_to_watch value indicating a packet is present */ 2442 head->next_to_watch = desc; 2443 ring->next_to_use = index; 2444 2445 return 0; 2446 2447 unmap: 2448 for (;;) { 2449 buffer = &ring->tx_buffer_info[index]; 2450 if (dma_unmap_len(buffer, len)) 2451 dma_unmap_page(ring->dev, 2452 dma_unmap_addr(buffer, dma), 2453 dma_unmap_len(buffer, len), 2454 DMA_TO_DEVICE); 2455 dma_unmap_len_set(buffer, len, 0); 2456 if (buffer == head) 2457 break; 2458 2459 if (!index) 2460 index += ring->count; 2461 index--; 2462 } 2463 2464 return -ENOMEM; 2465 } 2466 igc_xdp_get_tx_ring(struct igc_adapter * adapter,int cpu)2467 static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter, 2468 int cpu) 2469 { 2470 int index = cpu; 2471 2472 if (unlikely(index < 0)) 2473 index = 0; 2474 2475 while (index >= adapter->num_tx_queues) 2476 index -= adapter->num_tx_queues; 2477 2478 return adapter->tx_ring[index]; 2479 } 2480 igc_xdp_xmit_back(struct igc_adapter * adapter,struct xdp_buff * xdp)2481 static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) 2482 { 2483 struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); 2484 int cpu = smp_processor_id(); 2485 struct netdev_queue *nq; 2486 struct igc_ring *ring; 2487 int res; 2488 2489 if (unlikely(!xdpf)) 2490 return -EFAULT; 2491 2492 ring = igc_xdp_get_tx_ring(adapter, cpu); 2493 nq = txring_txq(ring); 2494 2495 __netif_tx_lock(nq, cpu); 2496 /* Avoid transmit queue timeout since we share it with the slow path */ 2497 txq_trans_cond_update(nq); 2498 res = igc_xdp_init_tx_descriptor(ring, xdpf); 2499 __netif_tx_unlock(nq); 2500 return res; 2501 } 2502 2503 /* This function assumes rcu_read_lock() is held by the caller. */ __igc_xdp_run_prog(struct igc_adapter * adapter,struct bpf_prog * prog,struct xdp_buff * xdp)2504 static int __igc_xdp_run_prog(struct igc_adapter *adapter, 2505 struct bpf_prog *prog, 2506 struct xdp_buff *xdp) 2507 { 2508 u32 act = bpf_prog_run_xdp(prog, xdp); 2509 2510 switch (act) { 2511 case XDP_PASS: 2512 return IGC_XDP_PASS; 2513 case XDP_TX: 2514 if (igc_xdp_xmit_back(adapter, xdp) < 0) 2515 goto out_failure; 2516 return IGC_XDP_TX; 2517 case XDP_REDIRECT: 2518 if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0) 2519 goto out_failure; 2520 return IGC_XDP_REDIRECT; 2521 break; 2522 default: 2523 bpf_warn_invalid_xdp_action(adapter->netdev, prog, act); 2524 fallthrough; 2525 case XDP_ABORTED: 2526 out_failure: 2527 trace_xdp_exception(adapter->netdev, prog, act); 2528 fallthrough; 2529 case XDP_DROP: 2530 return IGC_XDP_CONSUMED; 2531 } 2532 } 2533 igc_xdp_run_prog(struct igc_adapter * adapter,struct xdp_buff * xdp)2534 static int igc_xdp_run_prog(struct igc_adapter *adapter, struct xdp_buff *xdp) 2535 { 2536 struct bpf_prog *prog; 2537 int res; 2538 2539 prog = READ_ONCE(adapter->xdp_prog); 2540 if (!prog) { 2541 res = IGC_XDP_PASS; 2542 goto out; 2543 } 2544 2545 res = __igc_xdp_run_prog(adapter, prog, xdp); 2546 2547 out: 2548 return res; 2549 } 2550 2551 /* This function assumes __netif_tx_lock is held by the caller. */ igc_flush_tx_descriptors(struct igc_ring * ring)2552 static void igc_flush_tx_descriptors(struct igc_ring *ring) 2553 { 2554 /* Once tail pointer is updated, hardware can fetch the descriptors 2555 * any time so we issue a write membar here to ensure all memory 2556 * writes are complete before the tail pointer is updated. 2557 */ 2558 wmb(); 2559 writel(ring->next_to_use, ring->tail); 2560 } 2561 igc_finalize_xdp(struct igc_adapter * adapter,int status)2562 static void igc_finalize_xdp(struct igc_adapter *adapter, int status) 2563 { 2564 int cpu = smp_processor_id(); 2565 struct netdev_queue *nq; 2566 struct igc_ring *ring; 2567 2568 if (status & IGC_XDP_TX) { 2569 ring = igc_xdp_get_tx_ring(adapter, cpu); 2570 nq = txring_txq(ring); 2571 2572 __netif_tx_lock(nq, cpu); 2573 igc_flush_tx_descriptors(ring); 2574 __netif_tx_unlock(nq); 2575 } 2576 2577 if (status & IGC_XDP_REDIRECT) 2578 xdp_do_flush(); 2579 } 2580 igc_update_rx_stats(struct igc_q_vector * q_vector,unsigned int packets,unsigned int bytes)2581 static void igc_update_rx_stats(struct igc_q_vector *q_vector, 2582 unsigned int packets, unsigned int bytes) 2583 { 2584 struct igc_ring *ring = q_vector->rx.ring; 2585 2586 u64_stats_update_begin(&ring->rx_syncp); 2587 ring->rx_stats.packets += packets; 2588 ring->rx_stats.bytes += bytes; 2589 u64_stats_update_end(&ring->rx_syncp); 2590 2591 q_vector->rx.total_packets += packets; 2592 q_vector->rx.total_bytes += bytes; 2593 } 2594 igc_clean_rx_irq(struct igc_q_vector * q_vector,const int budget)2595 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) 2596 { 2597 unsigned int total_bytes = 0, total_packets = 0; 2598 struct igc_adapter *adapter = q_vector->adapter; 2599 struct igc_ring *rx_ring = q_vector->rx.ring; 2600 struct sk_buff *skb = rx_ring->skb; 2601 u16 cleaned_count = igc_desc_unused(rx_ring); 2602 int xdp_status = 0, rx_buffer_pgcnt; 2603 int xdp_res = 0; 2604 2605 while (likely(total_packets < budget)) { 2606 struct igc_xdp_buff ctx = { .rx_ts = NULL }; 2607 struct igc_rx_buffer *rx_buffer; 2608 union igc_adv_rx_desc *rx_desc; 2609 unsigned int size, truesize; 2610 int pkt_offset = 0; 2611 void *pktbuf; 2612 2613 /* return some buffers to hardware, one at a time is too slow */ 2614 if (cleaned_count >= IGC_RX_BUFFER_WRITE) { 2615 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2616 cleaned_count = 0; 2617 } 2618 2619 rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean); 2620 size = le16_to_cpu(rx_desc->wb.upper.length); 2621 if (!size) 2622 break; 2623 2624 /* This memory barrier is needed to keep us from reading 2625 * any other fields out of the rx_desc until we know the 2626 * descriptor has been written back 2627 */ 2628 dma_rmb(); 2629 2630 rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); 2631 truesize = igc_get_rx_frame_truesize(rx_ring, size); 2632 2633 pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; 2634 2635 if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { 2636 ctx.rx_ts = pktbuf; 2637 pkt_offset = IGC_TS_HDR_LEN; 2638 size -= IGC_TS_HDR_LEN; 2639 } 2640 2641 if (!skb) { 2642 xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq); 2643 xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring), 2644 igc_rx_offset(rx_ring) + pkt_offset, 2645 size, true); 2646 xdp_buff_clear_frags_flag(&ctx.xdp); 2647 ctx.rx_desc = rx_desc; 2648 2649 xdp_res = igc_xdp_run_prog(adapter, &ctx.xdp); 2650 } 2651 2652 if (xdp_res) { 2653 switch (xdp_res) { 2654 case IGC_XDP_CONSUMED: 2655 rx_buffer->pagecnt_bias++; 2656 break; 2657 case IGC_XDP_TX: 2658 case IGC_XDP_REDIRECT: 2659 igc_rx_buffer_flip(rx_buffer, truesize); 2660 xdp_status |= xdp_res; 2661 break; 2662 } 2663 2664 total_packets++; 2665 total_bytes += size; 2666 } else if (skb) 2667 igc_add_rx_frag(rx_ring, rx_buffer, skb, size); 2668 else if (ring_uses_build_skb(rx_ring)) 2669 skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp); 2670 else 2671 skb = igc_construct_skb(rx_ring, rx_buffer, &ctx); 2672 2673 /* exit if we failed to retrieve a buffer */ 2674 if (!xdp_res && !skb) { 2675 rx_ring->rx_stats.alloc_failed++; 2676 rx_buffer->pagecnt_bias++; 2677 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 2678 break; 2679 } 2680 2681 igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); 2682 cleaned_count++; 2683 2684 /* fetch next buffer in frame if non-eop */ 2685 if (igc_is_non_eop(rx_ring, rx_desc)) 2686 continue; 2687 2688 /* verify the packet layout is correct */ 2689 if (xdp_res || igc_cleanup_headers(rx_ring, rx_desc, skb)) { 2690 skb = NULL; 2691 continue; 2692 } 2693 2694 /* probably a little skewed due to removing CRC */ 2695 total_bytes += skb->len; 2696 2697 /* populate checksum, VLAN, and protocol */ 2698 igc_process_skb_fields(rx_ring, rx_desc, skb); 2699 2700 napi_gro_receive(&q_vector->napi, skb); 2701 2702 /* reset skb pointer */ 2703 skb = NULL; 2704 2705 /* update budget accounting */ 2706 total_packets++; 2707 } 2708 2709 if (xdp_status) 2710 igc_finalize_xdp(adapter, xdp_status); 2711 2712 /* place incomplete frames back on ring for completion */ 2713 rx_ring->skb = skb; 2714 2715 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2716 2717 if (cleaned_count) 2718 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2719 2720 return total_packets; 2721 } 2722 igc_construct_skb_zc(struct igc_ring * ring,struct igc_xdp_buff * ctx)2723 static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, 2724 struct igc_xdp_buff *ctx) 2725 { 2726 struct xdp_buff *xdp = &ctx->xdp; 2727 unsigned int totalsize = xdp->data_end - xdp->data_meta; 2728 unsigned int metasize = xdp->data - xdp->data_meta; 2729 struct sk_buff *skb; 2730 2731 net_prefetch(xdp->data_meta); 2732 2733 skb = napi_alloc_skb(&ring->q_vector->napi, totalsize); 2734 if (unlikely(!skb)) 2735 return NULL; 2736 2737 memcpy(__skb_put(skb, totalsize), xdp->data_meta, 2738 ALIGN(totalsize, sizeof(long))); 2739 2740 if (metasize) { 2741 skb_metadata_set(skb, metasize); 2742 __skb_pull(skb, metasize); 2743 } 2744 2745 if (ctx->rx_ts) { 2746 skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; 2747 skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; 2748 } 2749 2750 return skb; 2751 } 2752 igc_dispatch_skb_zc(struct igc_q_vector * q_vector,union igc_adv_rx_desc * desc,struct igc_xdp_buff * ctx)2753 static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, 2754 union igc_adv_rx_desc *desc, 2755 struct igc_xdp_buff *ctx) 2756 { 2757 struct igc_ring *ring = q_vector->rx.ring; 2758 struct sk_buff *skb; 2759 2760 skb = igc_construct_skb_zc(ring, ctx); 2761 if (!skb) { 2762 ring->rx_stats.alloc_failed++; 2763 set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &ring->flags); 2764 return; 2765 } 2766 2767 if (igc_cleanup_headers(ring, desc, skb)) 2768 return; 2769 2770 igc_process_skb_fields(ring, desc, skb); 2771 napi_gro_receive(&q_vector->napi, skb); 2772 } 2773 xsk_buff_to_igc_ctx(struct xdp_buff * xdp)2774 static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp) 2775 { 2776 /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The 2777 * igc_xdp_buff shares its layout with xdp_buff_xsk and private 2778 * igc_xdp_buff fields fall into xdp_buff_xsk->cb 2779 */ 2780 return (struct igc_xdp_buff *)xdp; 2781 } 2782 igc_clean_rx_irq_zc(struct igc_q_vector * q_vector,const int budget)2783 static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) 2784 { 2785 struct igc_adapter *adapter = q_vector->adapter; 2786 struct igc_ring *ring = q_vector->rx.ring; 2787 u16 cleaned_count = igc_desc_unused(ring); 2788 int total_bytes = 0, total_packets = 0; 2789 u16 ntc = ring->next_to_clean; 2790 struct bpf_prog *prog; 2791 bool failure = false; 2792 int xdp_status = 0; 2793 2794 rcu_read_lock(); 2795 2796 prog = READ_ONCE(adapter->xdp_prog); 2797 2798 while (likely(total_packets < budget)) { 2799 union igc_adv_rx_desc *desc; 2800 struct igc_rx_buffer *bi; 2801 struct igc_xdp_buff *ctx; 2802 unsigned int size; 2803 int res; 2804 2805 desc = IGC_RX_DESC(ring, ntc); 2806 size = le16_to_cpu(desc->wb.upper.length); 2807 if (!size) 2808 break; 2809 2810 /* This memory barrier is needed to keep us from reading 2811 * any other fields out of the rx_desc until we know the 2812 * descriptor has been written back 2813 */ 2814 dma_rmb(); 2815 2816 bi = &ring->rx_buffer_info[ntc]; 2817 2818 ctx = xsk_buff_to_igc_ctx(bi->xdp); 2819 ctx->rx_desc = desc; 2820 2821 if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { 2822 ctx->rx_ts = bi->xdp->data; 2823 2824 bi->xdp->data += IGC_TS_HDR_LEN; 2825 2826 /* HW timestamp has been copied into local variable. Metadata 2827 * length when XDP program is called should be 0. 2828 */ 2829 bi->xdp->data_meta += IGC_TS_HDR_LEN; 2830 size -= IGC_TS_HDR_LEN; 2831 } else { 2832 ctx->rx_ts = NULL; 2833 } 2834 2835 bi->xdp->data_end = bi->xdp->data + size; 2836 xsk_buff_dma_sync_for_cpu(bi->xdp); 2837 2838 res = __igc_xdp_run_prog(adapter, prog, bi->xdp); 2839 switch (res) { 2840 case IGC_XDP_PASS: 2841 igc_dispatch_skb_zc(q_vector, desc, ctx); 2842 fallthrough; 2843 case IGC_XDP_CONSUMED: 2844 xsk_buff_free(bi->xdp); 2845 break; 2846 case IGC_XDP_TX: 2847 case IGC_XDP_REDIRECT: 2848 xdp_status |= res; 2849 break; 2850 } 2851 2852 bi->xdp = NULL; 2853 total_bytes += size; 2854 total_packets++; 2855 cleaned_count++; 2856 ntc++; 2857 if (ntc == ring->count) 2858 ntc = 0; 2859 } 2860 2861 ring->next_to_clean = ntc; 2862 rcu_read_unlock(); 2863 2864 if (cleaned_count >= IGC_RX_BUFFER_WRITE) 2865 failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count); 2866 2867 if (xdp_status) 2868 igc_finalize_xdp(adapter, xdp_status); 2869 2870 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2871 2872 if (xsk_uses_need_wakeup(ring->xsk_pool)) { 2873 if (failure || ring->next_to_clean == ring->next_to_use) 2874 xsk_set_rx_need_wakeup(ring->xsk_pool); 2875 else 2876 xsk_clear_rx_need_wakeup(ring->xsk_pool); 2877 return total_packets; 2878 } 2879 2880 return failure ? budget : total_packets; 2881 } 2882 igc_update_tx_stats(struct igc_q_vector * q_vector,unsigned int packets,unsigned int bytes)2883 static void igc_update_tx_stats(struct igc_q_vector *q_vector, 2884 unsigned int packets, unsigned int bytes) 2885 { 2886 struct igc_ring *ring = q_vector->tx.ring; 2887 2888 u64_stats_update_begin(&ring->tx_syncp); 2889 ring->tx_stats.bytes += bytes; 2890 ring->tx_stats.packets += packets; 2891 u64_stats_update_end(&ring->tx_syncp); 2892 2893 q_vector->tx.total_bytes += bytes; 2894 q_vector->tx.total_packets += packets; 2895 } 2896 igc_xsk_request_timestamp(void * _priv)2897 static void igc_xsk_request_timestamp(void *_priv) 2898 { 2899 struct igc_metadata_request *meta_req = _priv; 2900 struct igc_ring *tx_ring = meta_req->tx_ring; 2901 struct igc_tx_timestamp_request *tstamp; 2902 u32 tx_flags = IGC_TX_FLAGS_TSTAMP; 2903 struct igc_adapter *adapter; 2904 unsigned long lock_flags; 2905 bool found = false; 2906 int i; 2907 2908 if (test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags)) { 2909 adapter = netdev_priv(tx_ring->netdev); 2910 2911 spin_lock_irqsave(&adapter->ptp_tx_lock, lock_flags); 2912 2913 /* Search for available tstamp regs */ 2914 for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { 2915 tstamp = &adapter->tx_tstamp[i]; 2916 2917 /* tstamp->skb and tstamp->xsk_tx_buffer are in union. 2918 * When tstamp->skb is equal to NULL, 2919 * tstamp->xsk_tx_buffer is equal to NULL as well. 2920 * This condition means that the particular tstamp reg 2921 * is not occupied by other packet. 2922 */ 2923 if (!tstamp->skb) { 2924 found = true; 2925 break; 2926 } 2927 } 2928 2929 /* Return if no available tstamp regs */ 2930 if (!found) { 2931 adapter->tx_hwtstamp_skipped++; 2932 spin_unlock_irqrestore(&adapter->ptp_tx_lock, 2933 lock_flags); 2934 return; 2935 } 2936 2937 tstamp->start = jiffies; 2938 tstamp->xsk_queue_index = tx_ring->queue_index; 2939 tstamp->xsk_tx_buffer = meta_req->tx_buffer; 2940 tstamp->buffer_type = IGC_TX_BUFFER_TYPE_XSK; 2941 2942 /* Hold the transmit completion until timestamp is ready */ 2943 meta_req->tx_buffer->xsk_pending_ts = true; 2944 2945 /* Keep the pointer to tx_timestamp, which is located in XDP 2946 * metadata area. It is the location to store the value of 2947 * tx hardware timestamp. 2948 */ 2949 xsk_tx_metadata_to_compl(meta_req->meta, &tstamp->xsk_meta); 2950 2951 /* Set timestamp bit based on the _TSTAMP(_X) bit. */ 2952 tx_flags |= tstamp->flags; 2953 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2954 IGC_TX_FLAGS_TSTAMP, 2955 (IGC_ADVTXD_MAC_TSTAMP)); 2956 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2957 IGC_TX_FLAGS_TSTAMP_1, 2958 (IGC_ADVTXD_TSTAMP_REG_1)); 2959 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2960 IGC_TX_FLAGS_TSTAMP_2, 2961 (IGC_ADVTXD_TSTAMP_REG_2)); 2962 meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, 2963 IGC_TX_FLAGS_TSTAMP_3, 2964 (IGC_ADVTXD_TSTAMP_REG_3)); 2965 2966 spin_unlock_irqrestore(&adapter->ptp_tx_lock, lock_flags); 2967 } 2968 } 2969 igc_xsk_fill_timestamp(void * _priv)2970 static u64 igc_xsk_fill_timestamp(void *_priv) 2971 { 2972 return *(u64 *)_priv; 2973 } 2974 igc_xsk_request_launch_time(u64 launch_time,void * _priv)2975 static void igc_xsk_request_launch_time(u64 launch_time, void *_priv) 2976 { 2977 struct igc_metadata_request *meta_req = _priv; 2978 struct igc_ring *tx_ring = meta_req->tx_ring; 2979 __le32 launch_time_offset; 2980 bool insert_empty = false; 2981 bool first_flag = false; 2982 u16 used_desc = 0; 2983 2984 if (!tx_ring->launchtime_enable) 2985 return; 2986 2987 launch_time_offset = igc_tx_launchtime(tx_ring, 2988 ns_to_ktime(launch_time), 2989 &first_flag, &insert_empty); 2990 if (insert_empty) { 2991 /* Disregard the launch time request if the required empty frame 2992 * fails to be inserted. 2993 */ 2994 if (igc_insert_empty_frame(tx_ring)) 2995 return; 2996 2997 meta_req->tx_buffer = 2998 &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 2999 /* Inserting an empty packet requires two descriptors: 3000 * one data descriptor and one context descriptor. 3001 */ 3002 used_desc += 2; 3003 } 3004 3005 /* Use one context descriptor to specify launch time and first flag. */ 3006 igc_tx_ctxtdesc(tx_ring, launch_time_offset, first_flag, 0, 0, 0); 3007 used_desc += 1; 3008 3009 /* Update the number of used descriptors in this request */ 3010 meta_req->used_desc += used_desc; 3011 } 3012 3013 const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops = { 3014 .tmo_request_timestamp = igc_xsk_request_timestamp, 3015 .tmo_fill_timestamp = igc_xsk_fill_timestamp, 3016 .tmo_request_launch_time = igc_xsk_request_launch_time, 3017 }; 3018 igc_xdp_xmit_zc(struct igc_ring * ring)3019 static void igc_xdp_xmit_zc(struct igc_ring *ring) 3020 { 3021 struct xsk_buff_pool *pool = ring->xsk_pool; 3022 struct netdev_queue *nq = txring_txq(ring); 3023 union igc_adv_tx_desc *tx_desc = NULL; 3024 int cpu = smp_processor_id(); 3025 struct xdp_desc xdp_desc; 3026 u16 budget, ntu; 3027 3028 if (!netif_carrier_ok(ring->netdev)) 3029 return; 3030 3031 __netif_tx_lock(nq, cpu); 3032 3033 /* Avoid transmit queue timeout since we share it with the slow path */ 3034 txq_trans_cond_update(nq); 3035 3036 ntu = ring->next_to_use; 3037 budget = igc_desc_unused(ring); 3038 3039 /* Packets with launch time require one data descriptor and one context 3040 * descriptor. When the launch time falls into the next Qbv cycle, we 3041 * may need to insert an empty packet, which requires two more 3042 * descriptors. Therefore, to be safe, we always ensure we have at least 3043 * 4 descriptors available. 3044 */ 3045 while (xsk_tx_peek_desc(pool, &xdp_desc) && budget >= 4) { 3046 struct igc_metadata_request meta_req; 3047 struct xsk_tx_metadata *meta = NULL; 3048 struct igc_tx_buffer *bi; 3049 u32 olinfo_status; 3050 dma_addr_t dma; 3051 3052 meta_req.cmd_type = IGC_ADVTXD_DTYP_DATA | 3053 IGC_ADVTXD_DCMD_DEXT | 3054 IGC_ADVTXD_DCMD_IFCS | 3055 IGC_TXD_DCMD | xdp_desc.len; 3056 olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT; 3057 3058 dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr); 3059 meta = xsk_buff_get_metadata(pool, xdp_desc.addr); 3060 xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len); 3061 bi = &ring->tx_buffer_info[ntu]; 3062 3063 meta_req.tx_ring = ring; 3064 meta_req.tx_buffer = bi; 3065 meta_req.meta = meta; 3066 meta_req.used_desc = 0; 3067 xsk_tx_metadata_request(meta, &igc_xsk_tx_metadata_ops, 3068 &meta_req); 3069 3070 /* xsk_tx_metadata_request() may have updated next_to_use */ 3071 ntu = ring->next_to_use; 3072 3073 /* xsk_tx_metadata_request() may have updated Tx buffer info */ 3074 bi = meta_req.tx_buffer; 3075 3076 /* xsk_tx_metadata_request() may use a few descriptors */ 3077 budget -= meta_req.used_desc; 3078 3079 tx_desc = IGC_TX_DESC(ring, ntu); 3080 tx_desc->read.cmd_type_len = cpu_to_le32(meta_req.cmd_type); 3081 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 3082 tx_desc->read.buffer_addr = cpu_to_le64(dma); 3083 3084 bi->type = IGC_TX_BUFFER_TYPE_XSK; 3085 bi->protocol = 0; 3086 bi->bytecount = xdp_desc.len; 3087 bi->gso_segs = 1; 3088 bi->time_stamp = jiffies; 3089 bi->next_to_watch = tx_desc; 3090 3091 netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len); 3092 3093 ntu++; 3094 if (ntu == ring->count) 3095 ntu = 0; 3096 3097 ring->next_to_use = ntu; 3098 budget--; 3099 } 3100 3101 if (tx_desc) { 3102 igc_flush_tx_descriptors(ring); 3103 xsk_tx_release(pool); 3104 } 3105 3106 __netif_tx_unlock(nq); 3107 } 3108 3109 /** 3110 * igc_clean_tx_irq - Reclaim resources after transmit completes 3111 * @q_vector: pointer to q_vector containing needed info 3112 * @napi_budget: Used to determine if we are in netpoll 3113 * 3114 * returns true if ring is completely cleaned 3115 */ igc_clean_tx_irq(struct igc_q_vector * q_vector,int napi_budget)3116 static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) 3117 { 3118 struct igc_adapter *adapter = q_vector->adapter; 3119 unsigned int total_bytes = 0, total_packets = 0; 3120 unsigned int budget = q_vector->tx.work_limit; 3121 struct igc_ring *tx_ring = q_vector->tx.ring; 3122 unsigned int i = tx_ring->next_to_clean; 3123 struct igc_tx_buffer *tx_buffer; 3124 union igc_adv_tx_desc *tx_desc; 3125 u32 xsk_frames = 0; 3126 3127 if (test_bit(__IGC_DOWN, &adapter->state)) 3128 return true; 3129 3130 tx_buffer = &tx_ring->tx_buffer_info[i]; 3131 tx_desc = IGC_TX_DESC(tx_ring, i); 3132 i -= tx_ring->count; 3133 3134 do { 3135 union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; 3136 3137 /* if next_to_watch is not set then there is no work pending */ 3138 if (!eop_desc) 3139 break; 3140 3141 /* prevent any other reads prior to eop_desc */ 3142 smp_rmb(); 3143 3144 /* if DD is not set pending work has not been completed */ 3145 if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) 3146 break; 3147 3148 /* Hold the completions while there's a pending tx hardware 3149 * timestamp request from XDP Tx metadata. 3150 */ 3151 if (tx_buffer->type == IGC_TX_BUFFER_TYPE_XSK && 3152 tx_buffer->xsk_pending_ts) 3153 break; 3154 3155 /* clear next_to_watch to prevent false hangs */ 3156 tx_buffer->next_to_watch = NULL; 3157 3158 /* update the statistics for this packet */ 3159 total_bytes += tx_buffer->bytecount; 3160 total_packets += tx_buffer->gso_segs; 3161 3162 switch (tx_buffer->type) { 3163 case IGC_TX_BUFFER_TYPE_XSK: 3164 xsk_frames++; 3165 break; 3166 case IGC_TX_BUFFER_TYPE_XDP: 3167 xdp_return_frame(tx_buffer->xdpf); 3168 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3169 break; 3170 case IGC_TX_BUFFER_TYPE_SKB: 3171 napi_consume_skb(tx_buffer->skb, napi_budget); 3172 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3173 break; 3174 default: 3175 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 3176 break; 3177 } 3178 3179 /* clear last DMA location and unmap remaining buffers */ 3180 while (tx_desc != eop_desc) { 3181 tx_buffer++; 3182 tx_desc++; 3183 i++; 3184 if (unlikely(!i)) { 3185 i -= tx_ring->count; 3186 tx_buffer = tx_ring->tx_buffer_info; 3187 tx_desc = IGC_TX_DESC(tx_ring, 0); 3188 } 3189 3190 /* unmap any remaining paged data */ 3191 if (dma_unmap_len(tx_buffer, len)) 3192 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 3193 } 3194 3195 /* move us one more past the eop_desc for start of next pkt */ 3196 tx_buffer++; 3197 tx_desc++; 3198 i++; 3199 if (unlikely(!i)) { 3200 i -= tx_ring->count; 3201 tx_buffer = tx_ring->tx_buffer_info; 3202 tx_desc = IGC_TX_DESC(tx_ring, 0); 3203 } 3204 3205 /* issue prefetch for next Tx descriptor */ 3206 prefetch(tx_desc); 3207 3208 /* update budget accounting */ 3209 budget--; 3210 } while (likely(budget)); 3211 3212 netdev_tx_completed_queue(txring_txq(tx_ring), 3213 total_packets, total_bytes); 3214 3215 i += tx_ring->count; 3216 tx_ring->next_to_clean = i; 3217 3218 igc_update_tx_stats(q_vector, total_packets, total_bytes); 3219 3220 if (tx_ring->xsk_pool) { 3221 if (xsk_frames) 3222 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 3223 if (xsk_uses_need_wakeup(tx_ring->xsk_pool)) 3224 xsk_set_tx_need_wakeup(tx_ring->xsk_pool); 3225 igc_xdp_xmit_zc(tx_ring); 3226 } 3227 3228 if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { 3229 struct igc_hw *hw = &adapter->hw; 3230 3231 /* Detect a transmit hang in hardware, this serializes the 3232 * check with the clearing of time_stamp and movement of i 3233 */ 3234 clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 3235 if (tx_buffer->next_to_watch && 3236 time_after(jiffies, tx_buffer->time_stamp + 3237 (adapter->tx_timeout_factor * HZ)) && 3238 !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && 3239 (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) && 3240 !tx_ring->oper_gate_closed) { 3241 /* detected Tx unit hang */ 3242 netdev_err(tx_ring->netdev, 3243 "Detected Tx Unit Hang\n" 3244 " Tx Queue <%d>\n" 3245 " TDH <%x>\n" 3246 " TDT <%x>\n" 3247 " next_to_use <%x>\n" 3248 " next_to_clean <%x>\n" 3249 "buffer_info[next_to_clean]\n" 3250 " time_stamp <%lx>\n" 3251 " next_to_watch <%p>\n" 3252 " jiffies <%lx>\n" 3253 " desc.status <%x>\n", 3254 tx_ring->queue_index, 3255 rd32(IGC_TDH(tx_ring->reg_idx)), 3256 readl(tx_ring->tail), 3257 tx_ring->next_to_use, 3258 tx_ring->next_to_clean, 3259 tx_buffer->time_stamp, 3260 tx_buffer->next_to_watch, 3261 jiffies, 3262 tx_buffer->next_to_watch->wb.status); 3263 netif_stop_subqueue(tx_ring->netdev, 3264 tx_ring->queue_index); 3265 3266 /* we are about to reset, no point in enabling stuff */ 3267 return true; 3268 } 3269 } 3270 3271 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 3272 if (unlikely(total_packets && 3273 netif_carrier_ok(tx_ring->netdev) && 3274 igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { 3275 /* Make sure that anybody stopping the queue after this 3276 * sees the new next_to_clean. 3277 */ 3278 smp_mb(); 3279 if (__netif_subqueue_stopped(tx_ring->netdev, 3280 tx_ring->queue_index) && 3281 !(test_bit(__IGC_DOWN, &adapter->state))) { 3282 netif_wake_subqueue(tx_ring->netdev, 3283 tx_ring->queue_index); 3284 3285 u64_stats_update_begin(&tx_ring->tx_syncp); 3286 tx_ring->tx_stats.restart_queue++; 3287 u64_stats_update_end(&tx_ring->tx_syncp); 3288 } 3289 } 3290 3291 return !!budget; 3292 } 3293 igc_find_mac_filter(struct igc_adapter * adapter,enum igc_mac_filter_type type,const u8 * addr)3294 static int igc_find_mac_filter(struct igc_adapter *adapter, 3295 enum igc_mac_filter_type type, const u8 *addr) 3296 { 3297 struct igc_hw *hw = &adapter->hw; 3298 int max_entries = hw->mac.rar_entry_count; 3299 u32 ral, rah; 3300 int i; 3301 3302 for (i = 0; i < max_entries; i++) { 3303 ral = rd32(IGC_RAL(i)); 3304 rah = rd32(IGC_RAH(i)); 3305 3306 if (!(rah & IGC_RAH_AV)) 3307 continue; 3308 if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type) 3309 continue; 3310 if ((rah & IGC_RAH_RAH_MASK) != 3311 le16_to_cpup((__le16 *)(addr + 4))) 3312 continue; 3313 if (ral != le32_to_cpup((__le32 *)(addr))) 3314 continue; 3315 3316 return i; 3317 } 3318 3319 return -1; 3320 } 3321 igc_get_avail_mac_filter_slot(struct igc_adapter * adapter)3322 static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter) 3323 { 3324 struct igc_hw *hw = &adapter->hw; 3325 int max_entries = hw->mac.rar_entry_count; 3326 u32 rah; 3327 int i; 3328 3329 for (i = 0; i < max_entries; i++) { 3330 rah = rd32(IGC_RAH(i)); 3331 3332 if (!(rah & IGC_RAH_AV)) 3333 return i; 3334 } 3335 3336 return -1; 3337 } 3338 3339 /** 3340 * igc_add_mac_filter() - Add MAC address filter 3341 * @adapter: Pointer to adapter where the filter should be added 3342 * @type: MAC address filter type (source or destination) 3343 * @addr: MAC address 3344 * @queue: If non-negative, queue assignment feature is enabled and frames 3345 * matching the filter are enqueued onto 'queue'. Otherwise, queue 3346 * assignment is disabled. 3347 * 3348 * Return: 0 in case of success, negative errno code otherwise. 3349 */ igc_add_mac_filter(struct igc_adapter * adapter,enum igc_mac_filter_type type,const u8 * addr,int queue)3350 static int igc_add_mac_filter(struct igc_adapter *adapter, 3351 enum igc_mac_filter_type type, const u8 *addr, 3352 int queue) 3353 { 3354 struct net_device *dev = adapter->netdev; 3355 int index; 3356 3357 index = igc_find_mac_filter(adapter, type, addr); 3358 if (index >= 0) 3359 goto update_filter; 3360 3361 index = igc_get_avail_mac_filter_slot(adapter); 3362 if (index < 0) 3363 return -ENOSPC; 3364 3365 netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n", 3366 index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 3367 addr, queue); 3368 3369 update_filter: 3370 igc_set_mac_filter_hw(adapter, index, type, addr, queue); 3371 return 0; 3372 } 3373 3374 /** 3375 * igc_del_mac_filter() - Delete MAC address filter 3376 * @adapter: Pointer to adapter where the filter should be deleted from 3377 * @type: MAC address filter type (source or destination) 3378 * @addr: MAC address 3379 */ igc_del_mac_filter(struct igc_adapter * adapter,enum igc_mac_filter_type type,const u8 * addr)3380 static void igc_del_mac_filter(struct igc_adapter *adapter, 3381 enum igc_mac_filter_type type, const u8 *addr) 3382 { 3383 struct net_device *dev = adapter->netdev; 3384 int index; 3385 3386 index = igc_find_mac_filter(adapter, type, addr); 3387 if (index < 0) 3388 return; 3389 3390 if (index == 0) { 3391 /* If this is the default filter, we don't actually delete it. 3392 * We just reset to its default value i.e. disable queue 3393 * assignment. 3394 */ 3395 netdev_dbg(dev, "Disable default MAC filter queue assignment"); 3396 3397 igc_set_mac_filter_hw(adapter, 0, type, addr, -1); 3398 } else { 3399 netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n", 3400 index, 3401 type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 3402 addr); 3403 3404 igc_clear_mac_filter_hw(adapter, index); 3405 } 3406 } 3407 3408 /** 3409 * igc_add_vlan_prio_filter() - Add VLAN priority filter 3410 * @adapter: Pointer to adapter where the filter should be added 3411 * @prio: VLAN priority value 3412 * @queue: Queue number which matching frames are assigned to 3413 * 3414 * Return: 0 in case of success, negative errno code otherwise. 3415 */ igc_add_vlan_prio_filter(struct igc_adapter * adapter,int prio,int queue)3416 static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio, 3417 int queue) 3418 { 3419 struct net_device *dev = adapter->netdev; 3420 struct igc_hw *hw = &adapter->hw; 3421 u32 vlanpqf; 3422 3423 vlanpqf = rd32(IGC_VLANPQF); 3424 3425 if (vlanpqf & IGC_VLANPQF_VALID(prio)) { 3426 netdev_dbg(dev, "VLAN priority filter already in use\n"); 3427 return -EEXIST; 3428 } 3429 3430 vlanpqf |= IGC_VLANPQF_QSEL(prio, queue); 3431 vlanpqf |= IGC_VLANPQF_VALID(prio); 3432 3433 wr32(IGC_VLANPQF, vlanpqf); 3434 3435 netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n", 3436 prio, queue); 3437 return 0; 3438 } 3439 3440 /** 3441 * igc_del_vlan_prio_filter() - Delete VLAN priority filter 3442 * @adapter: Pointer to adapter where the filter should be deleted from 3443 * @prio: VLAN priority value 3444 */ igc_del_vlan_prio_filter(struct igc_adapter * adapter,int prio)3445 static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio) 3446 { 3447 struct igc_hw *hw = &adapter->hw; 3448 u32 vlanpqf; 3449 3450 vlanpqf = rd32(IGC_VLANPQF); 3451 3452 vlanpqf &= ~IGC_VLANPQF_VALID(prio); 3453 vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK); 3454 3455 wr32(IGC_VLANPQF, vlanpqf); 3456 3457 netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n", 3458 prio); 3459 } 3460 igc_get_avail_etype_filter_slot(struct igc_adapter * adapter)3461 static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter) 3462 { 3463 struct igc_hw *hw = &adapter->hw; 3464 int i; 3465 3466 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3467 u32 etqf = rd32(IGC_ETQF(i)); 3468 3469 if (!(etqf & IGC_ETQF_FILTER_ENABLE)) 3470 return i; 3471 } 3472 3473 return -1; 3474 } 3475 3476 /** 3477 * igc_add_etype_filter() - Add ethertype filter 3478 * @adapter: Pointer to adapter where the filter should be added 3479 * @etype: Ethertype value 3480 * @queue: If non-negative, queue assignment feature is enabled and frames 3481 * matching the filter are enqueued onto 'queue'. Otherwise, queue 3482 * assignment is disabled. 3483 * 3484 * Return: 0 in case of success, negative errno code otherwise. 3485 */ igc_add_etype_filter(struct igc_adapter * adapter,u16 etype,int queue)3486 static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, 3487 int queue) 3488 { 3489 struct igc_hw *hw = &adapter->hw; 3490 int index; 3491 u32 etqf; 3492 3493 index = igc_get_avail_etype_filter_slot(adapter); 3494 if (index < 0) 3495 return -ENOSPC; 3496 3497 etqf = rd32(IGC_ETQF(index)); 3498 3499 etqf &= ~IGC_ETQF_ETYPE_MASK; 3500 etqf |= etype; 3501 3502 if (queue >= 0) { 3503 etqf &= ~IGC_ETQF_QUEUE_MASK; 3504 etqf |= (queue << IGC_ETQF_QUEUE_SHIFT); 3505 etqf |= IGC_ETQF_QUEUE_ENABLE; 3506 } 3507 3508 etqf |= IGC_ETQF_FILTER_ENABLE; 3509 3510 wr32(IGC_ETQF(index), etqf); 3511 3512 netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n", 3513 etype, queue); 3514 return 0; 3515 } 3516 igc_find_etype_filter(struct igc_adapter * adapter,u16 etype)3517 static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype) 3518 { 3519 struct igc_hw *hw = &adapter->hw; 3520 int i; 3521 3522 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3523 u32 etqf = rd32(IGC_ETQF(i)); 3524 3525 if ((etqf & IGC_ETQF_ETYPE_MASK) == etype) 3526 return i; 3527 } 3528 3529 return -1; 3530 } 3531 3532 /** 3533 * igc_del_etype_filter() - Delete ethertype filter 3534 * @adapter: Pointer to adapter where the filter should be deleted from 3535 * @etype: Ethertype value 3536 */ igc_del_etype_filter(struct igc_adapter * adapter,u16 etype)3537 static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype) 3538 { 3539 struct igc_hw *hw = &adapter->hw; 3540 int index; 3541 3542 index = igc_find_etype_filter(adapter, etype); 3543 if (index < 0) 3544 return; 3545 3546 wr32(IGC_ETQF(index), 0); 3547 3548 netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n", 3549 etype); 3550 } 3551 igc_flex_filter_select(struct igc_adapter * adapter,struct igc_flex_filter * input,u32 * fhft)3552 static int igc_flex_filter_select(struct igc_adapter *adapter, 3553 struct igc_flex_filter *input, 3554 u32 *fhft) 3555 { 3556 struct igc_hw *hw = &adapter->hw; 3557 u8 fhft_index; 3558 u32 fhftsl; 3559 3560 if (input->index >= MAX_FLEX_FILTER) { 3561 netdev_err(adapter->netdev, "Wrong Flex Filter index selected!\n"); 3562 return -EINVAL; 3563 } 3564 3565 /* Indirect table select register */ 3566 fhftsl = rd32(IGC_FHFTSL); 3567 fhftsl &= ~IGC_FHFTSL_FTSL_MASK; 3568 switch (input->index) { 3569 case 0 ... 7: 3570 fhftsl |= 0x00; 3571 break; 3572 case 8 ... 15: 3573 fhftsl |= 0x01; 3574 break; 3575 case 16 ... 23: 3576 fhftsl |= 0x02; 3577 break; 3578 case 24 ... 31: 3579 fhftsl |= 0x03; 3580 break; 3581 } 3582 wr32(IGC_FHFTSL, fhftsl); 3583 3584 /* Normalize index down to host table register */ 3585 fhft_index = input->index % 8; 3586 3587 *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) : 3588 IGC_FHFT_EXT(fhft_index - 4); 3589 3590 return 0; 3591 } 3592 igc_write_flex_filter_ll(struct igc_adapter * adapter,struct igc_flex_filter * input)3593 static int igc_write_flex_filter_ll(struct igc_adapter *adapter, 3594 struct igc_flex_filter *input) 3595 { 3596 struct igc_hw *hw = &adapter->hw; 3597 u8 *data = input->data; 3598 u8 *mask = input->mask; 3599 u32 queuing; 3600 u32 fhft; 3601 u32 wufc; 3602 int ret; 3603 int i; 3604 3605 /* Length has to be aligned to 8. Otherwise the filter will fail. Bail 3606 * out early to avoid surprises later. 3607 */ 3608 if (input->length % 8 != 0) { 3609 netdev_err(adapter->netdev, "The length of a flex filter has to be 8 byte aligned!\n"); 3610 return -EINVAL; 3611 } 3612 3613 /* Select corresponding flex filter register and get base for host table. */ 3614 ret = igc_flex_filter_select(adapter, input, &fhft); 3615 if (ret) 3616 return ret; 3617 3618 /* When adding a filter globally disable flex filter feature. That is 3619 * recommended within the datasheet. 3620 */ 3621 wufc = rd32(IGC_WUFC); 3622 wufc &= ~IGC_WUFC_FLEX_HQ; 3623 wr32(IGC_WUFC, wufc); 3624 3625 /* Configure filter */ 3626 queuing = input->length & IGC_FHFT_LENGTH_MASK; 3627 queuing |= FIELD_PREP(IGC_FHFT_QUEUE_MASK, input->rx_queue); 3628 queuing |= FIELD_PREP(IGC_FHFT_PRIO_MASK, input->prio); 3629 3630 if (input->immediate_irq) 3631 queuing |= IGC_FHFT_IMM_INT; 3632 3633 if (input->drop) 3634 queuing |= IGC_FHFT_DROP; 3635 3636 wr32(fhft + 0xFC, queuing); 3637 3638 /* Write data (128 byte) and mask (128 bit) */ 3639 for (i = 0; i < 16; ++i) { 3640 const size_t data_idx = i * 8; 3641 const size_t row_idx = i * 16; 3642 u32 dw0 = 3643 (data[data_idx + 0] << 0) | 3644 (data[data_idx + 1] << 8) | 3645 (data[data_idx + 2] << 16) | 3646 (data[data_idx + 3] << 24); 3647 u32 dw1 = 3648 (data[data_idx + 4] << 0) | 3649 (data[data_idx + 5] << 8) | 3650 (data[data_idx + 6] << 16) | 3651 (data[data_idx + 7] << 24); 3652 u32 tmp; 3653 3654 /* Write row: dw0, dw1 and mask */ 3655 wr32(fhft + row_idx, dw0); 3656 wr32(fhft + row_idx + 4, dw1); 3657 3658 /* mask is only valid for MASK(7, 0) */ 3659 tmp = rd32(fhft + row_idx + 8); 3660 tmp &= ~GENMASK(7, 0); 3661 tmp |= mask[i]; 3662 wr32(fhft + row_idx + 8, tmp); 3663 } 3664 3665 /* Enable filter. */ 3666 wufc |= IGC_WUFC_FLEX_HQ; 3667 if (input->index > 8) { 3668 /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */ 3669 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3670 3671 wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8)); 3672 3673 wr32(IGC_WUFC_EXT, wufc_ext); 3674 } else { 3675 wufc |= (IGC_WUFC_FLX0 << input->index); 3676 } 3677 wr32(IGC_WUFC, wufc); 3678 3679 netdev_dbg(adapter->netdev, "Added flex filter %u to HW.\n", 3680 input->index); 3681 3682 return 0; 3683 } 3684 igc_flex_filter_add_field(struct igc_flex_filter * flex,const void * src,unsigned int offset,size_t len,const void * mask)3685 static void igc_flex_filter_add_field(struct igc_flex_filter *flex, 3686 const void *src, unsigned int offset, 3687 size_t len, const void *mask) 3688 { 3689 int i; 3690 3691 /* data */ 3692 memcpy(&flex->data[offset], src, len); 3693 3694 /* mask */ 3695 for (i = 0; i < len; ++i) { 3696 const unsigned int idx = i + offset; 3697 const u8 *ptr = mask; 3698 3699 if (mask) { 3700 if (ptr[i] & 0xff) 3701 flex->mask[idx / 8] |= BIT(idx % 8); 3702 3703 continue; 3704 } 3705 3706 flex->mask[idx / 8] |= BIT(idx % 8); 3707 } 3708 } 3709 igc_find_avail_flex_filter_slot(struct igc_adapter * adapter)3710 static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter) 3711 { 3712 struct igc_hw *hw = &adapter->hw; 3713 u32 wufc, wufc_ext; 3714 int i; 3715 3716 wufc = rd32(IGC_WUFC); 3717 wufc_ext = rd32(IGC_WUFC_EXT); 3718 3719 for (i = 0; i < MAX_FLEX_FILTER; i++) { 3720 if (i < 8) { 3721 if (!(wufc & (IGC_WUFC_FLX0 << i))) 3722 return i; 3723 } else { 3724 if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8)))) 3725 return i; 3726 } 3727 } 3728 3729 return -ENOSPC; 3730 } 3731 igc_flex_filter_in_use(struct igc_adapter * adapter)3732 static bool igc_flex_filter_in_use(struct igc_adapter *adapter) 3733 { 3734 struct igc_hw *hw = &adapter->hw; 3735 u32 wufc, wufc_ext; 3736 3737 wufc = rd32(IGC_WUFC); 3738 wufc_ext = rd32(IGC_WUFC_EXT); 3739 3740 if (wufc & IGC_WUFC_FILTER_MASK) 3741 return true; 3742 3743 if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK) 3744 return true; 3745 3746 return false; 3747 } 3748 igc_add_flex_filter(struct igc_adapter * adapter,struct igc_nfc_rule * rule)3749 static int igc_add_flex_filter(struct igc_adapter *adapter, 3750 struct igc_nfc_rule *rule) 3751 { 3752 struct igc_nfc_filter *filter = &rule->filter; 3753 unsigned int eth_offset, user_offset; 3754 struct igc_flex_filter flex = { }; 3755 int ret, index; 3756 bool vlan; 3757 3758 index = igc_find_avail_flex_filter_slot(adapter); 3759 if (index < 0) 3760 return -ENOSPC; 3761 3762 /* Construct the flex filter: 3763 * -> dest_mac [6] 3764 * -> src_mac [6] 3765 * -> tpid [2] 3766 * -> vlan tci [2] 3767 * -> ether type [2] 3768 * -> user data [8] 3769 * -> = 26 bytes => 32 length 3770 */ 3771 flex.index = index; 3772 flex.length = 32; 3773 flex.rx_queue = rule->action; 3774 3775 vlan = rule->filter.vlan_tci || rule->filter.vlan_etype; 3776 eth_offset = vlan ? 16 : 12; 3777 user_offset = vlan ? 18 : 14; 3778 3779 /* Add destination MAC */ 3780 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3781 igc_flex_filter_add_field(&flex, &filter->dst_addr, 0, 3782 ETH_ALEN, NULL); 3783 3784 /* Add source MAC */ 3785 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3786 igc_flex_filter_add_field(&flex, &filter->src_addr, 6, 3787 ETH_ALEN, NULL); 3788 3789 /* Add VLAN etype */ 3790 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) { 3791 __be16 vlan_etype = cpu_to_be16(filter->vlan_etype); 3792 3793 igc_flex_filter_add_field(&flex, &vlan_etype, 12, 3794 sizeof(vlan_etype), NULL); 3795 } 3796 3797 /* Add VLAN TCI */ 3798 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) 3799 igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14, 3800 sizeof(filter->vlan_tci), NULL); 3801 3802 /* Add Ether type */ 3803 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3804 __be16 etype = cpu_to_be16(filter->etype); 3805 3806 igc_flex_filter_add_field(&flex, &etype, eth_offset, 3807 sizeof(etype), NULL); 3808 } 3809 3810 /* Add user data */ 3811 if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) 3812 igc_flex_filter_add_field(&flex, &filter->user_data, 3813 user_offset, 3814 sizeof(filter->user_data), 3815 filter->user_mask); 3816 3817 /* Add it down to the hardware and enable it. */ 3818 ret = igc_write_flex_filter_ll(adapter, &flex); 3819 if (ret) 3820 return ret; 3821 3822 filter->flex_index = index; 3823 3824 return 0; 3825 } 3826 igc_del_flex_filter(struct igc_adapter * adapter,u16 reg_index)3827 static void igc_del_flex_filter(struct igc_adapter *adapter, 3828 u16 reg_index) 3829 { 3830 struct igc_hw *hw = &adapter->hw; 3831 u32 wufc; 3832 3833 /* Just disable the filter. The filter table itself is kept 3834 * intact. Another flex_filter_add() should override the "old" data 3835 * then. 3836 */ 3837 if (reg_index > 8) { 3838 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3839 3840 wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8)); 3841 wr32(IGC_WUFC_EXT, wufc_ext); 3842 } else { 3843 wufc = rd32(IGC_WUFC); 3844 3845 wufc &= ~(IGC_WUFC_FLX0 << reg_index); 3846 wr32(IGC_WUFC, wufc); 3847 } 3848 3849 if (igc_flex_filter_in_use(adapter)) 3850 return; 3851 3852 /* No filters are in use, we may disable flex filters */ 3853 wufc = rd32(IGC_WUFC); 3854 wufc &= ~IGC_WUFC_FLEX_HQ; 3855 wr32(IGC_WUFC, wufc); 3856 } 3857 igc_enable_nfc_rule(struct igc_adapter * adapter,struct igc_nfc_rule * rule)3858 static int igc_enable_nfc_rule(struct igc_adapter *adapter, 3859 struct igc_nfc_rule *rule) 3860 { 3861 int err; 3862 3863 if (rule->flex) { 3864 return igc_add_flex_filter(adapter, rule); 3865 } 3866 3867 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3868 err = igc_add_etype_filter(adapter, rule->filter.etype, 3869 rule->action); 3870 if (err) 3871 return err; 3872 } 3873 3874 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { 3875 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3876 rule->filter.src_addr, rule->action); 3877 if (err) 3878 return err; 3879 } 3880 3881 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { 3882 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3883 rule->filter.dst_addr, rule->action); 3884 if (err) 3885 return err; 3886 } 3887 3888 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3889 int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); 3890 3891 err = igc_add_vlan_prio_filter(adapter, prio, rule->action); 3892 if (err) 3893 return err; 3894 } 3895 3896 return 0; 3897 } 3898 igc_disable_nfc_rule(struct igc_adapter * adapter,const struct igc_nfc_rule * rule)3899 static void igc_disable_nfc_rule(struct igc_adapter *adapter, 3900 const struct igc_nfc_rule *rule) 3901 { 3902 if (rule->flex) { 3903 igc_del_flex_filter(adapter, rule->filter.flex_index); 3904 return; 3905 } 3906 3907 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) 3908 igc_del_etype_filter(adapter, rule->filter.etype); 3909 3910 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3911 int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); 3912 3913 igc_del_vlan_prio_filter(adapter, prio); 3914 } 3915 3916 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3917 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3918 rule->filter.src_addr); 3919 3920 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3921 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3922 rule->filter.dst_addr); 3923 } 3924 3925 /** 3926 * igc_get_nfc_rule() - Get NFC rule 3927 * @adapter: Pointer to adapter 3928 * @location: Rule location 3929 * 3930 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3931 * 3932 * Return: Pointer to NFC rule at @location. If not found, NULL. 3933 */ igc_get_nfc_rule(struct igc_adapter * adapter,u32 location)3934 struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter, 3935 u32 location) 3936 { 3937 struct igc_nfc_rule *rule; 3938 3939 list_for_each_entry(rule, &adapter->nfc_rule_list, list) { 3940 if (rule->location == location) 3941 return rule; 3942 if (rule->location > location) 3943 break; 3944 } 3945 3946 return NULL; 3947 } 3948 3949 /** 3950 * igc_del_nfc_rule() - Delete NFC rule 3951 * @adapter: Pointer to adapter 3952 * @rule: Pointer to rule to be deleted 3953 * 3954 * Disable NFC rule in hardware and delete it from adapter. 3955 * 3956 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3957 */ igc_del_nfc_rule(struct igc_adapter * adapter,struct igc_nfc_rule * rule)3958 void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 3959 { 3960 igc_disable_nfc_rule(adapter, rule); 3961 3962 list_del(&rule->list); 3963 adapter->nfc_rule_count--; 3964 3965 kfree(rule); 3966 } 3967 igc_flush_nfc_rules(struct igc_adapter * adapter)3968 static void igc_flush_nfc_rules(struct igc_adapter *adapter) 3969 { 3970 struct igc_nfc_rule *rule, *tmp; 3971 3972 mutex_lock(&adapter->nfc_rule_lock); 3973 3974 list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list) 3975 igc_del_nfc_rule(adapter, rule); 3976 3977 mutex_unlock(&adapter->nfc_rule_lock); 3978 } 3979 3980 /** 3981 * igc_add_nfc_rule() - Add NFC rule 3982 * @adapter: Pointer to adapter 3983 * @rule: Pointer to rule to be added 3984 * 3985 * Enable NFC rule in hardware and add it to adapter. 3986 * 3987 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3988 * 3989 * Return: 0 on success, negative errno on failure. 3990 */ igc_add_nfc_rule(struct igc_adapter * adapter,struct igc_nfc_rule * rule)3991 int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 3992 { 3993 struct igc_nfc_rule *pred, *cur; 3994 int err; 3995 3996 err = igc_enable_nfc_rule(adapter, rule); 3997 if (err) 3998 return err; 3999 4000 pred = NULL; 4001 list_for_each_entry(cur, &adapter->nfc_rule_list, list) { 4002 if (cur->location >= rule->location) 4003 break; 4004 pred = cur; 4005 } 4006 4007 list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list); 4008 adapter->nfc_rule_count++; 4009 return 0; 4010 } 4011 igc_restore_nfc_rules(struct igc_adapter * adapter)4012 static void igc_restore_nfc_rules(struct igc_adapter *adapter) 4013 { 4014 struct igc_nfc_rule *rule; 4015 4016 mutex_lock(&adapter->nfc_rule_lock); 4017 4018 list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list) 4019 igc_enable_nfc_rule(adapter, rule); 4020 4021 mutex_unlock(&adapter->nfc_rule_lock); 4022 } 4023 igc_uc_sync(struct net_device * netdev,const unsigned char * addr)4024 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr) 4025 { 4026 struct igc_adapter *adapter = netdev_priv(netdev); 4027 4028 return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1); 4029 } 4030 igc_uc_unsync(struct net_device * netdev,const unsigned char * addr)4031 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) 4032 { 4033 struct igc_adapter *adapter = netdev_priv(netdev); 4034 4035 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr); 4036 return 0; 4037 } 4038 4039 /** 4040 * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set 4041 * @netdev: network interface device structure 4042 * 4043 * The set_rx_mode entry point is called whenever the unicast or multicast 4044 * address lists or the network interface flags are updated. This routine is 4045 * responsible for configuring the hardware for proper unicast, multicast, 4046 * promiscuous mode, and all-multi behavior. 4047 */ igc_set_rx_mode(struct net_device * netdev)4048 static void igc_set_rx_mode(struct net_device *netdev) 4049 { 4050 struct igc_adapter *adapter = netdev_priv(netdev); 4051 struct igc_hw *hw = &adapter->hw; 4052 u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE; 4053 int count; 4054 4055 /* Check for Promiscuous and All Multicast modes */ 4056 if (netdev->flags & IFF_PROMISC) { 4057 rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE; 4058 } else { 4059 if (netdev->flags & IFF_ALLMULTI) { 4060 rctl |= IGC_RCTL_MPE; 4061 } else { 4062 /* Write addresses to the MTA, if the attempt fails 4063 * then we should just turn on promiscuous mode so 4064 * that we can at least receive multicast traffic 4065 */ 4066 count = igc_write_mc_addr_list(netdev); 4067 if (count < 0) 4068 rctl |= IGC_RCTL_MPE; 4069 } 4070 } 4071 4072 /* Write addresses to available RAR registers, if there is not 4073 * sufficient space to store all the addresses then enable 4074 * unicast promiscuous mode 4075 */ 4076 if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync)) 4077 rctl |= IGC_RCTL_UPE; 4078 4079 /* update state of unicast and multicast */ 4080 rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE); 4081 wr32(IGC_RCTL, rctl); 4082 4083 #if (PAGE_SIZE < 8192) 4084 if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB) 4085 rlpml = IGC_MAX_FRAME_BUILD_SKB; 4086 #endif 4087 wr32(IGC_RLPML, rlpml); 4088 } 4089 4090 /** 4091 * igc_configure - configure the hardware for RX and TX 4092 * @adapter: private board structure 4093 */ igc_configure(struct igc_adapter * adapter)4094 static void igc_configure(struct igc_adapter *adapter) 4095 { 4096 struct net_device *netdev = adapter->netdev; 4097 int i = 0; 4098 4099 igc_get_hw_control(adapter); 4100 igc_set_rx_mode(netdev); 4101 4102 igc_restore_vlan(adapter); 4103 4104 igc_setup_tctl(adapter); 4105 igc_setup_mrqc(adapter); 4106 igc_setup_rctl(adapter); 4107 4108 igc_set_default_mac_filter(adapter); 4109 igc_restore_nfc_rules(adapter); 4110 4111 igc_configure_tx(adapter); 4112 igc_configure_rx(adapter); 4113 4114 igc_rx_fifo_flush_base(&adapter->hw); 4115 4116 /* call igc_desc_unused which always leaves 4117 * at least 1 descriptor unused to make sure 4118 * next_to_use != next_to_clean 4119 */ 4120 for (i = 0; i < adapter->num_rx_queues; i++) { 4121 struct igc_ring *ring = adapter->rx_ring[i]; 4122 4123 if (ring->xsk_pool) 4124 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 4125 else 4126 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 4127 } 4128 } 4129 4130 /** 4131 * igc_write_ivar - configure ivar for given MSI-X vector 4132 * @hw: pointer to the HW structure 4133 * @msix_vector: vector number we are allocating to a given ring 4134 * @index: row index of IVAR register to write within IVAR table 4135 * @offset: column offset of in IVAR, should be multiple of 8 4136 * 4137 * The IVAR table consists of 2 columns, 4138 * each containing an cause allocation for an Rx and Tx ring, and a 4139 * variable number of rows depending on the number of queues supported. 4140 */ igc_write_ivar(struct igc_hw * hw,int msix_vector,int index,int offset)4141 static void igc_write_ivar(struct igc_hw *hw, int msix_vector, 4142 int index, int offset) 4143 { 4144 u32 ivar = array_rd32(IGC_IVAR0, index); 4145 4146 /* clear any bits that are currently set */ 4147 ivar &= ~((u32)0xFF << offset); 4148 4149 /* write vector and valid bit */ 4150 ivar |= (msix_vector | IGC_IVAR_VALID) << offset; 4151 4152 array_wr32(IGC_IVAR0, index, ivar); 4153 } 4154 igc_assign_vector(struct igc_q_vector * q_vector,int msix_vector)4155 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector) 4156 { 4157 struct igc_adapter *adapter = q_vector->adapter; 4158 struct igc_hw *hw = &adapter->hw; 4159 int rx_queue = IGC_N0_QUEUE; 4160 int tx_queue = IGC_N0_QUEUE; 4161 4162 if (q_vector->rx.ring) 4163 rx_queue = q_vector->rx.ring->reg_idx; 4164 if (q_vector->tx.ring) 4165 tx_queue = q_vector->tx.ring->reg_idx; 4166 4167 switch (hw->mac.type) { 4168 case igc_i225: 4169 if (rx_queue > IGC_N0_QUEUE) 4170 igc_write_ivar(hw, msix_vector, 4171 rx_queue >> 1, 4172 (rx_queue & 0x1) << 4); 4173 if (tx_queue > IGC_N0_QUEUE) 4174 igc_write_ivar(hw, msix_vector, 4175 tx_queue >> 1, 4176 ((tx_queue & 0x1) << 4) + 8); 4177 q_vector->eims_value = BIT(msix_vector); 4178 break; 4179 default: 4180 WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n"); 4181 break; 4182 } 4183 4184 /* add q_vector eims value to global eims_enable_mask */ 4185 adapter->eims_enable_mask |= q_vector->eims_value; 4186 4187 /* configure q_vector to set itr on first interrupt */ 4188 q_vector->set_itr = 1; 4189 } 4190 4191 /** 4192 * igc_configure_msix - Configure MSI-X hardware 4193 * @adapter: Pointer to adapter structure 4194 * 4195 * igc_configure_msix sets up the hardware to properly 4196 * generate MSI-X interrupts. 4197 */ igc_configure_msix(struct igc_adapter * adapter)4198 static void igc_configure_msix(struct igc_adapter *adapter) 4199 { 4200 struct igc_hw *hw = &adapter->hw; 4201 int i, vector = 0; 4202 u32 tmp; 4203 4204 adapter->eims_enable_mask = 0; 4205 4206 /* set vector for other causes, i.e. link changes */ 4207 switch (hw->mac.type) { 4208 case igc_i225: 4209 /* Turn on MSI-X capability first, or our settings 4210 * won't stick. And it will take days to debug. 4211 */ 4212 wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE | 4213 IGC_GPIE_PBA | IGC_GPIE_EIAME | 4214 IGC_GPIE_NSICR); 4215 4216 /* enable msix_other interrupt */ 4217 adapter->eims_other = BIT(vector); 4218 tmp = (vector++ | IGC_IVAR_VALID) << 8; 4219 4220 wr32(IGC_IVAR_MISC, tmp); 4221 break; 4222 default: 4223 /* do nothing, since nothing else supports MSI-X */ 4224 break; 4225 } /* switch (hw->mac.type) */ 4226 4227 adapter->eims_enable_mask |= adapter->eims_other; 4228 4229 for (i = 0; i < adapter->num_q_vectors; i++) 4230 igc_assign_vector(adapter->q_vector[i], vector++); 4231 4232 wrfl(); 4233 } 4234 4235 /** 4236 * igc_irq_enable - Enable default interrupt generation settings 4237 * @adapter: board private structure 4238 */ igc_irq_enable(struct igc_adapter * adapter)4239 static void igc_irq_enable(struct igc_adapter *adapter) 4240 { 4241 struct igc_hw *hw = &adapter->hw; 4242 4243 if (adapter->msix_entries) { 4244 u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA; 4245 u32 regval = rd32(IGC_EIAC); 4246 4247 wr32(IGC_EIAC, regval | adapter->eims_enable_mask); 4248 regval = rd32(IGC_EIAM); 4249 wr32(IGC_EIAM, regval | adapter->eims_enable_mask); 4250 wr32(IGC_EIMS, adapter->eims_enable_mask); 4251 wr32(IGC_IMS, ims); 4252 } else { 4253 wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 4254 wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 4255 } 4256 } 4257 4258 /** 4259 * igc_irq_disable - Mask off interrupt generation on the NIC 4260 * @adapter: board private structure 4261 */ igc_irq_disable(struct igc_adapter * adapter)4262 static void igc_irq_disable(struct igc_adapter *adapter) 4263 { 4264 struct igc_hw *hw = &adapter->hw; 4265 4266 if (adapter->msix_entries) { 4267 u32 regval = rd32(IGC_EIAM); 4268 4269 wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask); 4270 wr32(IGC_EIMC, adapter->eims_enable_mask); 4271 regval = rd32(IGC_EIAC); 4272 wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask); 4273 } 4274 4275 wr32(IGC_IAM, 0); 4276 wr32(IGC_IMC, ~0); 4277 wrfl(); 4278 4279 if (adapter->msix_entries) { 4280 int vector = 0, i; 4281 4282 synchronize_irq(adapter->msix_entries[vector++].vector); 4283 4284 for (i = 0; i < adapter->num_q_vectors; i++) 4285 synchronize_irq(adapter->msix_entries[vector++].vector); 4286 } else { 4287 synchronize_irq(adapter->pdev->irq); 4288 } 4289 } 4290 igc_set_flag_queue_pairs(struct igc_adapter * adapter,const u32 max_rss_queues)4291 void igc_set_flag_queue_pairs(struct igc_adapter *adapter, 4292 const u32 max_rss_queues) 4293 { 4294 /* Determine if we need to pair queues. */ 4295 /* If rss_queues > half of max_rss_queues, pair the queues in 4296 * order to conserve interrupts due to limited supply. 4297 */ 4298 if (adapter->rss_queues > (max_rss_queues / 2)) 4299 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 4300 else 4301 adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS; 4302 } 4303 igc_get_max_rss_queues(struct igc_adapter * adapter)4304 unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) 4305 { 4306 return IGC_MAX_RX_QUEUES; 4307 } 4308 igc_init_queue_configuration(struct igc_adapter * adapter)4309 static void igc_init_queue_configuration(struct igc_adapter *adapter) 4310 { 4311 u32 max_rss_queues; 4312 4313 max_rss_queues = igc_get_max_rss_queues(adapter); 4314 adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); 4315 4316 igc_set_flag_queue_pairs(adapter, max_rss_queues); 4317 } 4318 4319 /** 4320 * igc_reset_q_vector - Reset config for interrupt vector 4321 * @adapter: board private structure to initialize 4322 * @v_idx: Index of vector to be reset 4323 * 4324 * If NAPI is enabled it will delete any references to the 4325 * NAPI struct. This is preparation for igc_free_q_vector. 4326 */ igc_reset_q_vector(struct igc_adapter * adapter,int v_idx)4327 static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx) 4328 { 4329 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 4330 4331 /* if we're coming from igc_set_interrupt_capability, the vectors are 4332 * not yet allocated 4333 */ 4334 if (!q_vector) 4335 return; 4336 4337 if (q_vector->tx.ring) 4338 adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; 4339 4340 if (q_vector->rx.ring) 4341 adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; 4342 4343 netif_napi_del(&q_vector->napi); 4344 } 4345 4346 /** 4347 * igc_free_q_vector - Free memory allocated for specific interrupt vector 4348 * @adapter: board private structure to initialize 4349 * @v_idx: Index of vector to be freed 4350 * 4351 * This function frees the memory allocated to the q_vector. 4352 */ igc_free_q_vector(struct igc_adapter * adapter,int v_idx)4353 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx) 4354 { 4355 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 4356 4357 adapter->q_vector[v_idx] = NULL; 4358 4359 /* igc_get_stats64() might access the rings on this vector, 4360 * we must wait a grace period before freeing it. 4361 */ 4362 if (q_vector) 4363 kfree_rcu(q_vector, rcu); 4364 } 4365 4366 /** 4367 * igc_free_q_vectors - Free memory allocated for interrupt vectors 4368 * @adapter: board private structure to initialize 4369 * 4370 * This function frees the memory allocated to the q_vectors. In addition if 4371 * NAPI is enabled it will delete any references to the NAPI struct prior 4372 * to freeing the q_vector. 4373 */ igc_free_q_vectors(struct igc_adapter * adapter)4374 static void igc_free_q_vectors(struct igc_adapter *adapter) 4375 { 4376 int v_idx = adapter->num_q_vectors; 4377 4378 adapter->num_tx_queues = 0; 4379 adapter->num_rx_queues = 0; 4380 adapter->num_q_vectors = 0; 4381 4382 while (v_idx--) { 4383 igc_reset_q_vector(adapter, v_idx); 4384 igc_free_q_vector(adapter, v_idx); 4385 } 4386 } 4387 4388 /** 4389 * igc_update_itr - update the dynamic ITR value based on statistics 4390 * @q_vector: pointer to q_vector 4391 * @ring_container: ring info to update the itr for 4392 * 4393 * Stores a new ITR value based on packets and byte 4394 * counts during the last interrupt. The advantage of per interrupt 4395 * computation is faster updates and more accurate ITR for the current 4396 * traffic pattern. Constants in this function were computed 4397 * based on theoretical maximum wire speed and thresholds were set based 4398 * on testing data as well as attempting to minimize response time 4399 * while increasing bulk throughput. 4400 * NOTE: These calculations are only valid when operating in a single- 4401 * queue environment. 4402 */ igc_update_itr(struct igc_q_vector * q_vector,struct igc_ring_container * ring_container)4403 static void igc_update_itr(struct igc_q_vector *q_vector, 4404 struct igc_ring_container *ring_container) 4405 { 4406 unsigned int packets = ring_container->total_packets; 4407 unsigned int bytes = ring_container->total_bytes; 4408 u8 itrval = ring_container->itr; 4409 4410 /* no packets, exit with status unchanged */ 4411 if (packets == 0) 4412 return; 4413 4414 switch (itrval) { 4415 case lowest_latency: 4416 /* handle TSO and jumbo frames */ 4417 if (bytes / packets > 8000) 4418 itrval = bulk_latency; 4419 else if ((packets < 5) && (bytes > 512)) 4420 itrval = low_latency; 4421 break; 4422 case low_latency: /* 50 usec aka 20000 ints/s */ 4423 if (bytes > 10000) { 4424 /* this if handles the TSO accounting */ 4425 if (bytes / packets > 8000) 4426 itrval = bulk_latency; 4427 else if ((packets < 10) || ((bytes / packets) > 1200)) 4428 itrval = bulk_latency; 4429 else if ((packets > 35)) 4430 itrval = lowest_latency; 4431 } else if (bytes / packets > 2000) { 4432 itrval = bulk_latency; 4433 } else if (packets <= 2 && bytes < 512) { 4434 itrval = lowest_latency; 4435 } 4436 break; 4437 case bulk_latency: /* 250 usec aka 4000 ints/s */ 4438 if (bytes > 25000) { 4439 if (packets > 35) 4440 itrval = low_latency; 4441 } else if (bytes < 1500) { 4442 itrval = low_latency; 4443 } 4444 break; 4445 } 4446 4447 /* clear work counters since we have the values we need */ 4448 ring_container->total_bytes = 0; 4449 ring_container->total_packets = 0; 4450 4451 /* write updated itr to ring container */ 4452 ring_container->itr = itrval; 4453 } 4454 igc_set_itr(struct igc_q_vector * q_vector)4455 static void igc_set_itr(struct igc_q_vector *q_vector) 4456 { 4457 struct igc_adapter *adapter = q_vector->adapter; 4458 u32 new_itr = q_vector->itr_val; 4459 u8 current_itr = 0; 4460 4461 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ 4462 switch (adapter->link_speed) { 4463 case SPEED_10: 4464 case SPEED_100: 4465 current_itr = 0; 4466 new_itr = IGC_4K_ITR; 4467 goto set_itr_now; 4468 default: 4469 break; 4470 } 4471 4472 igc_update_itr(q_vector, &q_vector->tx); 4473 igc_update_itr(q_vector, &q_vector->rx); 4474 4475 current_itr = max(q_vector->rx.itr, q_vector->tx.itr); 4476 4477 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4478 if (current_itr == lowest_latency && 4479 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4480 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4481 current_itr = low_latency; 4482 4483 switch (current_itr) { 4484 /* counts and packets in update_itr are dependent on these numbers */ 4485 case lowest_latency: 4486 new_itr = IGC_70K_ITR; /* 70,000 ints/sec */ 4487 break; 4488 case low_latency: 4489 new_itr = IGC_20K_ITR; /* 20,000 ints/sec */ 4490 break; 4491 case bulk_latency: 4492 new_itr = IGC_4K_ITR; /* 4,000 ints/sec */ 4493 break; 4494 default: 4495 break; 4496 } 4497 4498 set_itr_now: 4499 if (new_itr != q_vector->itr_val) { 4500 /* this attempts to bias the interrupt rate towards Bulk 4501 * by adding intermediate steps when interrupt rate is 4502 * increasing 4503 */ 4504 new_itr = new_itr > q_vector->itr_val ? 4505 max((new_itr * q_vector->itr_val) / 4506 (new_itr + (q_vector->itr_val >> 2)), 4507 new_itr) : new_itr; 4508 /* Don't write the value here; it resets the adapter's 4509 * internal timer, and causes us to delay far longer than 4510 * we should between interrupts. Instead, we write the ITR 4511 * value at the beginning of the next interrupt so the timing 4512 * ends up being correct. 4513 */ 4514 q_vector->itr_val = new_itr; 4515 q_vector->set_itr = 1; 4516 } 4517 } 4518 igc_reset_interrupt_capability(struct igc_adapter * adapter)4519 static void igc_reset_interrupt_capability(struct igc_adapter *adapter) 4520 { 4521 int v_idx = adapter->num_q_vectors; 4522 4523 if (adapter->msix_entries) { 4524 pci_disable_msix(adapter->pdev); 4525 kfree(adapter->msix_entries); 4526 adapter->msix_entries = NULL; 4527 } else if (adapter->flags & IGC_FLAG_HAS_MSI) { 4528 pci_disable_msi(adapter->pdev); 4529 } 4530 4531 while (v_idx--) 4532 igc_reset_q_vector(adapter, v_idx); 4533 } 4534 4535 /** 4536 * igc_set_interrupt_capability - set MSI or MSI-X if supported 4537 * @adapter: Pointer to adapter structure 4538 * @msix: boolean value for MSI-X capability 4539 * 4540 * Attempt to configure interrupts using the best available 4541 * capabilities of the hardware and kernel. 4542 */ igc_set_interrupt_capability(struct igc_adapter * adapter,bool msix)4543 static void igc_set_interrupt_capability(struct igc_adapter *adapter, 4544 bool msix) 4545 { 4546 int numvecs, i; 4547 int err; 4548 4549 if (!msix) 4550 goto msi_only; 4551 adapter->flags |= IGC_FLAG_HAS_MSIX; 4552 4553 /* Number of supported queues. */ 4554 adapter->num_rx_queues = adapter->rss_queues; 4555 4556 adapter->num_tx_queues = adapter->rss_queues; 4557 4558 /* start with one vector for every Rx queue */ 4559 numvecs = adapter->num_rx_queues; 4560 4561 /* if Tx handler is separate add 1 for every Tx queue */ 4562 if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) 4563 numvecs += adapter->num_tx_queues; 4564 4565 /* store the number of vectors reserved for queues */ 4566 adapter->num_q_vectors = numvecs; 4567 4568 /* add 1 vector for link status interrupts */ 4569 numvecs++; 4570 4571 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), 4572 GFP_KERNEL); 4573 4574 if (!adapter->msix_entries) 4575 return; 4576 4577 /* populate entry values */ 4578 for (i = 0; i < numvecs; i++) 4579 adapter->msix_entries[i].entry = i; 4580 4581 err = pci_enable_msix_range(adapter->pdev, 4582 adapter->msix_entries, 4583 numvecs, 4584 numvecs); 4585 if (err > 0) 4586 return; 4587 4588 kfree(adapter->msix_entries); 4589 adapter->msix_entries = NULL; 4590 4591 igc_reset_interrupt_capability(adapter); 4592 4593 msi_only: 4594 adapter->flags &= ~IGC_FLAG_HAS_MSIX; 4595 4596 adapter->rss_queues = 1; 4597 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 4598 adapter->num_rx_queues = 1; 4599 adapter->num_tx_queues = 1; 4600 adapter->num_q_vectors = 1; 4601 if (!pci_enable_msi(adapter->pdev)) 4602 adapter->flags |= IGC_FLAG_HAS_MSI; 4603 } 4604 4605 /** 4606 * igc_update_ring_itr - update the dynamic ITR value based on packet size 4607 * @q_vector: pointer to q_vector 4608 * 4609 * Stores a new ITR value based on strictly on packet size. This 4610 * algorithm is less sophisticated than that used in igc_update_itr, 4611 * due to the difficulty of synchronizing statistics across multiple 4612 * receive rings. The divisors and thresholds used by this function 4613 * were determined based on theoretical maximum wire speed and testing 4614 * data, in order to minimize response time while increasing bulk 4615 * throughput. 4616 * NOTE: This function is called only when operating in a multiqueue 4617 * receive environment. 4618 */ igc_update_ring_itr(struct igc_q_vector * q_vector)4619 static void igc_update_ring_itr(struct igc_q_vector *q_vector) 4620 { 4621 struct igc_adapter *adapter = q_vector->adapter; 4622 int new_val = q_vector->itr_val; 4623 int avg_wire_size = 0; 4624 unsigned int packets; 4625 4626 /* For non-gigabit speeds, just fix the interrupt rate at 4000 4627 * ints/sec - ITR timer value of 120 ticks. 4628 */ 4629 switch (adapter->link_speed) { 4630 case SPEED_10: 4631 case SPEED_100: 4632 new_val = IGC_4K_ITR; 4633 goto set_itr_val; 4634 default: 4635 break; 4636 } 4637 4638 packets = q_vector->rx.total_packets; 4639 if (packets) 4640 avg_wire_size = q_vector->rx.total_bytes / packets; 4641 4642 packets = q_vector->tx.total_packets; 4643 if (packets) 4644 avg_wire_size = max_t(u32, avg_wire_size, 4645 q_vector->tx.total_bytes / packets); 4646 4647 /* if avg_wire_size isn't set no work was done */ 4648 if (!avg_wire_size) 4649 goto clear_counts; 4650 4651 /* Add 24 bytes to size to account for CRC, preamble, and gap */ 4652 avg_wire_size += 24; 4653 4654 /* Don't starve jumbo frames */ 4655 avg_wire_size = min(avg_wire_size, 3000); 4656 4657 /* Give a little boost to mid-size frames */ 4658 if (avg_wire_size > 300 && avg_wire_size < 1200) 4659 new_val = avg_wire_size / 3; 4660 else 4661 new_val = avg_wire_size / 2; 4662 4663 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4664 if (new_val < IGC_20K_ITR && 4665 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4666 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4667 new_val = IGC_20K_ITR; 4668 4669 set_itr_val: 4670 if (new_val != q_vector->itr_val) { 4671 q_vector->itr_val = new_val; 4672 q_vector->set_itr = 1; 4673 } 4674 clear_counts: 4675 q_vector->rx.total_bytes = 0; 4676 q_vector->rx.total_packets = 0; 4677 q_vector->tx.total_bytes = 0; 4678 q_vector->tx.total_packets = 0; 4679 } 4680 igc_ring_irq_enable(struct igc_q_vector * q_vector)4681 static void igc_ring_irq_enable(struct igc_q_vector *q_vector) 4682 { 4683 struct igc_adapter *adapter = q_vector->adapter; 4684 struct igc_hw *hw = &adapter->hw; 4685 4686 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || 4687 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { 4688 if (adapter->num_q_vectors == 1) 4689 igc_set_itr(q_vector); 4690 else 4691 igc_update_ring_itr(q_vector); 4692 } 4693 4694 if (!test_bit(__IGC_DOWN, &adapter->state)) { 4695 if (adapter->msix_entries) 4696 wr32(IGC_EIMS, q_vector->eims_value); 4697 else 4698 igc_irq_enable(adapter); 4699 } 4700 } 4701 igc_add_ring(struct igc_ring * ring,struct igc_ring_container * head)4702 static void igc_add_ring(struct igc_ring *ring, 4703 struct igc_ring_container *head) 4704 { 4705 head->ring = ring; 4706 head->count++; 4707 } 4708 4709 /** 4710 * igc_cache_ring_register - Descriptor ring to register mapping 4711 * @adapter: board private structure to initialize 4712 * 4713 * Once we know the feature-set enabled for the device, we'll cache 4714 * the register offset the descriptor ring is assigned to. 4715 */ igc_cache_ring_register(struct igc_adapter * adapter)4716 static void igc_cache_ring_register(struct igc_adapter *adapter) 4717 { 4718 int i = 0, j = 0; 4719 4720 switch (adapter->hw.mac.type) { 4721 case igc_i225: 4722 default: 4723 for (; i < adapter->num_rx_queues; i++) 4724 adapter->rx_ring[i]->reg_idx = i; 4725 for (; j < adapter->num_tx_queues; j++) 4726 adapter->tx_ring[j]->reg_idx = j; 4727 break; 4728 } 4729 } 4730 4731 /** 4732 * igc_poll - NAPI Rx polling callback 4733 * @napi: napi polling structure 4734 * @budget: count of how many packets we should handle 4735 */ igc_poll(struct napi_struct * napi,int budget)4736 static int igc_poll(struct napi_struct *napi, int budget) 4737 { 4738 struct igc_q_vector *q_vector = container_of(napi, 4739 struct igc_q_vector, 4740 napi); 4741 struct igc_ring *rx_ring = q_vector->rx.ring; 4742 bool clean_complete = true; 4743 int work_done = 0; 4744 4745 if (q_vector->tx.ring) 4746 clean_complete = igc_clean_tx_irq(q_vector, budget); 4747 4748 if (rx_ring) { 4749 int cleaned = rx_ring->xsk_pool ? 4750 igc_clean_rx_irq_zc(q_vector, budget) : 4751 igc_clean_rx_irq(q_vector, budget); 4752 4753 work_done += cleaned; 4754 if (cleaned >= budget) 4755 clean_complete = false; 4756 } 4757 4758 /* If all work not completed, return budget and keep polling */ 4759 if (!clean_complete) 4760 return budget; 4761 4762 /* Exit the polling mode, but don't re-enable interrupts if stack might 4763 * poll us due to busy-polling 4764 */ 4765 if (likely(napi_complete_done(napi, work_done))) 4766 igc_ring_irq_enable(q_vector); 4767 4768 return min(work_done, budget - 1); 4769 } 4770 4771 /** 4772 * igc_alloc_q_vector - Allocate memory for a single interrupt vector 4773 * @adapter: board private structure to initialize 4774 * @v_count: q_vectors allocated on adapter, used for ring interleaving 4775 * @v_idx: index of vector in adapter struct 4776 * @txr_count: total number of Tx rings to allocate 4777 * @txr_idx: index of first Tx ring to allocate 4778 * @rxr_count: total number of Rx rings to allocate 4779 * @rxr_idx: index of first Rx ring to allocate 4780 * 4781 * We allocate one q_vector. If allocation fails we return -ENOMEM. 4782 */ igc_alloc_q_vector(struct igc_adapter * adapter,unsigned int v_count,unsigned int v_idx,unsigned int txr_count,unsigned int txr_idx,unsigned int rxr_count,unsigned int rxr_idx)4783 static int igc_alloc_q_vector(struct igc_adapter *adapter, 4784 unsigned int v_count, unsigned int v_idx, 4785 unsigned int txr_count, unsigned int txr_idx, 4786 unsigned int rxr_count, unsigned int rxr_idx) 4787 { 4788 struct igc_q_vector *q_vector; 4789 struct igc_ring *ring; 4790 int ring_count; 4791 4792 /* igc only supports 1 Tx and/or 1 Rx queue per vector */ 4793 if (txr_count > 1 || rxr_count > 1) 4794 return -ENOMEM; 4795 4796 ring_count = txr_count + rxr_count; 4797 4798 /* allocate q_vector and rings */ 4799 q_vector = adapter->q_vector[v_idx]; 4800 if (!q_vector) 4801 q_vector = kzalloc(struct_size(q_vector, ring, ring_count), 4802 GFP_KERNEL); 4803 else 4804 memset(q_vector, 0, struct_size(q_vector, ring, ring_count)); 4805 if (!q_vector) 4806 return -ENOMEM; 4807 4808 /* initialize NAPI */ 4809 netif_napi_add(adapter->netdev, &q_vector->napi, igc_poll); 4810 4811 /* tie q_vector and adapter together */ 4812 adapter->q_vector[v_idx] = q_vector; 4813 q_vector->adapter = adapter; 4814 4815 /* initialize work limits */ 4816 q_vector->tx.work_limit = adapter->tx_work_limit; 4817 4818 /* initialize ITR configuration */ 4819 q_vector->itr_register = adapter->io_addr + IGC_EITR(0); 4820 q_vector->itr_val = IGC_START_ITR; 4821 4822 /* initialize pointer to rings */ 4823 ring = q_vector->ring; 4824 4825 /* initialize ITR */ 4826 if (rxr_count) { 4827 /* rx or rx/tx vector */ 4828 if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) 4829 q_vector->itr_val = adapter->rx_itr_setting; 4830 } else { 4831 /* tx only vector */ 4832 if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) 4833 q_vector->itr_val = adapter->tx_itr_setting; 4834 } 4835 4836 if (txr_count) { 4837 /* assign generic ring traits */ 4838 ring->dev = &adapter->pdev->dev; 4839 ring->netdev = adapter->netdev; 4840 4841 /* configure backlink on ring */ 4842 ring->q_vector = q_vector; 4843 4844 /* update q_vector Tx values */ 4845 igc_add_ring(ring, &q_vector->tx); 4846 4847 /* apply Tx specific ring traits */ 4848 ring->count = adapter->tx_ring_count; 4849 ring->queue_index = txr_idx; 4850 4851 /* assign ring to adapter */ 4852 adapter->tx_ring[txr_idx] = ring; 4853 4854 /* push pointer to next ring */ 4855 ring++; 4856 } 4857 4858 if (rxr_count) { 4859 /* assign generic ring traits */ 4860 ring->dev = &adapter->pdev->dev; 4861 ring->netdev = adapter->netdev; 4862 4863 /* configure backlink on ring */ 4864 ring->q_vector = q_vector; 4865 4866 /* update q_vector Rx values */ 4867 igc_add_ring(ring, &q_vector->rx); 4868 4869 /* apply Rx specific ring traits */ 4870 ring->count = adapter->rx_ring_count; 4871 ring->queue_index = rxr_idx; 4872 4873 /* assign ring to adapter */ 4874 adapter->rx_ring[rxr_idx] = ring; 4875 } 4876 4877 return 0; 4878 } 4879 4880 /** 4881 * igc_alloc_q_vectors - Allocate memory for interrupt vectors 4882 * @adapter: board private structure to initialize 4883 * 4884 * We allocate one q_vector per queue interrupt. If allocation fails we 4885 * return -ENOMEM. 4886 */ igc_alloc_q_vectors(struct igc_adapter * adapter)4887 static int igc_alloc_q_vectors(struct igc_adapter *adapter) 4888 { 4889 int rxr_remaining = adapter->num_rx_queues; 4890 int txr_remaining = adapter->num_tx_queues; 4891 int rxr_idx = 0, txr_idx = 0, v_idx = 0; 4892 int q_vectors = adapter->num_q_vectors; 4893 int err; 4894 4895 if (q_vectors >= (rxr_remaining + txr_remaining)) { 4896 for (; rxr_remaining; v_idx++) { 4897 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4898 0, 0, 1, rxr_idx); 4899 4900 if (err) 4901 goto err_out; 4902 4903 /* update counts and index */ 4904 rxr_remaining--; 4905 rxr_idx++; 4906 } 4907 } 4908 4909 for (; v_idx < q_vectors; v_idx++) { 4910 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 4911 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 4912 4913 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4914 tqpv, txr_idx, rqpv, rxr_idx); 4915 4916 if (err) 4917 goto err_out; 4918 4919 /* update counts and index */ 4920 rxr_remaining -= rqpv; 4921 txr_remaining -= tqpv; 4922 rxr_idx++; 4923 txr_idx++; 4924 } 4925 4926 return 0; 4927 4928 err_out: 4929 adapter->num_tx_queues = 0; 4930 adapter->num_rx_queues = 0; 4931 adapter->num_q_vectors = 0; 4932 4933 while (v_idx--) 4934 igc_free_q_vector(adapter, v_idx); 4935 4936 return -ENOMEM; 4937 } 4938 4939 /** 4940 * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors 4941 * @adapter: Pointer to adapter structure 4942 * @msix: boolean for MSI-X capability 4943 * 4944 * This function initializes the interrupts and allocates all of the queues. 4945 */ igc_init_interrupt_scheme(struct igc_adapter * adapter,bool msix)4946 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) 4947 { 4948 struct net_device *dev = adapter->netdev; 4949 int err = 0; 4950 4951 igc_set_interrupt_capability(adapter, msix); 4952 4953 err = igc_alloc_q_vectors(adapter); 4954 if (err) { 4955 netdev_err(dev, "Unable to allocate memory for vectors\n"); 4956 goto err_alloc_q_vectors; 4957 } 4958 4959 igc_cache_ring_register(adapter); 4960 4961 return 0; 4962 4963 err_alloc_q_vectors: 4964 igc_reset_interrupt_capability(adapter); 4965 return err; 4966 } 4967 4968 /** 4969 * igc_sw_init - Initialize general software structures (struct igc_adapter) 4970 * @adapter: board private structure to initialize 4971 * 4972 * igc_sw_init initializes the Adapter private data structure. 4973 * Fields are initialized based on PCI device information and 4974 * OS network device settings (MTU size). 4975 */ igc_sw_init(struct igc_adapter * adapter)4976 static int igc_sw_init(struct igc_adapter *adapter) 4977 { 4978 struct net_device *netdev = adapter->netdev; 4979 struct pci_dev *pdev = adapter->pdev; 4980 struct igc_hw *hw = &adapter->hw; 4981 4982 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 4983 4984 /* set default ring sizes */ 4985 adapter->tx_ring_count = IGC_DEFAULT_TXD; 4986 adapter->rx_ring_count = IGC_DEFAULT_RXD; 4987 4988 /* set default ITR values */ 4989 adapter->rx_itr_setting = IGC_DEFAULT_ITR; 4990 adapter->tx_itr_setting = IGC_DEFAULT_ITR; 4991 4992 /* set default work limits */ 4993 adapter->tx_work_limit = IGC_DEFAULT_TX_WORK; 4994 4995 /* adjust max frame to be at least the size of a standard frame */ 4996 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 4997 VLAN_HLEN; 4998 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 4999 5000 mutex_init(&adapter->nfc_rule_lock); 5001 INIT_LIST_HEAD(&adapter->nfc_rule_list); 5002 adapter->nfc_rule_count = 0; 5003 5004 spin_lock_init(&adapter->stats64_lock); 5005 spin_lock_init(&adapter->qbv_tx_lock); 5006 /* Assume MSI-X interrupts, will be checked during IRQ allocation */ 5007 adapter->flags |= IGC_FLAG_HAS_MSIX; 5008 5009 igc_init_queue_configuration(adapter); 5010 5011 /* This call may decrease the number of queues */ 5012 if (igc_init_interrupt_scheme(adapter, true)) { 5013 netdev_err(netdev, "Unable to allocate memory for queues\n"); 5014 return -ENOMEM; 5015 } 5016 5017 /* Explicitly disable IRQ since the NIC can be in any state. */ 5018 igc_irq_disable(adapter); 5019 5020 set_bit(__IGC_DOWN, &adapter->state); 5021 5022 return 0; 5023 } 5024 igc_set_queue_napi(struct igc_adapter * adapter,int vector,struct napi_struct * napi)5025 void igc_set_queue_napi(struct igc_adapter *adapter, int vector, 5026 struct napi_struct *napi) 5027 { 5028 struct igc_q_vector *q_vector = adapter->q_vector[vector]; 5029 5030 if (q_vector->rx.ring) 5031 netif_queue_set_napi(adapter->netdev, 5032 q_vector->rx.ring->queue_index, 5033 NETDEV_QUEUE_TYPE_RX, napi); 5034 5035 if (q_vector->tx.ring) 5036 netif_queue_set_napi(adapter->netdev, 5037 q_vector->tx.ring->queue_index, 5038 NETDEV_QUEUE_TYPE_TX, napi); 5039 } 5040 5041 /** 5042 * igc_up - Open the interface and prepare it to handle traffic 5043 * @adapter: board private structure 5044 */ igc_up(struct igc_adapter * adapter)5045 void igc_up(struct igc_adapter *adapter) 5046 { 5047 struct igc_hw *hw = &adapter->hw; 5048 struct napi_struct *napi; 5049 int i = 0; 5050 5051 /* hardware has been reset, we need to reload some things */ 5052 igc_configure(adapter); 5053 5054 clear_bit(__IGC_DOWN, &adapter->state); 5055 5056 for (i = 0; i < adapter->num_q_vectors; i++) { 5057 napi = &adapter->q_vector[i]->napi; 5058 napi_enable(napi); 5059 igc_set_queue_napi(adapter, i, napi); 5060 } 5061 5062 if (adapter->msix_entries) 5063 igc_configure_msix(adapter); 5064 else 5065 igc_assign_vector(adapter->q_vector[0], 0); 5066 5067 /* Clear any pending interrupts. */ 5068 rd32(IGC_ICR); 5069 igc_irq_enable(adapter); 5070 5071 netif_tx_start_all_queues(adapter->netdev); 5072 5073 /* start the watchdog. */ 5074 hw->mac.get_link_status = true; 5075 schedule_work(&adapter->watchdog_task); 5076 } 5077 5078 /** 5079 * igc_update_stats - Update the board statistics counters 5080 * @adapter: board private structure 5081 */ igc_update_stats(struct igc_adapter * adapter)5082 void igc_update_stats(struct igc_adapter *adapter) 5083 { 5084 struct rtnl_link_stats64 *net_stats = &adapter->stats64; 5085 struct pci_dev *pdev = adapter->pdev; 5086 struct igc_hw *hw = &adapter->hw; 5087 u64 _bytes, _packets; 5088 u64 bytes, packets; 5089 unsigned int start; 5090 u32 mpc; 5091 int i; 5092 5093 /* Prevent stats update while adapter is being reset, or if the pci 5094 * connection is down. 5095 */ 5096 if (adapter->link_speed == 0) 5097 return; 5098 if (pci_channel_offline(pdev)) 5099 return; 5100 5101 packets = 0; 5102 bytes = 0; 5103 5104 rcu_read_lock(); 5105 for (i = 0; i < adapter->num_rx_queues; i++) { 5106 struct igc_ring *ring = adapter->rx_ring[i]; 5107 u32 rqdpc = rd32(IGC_RQDPC(i)); 5108 5109 if (hw->mac.type >= igc_i225) 5110 wr32(IGC_RQDPC(i), 0); 5111 5112 if (rqdpc) { 5113 ring->rx_stats.drops += rqdpc; 5114 net_stats->rx_fifo_errors += rqdpc; 5115 } 5116 5117 do { 5118 start = u64_stats_fetch_begin(&ring->rx_syncp); 5119 _bytes = ring->rx_stats.bytes; 5120 _packets = ring->rx_stats.packets; 5121 } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); 5122 bytes += _bytes; 5123 packets += _packets; 5124 } 5125 5126 net_stats->rx_bytes = bytes; 5127 net_stats->rx_packets = packets; 5128 5129 packets = 0; 5130 bytes = 0; 5131 for (i = 0; i < adapter->num_tx_queues; i++) { 5132 struct igc_ring *ring = adapter->tx_ring[i]; 5133 5134 do { 5135 start = u64_stats_fetch_begin(&ring->tx_syncp); 5136 _bytes = ring->tx_stats.bytes; 5137 _packets = ring->tx_stats.packets; 5138 } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); 5139 bytes += _bytes; 5140 packets += _packets; 5141 } 5142 net_stats->tx_bytes = bytes; 5143 net_stats->tx_packets = packets; 5144 rcu_read_unlock(); 5145 5146 /* read stats registers */ 5147 adapter->stats.crcerrs += rd32(IGC_CRCERRS); 5148 adapter->stats.gprc += rd32(IGC_GPRC); 5149 adapter->stats.gorc += rd32(IGC_GORCL); 5150 rd32(IGC_GORCH); /* clear GORCL */ 5151 adapter->stats.bprc += rd32(IGC_BPRC); 5152 adapter->stats.mprc += rd32(IGC_MPRC); 5153 adapter->stats.roc += rd32(IGC_ROC); 5154 5155 adapter->stats.prc64 += rd32(IGC_PRC64); 5156 adapter->stats.prc127 += rd32(IGC_PRC127); 5157 adapter->stats.prc255 += rd32(IGC_PRC255); 5158 adapter->stats.prc511 += rd32(IGC_PRC511); 5159 adapter->stats.prc1023 += rd32(IGC_PRC1023); 5160 adapter->stats.prc1522 += rd32(IGC_PRC1522); 5161 adapter->stats.tlpic += rd32(IGC_TLPIC); 5162 adapter->stats.rlpic += rd32(IGC_RLPIC); 5163 adapter->stats.hgptc += rd32(IGC_HGPTC); 5164 5165 mpc = rd32(IGC_MPC); 5166 adapter->stats.mpc += mpc; 5167 net_stats->rx_fifo_errors += mpc; 5168 adapter->stats.scc += rd32(IGC_SCC); 5169 adapter->stats.ecol += rd32(IGC_ECOL); 5170 adapter->stats.mcc += rd32(IGC_MCC); 5171 adapter->stats.latecol += rd32(IGC_LATECOL); 5172 adapter->stats.dc += rd32(IGC_DC); 5173 adapter->stats.rlec += rd32(IGC_RLEC); 5174 adapter->stats.xonrxc += rd32(IGC_XONRXC); 5175 adapter->stats.xontxc += rd32(IGC_XONTXC); 5176 adapter->stats.xoffrxc += rd32(IGC_XOFFRXC); 5177 adapter->stats.xofftxc += rd32(IGC_XOFFTXC); 5178 adapter->stats.fcruc += rd32(IGC_FCRUC); 5179 adapter->stats.gptc += rd32(IGC_GPTC); 5180 adapter->stats.gotc += rd32(IGC_GOTCL); 5181 rd32(IGC_GOTCH); /* clear GOTCL */ 5182 adapter->stats.rnbc += rd32(IGC_RNBC); 5183 adapter->stats.ruc += rd32(IGC_RUC); 5184 adapter->stats.rfc += rd32(IGC_RFC); 5185 adapter->stats.rjc += rd32(IGC_RJC); 5186 adapter->stats.tor += rd32(IGC_TORH); 5187 adapter->stats.tot += rd32(IGC_TOTH); 5188 adapter->stats.tpr += rd32(IGC_TPR); 5189 5190 adapter->stats.ptc64 += rd32(IGC_PTC64); 5191 adapter->stats.ptc127 += rd32(IGC_PTC127); 5192 adapter->stats.ptc255 += rd32(IGC_PTC255); 5193 adapter->stats.ptc511 += rd32(IGC_PTC511); 5194 adapter->stats.ptc1023 += rd32(IGC_PTC1023); 5195 adapter->stats.ptc1522 += rd32(IGC_PTC1522); 5196 5197 adapter->stats.mptc += rd32(IGC_MPTC); 5198 adapter->stats.bptc += rd32(IGC_BPTC); 5199 5200 adapter->stats.tpt += rd32(IGC_TPT); 5201 adapter->stats.colc += rd32(IGC_COLC); 5202 adapter->stats.colc += rd32(IGC_RERC); 5203 5204 adapter->stats.algnerrc += rd32(IGC_ALGNERRC); 5205 5206 adapter->stats.tsctc += rd32(IGC_TSCTC); 5207 5208 adapter->stats.iac += rd32(IGC_IAC); 5209 5210 /* Fill out the OS statistics structure */ 5211 net_stats->multicast = adapter->stats.mprc; 5212 net_stats->collisions = adapter->stats.colc; 5213 5214 /* Rx Errors */ 5215 5216 /* RLEC on some newer hardware can be incorrect so build 5217 * our own version based on RUC and ROC 5218 */ 5219 net_stats->rx_errors = adapter->stats.rxerrc + 5220 adapter->stats.crcerrs + adapter->stats.algnerrc + 5221 adapter->stats.ruc + adapter->stats.roc + 5222 adapter->stats.cexterr; 5223 net_stats->rx_length_errors = adapter->stats.ruc + 5224 adapter->stats.roc; 5225 net_stats->rx_crc_errors = adapter->stats.crcerrs; 5226 net_stats->rx_frame_errors = adapter->stats.algnerrc; 5227 net_stats->rx_missed_errors = adapter->stats.mpc; 5228 5229 /* Tx Errors */ 5230 net_stats->tx_errors = adapter->stats.ecol + 5231 adapter->stats.latecol; 5232 net_stats->tx_aborted_errors = adapter->stats.ecol; 5233 net_stats->tx_window_errors = adapter->stats.latecol; 5234 net_stats->tx_carrier_errors = adapter->stats.tncrs; 5235 5236 /* Tx Dropped */ 5237 net_stats->tx_dropped = adapter->stats.txdrop; 5238 5239 /* Management Stats */ 5240 adapter->stats.mgptc += rd32(IGC_MGTPTC); 5241 adapter->stats.mgprc += rd32(IGC_MGTPRC); 5242 adapter->stats.mgpdc += rd32(IGC_MGTPDC); 5243 } 5244 5245 /** 5246 * igc_down - Close the interface 5247 * @adapter: board private structure 5248 */ igc_down(struct igc_adapter * adapter)5249 void igc_down(struct igc_adapter *adapter) 5250 { 5251 struct net_device *netdev = adapter->netdev; 5252 struct igc_hw *hw = &adapter->hw; 5253 u32 tctl, rctl; 5254 int i = 0; 5255 5256 set_bit(__IGC_DOWN, &adapter->state); 5257 5258 igc_ptp_suspend(adapter); 5259 5260 if (pci_device_is_present(adapter->pdev)) { 5261 /* disable receives in the hardware */ 5262 rctl = rd32(IGC_RCTL); 5263 wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); 5264 /* flush and sleep below */ 5265 } 5266 /* set trans_start so we don't get spurious watchdogs during reset */ 5267 netif_trans_update(netdev); 5268 5269 netif_carrier_off(netdev); 5270 netif_tx_stop_all_queues(netdev); 5271 5272 if (pci_device_is_present(adapter->pdev)) { 5273 /* disable transmits in the hardware */ 5274 tctl = rd32(IGC_TCTL); 5275 tctl &= ~IGC_TCTL_EN; 5276 wr32(IGC_TCTL, tctl); 5277 /* flush both disables and wait for them to finish */ 5278 wrfl(); 5279 usleep_range(10000, 20000); 5280 5281 igc_irq_disable(adapter); 5282 } 5283 5284 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5285 5286 for (i = 0; i < adapter->num_q_vectors; i++) { 5287 if (adapter->q_vector[i]) { 5288 napi_synchronize(&adapter->q_vector[i]->napi); 5289 igc_set_queue_napi(adapter, i, NULL); 5290 napi_disable(&adapter->q_vector[i]->napi); 5291 } 5292 } 5293 5294 del_timer_sync(&adapter->watchdog_timer); 5295 del_timer_sync(&adapter->phy_info_timer); 5296 5297 /* record the stats before reset*/ 5298 spin_lock(&adapter->stats64_lock); 5299 igc_update_stats(adapter); 5300 spin_unlock(&adapter->stats64_lock); 5301 5302 adapter->link_speed = 0; 5303 adapter->link_duplex = 0; 5304 5305 if (!pci_channel_offline(adapter->pdev)) 5306 igc_reset(adapter); 5307 5308 /* clear VLAN promisc flag so VFTA will be updated if necessary */ 5309 adapter->flags &= ~IGC_FLAG_VLAN_PROMISC; 5310 5311 igc_disable_all_tx_rings_hw(adapter); 5312 igc_clean_all_tx_rings(adapter); 5313 igc_clean_all_rx_rings(adapter); 5314 } 5315 igc_reinit_locked(struct igc_adapter * adapter)5316 void igc_reinit_locked(struct igc_adapter *adapter) 5317 { 5318 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 5319 usleep_range(1000, 2000); 5320 igc_down(adapter); 5321 igc_up(adapter); 5322 clear_bit(__IGC_RESETTING, &adapter->state); 5323 } 5324 igc_reset_task(struct work_struct * work)5325 static void igc_reset_task(struct work_struct *work) 5326 { 5327 struct igc_adapter *adapter; 5328 5329 adapter = container_of(work, struct igc_adapter, reset_task); 5330 5331 rtnl_lock(); 5332 /* If we're already down or resetting, just bail */ 5333 if (test_bit(__IGC_DOWN, &adapter->state) || 5334 test_bit(__IGC_RESETTING, &adapter->state)) { 5335 rtnl_unlock(); 5336 return; 5337 } 5338 5339 igc_rings_dump(adapter); 5340 igc_regs_dump(adapter); 5341 netdev_err(adapter->netdev, "Reset adapter\n"); 5342 igc_reinit_locked(adapter); 5343 rtnl_unlock(); 5344 } 5345 5346 /** 5347 * igc_change_mtu - Change the Maximum Transfer Unit 5348 * @netdev: network interface device structure 5349 * @new_mtu: new value for maximum frame size 5350 * 5351 * Returns 0 on success, negative on failure 5352 */ igc_change_mtu(struct net_device * netdev,int new_mtu)5353 static int igc_change_mtu(struct net_device *netdev, int new_mtu) 5354 { 5355 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 5356 struct igc_adapter *adapter = netdev_priv(netdev); 5357 5358 if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) { 5359 netdev_dbg(netdev, "Jumbo frames not supported with XDP"); 5360 return -EINVAL; 5361 } 5362 5363 /* adjust max frame to be at least the size of a standard frame */ 5364 if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) 5365 max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; 5366 5367 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 5368 usleep_range(1000, 2000); 5369 5370 /* igc_down has a dependency on max_frame_size */ 5371 adapter->max_frame_size = max_frame; 5372 5373 if (netif_running(netdev)) 5374 igc_down(adapter); 5375 5376 netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); 5377 WRITE_ONCE(netdev->mtu, new_mtu); 5378 5379 if (netif_running(netdev)) 5380 igc_up(adapter); 5381 else 5382 igc_reset(adapter); 5383 5384 clear_bit(__IGC_RESETTING, &adapter->state); 5385 5386 return 0; 5387 } 5388 5389 /** 5390 * igc_tx_timeout - Respond to a Tx Hang 5391 * @netdev: network interface device structure 5392 * @txqueue: queue number that timed out 5393 **/ igc_tx_timeout(struct net_device * netdev,unsigned int __always_unused txqueue)5394 static void igc_tx_timeout(struct net_device *netdev, 5395 unsigned int __always_unused txqueue) 5396 { 5397 struct igc_adapter *adapter = netdev_priv(netdev); 5398 struct igc_hw *hw = &adapter->hw; 5399 5400 /* Do the reset outside of interrupt context */ 5401 adapter->tx_timeout_count++; 5402 schedule_work(&adapter->reset_task); 5403 wr32(IGC_EICS, 5404 (adapter->eims_enable_mask & ~adapter->eims_other)); 5405 } 5406 5407 /** 5408 * igc_get_stats64 - Get System Network Statistics 5409 * @netdev: network interface device structure 5410 * @stats: rtnl_link_stats64 pointer 5411 * 5412 * Returns the address of the device statistics structure. 5413 * The statistics are updated here and also from the timer callback. 5414 */ igc_get_stats64(struct net_device * netdev,struct rtnl_link_stats64 * stats)5415 static void igc_get_stats64(struct net_device *netdev, 5416 struct rtnl_link_stats64 *stats) 5417 { 5418 struct igc_adapter *adapter = netdev_priv(netdev); 5419 5420 spin_lock(&adapter->stats64_lock); 5421 if (!test_bit(__IGC_RESETTING, &adapter->state)) 5422 igc_update_stats(adapter); 5423 memcpy(stats, &adapter->stats64, sizeof(*stats)); 5424 spin_unlock(&adapter->stats64_lock); 5425 } 5426 igc_fix_features(struct net_device * netdev,netdev_features_t features)5427 static netdev_features_t igc_fix_features(struct net_device *netdev, 5428 netdev_features_t features) 5429 { 5430 /* Since there is no support for separate Rx/Tx vlan accel 5431 * enable/disable make sure Tx flag is always in same state as Rx. 5432 */ 5433 if (features & NETIF_F_HW_VLAN_CTAG_RX) 5434 features |= NETIF_F_HW_VLAN_CTAG_TX; 5435 else 5436 features &= ~NETIF_F_HW_VLAN_CTAG_TX; 5437 5438 return features; 5439 } 5440 igc_set_features(struct net_device * netdev,netdev_features_t features)5441 static int igc_set_features(struct net_device *netdev, 5442 netdev_features_t features) 5443 { 5444 netdev_features_t changed = netdev->features ^ features; 5445 struct igc_adapter *adapter = netdev_priv(netdev); 5446 5447 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 5448 igc_vlan_mode(netdev, features); 5449 5450 /* Add VLAN support */ 5451 if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) 5452 return 0; 5453 5454 if (!(features & NETIF_F_NTUPLE)) 5455 igc_flush_nfc_rules(adapter); 5456 5457 netdev->features = features; 5458 5459 if (netif_running(netdev)) 5460 igc_reinit_locked(adapter); 5461 else 5462 igc_reset(adapter); 5463 5464 return 1; 5465 } 5466 5467 static netdev_features_t igc_features_check(struct sk_buff * skb,struct net_device * dev,netdev_features_t features)5468 igc_features_check(struct sk_buff *skb, struct net_device *dev, 5469 netdev_features_t features) 5470 { 5471 unsigned int network_hdr_len, mac_hdr_len; 5472 5473 /* Make certain the headers can be described by a context descriptor */ 5474 mac_hdr_len = skb_network_offset(skb); 5475 if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) 5476 return features & ~(NETIF_F_HW_CSUM | 5477 NETIF_F_SCTP_CRC | 5478 NETIF_F_HW_VLAN_CTAG_TX | 5479 NETIF_F_TSO | 5480 NETIF_F_TSO6); 5481 5482 network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); 5483 if (unlikely(network_hdr_len > IGC_MAX_NETWORK_HDR_LEN)) 5484 return features & ~(NETIF_F_HW_CSUM | 5485 NETIF_F_SCTP_CRC | 5486 NETIF_F_TSO | 5487 NETIF_F_TSO6); 5488 5489 /* We can only support IPv4 TSO in tunnels if we can mangle the 5490 * inner IP ID field, so strip TSO if MANGLEID is not supported. 5491 */ 5492 if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) 5493 features &= ~NETIF_F_TSO; 5494 5495 return features; 5496 } 5497 igc_tsync_interrupt(struct igc_adapter * adapter)5498 static void igc_tsync_interrupt(struct igc_adapter *adapter) 5499 { 5500 struct igc_hw *hw = &adapter->hw; 5501 u32 tsauxc, sec, nsec, tsicr; 5502 struct ptp_clock_event event; 5503 struct timespec64 ts; 5504 5505 tsicr = rd32(IGC_TSICR); 5506 5507 if (tsicr & IGC_TSICR_SYS_WRAP) { 5508 event.type = PTP_CLOCK_PPS; 5509 if (adapter->ptp_caps.pps) 5510 ptp_clock_event(adapter->ptp_clock, &event); 5511 } 5512 5513 if (tsicr & IGC_TSICR_TXTS) { 5514 /* retrieve hardware timestamp */ 5515 igc_ptp_tx_tstamp_event(adapter); 5516 } 5517 5518 if (tsicr & IGC_TSICR_TT0) { 5519 spin_lock(&adapter->tmreg_lock); 5520 ts = timespec64_add(adapter->perout[0].start, 5521 adapter->perout[0].period); 5522 wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5523 wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec); 5524 tsauxc = rd32(IGC_TSAUXC); 5525 tsauxc |= IGC_TSAUXC_EN_TT0; 5526 wr32(IGC_TSAUXC, tsauxc); 5527 adapter->perout[0].start = ts; 5528 spin_unlock(&adapter->tmreg_lock); 5529 } 5530 5531 if (tsicr & IGC_TSICR_TT1) { 5532 spin_lock(&adapter->tmreg_lock); 5533 ts = timespec64_add(adapter->perout[1].start, 5534 adapter->perout[1].period); 5535 wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5536 wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec); 5537 tsauxc = rd32(IGC_TSAUXC); 5538 tsauxc |= IGC_TSAUXC_EN_TT1; 5539 wr32(IGC_TSAUXC, tsauxc); 5540 adapter->perout[1].start = ts; 5541 spin_unlock(&adapter->tmreg_lock); 5542 } 5543 5544 if (tsicr & IGC_TSICR_AUTT0) { 5545 nsec = rd32(IGC_AUXSTMPL0); 5546 sec = rd32(IGC_AUXSTMPH0); 5547 event.type = PTP_CLOCK_EXTTS; 5548 event.index = 0; 5549 event.timestamp = sec * NSEC_PER_SEC + nsec; 5550 ptp_clock_event(adapter->ptp_clock, &event); 5551 } 5552 5553 if (tsicr & IGC_TSICR_AUTT1) { 5554 nsec = rd32(IGC_AUXSTMPL1); 5555 sec = rd32(IGC_AUXSTMPH1); 5556 event.type = PTP_CLOCK_EXTTS; 5557 event.index = 1; 5558 event.timestamp = sec * NSEC_PER_SEC + nsec; 5559 ptp_clock_event(adapter->ptp_clock, &event); 5560 } 5561 } 5562 5563 /** 5564 * igc_msix_other - msix other interrupt handler 5565 * @irq: interrupt number 5566 * @data: pointer to a q_vector 5567 */ igc_msix_other(int irq,void * data)5568 static irqreturn_t igc_msix_other(int irq, void *data) 5569 { 5570 struct igc_adapter *adapter = data; 5571 struct igc_hw *hw = &adapter->hw; 5572 u32 icr = rd32(IGC_ICR); 5573 5574 /* reading ICR causes bit 31 of EICR to be cleared */ 5575 if (icr & IGC_ICR_DRSTA) 5576 schedule_work(&adapter->reset_task); 5577 5578 if (icr & IGC_ICR_DOUTSYNC) { 5579 /* HW is reporting DMA is out of sync */ 5580 adapter->stats.doosync++; 5581 } 5582 5583 if (icr & IGC_ICR_LSC) { 5584 hw->mac.get_link_status = true; 5585 /* guard against interrupt when we're going down */ 5586 if (!test_bit(__IGC_DOWN, &adapter->state)) 5587 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5588 } 5589 5590 if (icr & IGC_ICR_TS) 5591 igc_tsync_interrupt(adapter); 5592 5593 wr32(IGC_EIMS, adapter->eims_other); 5594 5595 return IRQ_HANDLED; 5596 } 5597 igc_write_itr(struct igc_q_vector * q_vector)5598 static void igc_write_itr(struct igc_q_vector *q_vector) 5599 { 5600 u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK; 5601 5602 if (!q_vector->set_itr) 5603 return; 5604 5605 if (!itr_val) 5606 itr_val = IGC_ITR_VAL_MASK; 5607 5608 itr_val |= IGC_EITR_CNT_IGNR; 5609 5610 writel(itr_val, q_vector->itr_register); 5611 q_vector->set_itr = 0; 5612 } 5613 igc_msix_ring(int irq,void * data)5614 static irqreturn_t igc_msix_ring(int irq, void *data) 5615 { 5616 struct igc_q_vector *q_vector = data; 5617 5618 /* Write the ITR value calculated from the previous interrupt. */ 5619 igc_write_itr(q_vector); 5620 5621 napi_schedule(&q_vector->napi); 5622 5623 return IRQ_HANDLED; 5624 } 5625 5626 /** 5627 * igc_request_msix - Initialize MSI-X interrupts 5628 * @adapter: Pointer to adapter structure 5629 * 5630 * igc_request_msix allocates MSI-X vectors and requests interrupts from the 5631 * kernel. 5632 */ igc_request_msix(struct igc_adapter * adapter)5633 static int igc_request_msix(struct igc_adapter *adapter) 5634 { 5635 unsigned int num_q_vectors = adapter->num_q_vectors; 5636 int i = 0, err = 0, vector = 0, free_vector = 0; 5637 struct net_device *netdev = adapter->netdev; 5638 5639 err = request_irq(adapter->msix_entries[vector].vector, 5640 &igc_msix_other, 0, netdev->name, adapter); 5641 if (err) 5642 goto err_out; 5643 5644 if (num_q_vectors > MAX_Q_VECTORS) { 5645 num_q_vectors = MAX_Q_VECTORS; 5646 dev_warn(&adapter->pdev->dev, 5647 "The number of queue vectors (%d) is higher than max allowed (%d)\n", 5648 adapter->num_q_vectors, MAX_Q_VECTORS); 5649 } 5650 for (i = 0; i < num_q_vectors; i++) { 5651 struct igc_q_vector *q_vector = adapter->q_vector[i]; 5652 5653 vector++; 5654 5655 q_vector->itr_register = adapter->io_addr + IGC_EITR(vector); 5656 5657 if (q_vector->rx.ring && q_vector->tx.ring) 5658 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, 5659 q_vector->rx.ring->queue_index); 5660 else if (q_vector->tx.ring) 5661 sprintf(q_vector->name, "%s-tx-%u", netdev->name, 5662 q_vector->tx.ring->queue_index); 5663 else if (q_vector->rx.ring) 5664 sprintf(q_vector->name, "%s-rx-%u", netdev->name, 5665 q_vector->rx.ring->queue_index); 5666 else 5667 sprintf(q_vector->name, "%s-unused", netdev->name); 5668 5669 err = request_irq(adapter->msix_entries[vector].vector, 5670 igc_msix_ring, 0, q_vector->name, 5671 q_vector); 5672 if (err) 5673 goto err_free; 5674 5675 netif_napi_set_irq(&q_vector->napi, 5676 adapter->msix_entries[vector].vector); 5677 } 5678 5679 igc_configure_msix(adapter); 5680 return 0; 5681 5682 err_free: 5683 /* free already assigned IRQs */ 5684 free_irq(adapter->msix_entries[free_vector++].vector, adapter); 5685 5686 vector--; 5687 for (i = 0; i < vector; i++) { 5688 free_irq(adapter->msix_entries[free_vector++].vector, 5689 adapter->q_vector[i]); 5690 } 5691 err_out: 5692 return err; 5693 } 5694 5695 /** 5696 * igc_clear_interrupt_scheme - reset the device to a state of no interrupts 5697 * @adapter: Pointer to adapter structure 5698 * 5699 * This function resets the device so that it has 0 rx queues, tx queues, and 5700 * MSI-X interrupts allocated. 5701 */ igc_clear_interrupt_scheme(struct igc_adapter * adapter)5702 static void igc_clear_interrupt_scheme(struct igc_adapter *adapter) 5703 { 5704 igc_free_q_vectors(adapter); 5705 igc_reset_interrupt_capability(adapter); 5706 } 5707 5708 /* Need to wait a few seconds after link up to get diagnostic information from 5709 * the phy 5710 */ igc_update_phy_info(struct timer_list * t)5711 static void igc_update_phy_info(struct timer_list *t) 5712 { 5713 struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer); 5714 5715 igc_get_phy_info(&adapter->hw); 5716 } 5717 5718 /** 5719 * igc_has_link - check shared code for link and determine up/down 5720 * @adapter: pointer to driver private info 5721 */ igc_has_link(struct igc_adapter * adapter)5722 bool igc_has_link(struct igc_adapter *adapter) 5723 { 5724 struct igc_hw *hw = &adapter->hw; 5725 bool link_active = false; 5726 5727 /* get_link_status is set on LSC (link status) interrupt or 5728 * rx sequence error interrupt. get_link_status will stay 5729 * false until the igc_check_for_link establishes link 5730 * for copper adapters ONLY 5731 */ 5732 if (!hw->mac.get_link_status) 5733 return true; 5734 hw->mac.ops.check_for_link(hw); 5735 link_active = !hw->mac.get_link_status; 5736 5737 if (hw->mac.type == igc_i225) { 5738 if (!netif_carrier_ok(adapter->netdev)) { 5739 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5740 } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { 5741 adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE; 5742 adapter->link_check_timeout = jiffies; 5743 } 5744 } 5745 5746 return link_active; 5747 } 5748 5749 /** 5750 * igc_watchdog - Timer Call-back 5751 * @t: timer for the watchdog 5752 */ igc_watchdog(struct timer_list * t)5753 static void igc_watchdog(struct timer_list *t) 5754 { 5755 struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer); 5756 /* Do the rest outside of interrupt context */ 5757 schedule_work(&adapter->watchdog_task); 5758 } 5759 igc_watchdog_task(struct work_struct * work)5760 static void igc_watchdog_task(struct work_struct *work) 5761 { 5762 struct igc_adapter *adapter = container_of(work, 5763 struct igc_adapter, 5764 watchdog_task); 5765 struct net_device *netdev = adapter->netdev; 5766 struct igc_hw *hw = &adapter->hw; 5767 struct igc_phy_info *phy = &hw->phy; 5768 u16 phy_data, retry_count = 20; 5769 u32 link; 5770 int i; 5771 5772 link = igc_has_link(adapter); 5773 5774 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) { 5775 if (time_after(jiffies, (adapter->link_check_timeout + HZ))) 5776 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5777 else 5778 link = false; 5779 } 5780 5781 if (link) { 5782 /* Cancel scheduled suspend requests. */ 5783 pm_runtime_resume(netdev->dev.parent); 5784 5785 if (!netif_carrier_ok(netdev)) { 5786 u32 ctrl; 5787 5788 hw->mac.ops.get_speed_and_duplex(hw, 5789 &adapter->link_speed, 5790 &adapter->link_duplex); 5791 5792 ctrl = rd32(IGC_CTRL); 5793 /* Link status message must follow this format */ 5794 netdev_info(netdev, 5795 "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", 5796 adapter->link_speed, 5797 adapter->link_duplex == FULL_DUPLEX ? 5798 "Full" : "Half", 5799 (ctrl & IGC_CTRL_TFCE) && 5800 (ctrl & IGC_CTRL_RFCE) ? "RX/TX" : 5801 (ctrl & IGC_CTRL_RFCE) ? "RX" : 5802 (ctrl & IGC_CTRL_TFCE) ? "TX" : "None"); 5803 5804 /* disable EEE if enabled */ 5805 if ((adapter->flags & IGC_FLAG_EEE) && 5806 adapter->link_duplex == HALF_DUPLEX) { 5807 netdev_info(netdev, 5808 "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n"); 5809 adapter->hw.dev_spec._base.eee_enable = false; 5810 adapter->flags &= ~IGC_FLAG_EEE; 5811 } 5812 5813 /* check if SmartSpeed worked */ 5814 igc_check_downshift(hw); 5815 if (phy->speed_downgraded) 5816 netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n"); 5817 5818 /* adjust timeout factor according to speed/duplex */ 5819 adapter->tx_timeout_factor = 1; 5820 switch (adapter->link_speed) { 5821 case SPEED_10: 5822 adapter->tx_timeout_factor = 14; 5823 break; 5824 case SPEED_100: 5825 case SPEED_1000: 5826 case SPEED_2500: 5827 adapter->tx_timeout_factor = 1; 5828 break; 5829 } 5830 5831 /* Once the launch time has been set on the wire, there 5832 * is a delay before the link speed can be determined 5833 * based on link-up activity. Write into the register 5834 * as soon as we know the correct link speed. 5835 */ 5836 igc_tsn_adjust_txtime_offset(adapter); 5837 5838 if (adapter->link_speed != SPEED_1000) 5839 goto no_wait; 5840 5841 /* wait for Remote receiver status OK */ 5842 retry_read_status: 5843 if (!igc_read_phy_reg(hw, PHY_1000T_STATUS, 5844 &phy_data)) { 5845 if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && 5846 retry_count) { 5847 msleep(100); 5848 retry_count--; 5849 goto retry_read_status; 5850 } else if (!retry_count) { 5851 netdev_err(netdev, "exceed max 2 second\n"); 5852 } 5853 } else { 5854 netdev_err(netdev, "read 1000Base-T Status Reg\n"); 5855 } 5856 no_wait: 5857 netif_carrier_on(netdev); 5858 5859 /* link state has changed, schedule phy info update */ 5860 if (!test_bit(__IGC_DOWN, &adapter->state)) 5861 mod_timer(&adapter->phy_info_timer, 5862 round_jiffies(jiffies + 2 * HZ)); 5863 } 5864 } else { 5865 if (netif_carrier_ok(netdev)) { 5866 adapter->link_speed = 0; 5867 adapter->link_duplex = 0; 5868 5869 /* Links status message must follow this format */ 5870 netdev_info(netdev, "NIC Link is Down\n"); 5871 netif_carrier_off(netdev); 5872 5873 /* link state has changed, schedule phy info update */ 5874 if (!test_bit(__IGC_DOWN, &adapter->state)) 5875 mod_timer(&adapter->phy_info_timer, 5876 round_jiffies(jiffies + 2 * HZ)); 5877 5878 pm_schedule_suspend(netdev->dev.parent, 5879 MSEC_PER_SEC * 5); 5880 } 5881 } 5882 5883 spin_lock(&adapter->stats64_lock); 5884 igc_update_stats(adapter); 5885 spin_unlock(&adapter->stats64_lock); 5886 5887 for (i = 0; i < adapter->num_tx_queues; i++) { 5888 struct igc_ring *tx_ring = adapter->tx_ring[i]; 5889 5890 if (!netif_carrier_ok(netdev)) { 5891 /* We've lost link, so the controller stops DMA, 5892 * but we've got queued Tx work that's never going 5893 * to get done, so reset controller to flush Tx. 5894 * (Do the reset outside of interrupt context). 5895 */ 5896 if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) { 5897 adapter->tx_timeout_count++; 5898 schedule_work(&adapter->reset_task); 5899 /* return immediately since reset is imminent */ 5900 return; 5901 } 5902 } 5903 5904 /* Force detection of hung controller every watchdog period */ 5905 set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 5906 } 5907 5908 /* Cause software interrupt to ensure Rx ring is cleaned */ 5909 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 5910 u32 eics = 0; 5911 5912 for (i = 0; i < adapter->num_q_vectors; i++) { 5913 struct igc_q_vector *q_vector = adapter->q_vector[i]; 5914 struct igc_ring *rx_ring; 5915 5916 if (!q_vector->rx.ring) 5917 continue; 5918 5919 rx_ring = adapter->rx_ring[q_vector->rx.ring->queue_index]; 5920 5921 if (test_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { 5922 eics |= q_vector->eims_value; 5923 clear_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 5924 } 5925 } 5926 if (eics) 5927 wr32(IGC_EICS, eics); 5928 } else { 5929 struct igc_ring *rx_ring = adapter->rx_ring[0]; 5930 5931 if (test_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { 5932 clear_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 5933 wr32(IGC_ICS, IGC_ICS_RXDMT0); 5934 } 5935 } 5936 5937 igc_ptp_tx_hang(adapter); 5938 5939 /* Reset the timer */ 5940 if (!test_bit(__IGC_DOWN, &adapter->state)) { 5941 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) 5942 mod_timer(&adapter->watchdog_timer, 5943 round_jiffies(jiffies + HZ)); 5944 else 5945 mod_timer(&adapter->watchdog_timer, 5946 round_jiffies(jiffies + 2 * HZ)); 5947 } 5948 } 5949 5950 /** 5951 * igc_intr_msi - Interrupt Handler 5952 * @irq: interrupt number 5953 * @data: pointer to a network interface device structure 5954 */ igc_intr_msi(int irq,void * data)5955 static irqreturn_t igc_intr_msi(int irq, void *data) 5956 { 5957 struct igc_adapter *adapter = data; 5958 struct igc_q_vector *q_vector = adapter->q_vector[0]; 5959 struct igc_hw *hw = &adapter->hw; 5960 /* read ICR disables interrupts using IAM */ 5961 u32 icr = rd32(IGC_ICR); 5962 5963 igc_write_itr(q_vector); 5964 5965 if (icr & IGC_ICR_DRSTA) 5966 schedule_work(&adapter->reset_task); 5967 5968 if (icr & IGC_ICR_DOUTSYNC) { 5969 /* HW is reporting DMA is out of sync */ 5970 adapter->stats.doosync++; 5971 } 5972 5973 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 5974 hw->mac.get_link_status = true; 5975 if (!test_bit(__IGC_DOWN, &adapter->state)) 5976 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5977 } 5978 5979 if (icr & IGC_ICR_TS) 5980 igc_tsync_interrupt(adapter); 5981 5982 napi_schedule(&q_vector->napi); 5983 5984 return IRQ_HANDLED; 5985 } 5986 5987 /** 5988 * igc_intr - Legacy Interrupt Handler 5989 * @irq: interrupt number 5990 * @data: pointer to a network interface device structure 5991 */ igc_intr(int irq,void * data)5992 static irqreturn_t igc_intr(int irq, void *data) 5993 { 5994 struct igc_adapter *adapter = data; 5995 struct igc_q_vector *q_vector = adapter->q_vector[0]; 5996 struct igc_hw *hw = &adapter->hw; 5997 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No 5998 * need for the IMC write 5999 */ 6000 u32 icr = rd32(IGC_ICR); 6001 6002 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is 6003 * not set, then the adapter didn't send an interrupt 6004 */ 6005 if (!(icr & IGC_ICR_INT_ASSERTED)) 6006 return IRQ_NONE; 6007 6008 igc_write_itr(q_vector); 6009 6010 if (icr & IGC_ICR_DRSTA) 6011 schedule_work(&adapter->reset_task); 6012 6013 if (icr & IGC_ICR_DOUTSYNC) { 6014 /* HW is reporting DMA is out of sync */ 6015 adapter->stats.doosync++; 6016 } 6017 6018 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 6019 hw->mac.get_link_status = true; 6020 /* guard against interrupt when we're going down */ 6021 if (!test_bit(__IGC_DOWN, &adapter->state)) 6022 mod_timer(&adapter->watchdog_timer, jiffies + 1); 6023 } 6024 6025 if (icr & IGC_ICR_TS) 6026 igc_tsync_interrupt(adapter); 6027 6028 napi_schedule(&q_vector->napi); 6029 6030 return IRQ_HANDLED; 6031 } 6032 igc_free_irq(struct igc_adapter * adapter)6033 static void igc_free_irq(struct igc_adapter *adapter) 6034 { 6035 if (adapter->msix_entries) { 6036 int vector = 0, i; 6037 6038 free_irq(adapter->msix_entries[vector++].vector, adapter); 6039 6040 for (i = 0; i < adapter->num_q_vectors; i++) 6041 free_irq(adapter->msix_entries[vector++].vector, 6042 adapter->q_vector[i]); 6043 } else { 6044 free_irq(adapter->pdev->irq, adapter); 6045 } 6046 } 6047 6048 /** 6049 * igc_request_irq - initialize interrupts 6050 * @adapter: Pointer to adapter structure 6051 * 6052 * Attempts to configure interrupts using the best available 6053 * capabilities of the hardware and kernel. 6054 */ igc_request_irq(struct igc_adapter * adapter)6055 static int igc_request_irq(struct igc_adapter *adapter) 6056 { 6057 struct net_device *netdev = adapter->netdev; 6058 struct pci_dev *pdev = adapter->pdev; 6059 int err = 0; 6060 6061 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 6062 err = igc_request_msix(adapter); 6063 if (!err) 6064 goto request_done; 6065 /* fall back to MSI */ 6066 igc_free_all_tx_resources(adapter); 6067 igc_free_all_rx_resources(adapter); 6068 6069 igc_clear_interrupt_scheme(adapter); 6070 err = igc_init_interrupt_scheme(adapter, false); 6071 if (err) 6072 goto request_done; 6073 igc_setup_all_tx_resources(adapter); 6074 igc_setup_all_rx_resources(adapter); 6075 igc_configure(adapter); 6076 } 6077 6078 igc_assign_vector(adapter->q_vector[0], 0); 6079 6080 if (adapter->flags & IGC_FLAG_HAS_MSI) { 6081 err = request_irq(pdev->irq, &igc_intr_msi, 0, 6082 netdev->name, adapter); 6083 if (!err) 6084 goto request_done; 6085 6086 /* fall back to legacy interrupts */ 6087 igc_reset_interrupt_capability(adapter); 6088 adapter->flags &= ~IGC_FLAG_HAS_MSI; 6089 } 6090 6091 err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED, 6092 netdev->name, adapter); 6093 6094 if (err) 6095 netdev_err(netdev, "Error %d getting interrupt\n", err); 6096 6097 request_done: 6098 return err; 6099 } 6100 6101 /** 6102 * __igc_open - Called when a network interface is made active 6103 * @netdev: network interface device structure 6104 * @resuming: boolean indicating if the device is resuming 6105 * 6106 * Returns 0 on success, negative value on failure 6107 * 6108 * The open entry point is called when a network interface is made 6109 * active by the system (IFF_UP). At this point all resources needed 6110 * for transmit and receive operations are allocated, the interrupt 6111 * handler is registered with the OS, the watchdog timer is started, 6112 * and the stack is notified that the interface is ready. 6113 */ __igc_open(struct net_device * netdev,bool resuming)6114 static int __igc_open(struct net_device *netdev, bool resuming) 6115 { 6116 struct igc_adapter *adapter = netdev_priv(netdev); 6117 struct pci_dev *pdev = adapter->pdev; 6118 struct igc_hw *hw = &adapter->hw; 6119 struct napi_struct *napi; 6120 int err = 0; 6121 int i = 0; 6122 6123 /* disallow open during test */ 6124 6125 if (test_bit(__IGC_TESTING, &adapter->state)) { 6126 WARN_ON(resuming); 6127 return -EBUSY; 6128 } 6129 6130 if (!resuming) 6131 pm_runtime_get_sync(&pdev->dev); 6132 6133 netif_carrier_off(netdev); 6134 6135 /* allocate transmit descriptors */ 6136 err = igc_setup_all_tx_resources(adapter); 6137 if (err) 6138 goto err_setup_tx; 6139 6140 /* allocate receive descriptors */ 6141 err = igc_setup_all_rx_resources(adapter); 6142 if (err) 6143 goto err_setup_rx; 6144 6145 igc_power_up_link(adapter); 6146 6147 igc_configure(adapter); 6148 6149 err = igc_request_irq(adapter); 6150 if (err) 6151 goto err_req_irq; 6152 6153 clear_bit(__IGC_DOWN, &adapter->state); 6154 6155 for (i = 0; i < adapter->num_q_vectors; i++) { 6156 napi = &adapter->q_vector[i]->napi; 6157 napi_enable(napi); 6158 igc_set_queue_napi(adapter, i, napi); 6159 } 6160 6161 /* Clear any pending interrupts. */ 6162 rd32(IGC_ICR); 6163 igc_irq_enable(adapter); 6164 6165 if (!resuming) 6166 pm_runtime_put(&pdev->dev); 6167 6168 netif_tx_start_all_queues(netdev); 6169 6170 /* start the watchdog. */ 6171 hw->mac.get_link_status = true; 6172 schedule_work(&adapter->watchdog_task); 6173 6174 return IGC_SUCCESS; 6175 6176 err_req_irq: 6177 igc_release_hw_control(adapter); 6178 igc_power_down_phy_copper_base(&adapter->hw); 6179 igc_free_all_rx_resources(adapter); 6180 err_setup_rx: 6181 igc_free_all_tx_resources(adapter); 6182 err_setup_tx: 6183 igc_reset(adapter); 6184 if (!resuming) 6185 pm_runtime_put(&pdev->dev); 6186 6187 return err; 6188 } 6189 igc_open(struct net_device * netdev)6190 int igc_open(struct net_device *netdev) 6191 { 6192 struct igc_adapter *adapter = netdev_priv(netdev); 6193 int err; 6194 6195 /* Notify the stack of the actual queue counts. */ 6196 err = netif_set_real_num_queues(netdev, adapter->num_tx_queues, 6197 adapter->num_rx_queues); 6198 if (err) { 6199 netdev_err(netdev, "error setting real queue count\n"); 6200 return err; 6201 } 6202 6203 return __igc_open(netdev, false); 6204 } 6205 6206 /** 6207 * __igc_close - Disables a network interface 6208 * @netdev: network interface device structure 6209 * @suspending: boolean indicating the device is suspending 6210 * 6211 * Returns 0, this is not allowed to fail 6212 * 6213 * The close entry point is called when an interface is de-activated 6214 * by the OS. The hardware is still under the driver's control, but 6215 * needs to be disabled. A global MAC reset is issued to stop the 6216 * hardware, and all transmit and receive resources are freed. 6217 */ __igc_close(struct net_device * netdev,bool suspending)6218 static int __igc_close(struct net_device *netdev, bool suspending) 6219 { 6220 struct igc_adapter *adapter = netdev_priv(netdev); 6221 struct pci_dev *pdev = adapter->pdev; 6222 6223 WARN_ON(test_bit(__IGC_RESETTING, &adapter->state)); 6224 6225 if (!suspending) 6226 pm_runtime_get_sync(&pdev->dev); 6227 6228 igc_down(adapter); 6229 6230 igc_release_hw_control(adapter); 6231 6232 igc_free_irq(adapter); 6233 6234 igc_free_all_tx_resources(adapter); 6235 igc_free_all_rx_resources(adapter); 6236 6237 if (!suspending) 6238 pm_runtime_put_sync(&pdev->dev); 6239 6240 return 0; 6241 } 6242 igc_close(struct net_device * netdev)6243 int igc_close(struct net_device *netdev) 6244 { 6245 if (netif_device_present(netdev) || netdev->dismantle) 6246 return __igc_close(netdev, false); 6247 return 0; 6248 } 6249 6250 /** 6251 * igc_ioctl - Access the hwtstamp interface 6252 * @netdev: network interface device structure 6253 * @ifr: interface request data 6254 * @cmd: ioctl command 6255 **/ igc_ioctl(struct net_device * netdev,struct ifreq * ifr,int cmd)6256 static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 6257 { 6258 switch (cmd) { 6259 case SIOCGHWTSTAMP: 6260 return igc_ptp_get_ts_config(netdev, ifr); 6261 case SIOCSHWTSTAMP: 6262 return igc_ptp_set_ts_config(netdev, ifr); 6263 default: 6264 return -EOPNOTSUPP; 6265 } 6266 } 6267 igc_save_launchtime_params(struct igc_adapter * adapter,int queue,bool enable)6268 static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue, 6269 bool enable) 6270 { 6271 struct igc_ring *ring; 6272 6273 if (queue < 0 || queue >= adapter->num_tx_queues) 6274 return -EINVAL; 6275 6276 ring = adapter->tx_ring[queue]; 6277 ring->launchtime_enable = enable; 6278 6279 return 0; 6280 } 6281 is_base_time_past(ktime_t base_time,const struct timespec64 * now)6282 static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now) 6283 { 6284 struct timespec64 b; 6285 6286 b = ktime_to_timespec64(base_time); 6287 6288 return timespec64_compare(now, &b) > 0; 6289 } 6290 validate_schedule(struct igc_adapter * adapter,const struct tc_taprio_qopt_offload * qopt)6291 static bool validate_schedule(struct igc_adapter *adapter, 6292 const struct tc_taprio_qopt_offload *qopt) 6293 { 6294 int queue_uses[IGC_MAX_TX_QUEUES] = { }; 6295 struct igc_hw *hw = &adapter->hw; 6296 struct timespec64 now; 6297 size_t n; 6298 6299 if (qopt->cycle_time_extension) 6300 return false; 6301 6302 igc_ptp_read(adapter, &now); 6303 6304 /* If we program the controller's BASET registers with a time 6305 * in the future, it will hold all the packets until that 6306 * time, causing a lot of TX Hangs, so to avoid that, we 6307 * reject schedules that would start in the future. 6308 * Note: Limitation above is no longer in i226. 6309 */ 6310 if (!is_base_time_past(qopt->base_time, &now) && 6311 igc_is_device_id_i225(hw)) 6312 return false; 6313 6314 for (n = 0; n < qopt->num_entries; n++) { 6315 const struct tc_taprio_sched_entry *e, *prev; 6316 int i; 6317 6318 prev = n ? &qopt->entries[n - 1] : NULL; 6319 e = &qopt->entries[n]; 6320 6321 /* i225 only supports "global" frame preemption 6322 * settings. 6323 */ 6324 if (e->command != TC_TAPRIO_CMD_SET_GATES) 6325 return false; 6326 6327 for (i = 0; i < adapter->num_tx_queues; i++) 6328 if (e->gate_mask & BIT(i)) { 6329 queue_uses[i]++; 6330 6331 /* There are limitations: A single queue cannot 6332 * be opened and closed multiple times per cycle 6333 * unless the gate stays open. Check for it. 6334 */ 6335 if (queue_uses[i] > 1 && 6336 !(prev->gate_mask & BIT(i))) 6337 return false; 6338 } 6339 } 6340 6341 return true; 6342 } 6343 igc_tsn_enable_launchtime(struct igc_adapter * adapter,struct tc_etf_qopt_offload * qopt)6344 static int igc_tsn_enable_launchtime(struct igc_adapter *adapter, 6345 struct tc_etf_qopt_offload *qopt) 6346 { 6347 struct igc_hw *hw = &adapter->hw; 6348 int err; 6349 6350 if (hw->mac.type != igc_i225) 6351 return -EOPNOTSUPP; 6352 6353 err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable); 6354 if (err) 6355 return err; 6356 6357 return igc_tsn_offload_apply(adapter); 6358 } 6359 igc_qbv_clear_schedule(struct igc_adapter * adapter)6360 static int igc_qbv_clear_schedule(struct igc_adapter *adapter) 6361 { 6362 unsigned long flags; 6363 int i; 6364 6365 adapter->base_time = 0; 6366 adapter->cycle_time = NSEC_PER_SEC; 6367 adapter->taprio_offload_enable = false; 6368 adapter->qbv_config_change_errors = 0; 6369 adapter->qbv_count = 0; 6370 6371 for (i = 0; i < adapter->num_tx_queues; i++) { 6372 struct igc_ring *ring = adapter->tx_ring[i]; 6373 6374 ring->start_time = 0; 6375 ring->end_time = NSEC_PER_SEC; 6376 ring->max_sdu = 0; 6377 } 6378 6379 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 6380 6381 adapter->qbv_transition = false; 6382 6383 for (i = 0; i < adapter->num_tx_queues; i++) { 6384 struct igc_ring *ring = adapter->tx_ring[i]; 6385 6386 ring->oper_gate_closed = false; 6387 ring->admin_gate_closed = false; 6388 } 6389 6390 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 6391 6392 return 0; 6393 } 6394 igc_tsn_clear_schedule(struct igc_adapter * adapter)6395 static int igc_tsn_clear_schedule(struct igc_adapter *adapter) 6396 { 6397 igc_qbv_clear_schedule(adapter); 6398 6399 return 0; 6400 } 6401 igc_taprio_stats(struct net_device * dev,struct tc_taprio_qopt_stats * stats)6402 static void igc_taprio_stats(struct net_device *dev, 6403 struct tc_taprio_qopt_stats *stats) 6404 { 6405 /* When Strict_End is enabled, the tx_overruns counter 6406 * will always be zero. 6407 */ 6408 stats->tx_overruns = 0; 6409 } 6410 igc_taprio_queue_stats(struct net_device * dev,struct tc_taprio_qopt_queue_stats * queue_stats)6411 static void igc_taprio_queue_stats(struct net_device *dev, 6412 struct tc_taprio_qopt_queue_stats *queue_stats) 6413 { 6414 struct tc_taprio_qopt_stats *stats = &queue_stats->stats; 6415 6416 /* When Strict_End is enabled, the tx_overruns counter 6417 * will always be zero. 6418 */ 6419 stats->tx_overruns = 0; 6420 } 6421 igc_save_qbv_schedule(struct igc_adapter * adapter,struct tc_taprio_qopt_offload * qopt)6422 static int igc_save_qbv_schedule(struct igc_adapter *adapter, 6423 struct tc_taprio_qopt_offload *qopt) 6424 { 6425 bool queue_configured[IGC_MAX_TX_QUEUES] = { }; 6426 struct igc_hw *hw = &adapter->hw; 6427 u32 start_time = 0, end_time = 0; 6428 struct timespec64 now; 6429 unsigned long flags; 6430 size_t n; 6431 int i; 6432 6433 if (qopt->base_time < 0) 6434 return -ERANGE; 6435 6436 if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable) 6437 return -EALREADY; 6438 6439 if (!validate_schedule(adapter, qopt)) 6440 return -EINVAL; 6441 6442 igc_ptp_read(adapter, &now); 6443 6444 if (igc_tsn_is_taprio_activated_by_user(adapter) && 6445 is_base_time_past(qopt->base_time, &now)) 6446 adapter->qbv_config_change_errors++; 6447 6448 adapter->cycle_time = qopt->cycle_time; 6449 adapter->base_time = qopt->base_time; 6450 adapter->taprio_offload_enable = true; 6451 6452 for (n = 0; n < qopt->num_entries; n++) { 6453 struct tc_taprio_sched_entry *e = &qopt->entries[n]; 6454 6455 end_time += e->interval; 6456 6457 /* If any of the conditions below are true, we need to manually 6458 * control the end time of the cycle. 6459 * 1. Qbv users can specify a cycle time that is not equal 6460 * to the total GCL intervals. Hence, recalculation is 6461 * necessary here to exclude the time interval that 6462 * exceeds the cycle time. 6463 * 2. According to IEEE Std. 802.1Q-2018 section 8.6.9.2, 6464 * once the end of the list is reached, it will switch 6465 * to the END_OF_CYCLE state and leave the gates in the 6466 * same state until the next cycle is started. 6467 */ 6468 if (end_time > adapter->cycle_time || 6469 n + 1 == qopt->num_entries) 6470 end_time = adapter->cycle_time; 6471 6472 for (i = 0; i < adapter->num_tx_queues; i++) { 6473 struct igc_ring *ring = adapter->tx_ring[i]; 6474 6475 if (!(e->gate_mask & BIT(i))) 6476 continue; 6477 6478 /* Check whether a queue stays open for more than one 6479 * entry. If so, keep the start and advance the end 6480 * time. 6481 */ 6482 if (!queue_configured[i]) 6483 ring->start_time = start_time; 6484 ring->end_time = end_time; 6485 6486 if (ring->start_time >= adapter->cycle_time) 6487 queue_configured[i] = false; 6488 else 6489 queue_configured[i] = true; 6490 } 6491 6492 start_time += e->interval; 6493 } 6494 6495 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 6496 6497 /* Check whether a queue gets configured. 6498 * If not, set the start and end time to be end time. 6499 */ 6500 for (i = 0; i < adapter->num_tx_queues; i++) { 6501 struct igc_ring *ring = adapter->tx_ring[i]; 6502 6503 if (!is_base_time_past(qopt->base_time, &now)) { 6504 ring->admin_gate_closed = false; 6505 } else { 6506 ring->oper_gate_closed = false; 6507 ring->admin_gate_closed = false; 6508 } 6509 6510 if (!queue_configured[i]) { 6511 if (!is_base_time_past(qopt->base_time, &now)) 6512 ring->admin_gate_closed = true; 6513 else 6514 ring->oper_gate_closed = true; 6515 6516 ring->start_time = end_time; 6517 ring->end_time = end_time; 6518 } 6519 } 6520 6521 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 6522 6523 for (i = 0; i < adapter->num_tx_queues; i++) { 6524 struct igc_ring *ring = adapter->tx_ring[i]; 6525 struct net_device *dev = adapter->netdev; 6526 6527 if (qopt->max_sdu[i]) 6528 ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN; 6529 else 6530 ring->max_sdu = 0; 6531 } 6532 6533 return 0; 6534 } 6535 igc_tsn_enable_qbv_scheduling(struct igc_adapter * adapter,struct tc_taprio_qopt_offload * qopt)6536 static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter, 6537 struct tc_taprio_qopt_offload *qopt) 6538 { 6539 struct igc_hw *hw = &adapter->hw; 6540 int err; 6541 6542 if (hw->mac.type != igc_i225) 6543 return -EOPNOTSUPP; 6544 6545 switch (qopt->cmd) { 6546 case TAPRIO_CMD_REPLACE: 6547 err = igc_save_qbv_schedule(adapter, qopt); 6548 break; 6549 case TAPRIO_CMD_DESTROY: 6550 err = igc_tsn_clear_schedule(adapter); 6551 break; 6552 case TAPRIO_CMD_STATS: 6553 igc_taprio_stats(adapter->netdev, &qopt->stats); 6554 return 0; 6555 case TAPRIO_CMD_QUEUE_STATS: 6556 igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); 6557 return 0; 6558 default: 6559 return -EOPNOTSUPP; 6560 } 6561 6562 if (err) 6563 return err; 6564 6565 return igc_tsn_offload_apply(adapter); 6566 } 6567 igc_save_cbs_params(struct igc_adapter * adapter,int queue,bool enable,int idleslope,int sendslope,int hicredit,int locredit)6568 static int igc_save_cbs_params(struct igc_adapter *adapter, int queue, 6569 bool enable, int idleslope, int sendslope, 6570 int hicredit, int locredit) 6571 { 6572 bool cbs_status[IGC_MAX_SR_QUEUES] = { false }; 6573 struct net_device *netdev = adapter->netdev; 6574 struct igc_ring *ring; 6575 int i; 6576 6577 /* i225 has two sets of credit-based shaper logic. 6578 * Supporting it only on the top two priority queues 6579 */ 6580 if (queue < 0 || queue > 1) 6581 return -EINVAL; 6582 6583 ring = adapter->tx_ring[queue]; 6584 6585 for (i = 0; i < IGC_MAX_SR_QUEUES; i++) 6586 if (adapter->tx_ring[i]) 6587 cbs_status[i] = adapter->tx_ring[i]->cbs_enable; 6588 6589 /* CBS should be enabled on the highest priority queue first in order 6590 * for the CBS algorithm to operate as intended. 6591 */ 6592 if (enable) { 6593 if (queue == 1 && !cbs_status[0]) { 6594 netdev_err(netdev, 6595 "Enabling CBS on queue1 before queue0\n"); 6596 return -EINVAL; 6597 } 6598 } else { 6599 if (queue == 0 && cbs_status[1]) { 6600 netdev_err(netdev, 6601 "Disabling CBS on queue0 before queue1\n"); 6602 return -EINVAL; 6603 } 6604 } 6605 6606 ring->cbs_enable = enable; 6607 ring->idleslope = idleslope; 6608 ring->sendslope = sendslope; 6609 ring->hicredit = hicredit; 6610 ring->locredit = locredit; 6611 6612 return 0; 6613 } 6614 igc_tsn_enable_cbs(struct igc_adapter * adapter,struct tc_cbs_qopt_offload * qopt)6615 static int igc_tsn_enable_cbs(struct igc_adapter *adapter, 6616 struct tc_cbs_qopt_offload *qopt) 6617 { 6618 struct igc_hw *hw = &adapter->hw; 6619 int err; 6620 6621 if (hw->mac.type != igc_i225) 6622 return -EOPNOTSUPP; 6623 6624 if (qopt->queue < 0 || qopt->queue > 1) 6625 return -EINVAL; 6626 6627 err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable, 6628 qopt->idleslope, qopt->sendslope, 6629 qopt->hicredit, qopt->locredit); 6630 if (err) 6631 return err; 6632 6633 return igc_tsn_offload_apply(adapter); 6634 } 6635 igc_tc_query_caps(struct igc_adapter * adapter,struct tc_query_caps_base * base)6636 static int igc_tc_query_caps(struct igc_adapter *adapter, 6637 struct tc_query_caps_base *base) 6638 { 6639 struct igc_hw *hw = &adapter->hw; 6640 6641 switch (base->type) { 6642 case TC_SETUP_QDISC_MQPRIO: { 6643 struct tc_mqprio_caps *caps = base->caps; 6644 6645 caps->validate_queue_counts = true; 6646 6647 return 0; 6648 } 6649 case TC_SETUP_QDISC_TAPRIO: { 6650 struct tc_taprio_caps *caps = base->caps; 6651 6652 caps->broken_mqprio = true; 6653 6654 if (hw->mac.type == igc_i225) { 6655 caps->supports_queue_max_sdu = true; 6656 caps->gate_mask_per_txq = true; 6657 } 6658 6659 return 0; 6660 } 6661 default: 6662 return -EOPNOTSUPP; 6663 } 6664 } 6665 igc_save_mqprio_params(struct igc_adapter * adapter,u8 num_tc,u16 * offset)6666 static void igc_save_mqprio_params(struct igc_adapter *adapter, u8 num_tc, 6667 u16 *offset) 6668 { 6669 int i; 6670 6671 adapter->strict_priority_enable = true; 6672 adapter->num_tc = num_tc; 6673 6674 for (i = 0; i < num_tc; i++) 6675 adapter->queue_per_tc[i] = offset[i]; 6676 } 6677 igc_tsn_enable_mqprio(struct igc_adapter * adapter,struct tc_mqprio_qopt_offload * mqprio)6678 static int igc_tsn_enable_mqprio(struct igc_adapter *adapter, 6679 struct tc_mqprio_qopt_offload *mqprio) 6680 { 6681 struct igc_hw *hw = &adapter->hw; 6682 int i; 6683 6684 if (hw->mac.type != igc_i225) 6685 return -EOPNOTSUPP; 6686 6687 if (!mqprio->qopt.num_tc) { 6688 adapter->strict_priority_enable = false; 6689 goto apply; 6690 } 6691 6692 /* There are as many TCs as Tx queues. */ 6693 if (mqprio->qopt.num_tc != adapter->num_tx_queues) { 6694 NL_SET_ERR_MSG_FMT_MOD(mqprio->extack, 6695 "Only %d traffic classes supported", 6696 adapter->num_tx_queues); 6697 return -EOPNOTSUPP; 6698 } 6699 6700 /* Only one queue per TC is supported. */ 6701 for (i = 0; i < mqprio->qopt.num_tc; i++) { 6702 if (mqprio->qopt.count[i] != 1) { 6703 NL_SET_ERR_MSG_MOD(mqprio->extack, 6704 "Only one queue per TC supported"); 6705 return -EOPNOTSUPP; 6706 } 6707 } 6708 6709 /* Preemption is not supported yet. */ 6710 if (mqprio->preemptible_tcs) { 6711 NL_SET_ERR_MSG_MOD(mqprio->extack, 6712 "Preemption is not supported yet"); 6713 return -EOPNOTSUPP; 6714 } 6715 6716 igc_save_mqprio_params(adapter, mqprio->qopt.num_tc, 6717 mqprio->qopt.offset); 6718 6719 mqprio->qopt.hw = TC_MQPRIO_HW_OFFLOAD_TCS; 6720 6721 apply: 6722 return igc_tsn_offload_apply(adapter); 6723 } 6724 igc_setup_tc(struct net_device * dev,enum tc_setup_type type,void * type_data)6725 static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, 6726 void *type_data) 6727 { 6728 struct igc_adapter *adapter = netdev_priv(dev); 6729 6730 adapter->tc_setup_type = type; 6731 6732 switch (type) { 6733 case TC_QUERY_CAPS: 6734 return igc_tc_query_caps(adapter, type_data); 6735 case TC_SETUP_QDISC_TAPRIO: 6736 return igc_tsn_enable_qbv_scheduling(adapter, type_data); 6737 6738 case TC_SETUP_QDISC_ETF: 6739 return igc_tsn_enable_launchtime(adapter, type_data); 6740 6741 case TC_SETUP_QDISC_CBS: 6742 return igc_tsn_enable_cbs(adapter, type_data); 6743 6744 case TC_SETUP_QDISC_MQPRIO: 6745 return igc_tsn_enable_mqprio(adapter, type_data); 6746 6747 default: 6748 return -EOPNOTSUPP; 6749 } 6750 } 6751 igc_bpf(struct net_device * dev,struct netdev_bpf * bpf)6752 static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf) 6753 { 6754 struct igc_adapter *adapter = netdev_priv(dev); 6755 6756 switch (bpf->command) { 6757 case XDP_SETUP_PROG: 6758 return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack); 6759 case XDP_SETUP_XSK_POOL: 6760 return igc_xdp_setup_pool(adapter, bpf->xsk.pool, 6761 bpf->xsk.queue_id); 6762 default: 6763 return -EOPNOTSUPP; 6764 } 6765 } 6766 igc_xdp_xmit(struct net_device * dev,int num_frames,struct xdp_frame ** frames,u32 flags)6767 static int igc_xdp_xmit(struct net_device *dev, int num_frames, 6768 struct xdp_frame **frames, u32 flags) 6769 { 6770 struct igc_adapter *adapter = netdev_priv(dev); 6771 int cpu = smp_processor_id(); 6772 struct netdev_queue *nq; 6773 struct igc_ring *ring; 6774 int i, nxmit; 6775 6776 if (unlikely(!netif_carrier_ok(dev))) 6777 return -ENETDOWN; 6778 6779 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 6780 return -EINVAL; 6781 6782 ring = igc_xdp_get_tx_ring(adapter, cpu); 6783 nq = txring_txq(ring); 6784 6785 __netif_tx_lock(nq, cpu); 6786 6787 /* Avoid transmit queue timeout since we share it with the slow path */ 6788 txq_trans_cond_update(nq); 6789 6790 nxmit = 0; 6791 for (i = 0; i < num_frames; i++) { 6792 int err; 6793 struct xdp_frame *xdpf = frames[i]; 6794 6795 err = igc_xdp_init_tx_descriptor(ring, xdpf); 6796 if (err) 6797 break; 6798 nxmit++; 6799 } 6800 6801 if (flags & XDP_XMIT_FLUSH) 6802 igc_flush_tx_descriptors(ring); 6803 6804 __netif_tx_unlock(nq); 6805 6806 return nxmit; 6807 } 6808 igc_trigger_rxtxq_interrupt(struct igc_adapter * adapter,struct igc_q_vector * q_vector)6809 static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, 6810 struct igc_q_vector *q_vector) 6811 { 6812 struct igc_hw *hw = &adapter->hw; 6813 u32 eics = 0; 6814 6815 eics |= q_vector->eims_value; 6816 wr32(IGC_EICS, eics); 6817 } 6818 igc_xsk_wakeup(struct net_device * dev,u32 queue_id,u32 flags)6819 int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 6820 { 6821 struct igc_adapter *adapter = netdev_priv(dev); 6822 struct igc_q_vector *q_vector; 6823 struct igc_ring *ring; 6824 6825 if (test_bit(__IGC_DOWN, &adapter->state)) 6826 return -ENETDOWN; 6827 6828 if (!igc_xdp_is_enabled(adapter)) 6829 return -ENXIO; 6830 6831 if (queue_id >= adapter->num_rx_queues) 6832 return -EINVAL; 6833 6834 ring = adapter->rx_ring[queue_id]; 6835 6836 if (!ring->xsk_pool) 6837 return -ENXIO; 6838 6839 q_vector = adapter->q_vector[queue_id]; 6840 if (!napi_if_scheduled_mark_missed(&q_vector->napi)) 6841 igc_trigger_rxtxq_interrupt(adapter, q_vector); 6842 6843 return 0; 6844 } 6845 igc_get_tstamp(struct net_device * dev,const struct skb_shared_hwtstamps * hwtstamps,bool cycles)6846 static ktime_t igc_get_tstamp(struct net_device *dev, 6847 const struct skb_shared_hwtstamps *hwtstamps, 6848 bool cycles) 6849 { 6850 struct igc_adapter *adapter = netdev_priv(dev); 6851 struct igc_inline_rx_tstamps *tstamp; 6852 ktime_t timestamp; 6853 6854 tstamp = hwtstamps->netdev_data; 6855 6856 if (cycles) 6857 timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer1); 6858 else 6859 timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); 6860 6861 return timestamp; 6862 } 6863 6864 static const struct net_device_ops igc_netdev_ops = { 6865 .ndo_open = igc_open, 6866 .ndo_stop = igc_close, 6867 .ndo_start_xmit = igc_xmit_frame, 6868 .ndo_set_rx_mode = igc_set_rx_mode, 6869 .ndo_set_mac_address = igc_set_mac, 6870 .ndo_change_mtu = igc_change_mtu, 6871 .ndo_tx_timeout = igc_tx_timeout, 6872 .ndo_get_stats64 = igc_get_stats64, 6873 .ndo_fix_features = igc_fix_features, 6874 .ndo_set_features = igc_set_features, 6875 .ndo_features_check = igc_features_check, 6876 .ndo_eth_ioctl = igc_ioctl, 6877 .ndo_setup_tc = igc_setup_tc, 6878 .ndo_bpf = igc_bpf, 6879 .ndo_xdp_xmit = igc_xdp_xmit, 6880 .ndo_xsk_wakeup = igc_xsk_wakeup, 6881 .ndo_get_tstamp = igc_get_tstamp, 6882 }; 6883 igc_rd32(struct igc_hw * hw,u32 reg)6884 u32 igc_rd32(struct igc_hw *hw, u32 reg) 6885 { 6886 struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw); 6887 u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); 6888 u32 value = 0; 6889 6890 if (IGC_REMOVED(hw_addr)) 6891 return ~value; 6892 6893 value = readl(&hw_addr[reg]); 6894 6895 /* reads should not return all F's */ 6896 if (!(~value) && (!reg || !(~readl(hw_addr)))) { 6897 struct net_device *netdev = igc->netdev; 6898 6899 hw->hw_addr = NULL; 6900 netif_device_detach(netdev); 6901 netdev_err(netdev, "PCIe link lost, device now detached\n"); 6902 WARN(pci_device_is_present(igc->pdev), 6903 "igc: Failed to read reg 0x%x!\n", reg); 6904 } 6905 6906 return value; 6907 } 6908 6909 /* Mapping HW RSS Type to enum xdp_rss_hash_type */ 6910 static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = { 6911 [IGC_RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_L2, 6912 [IGC_RSS_TYPE_HASH_TCP_IPV4] = XDP_RSS_TYPE_L4_IPV4_TCP, 6913 [IGC_RSS_TYPE_HASH_IPV4] = XDP_RSS_TYPE_L3_IPV4, 6914 [IGC_RSS_TYPE_HASH_TCP_IPV6] = XDP_RSS_TYPE_L4_IPV6_TCP, 6915 [IGC_RSS_TYPE_HASH_IPV6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, 6916 [IGC_RSS_TYPE_HASH_IPV6] = XDP_RSS_TYPE_L3_IPV6, 6917 [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, 6918 [IGC_RSS_TYPE_HASH_UDP_IPV4] = XDP_RSS_TYPE_L4_IPV4_UDP, 6919 [IGC_RSS_TYPE_HASH_UDP_IPV6] = XDP_RSS_TYPE_L4_IPV6_UDP, 6920 [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX, 6921 [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ 6922 [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask */ 6923 [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons */ 6924 [13] = XDP_RSS_TYPE_NONE, 6925 [14] = XDP_RSS_TYPE_NONE, 6926 [15] = XDP_RSS_TYPE_NONE, 6927 }; 6928 igc_xdp_rx_hash(const struct xdp_md * _ctx,u32 * hash,enum xdp_rss_hash_type * rss_type)6929 static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, 6930 enum xdp_rss_hash_type *rss_type) 6931 { 6932 const struct igc_xdp_buff *ctx = (void *)_ctx; 6933 6934 if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)) 6935 return -ENODATA; 6936 6937 *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss); 6938 *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)]; 6939 6940 return 0; 6941 } 6942 igc_xdp_rx_timestamp(const struct xdp_md * _ctx,u64 * timestamp)6943 static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp) 6944 { 6945 const struct igc_xdp_buff *ctx = (void *)_ctx; 6946 struct igc_adapter *adapter = netdev_priv(ctx->xdp.rxq->dev); 6947 struct igc_inline_rx_tstamps *tstamp = ctx->rx_ts; 6948 6949 if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) { 6950 *timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); 6951 6952 return 0; 6953 } 6954 6955 return -ENODATA; 6956 } 6957 6958 static const struct xdp_metadata_ops igc_xdp_metadata_ops = { 6959 .xmo_rx_hash = igc_xdp_rx_hash, 6960 .xmo_rx_timestamp = igc_xdp_rx_timestamp, 6961 }; 6962 igc_qbv_scheduling_timer(struct hrtimer * timer)6963 static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer) 6964 { 6965 struct igc_adapter *adapter = container_of(timer, struct igc_adapter, 6966 hrtimer); 6967 unsigned long flags; 6968 unsigned int i; 6969 6970 spin_lock_irqsave(&adapter->qbv_tx_lock, flags); 6971 6972 adapter->qbv_transition = true; 6973 for (i = 0; i < adapter->num_tx_queues; i++) { 6974 struct igc_ring *tx_ring = adapter->tx_ring[i]; 6975 6976 if (tx_ring->admin_gate_closed) { 6977 tx_ring->admin_gate_closed = false; 6978 tx_ring->oper_gate_closed = true; 6979 } else { 6980 tx_ring->oper_gate_closed = false; 6981 } 6982 } 6983 adapter->qbv_transition = false; 6984 6985 spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); 6986 6987 return HRTIMER_NORESTART; 6988 } 6989 6990 /** 6991 * igc_probe - Device Initialization Routine 6992 * @pdev: PCI device information struct 6993 * @ent: entry in igc_pci_tbl 6994 * 6995 * Returns 0 on success, negative on failure 6996 * 6997 * igc_probe initializes an adapter identified by a pci_dev structure. 6998 * The OS initialization, configuring the adapter private structure, 6999 * and a hardware reset occur. 7000 */ igc_probe(struct pci_dev * pdev,const struct pci_device_id * ent)7001 static int igc_probe(struct pci_dev *pdev, 7002 const struct pci_device_id *ent) 7003 { 7004 struct igc_adapter *adapter; 7005 struct net_device *netdev; 7006 struct igc_hw *hw; 7007 const struct igc_info *ei = igc_info_tbl[ent->driver_data]; 7008 int err; 7009 7010 err = pci_enable_device_mem(pdev); 7011 if (err) 7012 return err; 7013 7014 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 7015 if (err) { 7016 dev_err(&pdev->dev, 7017 "No usable DMA configuration, aborting\n"); 7018 goto err_dma; 7019 } 7020 7021 err = pci_request_mem_regions(pdev, igc_driver_name); 7022 if (err) 7023 goto err_pci_reg; 7024 7025 err = pci_enable_ptm(pdev, NULL); 7026 if (err < 0) 7027 dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n"); 7028 7029 pci_set_master(pdev); 7030 7031 err = -ENOMEM; 7032 netdev = alloc_etherdev_mq(sizeof(struct igc_adapter), 7033 IGC_MAX_TX_QUEUES); 7034 7035 if (!netdev) 7036 goto err_alloc_etherdev; 7037 7038 SET_NETDEV_DEV(netdev, &pdev->dev); 7039 7040 pci_set_drvdata(pdev, netdev); 7041 adapter = netdev_priv(netdev); 7042 adapter->netdev = netdev; 7043 adapter->pdev = pdev; 7044 hw = &adapter->hw; 7045 hw->back = adapter; 7046 adapter->port_num = hw->bus.func; 7047 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); 7048 7049 err = pci_save_state(pdev); 7050 if (err) 7051 goto err_ioremap; 7052 7053 err = -EIO; 7054 adapter->io_addr = ioremap(pci_resource_start(pdev, 0), 7055 pci_resource_len(pdev, 0)); 7056 if (!adapter->io_addr) 7057 goto err_ioremap; 7058 7059 /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */ 7060 hw->hw_addr = adapter->io_addr; 7061 7062 netdev->netdev_ops = &igc_netdev_ops; 7063 netdev->xdp_metadata_ops = &igc_xdp_metadata_ops; 7064 netdev->xsk_tx_metadata_ops = &igc_xsk_tx_metadata_ops; 7065 igc_ethtool_set_ops(netdev); 7066 netdev->watchdog_timeo = 5 * HZ; 7067 7068 netdev->mem_start = pci_resource_start(pdev, 0); 7069 netdev->mem_end = pci_resource_end(pdev, 0); 7070 7071 /* PCI config space info */ 7072 hw->vendor_id = pdev->vendor; 7073 hw->device_id = pdev->device; 7074 hw->revision_id = pdev->revision; 7075 hw->subsystem_vendor_id = pdev->subsystem_vendor; 7076 hw->subsystem_device_id = pdev->subsystem_device; 7077 7078 /* Copy the default MAC and PHY function pointers */ 7079 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); 7080 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); 7081 7082 /* Initialize skew-specific constants */ 7083 err = ei->get_invariants(hw); 7084 if (err) 7085 goto err_sw_init; 7086 7087 /* Add supported features to the features list*/ 7088 netdev->features |= NETIF_F_SG; 7089 netdev->features |= NETIF_F_TSO; 7090 netdev->features |= NETIF_F_TSO6; 7091 netdev->features |= NETIF_F_TSO_ECN; 7092 netdev->features |= NETIF_F_RXHASH; 7093 netdev->features |= NETIF_F_RXCSUM; 7094 netdev->features |= NETIF_F_HW_CSUM; 7095 netdev->features |= NETIF_F_SCTP_CRC; 7096 netdev->features |= NETIF_F_HW_TC; 7097 7098 #define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ 7099 NETIF_F_GSO_GRE_CSUM | \ 7100 NETIF_F_GSO_IPXIP4 | \ 7101 NETIF_F_GSO_IPXIP6 | \ 7102 NETIF_F_GSO_UDP_TUNNEL | \ 7103 NETIF_F_GSO_UDP_TUNNEL_CSUM) 7104 7105 netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES; 7106 netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES; 7107 7108 /* setup the private structure */ 7109 err = igc_sw_init(adapter); 7110 if (err) 7111 goto err_sw_init; 7112 7113 /* copy netdev features into list of user selectable features */ 7114 netdev->hw_features |= NETIF_F_NTUPLE; 7115 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; 7116 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 7117 netdev->hw_features |= netdev->features; 7118 7119 netdev->features |= NETIF_F_HIGHDMA; 7120 7121 netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; 7122 netdev->mpls_features |= NETIF_F_HW_CSUM; 7123 netdev->hw_enc_features |= netdev->vlan_features; 7124 7125 netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 7126 NETDEV_XDP_ACT_XSK_ZEROCOPY; 7127 7128 /* MTU range: 68 - 9216 */ 7129 netdev->min_mtu = ETH_MIN_MTU; 7130 netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; 7131 7132 /* before reading the NVM, reset the controller to put the device in a 7133 * known good starting state 7134 */ 7135 hw->mac.ops.reset_hw(hw); 7136 7137 if (igc_get_flash_presence_i225(hw)) { 7138 if (hw->nvm.ops.validate(hw) < 0) { 7139 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); 7140 err = -EIO; 7141 goto err_eeprom; 7142 } 7143 } 7144 7145 if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) { 7146 /* copy the MAC address out of the NVM */ 7147 if (hw->mac.ops.read_mac_addr(hw)) 7148 dev_err(&pdev->dev, "NVM Read Error\n"); 7149 } 7150 7151 eth_hw_addr_set(netdev, hw->mac.addr); 7152 7153 if (!is_valid_ether_addr(netdev->dev_addr)) { 7154 dev_err(&pdev->dev, "Invalid MAC Address\n"); 7155 err = -EIO; 7156 goto err_eeprom; 7157 } 7158 7159 /* configure RXPBSIZE and TXPBSIZE */ 7160 wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT); 7161 wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); 7162 7163 timer_setup(&adapter->watchdog_timer, igc_watchdog, 0); 7164 timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0); 7165 7166 INIT_WORK(&adapter->reset_task, igc_reset_task); 7167 INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); 7168 7169 hrtimer_setup(&adapter->hrtimer, &igc_qbv_scheduling_timer, CLOCK_MONOTONIC, 7170 HRTIMER_MODE_REL); 7171 7172 /* Initialize link properties that are user-changeable */ 7173 adapter->fc_autoneg = true; 7174 hw->phy.autoneg_advertised = 0xaf; 7175 7176 hw->fc.requested_mode = igc_fc_default; 7177 hw->fc.current_mode = igc_fc_default; 7178 7179 /* By default, support wake on port A */ 7180 adapter->flags |= IGC_FLAG_WOL_SUPPORTED; 7181 7182 /* initialize the wol settings based on the eeprom settings */ 7183 if (adapter->flags & IGC_FLAG_WOL_SUPPORTED) 7184 adapter->wol |= IGC_WUFC_MAG; 7185 7186 device_set_wakeup_enable(&adapter->pdev->dev, 7187 adapter->flags & IGC_FLAG_WOL_SUPPORTED); 7188 7189 igc_ptp_init(adapter); 7190 7191 igc_tsn_clear_schedule(adapter); 7192 7193 /* reset the hardware with the new settings */ 7194 igc_reset(adapter); 7195 7196 /* let the f/w know that the h/w is now under the control of the 7197 * driver. 7198 */ 7199 igc_get_hw_control(adapter); 7200 7201 strscpy(netdev->name, "eth%d", sizeof(netdev->name)); 7202 err = register_netdev(netdev); 7203 if (err) 7204 goto err_register; 7205 7206 /* carrier off reporting is important to ethtool even BEFORE open */ 7207 netif_carrier_off(netdev); 7208 7209 /* Check if Media Autosense is enabled */ 7210 adapter->ei = *ei; 7211 7212 /* print pcie link status and MAC address */ 7213 pcie_print_link_status(pdev); 7214 netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr); 7215 7216 dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); 7217 /* Disable EEE for internal PHY devices */ 7218 hw->dev_spec._base.eee_enable = false; 7219 adapter->flags &= ~IGC_FLAG_EEE; 7220 igc_set_eee_i225(hw, false, false, false); 7221 7222 pm_runtime_put_noidle(&pdev->dev); 7223 7224 if (IS_ENABLED(CONFIG_IGC_LEDS)) { 7225 err = igc_led_setup(adapter); 7226 if (err) 7227 goto err_register; 7228 } 7229 7230 return 0; 7231 7232 err_register: 7233 igc_release_hw_control(adapter); 7234 err_eeprom: 7235 if (!igc_check_reset_block(hw)) 7236 igc_reset_phy(hw); 7237 err_sw_init: 7238 igc_clear_interrupt_scheme(adapter); 7239 iounmap(adapter->io_addr); 7240 err_ioremap: 7241 free_netdev(netdev); 7242 err_alloc_etherdev: 7243 pci_release_mem_regions(pdev); 7244 err_pci_reg: 7245 err_dma: 7246 pci_disable_device(pdev); 7247 return err; 7248 } 7249 7250 /** 7251 * igc_remove - Device Removal Routine 7252 * @pdev: PCI device information struct 7253 * 7254 * igc_remove is called by the PCI subsystem to alert the driver 7255 * that it should release a PCI device. This could be caused by a 7256 * Hot-Plug event, or because the driver is going to be removed from 7257 * memory. 7258 */ igc_remove(struct pci_dev * pdev)7259 static void igc_remove(struct pci_dev *pdev) 7260 { 7261 struct net_device *netdev = pci_get_drvdata(pdev); 7262 struct igc_adapter *adapter = netdev_priv(netdev); 7263 7264 pm_runtime_get_noresume(&pdev->dev); 7265 7266 igc_flush_nfc_rules(adapter); 7267 7268 igc_ptp_stop(adapter); 7269 7270 pci_disable_ptm(pdev); 7271 pci_clear_master(pdev); 7272 7273 set_bit(__IGC_DOWN, &adapter->state); 7274 7275 del_timer_sync(&adapter->watchdog_timer); 7276 del_timer_sync(&adapter->phy_info_timer); 7277 7278 cancel_work_sync(&adapter->reset_task); 7279 cancel_work_sync(&adapter->watchdog_task); 7280 hrtimer_cancel(&adapter->hrtimer); 7281 7282 if (IS_ENABLED(CONFIG_IGC_LEDS)) 7283 igc_led_free(adapter); 7284 7285 /* Release control of h/w to f/w. If f/w is AMT enabled, this 7286 * would have already happened in close and is redundant. 7287 */ 7288 igc_release_hw_control(adapter); 7289 unregister_netdev(netdev); 7290 7291 igc_clear_interrupt_scheme(adapter); 7292 pci_iounmap(pdev, adapter->io_addr); 7293 pci_release_mem_regions(pdev); 7294 7295 free_netdev(netdev); 7296 7297 pci_disable_device(pdev); 7298 } 7299 __igc_shutdown(struct pci_dev * pdev,bool * enable_wake,bool runtime)7300 static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake, 7301 bool runtime) 7302 { 7303 struct net_device *netdev = pci_get_drvdata(pdev); 7304 struct igc_adapter *adapter = netdev_priv(netdev); 7305 u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol; 7306 struct igc_hw *hw = &adapter->hw; 7307 u32 ctrl, rctl, status; 7308 bool wake; 7309 7310 rtnl_lock(); 7311 netif_device_detach(netdev); 7312 7313 if (netif_running(netdev)) 7314 __igc_close(netdev, true); 7315 7316 igc_ptp_suspend(adapter); 7317 7318 igc_clear_interrupt_scheme(adapter); 7319 rtnl_unlock(); 7320 7321 status = rd32(IGC_STATUS); 7322 if (status & IGC_STATUS_LU) 7323 wufc &= ~IGC_WUFC_LNKC; 7324 7325 if (wufc) { 7326 igc_setup_rctl(adapter); 7327 igc_set_rx_mode(netdev); 7328 7329 /* turn on all-multi mode if wake on multicast is enabled */ 7330 if (wufc & IGC_WUFC_MC) { 7331 rctl = rd32(IGC_RCTL); 7332 rctl |= IGC_RCTL_MPE; 7333 wr32(IGC_RCTL, rctl); 7334 } 7335 7336 ctrl = rd32(IGC_CTRL); 7337 ctrl |= IGC_CTRL_ADVD3WUC; 7338 wr32(IGC_CTRL, ctrl); 7339 7340 /* Allow time for pending master requests to run */ 7341 igc_disable_pcie_master(hw); 7342 7343 wr32(IGC_WUC, IGC_WUC_PME_EN); 7344 wr32(IGC_WUFC, wufc); 7345 } else { 7346 wr32(IGC_WUC, 0); 7347 wr32(IGC_WUFC, 0); 7348 } 7349 7350 wake = wufc || adapter->en_mng_pt; 7351 if (!wake) 7352 igc_power_down_phy_copper_base(&adapter->hw); 7353 else 7354 igc_power_up_link(adapter); 7355 7356 if (enable_wake) 7357 *enable_wake = wake; 7358 7359 /* Release control of h/w to f/w. If f/w is AMT enabled, this 7360 * would have already happened in close and is redundant. 7361 */ 7362 igc_release_hw_control(adapter); 7363 7364 pci_disable_device(pdev); 7365 7366 return 0; 7367 } 7368 igc_runtime_suspend(struct device * dev)7369 static int igc_runtime_suspend(struct device *dev) 7370 { 7371 return __igc_shutdown(to_pci_dev(dev), NULL, 1); 7372 } 7373 igc_deliver_wake_packet(struct net_device * netdev)7374 static void igc_deliver_wake_packet(struct net_device *netdev) 7375 { 7376 struct igc_adapter *adapter = netdev_priv(netdev); 7377 struct igc_hw *hw = &adapter->hw; 7378 struct sk_buff *skb; 7379 u32 wupl; 7380 7381 wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK; 7382 7383 /* WUPM stores only the first 128 bytes of the wake packet. 7384 * Read the packet only if we have the whole thing. 7385 */ 7386 if (wupl == 0 || wupl > IGC_WUPM_BYTES) 7387 return; 7388 7389 skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES); 7390 if (!skb) 7391 return; 7392 7393 skb_put(skb, wupl); 7394 7395 /* Ensure reads are 32-bit aligned */ 7396 wupl = roundup(wupl, 4); 7397 7398 memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl); 7399 7400 skb->protocol = eth_type_trans(skb, netdev); 7401 netif_rx(skb); 7402 } 7403 __igc_resume(struct device * dev,bool rpm)7404 static int __igc_resume(struct device *dev, bool rpm) 7405 { 7406 struct pci_dev *pdev = to_pci_dev(dev); 7407 struct net_device *netdev = pci_get_drvdata(pdev); 7408 struct igc_adapter *adapter = netdev_priv(netdev); 7409 struct igc_hw *hw = &adapter->hw; 7410 u32 err, val; 7411 7412 pci_set_power_state(pdev, PCI_D0); 7413 pci_restore_state(pdev); 7414 pci_save_state(pdev); 7415 7416 if (!pci_device_is_present(pdev)) 7417 return -ENODEV; 7418 err = pci_enable_device_mem(pdev); 7419 if (err) { 7420 netdev_err(netdev, "Cannot enable PCI device from suspend\n"); 7421 return err; 7422 } 7423 pci_set_master(pdev); 7424 7425 pci_enable_wake(pdev, PCI_D3hot, 0); 7426 pci_enable_wake(pdev, PCI_D3cold, 0); 7427 7428 if (igc_init_interrupt_scheme(adapter, true)) { 7429 netdev_err(netdev, "Unable to allocate memory for queues\n"); 7430 return -ENOMEM; 7431 } 7432 7433 igc_reset(adapter); 7434 7435 /* let the f/w know that the h/w is now under the control of the 7436 * driver. 7437 */ 7438 igc_get_hw_control(adapter); 7439 7440 val = rd32(IGC_WUS); 7441 if (val & WAKE_PKT_WUS) 7442 igc_deliver_wake_packet(netdev); 7443 7444 wr32(IGC_WUS, ~0); 7445 7446 if (netif_running(netdev)) { 7447 if (!rpm) 7448 rtnl_lock(); 7449 err = __igc_open(netdev, true); 7450 if (!rpm) 7451 rtnl_unlock(); 7452 if (!err) 7453 netif_device_attach(netdev); 7454 } 7455 7456 return err; 7457 } 7458 igc_resume(struct device * dev)7459 static int igc_resume(struct device *dev) 7460 { 7461 return __igc_resume(dev, false); 7462 } 7463 igc_runtime_resume(struct device * dev)7464 static int igc_runtime_resume(struct device *dev) 7465 { 7466 return __igc_resume(dev, true); 7467 } 7468 igc_suspend(struct device * dev)7469 static int igc_suspend(struct device *dev) 7470 { 7471 return __igc_shutdown(to_pci_dev(dev), NULL, 0); 7472 } 7473 igc_runtime_idle(struct device * dev)7474 static int __maybe_unused igc_runtime_idle(struct device *dev) 7475 { 7476 struct net_device *netdev = dev_get_drvdata(dev); 7477 struct igc_adapter *adapter = netdev_priv(netdev); 7478 7479 if (!igc_has_link(adapter)) 7480 pm_schedule_suspend(dev, MSEC_PER_SEC * 5); 7481 7482 return -EBUSY; 7483 } 7484 igc_shutdown(struct pci_dev * pdev)7485 static void igc_shutdown(struct pci_dev *pdev) 7486 { 7487 bool wake; 7488 7489 __igc_shutdown(pdev, &wake, 0); 7490 7491 if (system_state == SYSTEM_POWER_OFF) { 7492 pci_wake_from_d3(pdev, wake); 7493 pci_set_power_state(pdev, PCI_D3hot); 7494 } 7495 } 7496 7497 /** 7498 * igc_io_error_detected - called when PCI error is detected 7499 * @pdev: Pointer to PCI device 7500 * @state: The current PCI connection state 7501 * 7502 * This function is called after a PCI bus error affecting 7503 * this device has been detected. 7504 **/ igc_io_error_detected(struct pci_dev * pdev,pci_channel_state_t state)7505 static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev, 7506 pci_channel_state_t state) 7507 { 7508 struct net_device *netdev = pci_get_drvdata(pdev); 7509 struct igc_adapter *adapter = netdev_priv(netdev); 7510 7511 rtnl_lock(); 7512 netif_device_detach(netdev); 7513 7514 if (state == pci_channel_io_perm_failure) { 7515 rtnl_unlock(); 7516 return PCI_ERS_RESULT_DISCONNECT; 7517 } 7518 7519 if (netif_running(netdev)) 7520 igc_down(adapter); 7521 pci_disable_device(pdev); 7522 rtnl_unlock(); 7523 7524 /* Request a slot reset. */ 7525 return PCI_ERS_RESULT_NEED_RESET; 7526 } 7527 7528 /** 7529 * igc_io_slot_reset - called after the PCI bus has been reset. 7530 * @pdev: Pointer to PCI device 7531 * 7532 * Restart the card from scratch, as if from a cold-boot. Implementation 7533 * resembles the first-half of the __igc_resume routine. 7534 **/ igc_io_slot_reset(struct pci_dev * pdev)7535 static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) 7536 { 7537 struct net_device *netdev = pci_get_drvdata(pdev); 7538 struct igc_adapter *adapter = netdev_priv(netdev); 7539 struct igc_hw *hw = &adapter->hw; 7540 pci_ers_result_t result; 7541 7542 if (pci_enable_device_mem(pdev)) { 7543 netdev_err(netdev, "Could not re-enable PCI device after reset\n"); 7544 result = PCI_ERS_RESULT_DISCONNECT; 7545 } else { 7546 pci_set_master(pdev); 7547 pci_restore_state(pdev); 7548 pci_save_state(pdev); 7549 7550 pci_enable_wake(pdev, PCI_D3hot, 0); 7551 pci_enable_wake(pdev, PCI_D3cold, 0); 7552 7553 /* In case of PCI error, adapter loses its HW address 7554 * so we should re-assign it here. 7555 */ 7556 hw->hw_addr = adapter->io_addr; 7557 7558 igc_reset(adapter); 7559 wr32(IGC_WUS, ~0); 7560 result = PCI_ERS_RESULT_RECOVERED; 7561 } 7562 7563 return result; 7564 } 7565 7566 /** 7567 * igc_io_resume - called when traffic can start to flow again. 7568 * @pdev: Pointer to PCI device 7569 * 7570 * This callback is called when the error recovery driver tells us that 7571 * its OK to resume normal operation. Implementation resembles the 7572 * second-half of the __igc_resume routine. 7573 */ igc_io_resume(struct pci_dev * pdev)7574 static void igc_io_resume(struct pci_dev *pdev) 7575 { 7576 struct net_device *netdev = pci_get_drvdata(pdev); 7577 struct igc_adapter *adapter = netdev_priv(netdev); 7578 7579 rtnl_lock(); 7580 if (netif_running(netdev)) { 7581 if (igc_open(netdev)) { 7582 rtnl_unlock(); 7583 netdev_err(netdev, "igc_open failed after reset\n"); 7584 return; 7585 } 7586 } 7587 7588 netif_device_attach(netdev); 7589 7590 /* let the f/w know that the h/w is now under the control of the 7591 * driver. 7592 */ 7593 igc_get_hw_control(adapter); 7594 rtnl_unlock(); 7595 } 7596 7597 static const struct pci_error_handlers igc_err_handler = { 7598 .error_detected = igc_io_error_detected, 7599 .slot_reset = igc_io_slot_reset, 7600 .resume = igc_io_resume, 7601 }; 7602 7603 static _DEFINE_DEV_PM_OPS(igc_pm_ops, igc_suspend, igc_resume, 7604 igc_runtime_suspend, igc_runtime_resume, 7605 igc_runtime_idle); 7606 7607 static struct pci_driver igc_driver = { 7608 .name = igc_driver_name, 7609 .id_table = igc_pci_tbl, 7610 .probe = igc_probe, 7611 .remove = igc_remove, 7612 .driver.pm = pm_ptr(&igc_pm_ops), 7613 .shutdown = igc_shutdown, 7614 .err_handler = &igc_err_handler, 7615 }; 7616 7617 /** 7618 * igc_reinit_queues - return error 7619 * @adapter: pointer to adapter structure 7620 */ igc_reinit_queues(struct igc_adapter * adapter)7621 int igc_reinit_queues(struct igc_adapter *adapter) 7622 { 7623 struct net_device *netdev = adapter->netdev; 7624 int err = 0; 7625 7626 if (netif_running(netdev)) 7627 igc_close(netdev); 7628 7629 igc_reset_interrupt_capability(adapter); 7630 7631 if (igc_init_interrupt_scheme(adapter, true)) { 7632 netdev_err(netdev, "Unable to allocate memory for queues\n"); 7633 return -ENOMEM; 7634 } 7635 7636 if (netif_running(netdev)) 7637 err = igc_open(netdev); 7638 7639 return err; 7640 } 7641 7642 /** 7643 * igc_get_hw_dev - return device 7644 * @hw: pointer to hardware structure 7645 * 7646 * used by hardware layer to print debugging information 7647 */ igc_get_hw_dev(struct igc_hw * hw)7648 struct net_device *igc_get_hw_dev(struct igc_hw *hw) 7649 { 7650 struct igc_adapter *adapter = hw->back; 7651 7652 return adapter->netdev; 7653 } 7654 igc_disable_rx_ring_hw(struct igc_ring * ring)7655 static void igc_disable_rx_ring_hw(struct igc_ring *ring) 7656 { 7657 struct igc_hw *hw = &ring->q_vector->adapter->hw; 7658 u8 idx = ring->reg_idx; 7659 u32 rxdctl; 7660 7661 rxdctl = rd32(IGC_RXDCTL(idx)); 7662 rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE; 7663 rxdctl |= IGC_RXDCTL_SWFLUSH; 7664 wr32(IGC_RXDCTL(idx), rxdctl); 7665 } 7666 igc_disable_rx_ring(struct igc_ring * ring)7667 void igc_disable_rx_ring(struct igc_ring *ring) 7668 { 7669 igc_disable_rx_ring_hw(ring); 7670 igc_clean_rx_ring(ring); 7671 } 7672 igc_enable_rx_ring(struct igc_ring * ring)7673 void igc_enable_rx_ring(struct igc_ring *ring) 7674 { 7675 struct igc_adapter *adapter = ring->q_vector->adapter; 7676 7677 igc_configure_rx_ring(adapter, ring); 7678 7679 if (ring->xsk_pool) 7680 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 7681 else 7682 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 7683 } 7684 igc_disable_tx_ring(struct igc_ring * ring)7685 void igc_disable_tx_ring(struct igc_ring *ring) 7686 { 7687 igc_disable_tx_ring_hw(ring); 7688 igc_clean_tx_ring(ring); 7689 } 7690 igc_enable_tx_ring(struct igc_ring * ring)7691 void igc_enable_tx_ring(struct igc_ring *ring) 7692 { 7693 struct igc_adapter *adapter = ring->q_vector->adapter; 7694 7695 igc_configure_tx_ring(adapter, ring); 7696 } 7697 7698 /** 7699 * igc_init_module - Driver Registration Routine 7700 * 7701 * igc_init_module is the first routine called when the driver is 7702 * loaded. All it does is register with the PCI subsystem. 7703 */ igc_init_module(void)7704 static int __init igc_init_module(void) 7705 { 7706 int ret; 7707 7708 pr_info("%s\n", igc_driver_string); 7709 pr_info("%s\n", igc_copyright); 7710 7711 ret = pci_register_driver(&igc_driver); 7712 return ret; 7713 } 7714 7715 module_init(igc_init_module); 7716 7717 /** 7718 * igc_exit_module - Driver Exit Cleanup Routine 7719 * 7720 * igc_exit_module is called just before the driver is removed 7721 * from memory. 7722 */ igc_exit_module(void)7723 static void __exit igc_exit_module(void) 7724 { 7725 pci_unregister_driver(&igc_driver); 7726 } 7727 7728 module_exit(igc_exit_module); 7729 /* igc_main.c */ 7730