1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2007 - 2018 Intel Corporation. */ 3 4 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 5 6 #include <linux/module.h> 7 #include <linux/types.h> 8 #include <linux/init.h> 9 #include <linux/bitops.h> 10 #include <linux/vmalloc.h> 11 #include <linux/pagemap.h> 12 #include <linux/netdevice.h> 13 #include <linux/ipv6.h> 14 #include <linux/slab.h> 15 #include <net/checksum.h> 16 #include <net/ip6_checksum.h> 17 #include <net/pkt_sched.h> 18 #include <net/pkt_cls.h> 19 #include <linux/net_tstamp.h> 20 #include <linux/mii.h> 21 #include <linux/ethtool.h> 22 #include <linux/if.h> 23 #include <linux/if_vlan.h> 24 #include <linux/pci.h> 25 #include <linux/delay.h> 26 #include <linux/interrupt.h> 27 #include <linux/ip.h> 28 #include <linux/tcp.h> 29 #include <linux/sctp.h> 30 #include <linux/if_ether.h> 31 #include <linux/prefetch.h> 32 #include <linux/bpf.h> 33 #include <linux/bpf_trace.h> 34 #include <linux/pm_runtime.h> 35 #include <linux/etherdevice.h> 36 #ifdef CONFIG_IGB_DCA 37 #include <linux/dca.h> 38 #endif 39 #include <linux/i2c.h> 40 #include "igb.h" 41 42 enum queue_mode { 43 QUEUE_MODE_STRICT_PRIORITY, 44 QUEUE_MODE_STREAM_RESERVATION, 45 }; 46 47 enum tx_queue_prio { 48 TX_QUEUE_PRIO_HIGH, 49 TX_QUEUE_PRIO_LOW, 50 }; 51 52 char igb_driver_name[] = "igb"; 53 static const char igb_driver_string[] = 54 "Intel(R) Gigabit Ethernet Network Driver"; 55 static const char igb_copyright[] = 56 "Copyright (c) 2007-2014 Intel Corporation."; 57 58 static const struct e1000_info *igb_info_tbl[] = { 59 [board_82575] = &e1000_82575_info, 60 }; 61 62 static const struct pci_device_id igb_pci_tbl[] = { 63 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) }, 64 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) }, 65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) }, 66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 }, 67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 }, 68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 }, 69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 }, 70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 }, 71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS), board_82575 }, 72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS), board_82575 }, 73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 }, 74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 }, 75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 }, 76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 }, 77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 }, 78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 }, 79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 }, 80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 }, 81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 }, 82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 }, 83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 }, 84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 }, 85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 }, 86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 }, 87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 }, 88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 }, 89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 }, 90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 }, 91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 }, 92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 }, 93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 }, 94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 }, 95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 }, 96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 }, 97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 }, 98 /* required last entry */ 99 {0, } 100 }; 101 102 MODULE_DEVICE_TABLE(pci, igb_pci_tbl); 103 104 static int igb_setup_all_tx_resources(struct igb_adapter *); 105 static int igb_setup_all_rx_resources(struct igb_adapter *); 106 static void igb_free_all_tx_resources(struct igb_adapter *); 107 static void igb_free_all_rx_resources(struct igb_adapter *); 108 static void igb_setup_mrqc(struct igb_adapter *); 109 static void igb_init_queue_configuration(struct igb_adapter *adapter); 110 static int igb_sw_init(struct igb_adapter *); 111 int igb_open(struct net_device *); 112 int igb_close(struct net_device *); 113 static void igb_configure(struct igb_adapter *); 114 static void igb_configure_tx(struct igb_adapter *); 115 static void igb_configure_rx(struct igb_adapter *); 116 static void igb_clean_all_tx_rings(struct igb_adapter *); 117 static void igb_clean_all_rx_rings(struct igb_adapter *); 118 static void igb_set_rx_mode(struct net_device *); 119 static void igb_update_phy_info(struct timer_list *); 120 static void igb_watchdog(struct timer_list *); 121 static void igb_watchdog_task(struct work_struct *); 122 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); 123 static void igb_get_stats64(struct net_device *dev, 124 struct rtnl_link_stats64 *stats); 125 static int igb_change_mtu(struct net_device *, int); 126 static int igb_set_mac(struct net_device *, void *); 127 static void igb_set_uta(struct igb_adapter *adapter, bool set); 128 static irqreturn_t igb_intr(int irq, void *); 129 static irqreturn_t igb_intr_msi(int irq, void *); 130 static irqreturn_t igb_msix_other(int irq, void *); 131 static irqreturn_t igb_msix_ring(int irq, void *); 132 #ifdef CONFIG_IGB_DCA 133 static void igb_update_dca(struct igb_q_vector *); 134 static void igb_setup_dca(struct igb_adapter *); 135 #endif /* CONFIG_IGB_DCA */ 136 static int igb_poll(struct napi_struct *, int); 137 static bool igb_clean_tx_irq(struct igb_q_vector *, int); 138 static int igb_clean_rx_irq(struct igb_q_vector *, int); 139 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); 140 static void igb_tx_timeout(struct net_device *, unsigned int txqueue); 141 static void igb_reset_task(struct work_struct *); 142 static void igb_vlan_mode(struct net_device *netdev, 143 netdev_features_t features); 144 static int igb_vlan_rx_add_vid(struct net_device *, __be16, u16); 145 static int igb_vlan_rx_kill_vid(struct net_device *, __be16, u16); 146 static void igb_restore_vlan(struct igb_adapter *); 147 static void igb_rar_set_index(struct igb_adapter *, u32); 148 static void igb_ping_all_vfs(struct igb_adapter *); 149 static void igb_msg_task(struct igb_adapter *); 150 static void igb_vmm_control(struct igb_adapter *); 151 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *); 152 static void igb_flush_mac_table(struct igb_adapter *); 153 static int igb_available_rars(struct igb_adapter *, u8); 154 static void igb_set_default_mac_filter(struct igb_adapter *); 155 static int igb_uc_sync(struct net_device *, const unsigned char *); 156 static int igb_uc_unsync(struct net_device *, const unsigned char *); 157 static void igb_restore_vf_multicasts(struct igb_adapter *adapter); 158 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); 159 static int igb_ndo_set_vf_vlan(struct net_device *netdev, 160 int vf, u16 vlan, u8 qos, __be16 vlan_proto); 161 static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); 162 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, 163 bool setting); 164 static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf, 165 bool setting); 166 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, 167 struct ifla_vf_info *ivi); 168 static void igb_check_vf_rate_limit(struct igb_adapter *); 169 static void igb_nfc_filter_exit(struct igb_adapter *adapter); 170 static void igb_nfc_filter_restore(struct igb_adapter *adapter); 171 172 #ifdef CONFIG_PCI_IOV 173 static int igb_vf_configure(struct igb_adapter *adapter, int vf); 174 static int igb_disable_sriov(struct pci_dev *dev, bool reinit); 175 #endif 176 177 #ifdef CONFIG_IGB_DCA 178 static int igb_notify_dca(struct notifier_block *, unsigned long, void *); 179 static struct notifier_block dca_notifier = { 180 .notifier_call = igb_notify_dca, 181 .next = NULL, 182 .priority = 0 183 }; 184 #endif 185 #ifdef CONFIG_PCI_IOV 186 static unsigned int max_vfs; 187 module_param(max_vfs, uint, 0444); 188 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate per physical function"); 189 #endif /* CONFIG_PCI_IOV */ 190 191 static pci_ers_result_t igb_io_error_detected(struct pci_dev *, 192 pci_channel_state_t); 193 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *); 194 static void igb_io_resume(struct pci_dev *); 195 196 static const struct pci_error_handlers igb_err_handler = { 197 .error_detected = igb_io_error_detected, 198 .slot_reset = igb_io_slot_reset, 199 .resume = igb_io_resume, 200 }; 201 202 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba); 203 204 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); 205 MODULE_LICENSE("GPL v2"); 206 207 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) 208 static int debug = -1; 209 module_param(debug, int, 0); 210 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 211 212 struct igb_reg_info { 213 u32 ofs; 214 char *name; 215 }; 216 217 static const struct igb_reg_info igb_reg_info_tbl[] = { 218 219 /* General Registers */ 220 {E1000_CTRL, "CTRL"}, 221 {E1000_STATUS, "STATUS"}, 222 {E1000_CTRL_EXT, "CTRL_EXT"}, 223 224 /* Interrupt Registers */ 225 {E1000_ICR, "ICR"}, 226 227 /* RX Registers */ 228 {E1000_RCTL, "RCTL"}, 229 {E1000_RDLEN(0), "RDLEN"}, 230 {E1000_RDH(0), "RDH"}, 231 {E1000_RDT(0), "RDT"}, 232 {E1000_RXDCTL(0), "RXDCTL"}, 233 {E1000_RDBAL(0), "RDBAL"}, 234 {E1000_RDBAH(0), "RDBAH"}, 235 236 /* TX Registers */ 237 {E1000_TCTL, "TCTL"}, 238 {E1000_TDBAL(0), "TDBAL"}, 239 {E1000_TDBAH(0), "TDBAH"}, 240 {E1000_TDLEN(0), "TDLEN"}, 241 {E1000_TDH(0), "TDH"}, 242 {E1000_TDT(0), "TDT"}, 243 {E1000_TXDCTL(0), "TXDCTL"}, 244 {E1000_TDFH, "TDFH"}, 245 {E1000_TDFT, "TDFT"}, 246 {E1000_TDFHS, "TDFHS"}, 247 {E1000_TDFPC, "TDFPC"}, 248 249 /* List Terminator */ 250 {} 251 }; 252 253 /* igb_regdump - register printout routine */ 254 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo) 255 { 256 int n = 0; 257 char rname[16]; 258 u32 regs[8]; 259 260 switch (reginfo->ofs) { 261 case E1000_RDLEN(0): 262 for (n = 0; n < 4; n++) 263 regs[n] = rd32(E1000_RDLEN(n)); 264 break; 265 case E1000_RDH(0): 266 for (n = 0; n < 4; n++) 267 regs[n] = rd32(E1000_RDH(n)); 268 break; 269 case E1000_RDT(0): 270 for (n = 0; n < 4; n++) 271 regs[n] = rd32(E1000_RDT(n)); 272 break; 273 case E1000_RXDCTL(0): 274 for (n = 0; n < 4; n++) 275 regs[n] = rd32(E1000_RXDCTL(n)); 276 break; 277 case E1000_RDBAL(0): 278 for (n = 0; n < 4; n++) 279 regs[n] = rd32(E1000_RDBAL(n)); 280 break; 281 case E1000_RDBAH(0): 282 for (n = 0; n < 4; n++) 283 regs[n] = rd32(E1000_RDBAH(n)); 284 break; 285 case E1000_TDBAL(0): 286 for (n = 0; n < 4; n++) 287 regs[n] = rd32(E1000_TDBAL(n)); 288 break; 289 case E1000_TDBAH(0): 290 for (n = 0; n < 4; n++) 291 regs[n] = rd32(E1000_TDBAH(n)); 292 break; 293 case E1000_TDLEN(0): 294 for (n = 0; n < 4; n++) 295 regs[n] = rd32(E1000_TDLEN(n)); 296 break; 297 case E1000_TDH(0): 298 for (n = 0; n < 4; n++) 299 regs[n] = rd32(E1000_TDH(n)); 300 break; 301 case E1000_TDT(0): 302 for (n = 0; n < 4; n++) 303 regs[n] = rd32(E1000_TDT(n)); 304 break; 305 case E1000_TXDCTL(0): 306 for (n = 0; n < 4; n++) 307 regs[n] = rd32(E1000_TXDCTL(n)); 308 break; 309 default: 310 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs)); 311 return; 312 } 313 314 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]"); 315 pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1], 316 regs[2], regs[3]); 317 } 318 319 /* igb_dump - Print registers, Tx-rings and Rx-rings */ 320 static void igb_dump(struct igb_adapter *adapter) 321 { 322 struct net_device *netdev = adapter->netdev; 323 struct e1000_hw *hw = &adapter->hw; 324 struct igb_reg_info *reginfo; 325 struct igb_ring *tx_ring; 326 union e1000_adv_tx_desc *tx_desc; 327 struct my_u0 { __le64 a; __le64 b; } *u0; 328 struct igb_ring *rx_ring; 329 union e1000_adv_rx_desc *rx_desc; 330 u32 staterr; 331 u16 i, n; 332 333 if (!netif_msg_hw(adapter)) 334 return; 335 336 /* Print netdevice Info */ 337 if (netdev) { 338 dev_info(&adapter->pdev->dev, "Net device Info\n"); 339 pr_info("Device Name state trans_start\n"); 340 pr_info("%-15s %016lX %016lX\n", netdev->name, 341 netdev->state, dev_trans_start(netdev)); 342 } 343 344 /* Print Registers */ 345 dev_info(&adapter->pdev->dev, "Register Dump\n"); 346 pr_info(" Register Name Value\n"); 347 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl; 348 reginfo->name; reginfo++) { 349 igb_regdump(hw, reginfo); 350 } 351 352 /* Print TX Ring Summary */ 353 if (!netdev || !netif_running(netdev)) 354 goto exit; 355 356 dev_info(&adapter->pdev->dev, "TX Rings Summary\n"); 357 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n"); 358 for (n = 0; n < adapter->num_tx_queues; n++) { 359 struct igb_tx_buffer *buffer_info; 360 tx_ring = adapter->tx_ring[n]; 361 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; 362 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n", 363 n, tx_ring->next_to_use, tx_ring->next_to_clean, 364 (u64)dma_unmap_addr(buffer_info, dma), 365 dma_unmap_len(buffer_info, len), 366 buffer_info->next_to_watch, 367 (u64)buffer_info->time_stamp); 368 } 369 370 /* Print TX Rings */ 371 if (!netif_msg_tx_done(adapter)) 372 goto rx_ring_summary; 373 374 dev_info(&adapter->pdev->dev, "TX Rings Dump\n"); 375 376 /* Transmit Descriptor Formats 377 * 378 * Advanced Transmit Descriptor 379 * +--------------------------------------------------------------+ 380 * 0 | Buffer Address [63:0] | 381 * +--------------------------------------------------------------+ 382 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN | 383 * +--------------------------------------------------------------+ 384 * 63 46 45 40 39 38 36 35 32 31 24 15 0 385 */ 386 387 for (n = 0; n < adapter->num_tx_queues; n++) { 388 tx_ring = adapter->tx_ring[n]; 389 pr_info("------------------------------------\n"); 390 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index); 391 pr_info("------------------------------------\n"); 392 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] [bi->dma ] leng ntw timestamp bi->skb\n"); 393 394 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { 395 const char *next_desc; 396 struct igb_tx_buffer *buffer_info; 397 tx_desc = IGB_TX_DESC(tx_ring, i); 398 buffer_info = &tx_ring->tx_buffer_info[i]; 399 u0 = (struct my_u0 *)tx_desc; 400 if (i == tx_ring->next_to_use && 401 i == tx_ring->next_to_clean) 402 next_desc = " NTC/U"; 403 else if (i == tx_ring->next_to_use) 404 next_desc = " NTU"; 405 else if (i == tx_ring->next_to_clean) 406 next_desc = " NTC"; 407 else 408 next_desc = ""; 409 410 pr_info("T [0x%03X] %016llX %016llX %016llX %04X %p %016llX %p%s\n", 411 i, le64_to_cpu(u0->a), 412 le64_to_cpu(u0->b), 413 (u64)dma_unmap_addr(buffer_info, dma), 414 dma_unmap_len(buffer_info, len), 415 buffer_info->next_to_watch, 416 (u64)buffer_info->time_stamp, 417 buffer_info->skb, next_desc); 418 419 if (netif_msg_pktdata(adapter) && buffer_info->skb) 420 print_hex_dump(KERN_INFO, "", 421 DUMP_PREFIX_ADDRESS, 422 16, 1, buffer_info->skb->data, 423 dma_unmap_len(buffer_info, len), 424 true); 425 } 426 } 427 428 /* Print RX Rings Summary */ 429 rx_ring_summary: 430 dev_info(&adapter->pdev->dev, "RX Rings Summary\n"); 431 pr_info("Queue [NTU] [NTC]\n"); 432 for (n = 0; n < adapter->num_rx_queues; n++) { 433 rx_ring = adapter->rx_ring[n]; 434 pr_info(" %5d %5X %5X\n", 435 n, rx_ring->next_to_use, rx_ring->next_to_clean); 436 } 437 438 /* Print RX Rings */ 439 if (!netif_msg_rx_status(adapter)) 440 goto exit; 441 442 dev_info(&adapter->pdev->dev, "RX Rings Dump\n"); 443 444 /* Advanced Receive Descriptor (Read) Format 445 * 63 1 0 446 * +-----------------------------------------------------+ 447 * 0 | Packet Buffer Address [63:1] |A0/NSE| 448 * +----------------------------------------------+------+ 449 * 8 | Header Buffer Address [63:1] | DD | 450 * +-----------------------------------------------------+ 451 * 452 * 453 * Advanced Receive Descriptor (Write-Back) Format 454 * 455 * 63 48 47 32 31 30 21 20 17 16 4 3 0 456 * +------------------------------------------------------+ 457 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS | 458 * | Checksum Ident | | | | Type | Type | 459 * +------------------------------------------------------+ 460 * 8 | VLAN Tag | Length | Extended Error | Extended Status | 461 * +------------------------------------------------------+ 462 * 63 48 47 32 31 20 19 0 463 */ 464 465 for (n = 0; n < adapter->num_rx_queues; n++) { 466 rx_ring = adapter->rx_ring[n]; 467 pr_info("------------------------------------\n"); 468 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index); 469 pr_info("------------------------------------\n"); 470 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] [bi->dma ] [bi->skb] <-- Adv Rx Read format\n"); 471 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n"); 472 473 for (i = 0; i < rx_ring->count; i++) { 474 const char *next_desc; 475 dma_addr_t dma = (dma_addr_t)0; 476 struct igb_rx_buffer *buffer_info = NULL; 477 rx_desc = IGB_RX_DESC(rx_ring, i); 478 u0 = (struct my_u0 *)rx_desc; 479 staterr = le32_to_cpu(rx_desc->wb.upper.status_error); 480 481 if (!rx_ring->xsk_pool) { 482 buffer_info = &rx_ring->rx_buffer_info[i]; 483 dma = buffer_info->dma; 484 } 485 486 if (i == rx_ring->next_to_use) 487 next_desc = " NTU"; 488 else if (i == rx_ring->next_to_clean) 489 next_desc = " NTC"; 490 else 491 next_desc = ""; 492 493 if (staterr & E1000_RXD_STAT_DD) { 494 /* Descriptor Done */ 495 pr_info("%s[0x%03X] %016llX %016llX ---------------- %s\n", 496 "RWB", i, 497 le64_to_cpu(u0->a), 498 le64_to_cpu(u0->b), 499 next_desc); 500 } else { 501 pr_info("%s[0x%03X] %016llX %016llX %016llX %s\n", 502 "R ", i, 503 le64_to_cpu(u0->a), 504 le64_to_cpu(u0->b), 505 (u64)dma, 506 next_desc); 507 508 if (netif_msg_pktdata(adapter) && 509 buffer_info && dma && buffer_info->page) { 510 print_hex_dump(KERN_INFO, "", 511 DUMP_PREFIX_ADDRESS, 512 16, 1, 513 page_address(buffer_info->page) + 514 buffer_info->page_offset, 515 igb_rx_bufsz(rx_ring), true); 516 } 517 } 518 } 519 } 520 521 exit: 522 return; 523 } 524 525 /** 526 * igb_get_i2c_data - Reads the I2C SDA data bit 527 * @data: opaque pointer to adapter struct 528 * 529 * Returns the I2C data bit value 530 **/ 531 static int igb_get_i2c_data(void *data) 532 { 533 struct igb_adapter *adapter = (struct igb_adapter *)data; 534 struct e1000_hw *hw = &adapter->hw; 535 s32 i2cctl = rd32(E1000_I2CPARAMS); 536 537 return !!(i2cctl & E1000_I2C_DATA_IN); 538 } 539 540 /** 541 * igb_set_i2c_data - Sets the I2C data bit 542 * @data: pointer to hardware structure 543 * @state: I2C data value (0 or 1) to set 544 * 545 * Sets the I2C data bit 546 **/ 547 static void igb_set_i2c_data(void *data, int state) 548 { 549 struct igb_adapter *adapter = (struct igb_adapter *)data; 550 struct e1000_hw *hw = &adapter->hw; 551 s32 i2cctl = rd32(E1000_I2CPARAMS); 552 553 if (state) { 554 i2cctl |= E1000_I2C_DATA_OUT | E1000_I2C_DATA_OE_N; 555 } else { 556 i2cctl &= ~E1000_I2C_DATA_OE_N; 557 i2cctl &= ~E1000_I2C_DATA_OUT; 558 } 559 560 wr32(E1000_I2CPARAMS, i2cctl); 561 wrfl(); 562 } 563 564 /** 565 * igb_set_i2c_clk - Sets the I2C SCL clock 566 * @data: pointer to hardware structure 567 * @state: state to set clock 568 * 569 * Sets the I2C clock line to state 570 **/ 571 static void igb_set_i2c_clk(void *data, int state) 572 { 573 struct igb_adapter *adapter = (struct igb_adapter *)data; 574 struct e1000_hw *hw = &adapter->hw; 575 s32 i2cctl = rd32(E1000_I2CPARAMS); 576 577 if (state) { 578 i2cctl |= E1000_I2C_CLK_OUT | E1000_I2C_CLK_OE_N; 579 } else { 580 i2cctl &= ~E1000_I2C_CLK_OUT; 581 i2cctl &= ~E1000_I2C_CLK_OE_N; 582 } 583 wr32(E1000_I2CPARAMS, i2cctl); 584 wrfl(); 585 } 586 587 /** 588 * igb_get_i2c_clk - Gets the I2C SCL clock state 589 * @data: pointer to hardware structure 590 * 591 * Gets the I2C clock state 592 **/ 593 static int igb_get_i2c_clk(void *data) 594 { 595 struct igb_adapter *adapter = (struct igb_adapter *)data; 596 struct e1000_hw *hw = &adapter->hw; 597 s32 i2cctl = rd32(E1000_I2CPARAMS); 598 599 return !!(i2cctl & E1000_I2C_CLK_IN); 600 } 601 602 static const struct i2c_algo_bit_data igb_i2c_algo = { 603 .setsda = igb_set_i2c_data, 604 .setscl = igb_set_i2c_clk, 605 .getsda = igb_get_i2c_data, 606 .getscl = igb_get_i2c_clk, 607 .udelay = 5, 608 .timeout = 20, 609 }; 610 611 /** 612 * igb_get_hw_dev - return device 613 * @hw: pointer to hardware structure 614 * 615 * used by hardware layer to print debugging information 616 **/ 617 struct net_device *igb_get_hw_dev(struct e1000_hw *hw) 618 { 619 struct igb_adapter *adapter = hw->back; 620 return adapter->netdev; 621 } 622 623 static struct pci_driver igb_driver; 624 625 /** 626 * igb_init_module - Driver Registration Routine 627 * 628 * igb_init_module is the first routine called when the driver is 629 * loaded. All it does is register with the PCI subsystem. 630 **/ 631 static int __init igb_init_module(void) 632 { 633 int ret; 634 635 pr_info("%s\n", igb_driver_string); 636 pr_info("%s\n", igb_copyright); 637 638 #ifdef CONFIG_IGB_DCA 639 dca_register_notify(&dca_notifier); 640 #endif 641 ret = pci_register_driver(&igb_driver); 642 #ifdef CONFIG_IGB_DCA 643 if (ret) 644 dca_unregister_notify(&dca_notifier); 645 #endif 646 return ret; 647 } 648 649 module_init(igb_init_module); 650 651 /** 652 * igb_exit_module - Driver Exit Cleanup Routine 653 * 654 * igb_exit_module is called just before the driver is removed 655 * from memory. 656 **/ 657 static void __exit igb_exit_module(void) 658 { 659 #ifdef CONFIG_IGB_DCA 660 dca_unregister_notify(&dca_notifier); 661 #endif 662 pci_unregister_driver(&igb_driver); 663 } 664 665 module_exit(igb_exit_module); 666 667 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1)) 668 /** 669 * igb_cache_ring_register - Descriptor ring to register mapping 670 * @adapter: board private structure to initialize 671 * 672 * Once we know the feature-set enabled for the device, we'll cache 673 * the register offset the descriptor ring is assigned to. 674 **/ 675 static void igb_cache_ring_register(struct igb_adapter *adapter) 676 { 677 int i = 0, j = 0; 678 u32 rbase_offset = adapter->vfs_allocated_count; 679 680 switch (adapter->hw.mac.type) { 681 case e1000_82576: 682 /* The queues are allocated for virtualization such that VF 0 683 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc. 684 * In order to avoid collision we start at the first free queue 685 * and continue consuming queues in the same sequence 686 */ 687 if (adapter->vfs_allocated_count) { 688 for (; i < adapter->rss_queues; i++) 689 adapter->rx_ring[i]->reg_idx = rbase_offset + 690 Q_IDX_82576(i); 691 } 692 fallthrough; 693 case e1000_82575: 694 case e1000_82580: 695 case e1000_i350: 696 case e1000_i354: 697 case e1000_i210: 698 case e1000_i211: 699 default: 700 for (; i < adapter->num_rx_queues; i++) 701 adapter->rx_ring[i]->reg_idx = rbase_offset + i; 702 for (; j < adapter->num_tx_queues; j++) 703 adapter->tx_ring[j]->reg_idx = rbase_offset + j; 704 break; 705 } 706 } 707 708 u32 igb_rd32(struct e1000_hw *hw, u32 reg) 709 { 710 struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw); 711 u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); 712 u32 value = 0; 713 714 if (E1000_REMOVED(hw_addr)) 715 return ~value; 716 717 value = readl(&hw_addr[reg]); 718 719 /* reads should not return all F's */ 720 if (!(~value) && (!reg || !(~readl(hw_addr)))) { 721 struct net_device *netdev = igb->netdev; 722 hw->hw_addr = NULL; 723 netdev_err(netdev, "PCIe link lost\n"); 724 WARN(pci_device_is_present(igb->pdev), 725 "igb: Failed to read reg 0x%x!\n", reg); 726 } 727 728 return value; 729 } 730 731 /** 732 * igb_write_ivar - configure ivar for given MSI-X vector 733 * @hw: pointer to the HW structure 734 * @msix_vector: vector number we are allocating to a given ring 735 * @index: row index of IVAR register to write within IVAR table 736 * @offset: column offset of in IVAR, should be multiple of 8 737 * 738 * This function is intended to handle the writing of the IVAR register 739 * for adapters 82576 and newer. The IVAR table consists of 2 columns, 740 * each containing an cause allocation for an Rx and Tx ring, and a 741 * variable number of rows depending on the number of queues supported. 742 **/ 743 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector, 744 int index, int offset) 745 { 746 u32 ivar = array_rd32(E1000_IVAR0, index); 747 748 /* clear any bits that are currently set */ 749 ivar &= ~((u32)0xFF << offset); 750 751 /* write vector and valid bit */ 752 ivar |= (msix_vector | E1000_IVAR_VALID) << offset; 753 754 array_wr32(E1000_IVAR0, index, ivar); 755 } 756 757 #define IGB_N0_QUEUE -1 758 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) 759 { 760 struct igb_adapter *adapter = q_vector->adapter; 761 struct e1000_hw *hw = &adapter->hw; 762 int rx_queue = IGB_N0_QUEUE; 763 int tx_queue = IGB_N0_QUEUE; 764 u32 msixbm = 0; 765 766 if (q_vector->rx.ring) 767 rx_queue = q_vector->rx.ring->reg_idx; 768 if (q_vector->tx.ring) 769 tx_queue = q_vector->tx.ring->reg_idx; 770 771 switch (hw->mac.type) { 772 case e1000_82575: 773 /* The 82575 assigns vectors using a bitmask, which matches the 774 * bitmask for the EICR/EIMS/EIMC registers. To assign one 775 * or more queues to a vector, we write the appropriate bits 776 * into the MSIXBM register for that vector. 777 */ 778 if (rx_queue > IGB_N0_QUEUE) 779 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue; 780 if (tx_queue > IGB_N0_QUEUE) 781 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue; 782 if (!(adapter->flags & IGB_FLAG_HAS_MSIX) && msix_vector == 0) 783 msixbm |= E1000_EIMS_OTHER; 784 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm); 785 q_vector->eims_value = msixbm; 786 break; 787 case e1000_82576: 788 /* 82576 uses a table that essentially consists of 2 columns 789 * with 8 rows. The ordering is column-major so we use the 790 * lower 3 bits as the row index, and the 4th bit as the 791 * column offset. 792 */ 793 if (rx_queue > IGB_N0_QUEUE) 794 igb_write_ivar(hw, msix_vector, 795 rx_queue & 0x7, 796 (rx_queue & 0x8) << 1); 797 if (tx_queue > IGB_N0_QUEUE) 798 igb_write_ivar(hw, msix_vector, 799 tx_queue & 0x7, 800 ((tx_queue & 0x8) << 1) + 8); 801 q_vector->eims_value = BIT(msix_vector); 802 break; 803 case e1000_82580: 804 case e1000_i350: 805 case e1000_i354: 806 case e1000_i210: 807 case e1000_i211: 808 /* On 82580 and newer adapters the scheme is similar to 82576 809 * however instead of ordering column-major we have things 810 * ordered row-major. So we traverse the table by using 811 * bit 0 as the column offset, and the remaining bits as the 812 * row index. 813 */ 814 if (rx_queue > IGB_N0_QUEUE) 815 igb_write_ivar(hw, msix_vector, 816 rx_queue >> 1, 817 (rx_queue & 0x1) << 4); 818 if (tx_queue > IGB_N0_QUEUE) 819 igb_write_ivar(hw, msix_vector, 820 tx_queue >> 1, 821 ((tx_queue & 0x1) << 4) + 8); 822 q_vector->eims_value = BIT(msix_vector); 823 break; 824 default: 825 BUG(); 826 break; 827 } 828 829 /* add q_vector eims value to global eims_enable_mask */ 830 adapter->eims_enable_mask |= q_vector->eims_value; 831 832 /* configure q_vector to set itr on first interrupt */ 833 q_vector->set_itr = 1; 834 } 835 836 /** 837 * igb_configure_msix - Configure MSI-X hardware 838 * @adapter: board private structure to initialize 839 * 840 * igb_configure_msix sets up the hardware to properly 841 * generate MSI-X interrupts. 842 **/ 843 static void igb_configure_msix(struct igb_adapter *adapter) 844 { 845 u32 tmp; 846 int i, vector = 0; 847 struct e1000_hw *hw = &adapter->hw; 848 849 adapter->eims_enable_mask = 0; 850 851 /* set vector for other causes, i.e. link changes */ 852 switch (hw->mac.type) { 853 case e1000_82575: 854 tmp = rd32(E1000_CTRL_EXT); 855 /* enable MSI-X PBA support*/ 856 tmp |= E1000_CTRL_EXT_PBA_CLR; 857 858 /* Auto-Mask interrupts upon ICR read. */ 859 tmp |= E1000_CTRL_EXT_EIAME; 860 tmp |= E1000_CTRL_EXT_IRCA; 861 862 wr32(E1000_CTRL_EXT, tmp); 863 864 /* enable msix_other interrupt */ 865 array_wr32(E1000_MSIXBM(0), vector++, E1000_EIMS_OTHER); 866 adapter->eims_other = E1000_EIMS_OTHER; 867 868 break; 869 870 case e1000_82576: 871 case e1000_82580: 872 case e1000_i350: 873 case e1000_i354: 874 case e1000_i210: 875 case e1000_i211: 876 /* Turn on MSI-X capability first, or our settings 877 * won't stick. And it will take days to debug. 878 */ 879 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE | 880 E1000_GPIE_PBA | E1000_GPIE_EIAME | 881 E1000_GPIE_NSICR); 882 883 /* enable msix_other interrupt */ 884 adapter->eims_other = BIT(vector); 885 tmp = (vector++ | E1000_IVAR_VALID) << 8; 886 887 wr32(E1000_IVAR_MISC, tmp); 888 break; 889 default: 890 /* do nothing, since nothing else supports MSI-X */ 891 break; 892 } /* switch (hw->mac.type) */ 893 894 adapter->eims_enable_mask |= adapter->eims_other; 895 896 for (i = 0; i < adapter->num_q_vectors; i++) 897 igb_assign_vector(adapter->q_vector[i], vector++); 898 899 wrfl(); 900 } 901 902 /** 903 * igb_request_msix - Initialize MSI-X interrupts 904 * @adapter: board private structure to initialize 905 * 906 * igb_request_msix allocates MSI-X vectors and requests interrupts from the 907 * kernel. 908 **/ 909 static int igb_request_msix(struct igb_adapter *adapter) 910 { 911 unsigned int num_q_vectors = adapter->num_q_vectors; 912 struct net_device *netdev = adapter->netdev; 913 int i, err = 0, vector = 0, free_vector = 0; 914 915 err = request_irq(adapter->msix_entries[vector].vector, 916 igb_msix_other, 0, netdev->name, adapter); 917 if (err) 918 goto err_out; 919 920 if (num_q_vectors > MAX_Q_VECTORS) { 921 num_q_vectors = MAX_Q_VECTORS; 922 dev_warn(&adapter->pdev->dev, 923 "The number of queue vectors (%d) is higher than max allowed (%d)\n", 924 adapter->num_q_vectors, MAX_Q_VECTORS); 925 } 926 for (i = 0; i < num_q_vectors; i++) { 927 struct igb_q_vector *q_vector = adapter->q_vector[i]; 928 929 vector++; 930 931 q_vector->itr_register = adapter->io_addr + E1000_EITR(vector); 932 933 if (q_vector->rx.ring && q_vector->tx.ring) 934 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, 935 q_vector->rx.ring->queue_index); 936 else if (q_vector->tx.ring) 937 sprintf(q_vector->name, "%s-tx-%u", netdev->name, 938 q_vector->tx.ring->queue_index); 939 else if (q_vector->rx.ring) 940 sprintf(q_vector->name, "%s-rx-%u", netdev->name, 941 q_vector->rx.ring->queue_index); 942 else 943 sprintf(q_vector->name, "%s-unused", netdev->name); 944 945 err = request_irq(adapter->msix_entries[vector].vector, 946 igb_msix_ring, 0, q_vector->name, 947 q_vector); 948 if (err) 949 goto err_free; 950 951 netif_napi_set_irq(&q_vector->napi, 952 adapter->msix_entries[vector].vector); 953 } 954 955 igb_configure_msix(adapter); 956 return 0; 957 958 err_free: 959 /* free already assigned IRQs */ 960 free_irq(adapter->msix_entries[free_vector++].vector, adapter); 961 962 vector--; 963 for (i = 0; i < vector; i++) { 964 free_irq(adapter->msix_entries[free_vector++].vector, 965 adapter->q_vector[i]); 966 } 967 err_out: 968 return err; 969 } 970 971 /** 972 * igb_free_q_vector - Free memory allocated for specific interrupt vector 973 * @adapter: board private structure to initialize 974 * @v_idx: Index of vector to be freed 975 * 976 * This function frees the memory allocated to the q_vector. 977 **/ 978 static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx) 979 { 980 struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; 981 982 adapter->q_vector[v_idx] = NULL; 983 984 /* igb_get_stats64() might access the rings on this vector, 985 * we must wait a grace period before freeing it. 986 */ 987 if (q_vector) 988 kfree_rcu(q_vector, rcu); 989 } 990 991 /** 992 * igb_reset_q_vector - Reset config for interrupt vector 993 * @adapter: board private structure to initialize 994 * @v_idx: Index of vector to be reset 995 * 996 * If NAPI is enabled it will delete any references to the 997 * NAPI struct. This is preparation for igb_free_q_vector. 998 **/ 999 static void igb_reset_q_vector(struct igb_adapter *adapter, int v_idx) 1000 { 1001 struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; 1002 1003 /* Coming from igb_set_interrupt_capability, the vectors are not yet 1004 * allocated. So, q_vector is NULL so we should stop here. 1005 */ 1006 if (!q_vector) 1007 return; 1008 1009 if (q_vector->tx.ring) 1010 adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; 1011 1012 if (q_vector->rx.ring) 1013 adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; 1014 1015 netif_napi_del(&q_vector->napi); 1016 1017 } 1018 1019 static void igb_reset_interrupt_capability(struct igb_adapter *adapter) 1020 { 1021 int v_idx = adapter->num_q_vectors; 1022 1023 if (adapter->flags & IGB_FLAG_HAS_MSIX) 1024 pci_disable_msix(adapter->pdev); 1025 else if (adapter->flags & IGB_FLAG_HAS_MSI) 1026 pci_disable_msi(adapter->pdev); 1027 1028 while (v_idx--) 1029 igb_reset_q_vector(adapter, v_idx); 1030 } 1031 1032 /** 1033 * igb_free_q_vectors - Free memory allocated for interrupt vectors 1034 * @adapter: board private structure to initialize 1035 * 1036 * This function frees the memory allocated to the q_vectors. In addition if 1037 * NAPI is enabled it will delete any references to the NAPI struct prior 1038 * to freeing the q_vector. 1039 **/ 1040 static void igb_free_q_vectors(struct igb_adapter *adapter) 1041 { 1042 int v_idx = adapter->num_q_vectors; 1043 1044 adapter->num_tx_queues = 0; 1045 adapter->num_rx_queues = 0; 1046 adapter->num_q_vectors = 0; 1047 1048 while (v_idx--) { 1049 igb_reset_q_vector(adapter, v_idx); 1050 igb_free_q_vector(adapter, v_idx); 1051 } 1052 } 1053 1054 /** 1055 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts 1056 * @adapter: board private structure to initialize 1057 * 1058 * This function resets the device so that it has 0 Rx queues, Tx queues, and 1059 * MSI-X interrupts allocated. 1060 */ 1061 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) 1062 { 1063 igb_free_q_vectors(adapter); 1064 igb_reset_interrupt_capability(adapter); 1065 } 1066 1067 /** 1068 * igb_set_interrupt_capability - set MSI or MSI-X if supported 1069 * @adapter: board private structure to initialize 1070 * @msix: boolean value of MSIX capability 1071 * 1072 * Attempt to configure interrupts using the best available 1073 * capabilities of the hardware and kernel. 1074 **/ 1075 static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix) 1076 { 1077 int err; 1078 int numvecs, i; 1079 1080 if (!msix) 1081 goto msi_only; 1082 adapter->flags |= IGB_FLAG_HAS_MSIX; 1083 1084 /* Number of supported queues. */ 1085 adapter->num_rx_queues = adapter->rss_queues; 1086 if (adapter->vfs_allocated_count) 1087 adapter->num_tx_queues = 1; 1088 else 1089 adapter->num_tx_queues = adapter->rss_queues; 1090 1091 /* start with one vector for every Rx queue */ 1092 numvecs = adapter->num_rx_queues; 1093 1094 /* if Tx handler is separate add 1 for every Tx queue */ 1095 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) 1096 numvecs += adapter->num_tx_queues; 1097 1098 /* store the number of vectors reserved for queues */ 1099 adapter->num_q_vectors = numvecs; 1100 1101 /* add 1 vector for link status interrupts */ 1102 numvecs++; 1103 for (i = 0; i < numvecs; i++) 1104 adapter->msix_entries[i].entry = i; 1105 1106 err = pci_enable_msix_range(adapter->pdev, 1107 adapter->msix_entries, 1108 numvecs, 1109 numvecs); 1110 if (err > 0) 1111 return; 1112 1113 igb_reset_interrupt_capability(adapter); 1114 1115 /* If we can't do MSI-X, try MSI */ 1116 msi_only: 1117 adapter->flags &= ~IGB_FLAG_HAS_MSIX; 1118 #ifdef CONFIG_PCI_IOV 1119 /* disable SR-IOV for non MSI-X configurations */ 1120 if (adapter->vf_data) { 1121 struct e1000_hw *hw = &adapter->hw; 1122 /* disable iov and allow time for transactions to clear */ 1123 pci_disable_sriov(adapter->pdev); 1124 msleep(500); 1125 1126 kfree(adapter->vf_mac_list); 1127 adapter->vf_mac_list = NULL; 1128 kfree(adapter->vf_data); 1129 adapter->vf_data = NULL; 1130 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); 1131 wrfl(); 1132 msleep(100); 1133 dev_info(&adapter->pdev->dev, "IOV Disabled\n"); 1134 } 1135 #endif 1136 adapter->vfs_allocated_count = 0; 1137 adapter->rss_queues = 1; 1138 adapter->flags |= IGB_FLAG_QUEUE_PAIRS; 1139 adapter->num_rx_queues = 1; 1140 adapter->num_tx_queues = 1; 1141 adapter->num_q_vectors = 1; 1142 if (!pci_enable_msi(adapter->pdev)) 1143 adapter->flags |= IGB_FLAG_HAS_MSI; 1144 } 1145 1146 static void igb_add_ring(struct igb_ring *ring, 1147 struct igb_ring_container *head) 1148 { 1149 head->ring = ring; 1150 head->count++; 1151 } 1152 1153 /** 1154 * igb_alloc_q_vector - Allocate memory for a single interrupt vector 1155 * @adapter: board private structure to initialize 1156 * @v_count: q_vectors allocated on adapter, used for ring interleaving 1157 * @v_idx: index of vector in adapter struct 1158 * @txr_count: total number of Tx rings to allocate 1159 * @txr_idx: index of first Tx ring to allocate 1160 * @rxr_count: total number of Rx rings to allocate 1161 * @rxr_idx: index of first Rx ring to allocate 1162 * 1163 * We allocate one q_vector. If allocation fails we return -ENOMEM. 1164 **/ 1165 static int igb_alloc_q_vector(struct igb_adapter *adapter, 1166 int v_count, int v_idx, 1167 int txr_count, int txr_idx, 1168 int rxr_count, int rxr_idx) 1169 { 1170 struct igb_q_vector *q_vector; 1171 struct igb_ring *ring; 1172 int ring_count; 1173 size_t size; 1174 1175 /* igb only supports 1 Tx and/or 1 Rx queue per vector */ 1176 if (txr_count > 1 || rxr_count > 1) 1177 return -ENOMEM; 1178 1179 ring_count = txr_count + rxr_count; 1180 size = kmalloc_size_roundup(struct_size(q_vector, ring, ring_count)); 1181 1182 /* allocate q_vector and rings */ 1183 q_vector = adapter->q_vector[v_idx]; 1184 if (!q_vector) { 1185 q_vector = kzalloc(size, GFP_KERNEL); 1186 } else if (size > ksize(q_vector)) { 1187 struct igb_q_vector *new_q_vector; 1188 1189 new_q_vector = kzalloc(size, GFP_KERNEL); 1190 if (new_q_vector) 1191 kfree_rcu(q_vector, rcu); 1192 q_vector = new_q_vector; 1193 } else { 1194 memset(q_vector, 0, size); 1195 } 1196 if (!q_vector) 1197 return -ENOMEM; 1198 1199 /* initialize NAPI */ 1200 netif_napi_add_config(adapter->netdev, &q_vector->napi, igb_poll, 1201 v_idx); 1202 1203 /* tie q_vector and adapter together */ 1204 adapter->q_vector[v_idx] = q_vector; 1205 q_vector->adapter = adapter; 1206 1207 /* initialize work limits */ 1208 q_vector->tx.work_limit = adapter->tx_work_limit; 1209 1210 /* initialize ITR configuration */ 1211 q_vector->itr_register = adapter->io_addr + E1000_EITR(0); 1212 q_vector->itr_val = IGB_START_ITR; 1213 1214 /* initialize pointer to rings */ 1215 ring = q_vector->ring; 1216 1217 /* initialize ITR */ 1218 if (rxr_count) { 1219 /* rx or rx/tx vector */ 1220 if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) 1221 q_vector->itr_val = adapter->rx_itr_setting; 1222 } else { 1223 /* tx only vector */ 1224 if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) 1225 q_vector->itr_val = adapter->tx_itr_setting; 1226 } 1227 1228 if (txr_count) { 1229 /* assign generic ring traits */ 1230 ring->dev = &adapter->pdev->dev; 1231 ring->netdev = adapter->netdev; 1232 1233 /* configure backlink on ring */ 1234 ring->q_vector = q_vector; 1235 1236 /* update q_vector Tx values */ 1237 igb_add_ring(ring, &q_vector->tx); 1238 1239 /* For 82575, context index must be unique per ring. */ 1240 if (adapter->hw.mac.type == e1000_82575) 1241 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); 1242 1243 /* apply Tx specific ring traits */ 1244 ring->count = adapter->tx_ring_count; 1245 ring->queue_index = txr_idx; 1246 1247 ring->cbs_enable = false; 1248 ring->idleslope = 0; 1249 ring->sendslope = 0; 1250 ring->hicredit = 0; 1251 ring->locredit = 0; 1252 1253 u64_stats_init(&ring->tx_syncp); 1254 u64_stats_init(&ring->tx_syncp2); 1255 1256 /* assign ring to adapter */ 1257 adapter->tx_ring[txr_idx] = ring; 1258 1259 /* push pointer to next ring */ 1260 ring++; 1261 } 1262 1263 if (rxr_count) { 1264 /* assign generic ring traits */ 1265 ring->dev = &adapter->pdev->dev; 1266 ring->netdev = adapter->netdev; 1267 1268 /* configure backlink on ring */ 1269 ring->q_vector = q_vector; 1270 1271 /* update q_vector Rx values */ 1272 igb_add_ring(ring, &q_vector->rx); 1273 1274 /* set flag indicating ring supports SCTP checksum offload */ 1275 if (adapter->hw.mac.type >= e1000_82576) 1276 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); 1277 1278 /* On i350, i354, i210, and i211, loopback VLAN packets 1279 * have the tag byte-swapped. 1280 */ 1281 if (adapter->hw.mac.type >= e1000_i350) 1282 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); 1283 1284 /* apply Rx specific ring traits */ 1285 ring->count = adapter->rx_ring_count; 1286 ring->queue_index = rxr_idx; 1287 1288 u64_stats_init(&ring->rx_syncp); 1289 1290 /* assign ring to adapter */ 1291 adapter->rx_ring[rxr_idx] = ring; 1292 } 1293 1294 return 0; 1295 } 1296 1297 1298 /** 1299 * igb_alloc_q_vectors - Allocate memory for interrupt vectors 1300 * @adapter: board private structure to initialize 1301 * 1302 * We allocate one q_vector per queue interrupt. If allocation fails we 1303 * return -ENOMEM. 1304 **/ 1305 static int igb_alloc_q_vectors(struct igb_adapter *adapter) 1306 { 1307 int q_vectors = adapter->num_q_vectors; 1308 int rxr_remaining = adapter->num_rx_queues; 1309 int txr_remaining = adapter->num_tx_queues; 1310 int rxr_idx = 0, txr_idx = 0, v_idx = 0; 1311 int err; 1312 1313 if (q_vectors >= (rxr_remaining + txr_remaining)) { 1314 for (; rxr_remaining; v_idx++) { 1315 err = igb_alloc_q_vector(adapter, q_vectors, v_idx, 1316 0, 0, 1, rxr_idx); 1317 1318 if (err) 1319 goto err_out; 1320 1321 /* update counts and index */ 1322 rxr_remaining--; 1323 rxr_idx++; 1324 } 1325 } 1326 1327 for (; v_idx < q_vectors; v_idx++) { 1328 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 1329 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 1330 1331 err = igb_alloc_q_vector(adapter, q_vectors, v_idx, 1332 tqpv, txr_idx, rqpv, rxr_idx); 1333 1334 if (err) 1335 goto err_out; 1336 1337 /* update counts and index */ 1338 rxr_remaining -= rqpv; 1339 txr_remaining -= tqpv; 1340 rxr_idx++; 1341 txr_idx++; 1342 } 1343 1344 return 0; 1345 1346 err_out: 1347 adapter->num_tx_queues = 0; 1348 adapter->num_rx_queues = 0; 1349 adapter->num_q_vectors = 0; 1350 1351 while (v_idx--) 1352 igb_free_q_vector(adapter, v_idx); 1353 1354 return -ENOMEM; 1355 } 1356 1357 /** 1358 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors 1359 * @adapter: board private structure to initialize 1360 * @msix: boolean value of MSIX capability 1361 * 1362 * This function initializes the interrupts and allocates all of the queues. 1363 **/ 1364 static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix) 1365 { 1366 struct pci_dev *pdev = adapter->pdev; 1367 int err; 1368 1369 igb_set_interrupt_capability(adapter, msix); 1370 1371 err = igb_alloc_q_vectors(adapter); 1372 if (err) { 1373 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n"); 1374 goto err_alloc_q_vectors; 1375 } 1376 1377 igb_cache_ring_register(adapter); 1378 1379 return 0; 1380 1381 err_alloc_q_vectors: 1382 igb_reset_interrupt_capability(adapter); 1383 return err; 1384 } 1385 1386 /** 1387 * igb_request_irq - initialize interrupts 1388 * @adapter: board private structure to initialize 1389 * 1390 * Attempts to configure interrupts using the best available 1391 * capabilities of the hardware and kernel. 1392 **/ 1393 static int igb_request_irq(struct igb_adapter *adapter) 1394 { 1395 struct net_device *netdev = adapter->netdev; 1396 struct pci_dev *pdev = adapter->pdev; 1397 int err = 0; 1398 1399 if (adapter->flags & IGB_FLAG_HAS_MSIX) { 1400 err = igb_request_msix(adapter); 1401 if (!err) 1402 goto request_done; 1403 /* fall back to MSI */ 1404 igb_free_all_tx_resources(adapter); 1405 igb_free_all_rx_resources(adapter); 1406 1407 igb_clear_interrupt_scheme(adapter); 1408 err = igb_init_interrupt_scheme(adapter, false); 1409 if (err) 1410 goto request_done; 1411 1412 igb_setup_all_tx_resources(adapter); 1413 igb_setup_all_rx_resources(adapter); 1414 igb_configure(adapter); 1415 } 1416 1417 igb_assign_vector(adapter->q_vector[0], 0); 1418 1419 if (adapter->flags & IGB_FLAG_HAS_MSI) { 1420 err = request_irq(pdev->irq, igb_intr_msi, 0, 1421 netdev->name, adapter); 1422 if (!err) 1423 goto request_done; 1424 1425 /* fall back to legacy interrupts */ 1426 igb_reset_interrupt_capability(adapter); 1427 adapter->flags &= ~IGB_FLAG_HAS_MSI; 1428 } 1429 1430 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED, 1431 netdev->name, adapter); 1432 1433 if (err) 1434 dev_err(&pdev->dev, "Error %d getting interrupt\n", 1435 err); 1436 1437 request_done: 1438 return err; 1439 } 1440 1441 static void igb_free_irq(struct igb_adapter *adapter) 1442 { 1443 if (adapter->flags & IGB_FLAG_HAS_MSIX) { 1444 int vector = 0, i; 1445 1446 free_irq(adapter->msix_entries[vector++].vector, adapter); 1447 1448 for (i = 0; i < adapter->num_q_vectors; i++) 1449 free_irq(adapter->msix_entries[vector++].vector, 1450 adapter->q_vector[i]); 1451 } else { 1452 free_irq(adapter->pdev->irq, adapter); 1453 } 1454 } 1455 1456 /** 1457 * igb_irq_disable - Mask off interrupt generation on the NIC 1458 * @adapter: board private structure 1459 **/ 1460 static void igb_irq_disable(struct igb_adapter *adapter) 1461 { 1462 struct e1000_hw *hw = &adapter->hw; 1463 1464 /* we need to be careful when disabling interrupts. The VFs are also 1465 * mapped into these registers and so clearing the bits can cause 1466 * issues on the VF drivers so we only need to clear what we set 1467 */ 1468 if (adapter->flags & IGB_FLAG_HAS_MSIX) { 1469 u32 regval = rd32(E1000_EIAM); 1470 1471 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask); 1472 wr32(E1000_EIMC, adapter->eims_enable_mask); 1473 regval = rd32(E1000_EIAC); 1474 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask); 1475 } 1476 1477 wr32(E1000_IAM, 0); 1478 wr32(E1000_IMC, ~0); 1479 wrfl(); 1480 if (adapter->flags & IGB_FLAG_HAS_MSIX) { 1481 int i; 1482 1483 for (i = 0; i < adapter->num_q_vectors; i++) 1484 synchronize_irq(adapter->msix_entries[i].vector); 1485 } else { 1486 synchronize_irq(adapter->pdev->irq); 1487 } 1488 } 1489 1490 /** 1491 * igb_irq_enable - Enable default interrupt generation settings 1492 * @adapter: board private structure 1493 **/ 1494 static void igb_irq_enable(struct igb_adapter *adapter) 1495 { 1496 struct e1000_hw *hw = &adapter->hw; 1497 1498 if (adapter->flags & IGB_FLAG_HAS_MSIX) { 1499 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA; 1500 u32 regval = rd32(E1000_EIAC); 1501 1502 wr32(E1000_EIAC, regval | adapter->eims_enable_mask); 1503 regval = rd32(E1000_EIAM); 1504 wr32(E1000_EIAM, regval | adapter->eims_enable_mask); 1505 wr32(E1000_EIMS, adapter->eims_enable_mask); 1506 if (adapter->vfs_allocated_count) { 1507 wr32(E1000_MBVFIMR, 0xFF); 1508 ims |= E1000_IMS_VMMB; 1509 } 1510 wr32(E1000_IMS, ims); 1511 } else { 1512 wr32(E1000_IMS, IMS_ENABLE_MASK | 1513 E1000_IMS_DRSTA); 1514 wr32(E1000_IAM, IMS_ENABLE_MASK | 1515 E1000_IMS_DRSTA); 1516 } 1517 } 1518 1519 static void igb_update_mng_vlan(struct igb_adapter *adapter) 1520 { 1521 struct e1000_hw *hw = &adapter->hw; 1522 u16 pf_id = adapter->vfs_allocated_count; 1523 u16 vid = adapter->hw.mng_cookie.vlan_id; 1524 u16 old_vid = adapter->mng_vlan_id; 1525 1526 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { 1527 /* add VID to filter table */ 1528 igb_vfta_set(hw, vid, pf_id, true, true); 1529 adapter->mng_vlan_id = vid; 1530 } else { 1531 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; 1532 } 1533 1534 if (old_vid != IGB_MNG_VLAN_NONE && vid != old_vid && 1535 !test_bit(old_vid, adapter->active_vlans)) { 1536 /* remove VID from filter table */ 1537 igb_vfta_set(hw, vid, pf_id, false, true); 1538 } 1539 } 1540 1541 /** 1542 * igb_release_hw_control - release control of the h/w to f/w 1543 * @adapter: address of board private structure 1544 * 1545 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 1546 * For ASF and Pass Through versions of f/w this means that the 1547 * driver is no longer loaded. 1548 **/ 1549 static void igb_release_hw_control(struct igb_adapter *adapter) 1550 { 1551 struct e1000_hw *hw = &adapter->hw; 1552 u32 ctrl_ext; 1553 1554 /* Let firmware take over control of h/w */ 1555 ctrl_ext = rd32(E1000_CTRL_EXT); 1556 wr32(E1000_CTRL_EXT, 1557 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 1558 } 1559 1560 /** 1561 * igb_get_hw_control - get control of the h/w from f/w 1562 * @adapter: address of board private structure 1563 * 1564 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 1565 * For ASF and Pass Through versions of f/w this means that 1566 * the driver is loaded. 1567 **/ 1568 static void igb_get_hw_control(struct igb_adapter *adapter) 1569 { 1570 struct e1000_hw *hw = &adapter->hw; 1571 u32 ctrl_ext; 1572 1573 /* Let firmware know the driver has taken over */ 1574 ctrl_ext = rd32(E1000_CTRL_EXT); 1575 wr32(E1000_CTRL_EXT, 1576 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 1577 } 1578 1579 static void enable_fqtss(struct igb_adapter *adapter, bool enable) 1580 { 1581 struct net_device *netdev = adapter->netdev; 1582 struct e1000_hw *hw = &adapter->hw; 1583 1584 WARN_ON(hw->mac.type != e1000_i210); 1585 1586 if (enable) 1587 adapter->flags |= IGB_FLAG_FQTSS; 1588 else 1589 adapter->flags &= ~IGB_FLAG_FQTSS; 1590 1591 if (netif_running(netdev)) 1592 schedule_work(&adapter->reset_task); 1593 } 1594 1595 static bool is_fqtss_enabled(struct igb_adapter *adapter) 1596 { 1597 return (adapter->flags & IGB_FLAG_FQTSS) ? true : false; 1598 } 1599 1600 static void set_tx_desc_fetch_prio(struct e1000_hw *hw, int queue, 1601 enum tx_queue_prio prio) 1602 { 1603 u32 val; 1604 1605 WARN_ON(hw->mac.type != e1000_i210); 1606 WARN_ON(queue < 0 || queue > 4); 1607 1608 val = rd32(E1000_I210_TXDCTL(queue)); 1609 1610 if (prio == TX_QUEUE_PRIO_HIGH) 1611 val |= E1000_TXDCTL_PRIORITY; 1612 else 1613 val &= ~E1000_TXDCTL_PRIORITY; 1614 1615 wr32(E1000_I210_TXDCTL(queue), val); 1616 } 1617 1618 static void set_queue_mode(struct e1000_hw *hw, int queue, enum queue_mode mode) 1619 { 1620 u32 val; 1621 1622 WARN_ON(hw->mac.type != e1000_i210); 1623 WARN_ON(queue < 0 || queue > 1); 1624 1625 val = rd32(E1000_I210_TQAVCC(queue)); 1626 1627 if (mode == QUEUE_MODE_STREAM_RESERVATION) 1628 val |= E1000_TQAVCC_QUEUEMODE; 1629 else 1630 val &= ~E1000_TQAVCC_QUEUEMODE; 1631 1632 wr32(E1000_I210_TQAVCC(queue), val); 1633 } 1634 1635 static bool is_any_cbs_enabled(struct igb_adapter *adapter) 1636 { 1637 int i; 1638 1639 for (i = 0; i < adapter->num_tx_queues; i++) { 1640 if (adapter->tx_ring[i]->cbs_enable) 1641 return true; 1642 } 1643 1644 return false; 1645 } 1646 1647 static bool is_any_txtime_enabled(struct igb_adapter *adapter) 1648 { 1649 int i; 1650 1651 for (i = 0; i < adapter->num_tx_queues; i++) { 1652 if (adapter->tx_ring[i]->launchtime_enable) 1653 return true; 1654 } 1655 1656 return false; 1657 } 1658 1659 /** 1660 * igb_config_tx_modes - Configure "Qav Tx mode" features on igb 1661 * @adapter: pointer to adapter struct 1662 * @queue: queue number 1663 * 1664 * Configure CBS and Launchtime for a given hardware queue. 1665 * Parameters are retrieved from the correct Tx ring, so 1666 * igb_save_cbs_params() and igb_save_txtime_params() should be used 1667 * for setting those correctly prior to this function being called. 1668 **/ 1669 static void igb_config_tx_modes(struct igb_adapter *adapter, int queue) 1670 { 1671 struct net_device *netdev = adapter->netdev; 1672 struct e1000_hw *hw = &adapter->hw; 1673 struct igb_ring *ring; 1674 u32 tqavcc, tqavctrl; 1675 u16 value; 1676 1677 WARN_ON(hw->mac.type != e1000_i210); 1678 WARN_ON(queue < 0 || queue > 1); 1679 ring = adapter->tx_ring[queue]; 1680 1681 /* If any of the Qav features is enabled, configure queues as SR and 1682 * with HIGH PRIO. If none is, then configure them with LOW PRIO and 1683 * as SP. 1684 */ 1685 if (ring->cbs_enable || ring->launchtime_enable) { 1686 set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH); 1687 set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION); 1688 } else { 1689 set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_LOW); 1690 set_queue_mode(hw, queue, QUEUE_MODE_STRICT_PRIORITY); 1691 } 1692 1693 /* If CBS is enabled, set DataTranARB and config its parameters. */ 1694 if (ring->cbs_enable || queue == 0) { 1695 /* i210 does not allow the queue 0 to be in the Strict 1696 * Priority mode while the Qav mode is enabled, so, 1697 * instead of disabling strict priority mode, we give 1698 * queue 0 the maximum of credits possible. 1699 * 1700 * See section 8.12.19 of the i210 datasheet, "Note: 1701 * Queue0 QueueMode must be set to 1b when 1702 * TransmitMode is set to Qav." 1703 */ 1704 if (queue == 0 && !ring->cbs_enable) { 1705 /* max "linkspeed" idleslope in kbps */ 1706 ring->idleslope = 1000000; 1707 ring->hicredit = ETH_FRAME_LEN; 1708 } 1709 1710 /* Always set data transfer arbitration to credit-based 1711 * shaper algorithm on TQAVCTRL if CBS is enabled for any of 1712 * the queues. 1713 */ 1714 tqavctrl = rd32(E1000_I210_TQAVCTRL); 1715 tqavctrl |= E1000_TQAVCTRL_DATATRANARB; 1716 wr32(E1000_I210_TQAVCTRL, tqavctrl); 1717 1718 /* According to i210 datasheet section 7.2.7.7, we should set 1719 * the 'idleSlope' field from TQAVCC register following the 1720 * equation: 1721 * 1722 * For 100 Mbps link speed: 1723 * 1724 * value = BW * 0x7735 * 0.2 (E1) 1725 * 1726 * For 1000Mbps link speed: 1727 * 1728 * value = BW * 0x7735 * 2 (E2) 1729 * 1730 * E1 and E2 can be merged into one equation as shown below. 1731 * Note that 'link-speed' is in Mbps. 1732 * 1733 * value = BW * 0x7735 * 2 * link-speed 1734 * -------------- (E3) 1735 * 1000 1736 * 1737 * 'BW' is the percentage bandwidth out of full link speed 1738 * which can be found with the following equation. Note that 1739 * idleSlope here is the parameter from this function which 1740 * is in kbps. 1741 * 1742 * BW = idleSlope 1743 * ----------------- (E4) 1744 * link-speed * 1000 1745 * 1746 * That said, we can come up with a generic equation to 1747 * calculate the value we should set it TQAVCC register by 1748 * replacing 'BW' in E3 by E4. The resulting equation is: 1749 * 1750 * value = idleSlope * 0x7735 * 2 * link-speed 1751 * ----------------- -------------- (E5) 1752 * link-speed * 1000 1000 1753 * 1754 * 'link-speed' is present in both sides of the fraction so 1755 * it is canceled out. The final equation is the following: 1756 * 1757 * value = idleSlope * 61034 1758 * ----------------- (E6) 1759 * 1000000 1760 * 1761 * NOTE: For i210, given the above, we can see that idleslope 1762 * is represented in 16.38431 kbps units by the value at 1763 * the TQAVCC register (1Gbps / 61034), which reduces 1764 * the granularity for idleslope increments. 1765 * For instance, if you want to configure a 2576kbps 1766 * idleslope, the value to be written on the register 1767 * would have to be 157.23. If rounded down, you end 1768 * up with less bandwidth available than originally 1769 * required (~2572 kbps). If rounded up, you end up 1770 * with a higher bandwidth (~2589 kbps). Below the 1771 * approach we take is to always round up the 1772 * calculated value, so the resulting bandwidth might 1773 * be slightly higher for some configurations. 1774 */ 1775 value = DIV_ROUND_UP_ULL(ring->idleslope * 61034ULL, 1000000); 1776 1777 tqavcc = rd32(E1000_I210_TQAVCC(queue)); 1778 tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK; 1779 tqavcc |= value; 1780 wr32(E1000_I210_TQAVCC(queue), tqavcc); 1781 1782 wr32(E1000_I210_TQAVHC(queue), 1783 0x80000000 + ring->hicredit * 0x7735); 1784 } else { 1785 1786 /* Set idleSlope to zero. */ 1787 tqavcc = rd32(E1000_I210_TQAVCC(queue)); 1788 tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK; 1789 wr32(E1000_I210_TQAVCC(queue), tqavcc); 1790 1791 /* Set hiCredit to zero. */ 1792 wr32(E1000_I210_TQAVHC(queue), 0); 1793 1794 /* If CBS is not enabled for any queues anymore, then return to 1795 * the default state of Data Transmission Arbitration on 1796 * TQAVCTRL. 1797 */ 1798 if (!is_any_cbs_enabled(adapter)) { 1799 tqavctrl = rd32(E1000_I210_TQAVCTRL); 1800 tqavctrl &= ~E1000_TQAVCTRL_DATATRANARB; 1801 wr32(E1000_I210_TQAVCTRL, tqavctrl); 1802 } 1803 } 1804 1805 /* If LaunchTime is enabled, set DataTranTIM. */ 1806 if (ring->launchtime_enable) { 1807 /* Always set DataTranTIM on TQAVCTRL if LaunchTime is enabled 1808 * for any of the SR queues, and configure fetchtime delta. 1809 * XXX NOTE: 1810 * - LaunchTime will be enabled for all SR queues. 1811 * - A fixed offset can be added relative to the launch 1812 * time of all packets if configured at reg LAUNCH_OS0. 1813 * We are keeping it as 0 for now (default value). 1814 */ 1815 tqavctrl = rd32(E1000_I210_TQAVCTRL); 1816 tqavctrl |= E1000_TQAVCTRL_DATATRANTIM | 1817 E1000_TQAVCTRL_FETCHTIME_DELTA; 1818 wr32(E1000_I210_TQAVCTRL, tqavctrl); 1819 } else { 1820 /* If Launchtime is not enabled for any SR queues anymore, 1821 * then clear DataTranTIM on TQAVCTRL and clear fetchtime delta, 1822 * effectively disabling Launchtime. 1823 */ 1824 if (!is_any_txtime_enabled(adapter)) { 1825 tqavctrl = rd32(E1000_I210_TQAVCTRL); 1826 tqavctrl &= ~E1000_TQAVCTRL_DATATRANTIM; 1827 tqavctrl &= ~E1000_TQAVCTRL_FETCHTIME_DELTA; 1828 wr32(E1000_I210_TQAVCTRL, tqavctrl); 1829 } 1830 } 1831 1832 /* XXX: In i210 controller the sendSlope and loCredit parameters from 1833 * CBS are not configurable by software so we don't do any 'controller 1834 * configuration' in respect to these parameters. 1835 */ 1836 1837 netdev_dbg(netdev, "Qav Tx mode: cbs %s, launchtime %s, queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n", 1838 ring->cbs_enable ? "enabled" : "disabled", 1839 ring->launchtime_enable ? "enabled" : "disabled", 1840 queue, 1841 ring->idleslope, ring->sendslope, 1842 ring->hicredit, ring->locredit); 1843 } 1844 1845 static int igb_save_txtime_params(struct igb_adapter *adapter, int queue, 1846 bool enable) 1847 { 1848 struct igb_ring *ring; 1849 1850 if (queue < 0 || queue > adapter->num_tx_queues) 1851 return -EINVAL; 1852 1853 ring = adapter->tx_ring[queue]; 1854 ring->launchtime_enable = enable; 1855 1856 return 0; 1857 } 1858 1859 static int igb_save_cbs_params(struct igb_adapter *adapter, int queue, 1860 bool enable, int idleslope, int sendslope, 1861 int hicredit, int locredit) 1862 { 1863 struct igb_ring *ring; 1864 1865 if (queue < 0 || queue > adapter->num_tx_queues) 1866 return -EINVAL; 1867 1868 ring = adapter->tx_ring[queue]; 1869 1870 ring->cbs_enable = enable; 1871 ring->idleslope = idleslope; 1872 ring->sendslope = sendslope; 1873 ring->hicredit = hicredit; 1874 ring->locredit = locredit; 1875 1876 return 0; 1877 } 1878 1879 /** 1880 * igb_setup_tx_mode - Switch to/from Qav Tx mode when applicable 1881 * @adapter: pointer to adapter struct 1882 * 1883 * Configure TQAVCTRL register switching the controller's Tx mode 1884 * if FQTSS mode is enabled or disabled. Additionally, will issue 1885 * a call to igb_config_tx_modes() per queue so any previously saved 1886 * Tx parameters are applied. 1887 **/ 1888 static void igb_setup_tx_mode(struct igb_adapter *adapter) 1889 { 1890 struct net_device *netdev = adapter->netdev; 1891 struct e1000_hw *hw = &adapter->hw; 1892 u32 val; 1893 1894 /* Only i210 controller supports changing the transmission mode. */ 1895 if (hw->mac.type != e1000_i210) 1896 return; 1897 1898 if (is_fqtss_enabled(adapter)) { 1899 int i, max_queue; 1900 1901 /* Configure TQAVCTRL register: set transmit mode to 'Qav', 1902 * set data fetch arbitration to 'round robin', set SP_WAIT_SR 1903 * so SP queues wait for SR ones. 1904 */ 1905 val = rd32(E1000_I210_TQAVCTRL); 1906 val |= E1000_TQAVCTRL_XMIT_MODE | E1000_TQAVCTRL_SP_WAIT_SR; 1907 val &= ~E1000_TQAVCTRL_DATAFETCHARB; 1908 wr32(E1000_I210_TQAVCTRL, val); 1909 1910 /* Configure Tx and Rx packet buffers sizes as described in 1911 * i210 datasheet section 7.2.7.7. 1912 */ 1913 val = rd32(E1000_TXPBS); 1914 val &= ~I210_TXPBSIZE_MASK; 1915 val |= I210_TXPBSIZE_PB0_6KB | I210_TXPBSIZE_PB1_6KB | 1916 I210_TXPBSIZE_PB2_6KB | I210_TXPBSIZE_PB3_6KB; 1917 wr32(E1000_TXPBS, val); 1918 1919 val = rd32(E1000_RXPBS); 1920 val &= ~I210_RXPBSIZE_MASK; 1921 val |= I210_RXPBSIZE_PB_30KB; 1922 wr32(E1000_RXPBS, val); 1923 1924 /* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ 1925 * register should not exceed the buffer size programmed in 1926 * TXPBS. The smallest buffer size programmed in TXPBS is 4kB 1927 * so according to the datasheet we should set MAX_TPKT_SIZE to 1928 * 4kB / 64. 1929 * 1930 * However, when we do so, no frame from queue 2 and 3 are 1931 * transmitted. It seems the MAX_TPKT_SIZE should not be great 1932 * or _equal_ to the buffer size programmed in TXPBS. For this 1933 * reason, we set MAX_ TPKT_SIZE to (4kB - 1) / 64. 1934 */ 1935 val = (4096 - 1) / 64; 1936 wr32(E1000_I210_DTXMXPKTSZ, val); 1937 1938 /* Since FQTSS mode is enabled, apply any CBS configuration 1939 * previously set. If no previous CBS configuration has been 1940 * done, then the initial configuration is applied, which means 1941 * CBS is disabled. 1942 */ 1943 max_queue = (adapter->num_tx_queues < I210_SR_QUEUES_NUM) ? 1944 adapter->num_tx_queues : I210_SR_QUEUES_NUM; 1945 1946 for (i = 0; i < max_queue; i++) { 1947 igb_config_tx_modes(adapter, i); 1948 } 1949 } else { 1950 wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT); 1951 wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT); 1952 wr32(E1000_I210_DTXMXPKTSZ, I210_DTXMXPKTSZ_DEFAULT); 1953 1954 val = rd32(E1000_I210_TQAVCTRL); 1955 /* According to Section 8.12.21, the other flags we've set when 1956 * enabling FQTSS are not relevant when disabling FQTSS so we 1957 * don't set they here. 1958 */ 1959 val &= ~E1000_TQAVCTRL_XMIT_MODE; 1960 wr32(E1000_I210_TQAVCTRL, val); 1961 } 1962 1963 netdev_dbg(netdev, "FQTSS %s\n", (is_fqtss_enabled(adapter)) ? 1964 "enabled" : "disabled"); 1965 } 1966 1967 /** 1968 * igb_configure - configure the hardware for RX and TX 1969 * @adapter: private board structure 1970 **/ 1971 static void igb_configure(struct igb_adapter *adapter) 1972 { 1973 struct net_device *netdev = adapter->netdev; 1974 int i; 1975 1976 igb_get_hw_control(adapter); 1977 igb_set_rx_mode(netdev); 1978 igb_setup_tx_mode(adapter); 1979 1980 igb_restore_vlan(adapter); 1981 1982 igb_setup_tctl(adapter); 1983 igb_setup_mrqc(adapter); 1984 igb_setup_rctl(adapter); 1985 1986 igb_nfc_filter_restore(adapter); 1987 igb_configure_tx(adapter); 1988 igb_configure_rx(adapter); 1989 1990 igb_rx_fifo_flush_82575(&adapter->hw); 1991 1992 /* call igb_desc_unused which always leaves 1993 * at least 1 descriptor unused to make sure 1994 * next_to_use != next_to_clean 1995 */ 1996 for (i = 0; i < adapter->num_rx_queues; i++) { 1997 struct igb_ring *ring = adapter->rx_ring[i]; 1998 if (ring->xsk_pool) 1999 igb_alloc_rx_buffers_zc(ring, ring->xsk_pool, 2000 igb_desc_unused(ring)); 2001 else 2002 igb_alloc_rx_buffers(ring, igb_desc_unused(ring)); 2003 } 2004 } 2005 2006 /** 2007 * igb_power_up_link - Power up the phy/serdes link 2008 * @adapter: address of board private structure 2009 **/ 2010 void igb_power_up_link(struct igb_adapter *adapter) 2011 { 2012 igb_reset_phy(&adapter->hw); 2013 2014 if (adapter->hw.phy.media_type == e1000_media_type_copper) 2015 igb_power_up_phy_copper(&adapter->hw); 2016 else 2017 igb_power_up_serdes_link_82575(&adapter->hw); 2018 2019 igb_setup_link(&adapter->hw); 2020 } 2021 2022 /** 2023 * igb_power_down_link - Power down the phy/serdes link 2024 * @adapter: address of board private structure 2025 */ 2026 static void igb_power_down_link(struct igb_adapter *adapter) 2027 { 2028 if (adapter->hw.phy.media_type == e1000_media_type_copper) 2029 igb_power_down_phy_copper_82575(&adapter->hw); 2030 else 2031 igb_shutdown_serdes_link_82575(&adapter->hw); 2032 } 2033 2034 /** 2035 * igb_check_swap_media - Detect and switch function for Media Auto Sense 2036 * @adapter: address of the board private structure 2037 **/ 2038 static void igb_check_swap_media(struct igb_adapter *adapter) 2039 { 2040 struct e1000_hw *hw = &adapter->hw; 2041 u32 ctrl_ext, connsw; 2042 bool swap_now = false; 2043 2044 ctrl_ext = rd32(E1000_CTRL_EXT); 2045 connsw = rd32(E1000_CONNSW); 2046 2047 /* need to live swap if current media is copper and we have fiber/serdes 2048 * to go to. 2049 */ 2050 2051 if ((hw->phy.media_type == e1000_media_type_copper) && 2052 (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) { 2053 swap_now = true; 2054 } else if ((hw->phy.media_type != e1000_media_type_copper) && 2055 !(connsw & E1000_CONNSW_SERDESD)) { 2056 /* copper signal takes time to appear */ 2057 if (adapter->copper_tries < 4) { 2058 adapter->copper_tries++; 2059 connsw |= E1000_CONNSW_AUTOSENSE_CONF; 2060 wr32(E1000_CONNSW, connsw); 2061 return; 2062 } else { 2063 adapter->copper_tries = 0; 2064 if ((connsw & E1000_CONNSW_PHYSD) && 2065 (!(connsw & E1000_CONNSW_PHY_PDN))) { 2066 swap_now = true; 2067 connsw &= ~E1000_CONNSW_AUTOSENSE_CONF; 2068 wr32(E1000_CONNSW, connsw); 2069 } 2070 } 2071 } 2072 2073 if (!swap_now) 2074 return; 2075 2076 switch (hw->phy.media_type) { 2077 case e1000_media_type_copper: 2078 netdev_info(adapter->netdev, 2079 "MAS: changing media to fiber/serdes\n"); 2080 ctrl_ext |= 2081 E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; 2082 adapter->flags |= IGB_FLAG_MEDIA_RESET; 2083 adapter->copper_tries = 0; 2084 break; 2085 case e1000_media_type_internal_serdes: 2086 case e1000_media_type_fiber: 2087 netdev_info(adapter->netdev, 2088 "MAS: changing media to copper\n"); 2089 ctrl_ext &= 2090 ~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; 2091 adapter->flags |= IGB_FLAG_MEDIA_RESET; 2092 break; 2093 default: 2094 /* shouldn't get here during regular operation */ 2095 netdev_err(adapter->netdev, 2096 "AMS: Invalid media type found, returning\n"); 2097 break; 2098 } 2099 wr32(E1000_CTRL_EXT, ctrl_ext); 2100 } 2101 2102 void igb_set_queue_napi(struct igb_adapter *adapter, int vector, 2103 struct napi_struct *napi) 2104 { 2105 struct igb_q_vector *q_vector = adapter->q_vector[vector]; 2106 2107 if (q_vector->rx.ring) 2108 netif_queue_set_napi(adapter->netdev, 2109 q_vector->rx.ring->queue_index, 2110 NETDEV_QUEUE_TYPE_RX, napi); 2111 2112 if (q_vector->tx.ring) 2113 netif_queue_set_napi(adapter->netdev, 2114 q_vector->tx.ring->queue_index, 2115 NETDEV_QUEUE_TYPE_TX, napi); 2116 } 2117 2118 /** 2119 * igb_up - Open the interface and prepare it to handle traffic 2120 * @adapter: board private structure 2121 **/ 2122 int igb_up(struct igb_adapter *adapter) 2123 { 2124 struct e1000_hw *hw = &adapter->hw; 2125 struct napi_struct *napi; 2126 int i; 2127 2128 /* hardware has been reset, we need to reload some things */ 2129 igb_configure(adapter); 2130 2131 clear_bit(__IGB_DOWN, &adapter->state); 2132 2133 for (i = 0; i < adapter->num_q_vectors; i++) { 2134 napi = &adapter->q_vector[i]->napi; 2135 napi_enable(napi); 2136 igb_set_queue_napi(adapter, i, napi); 2137 } 2138 2139 if (adapter->flags & IGB_FLAG_HAS_MSIX) 2140 igb_configure_msix(adapter); 2141 else 2142 igb_assign_vector(adapter->q_vector[0], 0); 2143 2144 /* Clear any pending interrupts. */ 2145 rd32(E1000_TSICR); 2146 rd32(E1000_ICR); 2147 igb_irq_enable(adapter); 2148 2149 /* notify VFs that reset has been completed */ 2150 if (adapter->vfs_allocated_count) { 2151 u32 reg_data = rd32(E1000_CTRL_EXT); 2152 2153 reg_data |= E1000_CTRL_EXT_PFRSTD; 2154 wr32(E1000_CTRL_EXT, reg_data); 2155 } 2156 2157 netif_tx_start_all_queues(adapter->netdev); 2158 2159 /* start the watchdog. */ 2160 hw->mac.get_link_status = 1; 2161 schedule_work(&adapter->watchdog_task); 2162 2163 if ((adapter->flags & IGB_FLAG_EEE) && 2164 (!hw->dev_spec._82575.eee_disable)) 2165 adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T; 2166 2167 return 0; 2168 } 2169 2170 void igb_down(struct igb_adapter *adapter) 2171 { 2172 struct net_device *netdev = adapter->netdev; 2173 struct e1000_hw *hw = &adapter->hw; 2174 u32 tctl, rctl; 2175 int i; 2176 2177 /* signal that we're down so the interrupt handler does not 2178 * reschedule our watchdog timer 2179 */ 2180 set_bit(__IGB_DOWN, &adapter->state); 2181 2182 /* disable receives in the hardware */ 2183 rctl = rd32(E1000_RCTL); 2184 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN); 2185 /* flush and sleep below */ 2186 2187 igb_nfc_filter_exit(adapter); 2188 2189 netif_carrier_off(netdev); 2190 netif_tx_stop_all_queues(netdev); 2191 2192 /* disable transmits in the hardware */ 2193 tctl = rd32(E1000_TCTL); 2194 tctl &= ~E1000_TCTL_EN; 2195 wr32(E1000_TCTL, tctl); 2196 /* flush both disables and wait for them to finish */ 2197 wrfl(); 2198 usleep_range(10000, 11000); 2199 2200 igb_irq_disable(adapter); 2201 2202 adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; 2203 2204 for (i = 0; i < adapter->num_q_vectors; i++) { 2205 if (adapter->q_vector[i]) { 2206 napi_disable(&adapter->q_vector[i]->napi); 2207 igb_set_queue_napi(adapter, i, NULL); 2208 } 2209 } 2210 2211 timer_delete_sync(&adapter->watchdog_timer); 2212 timer_delete_sync(&adapter->phy_info_timer); 2213 2214 /* record the stats before reset*/ 2215 spin_lock(&adapter->stats64_lock); 2216 igb_update_stats(adapter); 2217 spin_unlock(&adapter->stats64_lock); 2218 2219 adapter->link_speed = 0; 2220 adapter->link_duplex = 0; 2221 2222 if (!pci_channel_offline(adapter->pdev)) 2223 igb_reset(adapter); 2224 2225 /* clear VLAN promisc flag so VFTA will be updated if necessary */ 2226 adapter->flags &= ~IGB_FLAG_VLAN_PROMISC; 2227 2228 igb_clean_all_tx_rings(adapter); 2229 igb_clean_all_rx_rings(adapter); 2230 #ifdef CONFIG_IGB_DCA 2231 2232 /* since we reset the hardware DCA settings were cleared */ 2233 igb_setup_dca(adapter); 2234 #endif 2235 } 2236 2237 void igb_reinit_locked(struct igb_adapter *adapter) 2238 { 2239 while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) 2240 usleep_range(1000, 2000); 2241 igb_down(adapter); 2242 igb_up(adapter); 2243 clear_bit(__IGB_RESETTING, &adapter->state); 2244 } 2245 2246 /** igb_enable_mas - Media Autosense re-enable after swap 2247 * 2248 * @adapter: adapter struct 2249 **/ 2250 static void igb_enable_mas(struct igb_adapter *adapter) 2251 { 2252 struct e1000_hw *hw = &adapter->hw; 2253 u32 connsw = rd32(E1000_CONNSW); 2254 2255 /* configure for SerDes media detect */ 2256 if ((hw->phy.media_type == e1000_media_type_copper) && 2257 (!(connsw & E1000_CONNSW_SERDESD))) { 2258 connsw |= E1000_CONNSW_ENRGSRC; 2259 connsw |= E1000_CONNSW_AUTOSENSE_EN; 2260 wr32(E1000_CONNSW, connsw); 2261 wrfl(); 2262 } 2263 } 2264 2265 #ifdef CONFIG_IGB_HWMON 2266 /** 2267 * igb_set_i2c_bb - Init I2C interface 2268 * @hw: pointer to hardware structure 2269 **/ 2270 static void igb_set_i2c_bb(struct e1000_hw *hw) 2271 { 2272 u32 ctrl_ext; 2273 s32 i2cctl; 2274 2275 ctrl_ext = rd32(E1000_CTRL_EXT); 2276 ctrl_ext |= E1000_CTRL_I2C_ENA; 2277 wr32(E1000_CTRL_EXT, ctrl_ext); 2278 wrfl(); 2279 2280 i2cctl = rd32(E1000_I2CPARAMS); 2281 i2cctl |= E1000_I2CBB_EN 2282 | E1000_I2C_CLK_OE_N 2283 | E1000_I2C_DATA_OE_N; 2284 wr32(E1000_I2CPARAMS, i2cctl); 2285 wrfl(); 2286 } 2287 #endif 2288 2289 void igb_reset(struct igb_adapter *adapter) 2290 { 2291 struct pci_dev *pdev = adapter->pdev; 2292 struct e1000_hw *hw = &adapter->hw; 2293 struct e1000_mac_info *mac = &hw->mac; 2294 struct e1000_fc_info *fc = &hw->fc; 2295 u32 pba, hwm; 2296 2297 /* Repartition Pba for greater than 9k mtu 2298 * To take effect CTRL.RST is required. 2299 */ 2300 switch (mac->type) { 2301 case e1000_i350: 2302 case e1000_i354: 2303 case e1000_82580: 2304 pba = rd32(E1000_RXPBS); 2305 pba = igb_rxpbs_adjust_82580(pba); 2306 break; 2307 case e1000_82576: 2308 pba = rd32(E1000_RXPBS); 2309 pba &= E1000_RXPBS_SIZE_MASK_82576; 2310 break; 2311 case e1000_82575: 2312 case e1000_i210: 2313 case e1000_i211: 2314 default: 2315 pba = E1000_PBA_34K; 2316 break; 2317 } 2318 2319 if (mac->type == e1000_82575) { 2320 u32 min_rx_space, min_tx_space, needed_tx_space; 2321 2322 /* write Rx PBA so that hardware can report correct Tx PBA */ 2323 wr32(E1000_PBA, pba); 2324 2325 /* To maintain wire speed transmits, the Tx FIFO should be 2326 * large enough to accommodate two full transmit packets, 2327 * rounded up to the next 1KB and expressed in KB. Likewise, 2328 * the Rx FIFO should be large enough to accommodate at least 2329 * one full receive packet and is similarly rounded up and 2330 * expressed in KB. 2331 */ 2332 min_rx_space = DIV_ROUND_UP(MAX_JUMBO_FRAME_SIZE, 1024); 2333 2334 /* The Tx FIFO also stores 16 bytes of information about the Tx 2335 * but don't include Ethernet FCS because hardware appends it. 2336 * We only need to round down to the nearest 512 byte block 2337 * count since the value we care about is 2 frames, not 1. 2338 */ 2339 min_tx_space = adapter->max_frame_size; 2340 min_tx_space += sizeof(union e1000_adv_tx_desc) - ETH_FCS_LEN; 2341 min_tx_space = DIV_ROUND_UP(min_tx_space, 512); 2342 2343 /* upper 16 bits has Tx packet buffer allocation size in KB */ 2344 needed_tx_space = min_tx_space - (rd32(E1000_PBA) >> 16); 2345 2346 /* If current Tx allocation is less than the min Tx FIFO size, 2347 * and the min Tx FIFO size is less than the current Rx FIFO 2348 * allocation, take space away from current Rx allocation. 2349 */ 2350 if (needed_tx_space < pba) { 2351 pba -= needed_tx_space; 2352 2353 /* if short on Rx space, Rx wins and must trump Tx 2354 * adjustment 2355 */ 2356 if (pba < min_rx_space) 2357 pba = min_rx_space; 2358 } 2359 2360 /* adjust PBA for jumbo frames */ 2361 wr32(E1000_PBA, pba); 2362 } 2363 2364 /* flow control settings 2365 * The high water mark must be low enough to fit one full frame 2366 * after transmitting the pause frame. As such we must have enough 2367 * space to allow for us to complete our current transmit and then 2368 * receive the frame that is in progress from the link partner. 2369 * Set it to: 2370 * - the full Rx FIFO size minus one full Tx plus one full Rx frame 2371 */ 2372 hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); 2373 2374 fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ 2375 fc->low_water = fc->high_water - 16; 2376 fc->pause_time = 0xFFFF; 2377 fc->send_xon = 1; 2378 fc->current_mode = fc->requested_mode; 2379 2380 /* disable receive for all VFs and wait one second */ 2381 if (adapter->vfs_allocated_count) { 2382 int i; 2383 2384 for (i = 0 ; i < adapter->vfs_allocated_count; i++) 2385 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC; 2386 2387 /* ping all the active vfs to let them know we are going down */ 2388 igb_ping_all_vfs(adapter); 2389 2390 /* disable transmits and receives */ 2391 wr32(E1000_VFRE, 0); 2392 wr32(E1000_VFTE, 0); 2393 } 2394 2395 /* Allow time for pending master requests to run */ 2396 hw->mac.ops.reset_hw(hw); 2397 wr32(E1000_WUC, 0); 2398 2399 if (adapter->flags & IGB_FLAG_MEDIA_RESET) { 2400 /* need to resetup here after media swap */ 2401 adapter->ei.get_invariants(hw); 2402 adapter->flags &= ~IGB_FLAG_MEDIA_RESET; 2403 } 2404 if ((mac->type == e1000_82575 || mac->type == e1000_i350) && 2405 (adapter->flags & IGB_FLAG_MAS_ENABLE)) { 2406 igb_enable_mas(adapter); 2407 } 2408 if (hw->mac.ops.init_hw(hw)) 2409 dev_err(&pdev->dev, "Hardware Error\n"); 2410 2411 /* RAR registers were cleared during init_hw, clear mac table */ 2412 igb_flush_mac_table(adapter); 2413 __dev_uc_unsync(adapter->netdev, NULL); 2414 2415 /* Recover default RAR entry */ 2416 igb_set_default_mac_filter(adapter); 2417 2418 /* Flow control settings reset on hardware reset, so guarantee flow 2419 * control is off when forcing speed. 2420 */ 2421 if (!hw->mac.autoneg) 2422 igb_force_mac_fc(hw); 2423 2424 igb_init_dmac(adapter, pba); 2425 #ifdef CONFIG_IGB_HWMON 2426 /* Re-initialize the thermal sensor on i350 devices. */ 2427 if (!test_bit(__IGB_DOWN, &adapter->state)) { 2428 if (mac->type == e1000_i350 && hw->bus.func == 0) { 2429 /* If present, re-initialize the external thermal sensor 2430 * interface. 2431 */ 2432 if (adapter->ets) 2433 igb_set_i2c_bb(hw); 2434 mac->ops.init_thermal_sensor_thresh(hw); 2435 } 2436 } 2437 #endif 2438 /* Re-establish EEE setting */ 2439 if (hw->phy.media_type == e1000_media_type_copper) { 2440 switch (mac->type) { 2441 case e1000_i350: 2442 case e1000_i210: 2443 case e1000_i211: 2444 igb_set_eee_i350(hw, true, true); 2445 break; 2446 case e1000_i354: 2447 igb_set_eee_i354(hw, true, true); 2448 break; 2449 default: 2450 break; 2451 } 2452 } 2453 if (!netif_running(adapter->netdev)) 2454 igb_power_down_link(adapter); 2455 2456 igb_update_mng_vlan(adapter); 2457 2458 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ 2459 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE); 2460 2461 /* Re-enable PTP, where applicable. */ 2462 if (adapter->ptp_flags & IGB_PTP_ENABLED) 2463 igb_ptp_reset(adapter); 2464 2465 igb_get_phy_info(hw); 2466 } 2467 2468 static netdev_features_t igb_fix_features(struct net_device *netdev, 2469 netdev_features_t features) 2470 { 2471 /* Since there is no support for separate Rx/Tx vlan accel 2472 * enable/disable make sure Tx flag is always in same state as Rx. 2473 */ 2474 if (features & NETIF_F_HW_VLAN_CTAG_RX) 2475 features |= NETIF_F_HW_VLAN_CTAG_TX; 2476 else 2477 features &= ~NETIF_F_HW_VLAN_CTAG_TX; 2478 2479 return features; 2480 } 2481 2482 static int igb_set_features(struct net_device *netdev, 2483 netdev_features_t features) 2484 { 2485 netdev_features_t changed = netdev->features ^ features; 2486 struct igb_adapter *adapter = netdev_priv(netdev); 2487 2488 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 2489 igb_vlan_mode(netdev, features); 2490 2491 if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) 2492 return 0; 2493 2494 if (!(features & NETIF_F_NTUPLE)) { 2495 struct hlist_node *node2; 2496 struct igb_nfc_filter *rule; 2497 2498 spin_lock(&adapter->nfc_lock); 2499 hlist_for_each_entry_safe(rule, node2, 2500 &adapter->nfc_filter_list, nfc_node) { 2501 igb_erase_filter(adapter, rule); 2502 hlist_del(&rule->nfc_node); 2503 kfree(rule); 2504 } 2505 spin_unlock(&adapter->nfc_lock); 2506 adapter->nfc_filter_count = 0; 2507 } 2508 2509 netdev->features = features; 2510 2511 if (netif_running(netdev)) 2512 igb_reinit_locked(adapter); 2513 else 2514 igb_reset(adapter); 2515 2516 return 1; 2517 } 2518 2519 static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 2520 struct net_device *dev, 2521 const unsigned char *addr, u16 vid, 2522 u16 flags, bool *notified, 2523 struct netlink_ext_ack *extack) 2524 { 2525 /* guarantee we can provide a unique filter for the unicast address */ 2526 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { 2527 struct igb_adapter *adapter = netdev_priv(dev); 2528 int vfn = adapter->vfs_allocated_count; 2529 2530 if (netdev_uc_count(dev) >= igb_available_rars(adapter, vfn)) 2531 return -ENOMEM; 2532 } 2533 2534 return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags); 2535 } 2536 2537 #define IGB_MAX_MAC_HDR_LEN 127 2538 #define IGB_MAX_NETWORK_HDR_LEN 511 2539 2540 static netdev_features_t 2541 igb_features_check(struct sk_buff *skb, struct net_device *dev, 2542 netdev_features_t features) 2543 { 2544 unsigned int network_hdr_len, mac_hdr_len; 2545 2546 /* Make certain the headers can be described by a context descriptor */ 2547 mac_hdr_len = skb_network_offset(skb); 2548 if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN)) 2549 return features & ~(NETIF_F_HW_CSUM | 2550 NETIF_F_SCTP_CRC | 2551 NETIF_F_GSO_UDP_L4 | 2552 NETIF_F_HW_VLAN_CTAG_TX | 2553 NETIF_F_TSO | 2554 NETIF_F_TSO6); 2555 2556 network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); 2557 if (unlikely(network_hdr_len > IGB_MAX_NETWORK_HDR_LEN)) 2558 return features & ~(NETIF_F_HW_CSUM | 2559 NETIF_F_SCTP_CRC | 2560 NETIF_F_GSO_UDP_L4 | 2561 NETIF_F_TSO | 2562 NETIF_F_TSO6); 2563 2564 /* We can only support IPV4 TSO in tunnels if we can mangle the 2565 * inner IP ID field, so strip TSO if MANGLEID is not supported. 2566 */ 2567 if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) 2568 features &= ~NETIF_F_TSO; 2569 2570 return features; 2571 } 2572 2573 static void igb_offload_apply(struct igb_adapter *adapter, s32 queue) 2574 { 2575 if (!is_fqtss_enabled(adapter)) { 2576 enable_fqtss(adapter, true); 2577 return; 2578 } 2579 2580 igb_config_tx_modes(adapter, queue); 2581 2582 if (!is_any_cbs_enabled(adapter) && !is_any_txtime_enabled(adapter)) 2583 enable_fqtss(adapter, false); 2584 } 2585 2586 static int igb_offload_cbs(struct igb_adapter *adapter, 2587 struct tc_cbs_qopt_offload *qopt) 2588 { 2589 struct e1000_hw *hw = &adapter->hw; 2590 int err; 2591 2592 /* CBS offloading is only supported by i210 controller. */ 2593 if (hw->mac.type != e1000_i210) 2594 return -EOPNOTSUPP; 2595 2596 /* CBS offloading is only supported by queue 0 and queue 1. */ 2597 if (qopt->queue < 0 || qopt->queue > 1) 2598 return -EINVAL; 2599 2600 err = igb_save_cbs_params(adapter, qopt->queue, qopt->enable, 2601 qopt->idleslope, qopt->sendslope, 2602 qopt->hicredit, qopt->locredit); 2603 if (err) 2604 return err; 2605 2606 igb_offload_apply(adapter, qopt->queue); 2607 2608 return 0; 2609 } 2610 2611 #define ETHER_TYPE_FULL_MASK ((__force __be16)~0) 2612 #define VLAN_PRIO_FULL_MASK (0x07) 2613 2614 static int igb_parse_cls_flower(struct igb_adapter *adapter, 2615 struct flow_cls_offload *f, 2616 int traffic_class, 2617 struct igb_nfc_filter *input) 2618 { 2619 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2620 struct flow_dissector *dissector = rule->match.dissector; 2621 struct netlink_ext_ack *extack = f->common.extack; 2622 2623 if (dissector->used_keys & 2624 ~(BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | 2625 BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | 2626 BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) | 2627 BIT_ULL(FLOW_DISSECTOR_KEY_VLAN))) { 2628 NL_SET_ERR_MSG_MOD(extack, 2629 "Unsupported key used, only BASIC, CONTROL, ETH_ADDRS and VLAN are supported"); 2630 return -EOPNOTSUPP; 2631 } 2632 2633 if (flow_rule_match_has_control_flags(rule, extack)) 2634 return -EOPNOTSUPP; 2635 2636 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 2637 struct flow_match_eth_addrs match; 2638 2639 flow_rule_match_eth_addrs(rule, &match); 2640 if (!is_zero_ether_addr(match.mask->dst)) { 2641 if (!is_broadcast_ether_addr(match.mask->dst)) { 2642 NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address"); 2643 return -EINVAL; 2644 } 2645 2646 input->filter.match_flags |= 2647 IGB_FILTER_FLAG_DST_MAC_ADDR; 2648 ether_addr_copy(input->filter.dst_addr, match.key->dst); 2649 } 2650 2651 if (!is_zero_ether_addr(match.mask->src)) { 2652 if (!is_broadcast_ether_addr(match.mask->src)) { 2653 NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address"); 2654 return -EINVAL; 2655 } 2656 2657 input->filter.match_flags |= 2658 IGB_FILTER_FLAG_SRC_MAC_ADDR; 2659 ether_addr_copy(input->filter.src_addr, match.key->src); 2660 } 2661 } 2662 2663 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 2664 struct flow_match_basic match; 2665 2666 flow_rule_match_basic(rule, &match); 2667 if (match.mask->n_proto) { 2668 if (match.mask->n_proto != ETHER_TYPE_FULL_MASK) { 2669 NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter"); 2670 return -EINVAL; 2671 } 2672 2673 input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE; 2674 input->filter.etype = match.key->n_proto; 2675 } 2676 } 2677 2678 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { 2679 struct flow_match_vlan match; 2680 2681 flow_rule_match_vlan(rule, &match); 2682 if (match.mask->vlan_priority) { 2683 if (match.mask->vlan_priority != VLAN_PRIO_FULL_MASK) { 2684 NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority"); 2685 return -EINVAL; 2686 } 2687 2688 input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI; 2689 input->filter.vlan_tci = 2690 (__force __be16)match.key->vlan_priority; 2691 } 2692 } 2693 2694 input->action = traffic_class; 2695 input->cookie = f->cookie; 2696 2697 return 0; 2698 } 2699 2700 static int igb_configure_clsflower(struct igb_adapter *adapter, 2701 struct flow_cls_offload *cls_flower) 2702 { 2703 struct netlink_ext_ack *extack = cls_flower->common.extack; 2704 struct igb_nfc_filter *filter, *f; 2705 int err, tc; 2706 2707 tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid); 2708 if (tc < 0) { 2709 NL_SET_ERR_MSG_MOD(extack, "Invalid traffic class"); 2710 return -EINVAL; 2711 } 2712 2713 filter = kzalloc_obj(*filter); 2714 if (!filter) 2715 return -ENOMEM; 2716 2717 err = igb_parse_cls_flower(adapter, cls_flower, tc, filter); 2718 if (err < 0) 2719 goto err_parse; 2720 2721 spin_lock(&adapter->nfc_lock); 2722 2723 hlist_for_each_entry(f, &adapter->nfc_filter_list, nfc_node) { 2724 if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) { 2725 err = -EEXIST; 2726 NL_SET_ERR_MSG_MOD(extack, 2727 "This filter is already set in ethtool"); 2728 goto err_locked; 2729 } 2730 } 2731 2732 hlist_for_each_entry(f, &adapter->cls_flower_list, nfc_node) { 2733 if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) { 2734 err = -EEXIST; 2735 NL_SET_ERR_MSG_MOD(extack, 2736 "This filter is already set in cls_flower"); 2737 goto err_locked; 2738 } 2739 } 2740 2741 err = igb_add_filter(adapter, filter); 2742 if (err < 0) { 2743 NL_SET_ERR_MSG_MOD(extack, "Could not add filter to the adapter"); 2744 goto err_locked; 2745 } 2746 2747 hlist_add_head(&filter->nfc_node, &adapter->cls_flower_list); 2748 2749 spin_unlock(&adapter->nfc_lock); 2750 2751 return 0; 2752 2753 err_locked: 2754 spin_unlock(&adapter->nfc_lock); 2755 2756 err_parse: 2757 kfree(filter); 2758 2759 return err; 2760 } 2761 2762 static int igb_delete_clsflower(struct igb_adapter *adapter, 2763 struct flow_cls_offload *cls_flower) 2764 { 2765 struct igb_nfc_filter *filter; 2766 int err; 2767 2768 spin_lock(&adapter->nfc_lock); 2769 2770 hlist_for_each_entry(filter, &adapter->cls_flower_list, nfc_node) 2771 if (filter->cookie == cls_flower->cookie) 2772 break; 2773 2774 if (!filter) { 2775 err = -ENOENT; 2776 goto out; 2777 } 2778 2779 err = igb_erase_filter(adapter, filter); 2780 if (err < 0) 2781 goto out; 2782 2783 hlist_del(&filter->nfc_node); 2784 kfree(filter); 2785 2786 out: 2787 spin_unlock(&adapter->nfc_lock); 2788 2789 return err; 2790 } 2791 2792 static int igb_setup_tc_cls_flower(struct igb_adapter *adapter, 2793 struct flow_cls_offload *cls_flower) 2794 { 2795 switch (cls_flower->command) { 2796 case FLOW_CLS_REPLACE: 2797 return igb_configure_clsflower(adapter, cls_flower); 2798 case FLOW_CLS_DESTROY: 2799 return igb_delete_clsflower(adapter, cls_flower); 2800 case FLOW_CLS_STATS: 2801 return -EOPNOTSUPP; 2802 default: 2803 return -EOPNOTSUPP; 2804 } 2805 } 2806 2807 static int igb_setup_tc_block_cb(enum tc_setup_type type, void *type_data, 2808 void *cb_priv) 2809 { 2810 struct igb_adapter *adapter = cb_priv; 2811 2812 if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data)) 2813 return -EOPNOTSUPP; 2814 2815 switch (type) { 2816 case TC_SETUP_CLSFLOWER: 2817 return igb_setup_tc_cls_flower(adapter, type_data); 2818 2819 default: 2820 return -EOPNOTSUPP; 2821 } 2822 } 2823 2824 static int igb_offload_txtime(struct igb_adapter *adapter, 2825 struct tc_etf_qopt_offload *qopt) 2826 { 2827 struct e1000_hw *hw = &adapter->hw; 2828 int err; 2829 2830 /* Launchtime offloading is only supported by i210 controller. */ 2831 if (hw->mac.type != e1000_i210) 2832 return -EOPNOTSUPP; 2833 2834 /* Launchtime offloading is only supported by queues 0 and 1. */ 2835 if (qopt->queue < 0 || qopt->queue > 1) 2836 return -EINVAL; 2837 2838 err = igb_save_txtime_params(adapter, qopt->queue, qopt->enable); 2839 if (err) 2840 return err; 2841 2842 igb_offload_apply(adapter, qopt->queue); 2843 2844 return 0; 2845 } 2846 2847 static int igb_tc_query_caps(struct igb_adapter *adapter, 2848 struct tc_query_caps_base *base) 2849 { 2850 switch (base->type) { 2851 case TC_SETUP_QDISC_TAPRIO: { 2852 struct tc_taprio_caps *caps = base->caps; 2853 2854 caps->broken_mqprio = true; 2855 2856 return 0; 2857 } 2858 default: 2859 return -EOPNOTSUPP; 2860 } 2861 } 2862 2863 static LIST_HEAD(igb_block_cb_list); 2864 2865 static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, 2866 void *type_data) 2867 { 2868 struct igb_adapter *adapter = netdev_priv(dev); 2869 2870 switch (type) { 2871 case TC_QUERY_CAPS: 2872 return igb_tc_query_caps(adapter, type_data); 2873 case TC_SETUP_QDISC_CBS: 2874 return igb_offload_cbs(adapter, type_data); 2875 case TC_SETUP_BLOCK: 2876 return flow_block_cb_setup_simple(type_data, 2877 &igb_block_cb_list, 2878 igb_setup_tc_block_cb, 2879 adapter, adapter, true); 2880 2881 case TC_SETUP_QDISC_ETF: 2882 return igb_offload_txtime(adapter, type_data); 2883 2884 default: 2885 return -EOPNOTSUPP; 2886 } 2887 } 2888 2889 static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf) 2890 { 2891 int i, frame_size = dev->mtu + IGB_ETH_PKT_HDR_PAD; 2892 struct igb_adapter *adapter = netdev_priv(dev); 2893 struct bpf_prog *prog = bpf->prog, *old_prog; 2894 bool running = netif_running(dev); 2895 bool need_reset; 2896 2897 /* verify igb ring attributes are sufficient for XDP */ 2898 for (i = 0; i < adapter->num_rx_queues; i++) { 2899 struct igb_ring *ring = adapter->rx_ring[i]; 2900 2901 if (frame_size > igb_rx_bufsz(ring)) { 2902 NL_SET_ERR_MSG_MOD(bpf->extack, 2903 "The RX buffer size is too small for the frame size"); 2904 netdev_warn(dev, "XDP RX buffer size %d is too small for the frame size %d\n", 2905 igb_rx_bufsz(ring), frame_size); 2906 return -EINVAL; 2907 } 2908 } 2909 2910 old_prog = xchg(&adapter->xdp_prog, prog); 2911 need_reset = (!!prog != !!old_prog); 2912 2913 /* device is up and bpf is added/removed, must setup the RX queues */ 2914 if (need_reset && running) { 2915 igb_close(dev); 2916 } else { 2917 for (i = 0; i < adapter->num_rx_queues; i++) 2918 (void)xchg(&adapter->rx_ring[i]->xdp_prog, 2919 adapter->xdp_prog); 2920 } 2921 2922 if (old_prog) 2923 bpf_prog_put(old_prog); 2924 2925 /* bpf is just replaced, RXQ and MTU are already setup */ 2926 if (!need_reset) { 2927 return 0; 2928 } else { 2929 if (prog) 2930 xdp_features_set_redirect_target(dev, true); 2931 else 2932 xdp_features_clear_redirect_target(dev); 2933 } 2934 2935 if (running) 2936 igb_open(dev); 2937 2938 return 0; 2939 } 2940 2941 static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp) 2942 { 2943 struct igb_adapter *adapter = netdev_priv(dev); 2944 2945 switch (xdp->command) { 2946 case XDP_SETUP_PROG: 2947 return igb_xdp_setup(dev, xdp); 2948 case XDP_SETUP_XSK_POOL: 2949 return igb_xsk_pool_setup(adapter, xdp->xsk.pool, 2950 xdp->xsk.queue_id); 2951 default: 2952 return -EINVAL; 2953 } 2954 } 2955 2956 int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp) 2957 { 2958 struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); 2959 int cpu = smp_processor_id(); 2960 struct igb_ring *tx_ring; 2961 struct netdev_queue *nq; 2962 u32 ret; 2963 2964 if (unlikely(!xdpf)) 2965 return IGB_XDP_CONSUMED; 2966 2967 /* During program transitions its possible adapter->xdp_prog is assigned 2968 * but ring has not been configured yet. In this case simply abort xmit. 2969 */ 2970 tx_ring = igb_xdp_is_enabled(adapter) ? 2971 igb_xdp_tx_queue_mapping(adapter) : NULL; 2972 if (unlikely(!tx_ring)) 2973 return IGB_XDP_CONSUMED; 2974 2975 nq = txring_txq(tx_ring); 2976 __netif_tx_lock(nq, cpu); 2977 /* Avoid transmit queue timeout since we share it with the slow path */ 2978 txq_trans_cond_update(nq); 2979 ret = igb_xmit_xdp_ring(adapter, tx_ring, xdpf); 2980 __netif_tx_unlock(nq); 2981 2982 return ret; 2983 } 2984 2985 static int igb_xdp_xmit(struct net_device *dev, int n, 2986 struct xdp_frame **frames, u32 flags) 2987 { 2988 struct igb_adapter *adapter = netdev_priv(dev); 2989 int cpu = smp_processor_id(); 2990 struct igb_ring *tx_ring; 2991 struct netdev_queue *nq; 2992 int nxmit = 0; 2993 int i; 2994 2995 if (unlikely(test_bit(__IGB_DOWN, &adapter->state))) 2996 return -ENETDOWN; 2997 2998 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 2999 return -EINVAL; 3000 3001 /* During program transitions its possible adapter->xdp_prog is assigned 3002 * but ring has not been configured yet. In this case simply abort xmit. 3003 */ 3004 tx_ring = igb_xdp_is_enabled(adapter) ? 3005 igb_xdp_tx_queue_mapping(adapter) : NULL; 3006 if (unlikely(!tx_ring)) 3007 return -ENXIO; 3008 3009 if (unlikely(test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags))) 3010 return -ENXIO; 3011 3012 nq = txring_txq(tx_ring); 3013 __netif_tx_lock(nq, cpu); 3014 3015 /* Avoid transmit queue timeout since we share it with the slow path */ 3016 txq_trans_cond_update(nq); 3017 3018 for (i = 0; i < n; i++) { 3019 struct xdp_frame *xdpf = frames[i]; 3020 int err; 3021 3022 err = igb_xmit_xdp_ring(adapter, tx_ring, xdpf); 3023 if (err != IGB_XDP_TX) 3024 break; 3025 nxmit++; 3026 } 3027 3028 if (unlikely(flags & XDP_XMIT_FLUSH)) 3029 igb_xdp_ring_update_tail(tx_ring); 3030 3031 __netif_tx_unlock(nq); 3032 3033 return nxmit; 3034 } 3035 3036 static const struct net_device_ops igb_netdev_ops = { 3037 .ndo_open = igb_open, 3038 .ndo_stop = igb_close, 3039 .ndo_start_xmit = igb_xmit_frame, 3040 .ndo_get_stats64 = igb_get_stats64, 3041 .ndo_set_rx_mode = igb_set_rx_mode, 3042 .ndo_set_mac_address = igb_set_mac, 3043 .ndo_change_mtu = igb_change_mtu, 3044 .ndo_eth_ioctl = igb_ioctl, 3045 .ndo_tx_timeout = igb_tx_timeout, 3046 .ndo_validate_addr = eth_validate_addr, 3047 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid, 3048 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid, 3049 .ndo_set_vf_mac = igb_ndo_set_vf_mac, 3050 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, 3051 .ndo_set_vf_rate = igb_ndo_set_vf_bw, 3052 .ndo_set_vf_spoofchk = igb_ndo_set_vf_spoofchk, 3053 .ndo_set_vf_trust = igb_ndo_set_vf_trust, 3054 .ndo_get_vf_config = igb_ndo_get_vf_config, 3055 .ndo_fix_features = igb_fix_features, 3056 .ndo_set_features = igb_set_features, 3057 .ndo_fdb_add = igb_ndo_fdb_add, 3058 .ndo_features_check = igb_features_check, 3059 .ndo_setup_tc = igb_setup_tc, 3060 .ndo_bpf = igb_xdp, 3061 .ndo_xdp_xmit = igb_xdp_xmit, 3062 .ndo_xsk_wakeup = igb_xsk_wakeup, 3063 .ndo_hwtstamp_get = igb_ptp_hwtstamp_get, 3064 .ndo_hwtstamp_set = igb_ptp_hwtstamp_set, 3065 }; 3066 3067 /** 3068 * igb_set_fw_version - Configure version string for ethtool 3069 * @adapter: adapter struct 3070 **/ 3071 void igb_set_fw_version(struct igb_adapter *adapter) 3072 { 3073 struct e1000_hw *hw = &adapter->hw; 3074 struct e1000_fw_version fw; 3075 3076 igb_get_fw_version(hw, &fw); 3077 3078 switch (hw->mac.type) { 3079 case e1000_i210: 3080 case e1000_i211: 3081 if (!(igb_get_flash_presence_i210(hw))) { 3082 snprintf(adapter->fw_version, 3083 sizeof(adapter->fw_version), 3084 "%2d.%2d-%d", 3085 fw.invm_major, fw.invm_minor, 3086 fw.invm_img_type); 3087 break; 3088 } 3089 fallthrough; 3090 default: 3091 /* if option rom is valid, display its version too */ 3092 if (fw.or_valid) { 3093 snprintf(adapter->fw_version, 3094 sizeof(adapter->fw_version), 3095 "%d.%d, 0x%08x, %d.%d.%d", 3096 fw.eep_major, fw.eep_minor, fw.etrack_id, 3097 fw.or_major, fw.or_build, fw.or_patch); 3098 /* no option rom */ 3099 } else if (fw.etrack_id != 0X0000) { 3100 snprintf(adapter->fw_version, 3101 sizeof(adapter->fw_version), 3102 "%d.%d, 0x%08x", 3103 fw.eep_major, fw.eep_minor, fw.etrack_id); 3104 } else { 3105 snprintf(adapter->fw_version, 3106 sizeof(adapter->fw_version), 3107 "%d.%d.%d", 3108 fw.eep_major, fw.eep_minor, fw.eep_build); 3109 } 3110 break; 3111 } 3112 } 3113 3114 /** 3115 * igb_init_mas - init Media Autosense feature if enabled in the NVM 3116 * 3117 * @adapter: adapter struct 3118 **/ 3119 static void igb_init_mas(struct igb_adapter *adapter) 3120 { 3121 struct e1000_hw *hw = &adapter->hw; 3122 u16 eeprom_data; 3123 3124 hw->nvm.ops.read(hw, NVM_COMPAT, 1, &eeprom_data); 3125 switch (hw->bus.func) { 3126 case E1000_FUNC_0: 3127 if (eeprom_data & IGB_MAS_ENABLE_0) { 3128 adapter->flags |= IGB_FLAG_MAS_ENABLE; 3129 netdev_info(adapter->netdev, 3130 "MAS: Enabling Media Autosense for port %d\n", 3131 hw->bus.func); 3132 } 3133 break; 3134 case E1000_FUNC_1: 3135 if (eeprom_data & IGB_MAS_ENABLE_1) { 3136 adapter->flags |= IGB_FLAG_MAS_ENABLE; 3137 netdev_info(adapter->netdev, 3138 "MAS: Enabling Media Autosense for port %d\n", 3139 hw->bus.func); 3140 } 3141 break; 3142 case E1000_FUNC_2: 3143 if (eeprom_data & IGB_MAS_ENABLE_2) { 3144 adapter->flags |= IGB_FLAG_MAS_ENABLE; 3145 netdev_info(adapter->netdev, 3146 "MAS: Enabling Media Autosense for port %d\n", 3147 hw->bus.func); 3148 } 3149 break; 3150 case E1000_FUNC_3: 3151 if (eeprom_data & IGB_MAS_ENABLE_3) { 3152 adapter->flags |= IGB_FLAG_MAS_ENABLE; 3153 netdev_info(adapter->netdev, 3154 "MAS: Enabling Media Autosense for port %d\n", 3155 hw->bus.func); 3156 } 3157 break; 3158 default: 3159 /* Shouldn't get here */ 3160 netdev_err(adapter->netdev, 3161 "MAS: Invalid port configuration, returning\n"); 3162 break; 3163 } 3164 } 3165 3166 /** 3167 * igb_init_i2c - Init I2C interface 3168 * @adapter: pointer to adapter structure 3169 **/ 3170 static s32 igb_init_i2c(struct igb_adapter *adapter) 3171 { 3172 s32 status = 0; 3173 3174 /* I2C interface supported on i350 devices */ 3175 if (adapter->hw.mac.type != e1000_i350) 3176 return 0; 3177 3178 /* Initialize the i2c bus which is controlled by the registers. 3179 * This bus will use the i2c_algo_bit structure that implements 3180 * the protocol through toggling of the 4 bits in the register. 3181 */ 3182 adapter->i2c_adap.owner = THIS_MODULE; 3183 adapter->i2c_algo = igb_i2c_algo; 3184 adapter->i2c_algo.data = adapter; 3185 adapter->i2c_adap.algo_data = &adapter->i2c_algo; 3186 adapter->i2c_adap.dev.parent = &adapter->pdev->dev; 3187 strscpy(adapter->i2c_adap.name, "igb BB", 3188 sizeof(adapter->i2c_adap.name)); 3189 status = i2c_bit_add_bus(&adapter->i2c_adap); 3190 return status; 3191 } 3192 3193 /** 3194 * igb_probe - Device Initialization Routine 3195 * @pdev: PCI device information struct 3196 * @ent: entry in igb_pci_tbl 3197 * 3198 * Returns 0 on success, negative on failure 3199 * 3200 * igb_probe initializes an adapter identified by a pci_dev structure. 3201 * The OS initialization, configuring of the adapter private structure, 3202 * and a hardware reset occur. 3203 **/ 3204 static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 3205 { 3206 struct net_device *netdev; 3207 struct igb_adapter *adapter; 3208 struct e1000_hw *hw; 3209 u16 eeprom_data = 0; 3210 s32 ret_val; 3211 static int global_quad_port_a; /* global quad port a indication */ 3212 const struct e1000_info *ei = igb_info_tbl[ent->driver_data]; 3213 u8 part_str[E1000_PBANUM_LENGTH]; 3214 int err; 3215 3216 /* Catch broken hardware that put the wrong VF device ID in 3217 * the PCIe SR-IOV capability. 3218 */ 3219 if (pdev->is_virtfn) { 3220 WARN(1, KERN_ERR "%s (%x:%x) should not be a VF!\n", 3221 pci_name(pdev), pdev->vendor, pdev->device); 3222 return -EINVAL; 3223 } 3224 3225 err = pci_enable_device_mem(pdev); 3226 if (err) 3227 return err; 3228 3229 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 3230 if (err) { 3231 dev_err(&pdev->dev, 3232 "No usable DMA configuration, aborting\n"); 3233 goto err_dma; 3234 } 3235 3236 err = pci_request_mem_regions(pdev, igb_driver_name); 3237 if (err) 3238 goto err_pci_reg; 3239 3240 pci_set_master(pdev); 3241 pci_save_state(pdev); 3242 3243 err = -ENOMEM; 3244 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), 3245 IGB_MAX_TX_QUEUES); 3246 if (!netdev) 3247 goto err_alloc_etherdev; 3248 3249 SET_NETDEV_DEV(netdev, &pdev->dev); 3250 3251 pci_set_drvdata(pdev, netdev); 3252 adapter = netdev_priv(netdev); 3253 adapter->netdev = netdev; 3254 adapter->pdev = pdev; 3255 hw = &adapter->hw; 3256 hw->back = adapter; 3257 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); 3258 3259 err = -EIO; 3260 adapter->io_addr = pci_iomap(pdev, 0, 0); 3261 if (!adapter->io_addr) 3262 goto err_ioremap; 3263 /* hw->hw_addr can be altered, we'll use adapter->io_addr for unmap */ 3264 hw->hw_addr = adapter->io_addr; 3265 3266 netdev->netdev_ops = &igb_netdev_ops; 3267 igb_set_ethtool_ops(netdev); 3268 netdev->watchdog_timeo = 5 * HZ; 3269 3270 strscpy(netdev->name, pci_name(pdev), sizeof(netdev->name)); 3271 3272 netdev->mem_start = pci_resource_start(pdev, 0); 3273 netdev->mem_end = pci_resource_end(pdev, 0); 3274 3275 /* PCI config space info */ 3276 hw->vendor_id = pdev->vendor; 3277 hw->device_id = pdev->device; 3278 hw->revision_id = pdev->revision; 3279 hw->subsystem_vendor_id = pdev->subsystem_vendor; 3280 hw->subsystem_device_id = pdev->subsystem_device; 3281 3282 /* Copy the default MAC, PHY and NVM function pointers */ 3283 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); 3284 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); 3285 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops)); 3286 /* Initialize skew-specific constants */ 3287 err = ei->get_invariants(hw); 3288 if (err) 3289 goto err_sw_init; 3290 3291 /* setup the private structure */ 3292 err = igb_sw_init(adapter); 3293 if (err) 3294 goto err_sw_init; 3295 3296 igb_get_bus_info_pcie(hw); 3297 3298 hw->phy.autoneg_wait_to_complete = false; 3299 3300 /* Copper options */ 3301 if (hw->phy.media_type == e1000_media_type_copper) { 3302 hw->phy.mdix = AUTO_ALL_MODES; 3303 hw->phy.disable_polarity_correction = false; 3304 hw->phy.ms_type = e1000_ms_hw_default; 3305 } 3306 3307 if (igb_check_reset_block(hw)) 3308 dev_info(&pdev->dev, 3309 "PHY reset is blocked due to SOL/IDER session.\n"); 3310 3311 /* features is initialized to 0 in allocation, it might have bits 3312 * set by igb_sw_init so we should use an or instead of an 3313 * assignment. 3314 */ 3315 netdev->features |= NETIF_F_SG | 3316 NETIF_F_TSO | 3317 NETIF_F_TSO6 | 3318 NETIF_F_RXHASH | 3319 NETIF_F_RXCSUM | 3320 NETIF_F_HW_CSUM; 3321 3322 if (hw->mac.type >= e1000_82576) 3323 netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4; 3324 3325 if (hw->mac.type >= e1000_i350) 3326 netdev->features |= NETIF_F_HW_TC; 3327 3328 #define IGB_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ 3329 NETIF_F_GSO_GRE_CSUM | \ 3330 NETIF_F_GSO_IPXIP4 | \ 3331 NETIF_F_GSO_IPXIP6 | \ 3332 NETIF_F_GSO_UDP_TUNNEL | \ 3333 NETIF_F_GSO_UDP_TUNNEL_CSUM) 3334 3335 netdev->gso_partial_features = IGB_GSO_PARTIAL_FEATURES; 3336 netdev->features |= NETIF_F_GSO_PARTIAL | IGB_GSO_PARTIAL_FEATURES; 3337 3338 /* copy netdev features into list of user selectable features */ 3339 netdev->hw_features |= netdev->features | 3340 NETIF_F_HW_VLAN_CTAG_RX | 3341 NETIF_F_HW_VLAN_CTAG_TX | 3342 NETIF_F_RXALL; 3343 3344 if (hw->mac.type >= e1000_i350) 3345 netdev->hw_features |= NETIF_F_NTUPLE; 3346 3347 netdev->features |= NETIF_F_HIGHDMA; 3348 3349 netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; 3350 netdev->mpls_features |= NETIF_F_HW_CSUM; 3351 netdev->hw_enc_features |= netdev->vlan_features; 3352 3353 /* set this bit last since it cannot be part of vlan_features */ 3354 netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | 3355 NETIF_F_HW_VLAN_CTAG_RX | 3356 NETIF_F_HW_VLAN_CTAG_TX; 3357 3358 netdev->priv_flags |= IFF_SUPP_NOFCS; 3359 3360 netdev->priv_flags |= IFF_UNICAST_FLT; 3361 netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 3362 NETDEV_XDP_ACT_XSK_ZEROCOPY; 3363 3364 /* MTU range: 68 - 9216 */ 3365 netdev->min_mtu = ETH_MIN_MTU; 3366 netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; 3367 3368 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw); 3369 3370 /* before reading the NVM, reset the controller to put the device in a 3371 * known good starting state 3372 */ 3373 hw->mac.ops.reset_hw(hw); 3374 3375 /* make sure the NVM is good , i211/i210 parts can have special NVM 3376 * that doesn't contain a checksum 3377 */ 3378 switch (hw->mac.type) { 3379 case e1000_i210: 3380 case e1000_i211: 3381 if (igb_get_flash_presence_i210(hw)) { 3382 if (hw->nvm.ops.validate(hw) < 0) { 3383 dev_err(&pdev->dev, 3384 "The NVM Checksum Is Not Valid\n"); 3385 err = -EIO; 3386 goto err_eeprom; 3387 } 3388 } 3389 break; 3390 default: 3391 if (hw->nvm.ops.validate(hw) < 0) { 3392 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); 3393 err = -EIO; 3394 goto err_eeprom; 3395 } 3396 break; 3397 } 3398 3399 if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) { 3400 /* copy the MAC address out of the NVM */ 3401 if (hw->mac.ops.read_mac_addr(hw)) 3402 dev_err(&pdev->dev, "NVM Read Error\n"); 3403 } 3404 3405 eth_hw_addr_set(netdev, hw->mac.addr); 3406 3407 if (!is_valid_ether_addr(netdev->dev_addr)) { 3408 dev_err(&pdev->dev, "Invalid MAC Address\n"); 3409 err = -EIO; 3410 goto err_eeprom; 3411 } 3412 3413 igb_set_default_mac_filter(adapter); 3414 3415 /* get firmware version for ethtool -i */ 3416 igb_set_fw_version(adapter); 3417 3418 /* configure RXPBSIZE and TXPBSIZE */ 3419 if (hw->mac.type == e1000_i210) { 3420 wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT); 3421 wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT); 3422 } 3423 3424 timer_setup(&adapter->watchdog_timer, igb_watchdog, 0); 3425 timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0); 3426 3427 INIT_WORK(&adapter->reset_task, igb_reset_task); 3428 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); 3429 3430 /* Initialize link properties that are user-changeable */ 3431 adapter->fc_autoneg = true; 3432 hw->mac.autoneg = true; 3433 hw->phy.autoneg_advertised = 0x2f; 3434 3435 hw->fc.requested_mode = e1000_fc_default; 3436 hw->fc.current_mode = e1000_fc_default; 3437 3438 igb_validate_mdi_setting(hw); 3439 3440 /* By default, support wake on port A */ 3441 if (hw->bus.func == 0) 3442 adapter->flags |= IGB_FLAG_WOL_SUPPORTED; 3443 3444 /* Check the NVM for wake support on non-port A ports */ 3445 if (hw->mac.type >= e1000_82580) 3446 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + 3447 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, 3448 &eeprom_data); 3449 else if (hw->bus.func == 1) 3450 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); 3451 3452 if (eeprom_data & IGB_EEPROM_APME) 3453 adapter->flags |= IGB_FLAG_WOL_SUPPORTED; 3454 3455 /* now that we have the eeprom settings, apply the special cases where 3456 * the eeprom may be wrong or the board simply won't support wake on 3457 * lan on a particular port 3458 */ 3459 switch (pdev->device) { 3460 case E1000_DEV_ID_82575GB_QUAD_COPPER: 3461 adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; 3462 break; 3463 case E1000_DEV_ID_82575EB_FIBER_SERDES: 3464 case E1000_DEV_ID_82576_FIBER: 3465 case E1000_DEV_ID_82576_SERDES: 3466 /* Wake events only supported on port A for dual fiber 3467 * regardless of eeprom setting 3468 */ 3469 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1) 3470 adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; 3471 break; 3472 case E1000_DEV_ID_82576_QUAD_COPPER: 3473 case E1000_DEV_ID_82576_QUAD_COPPER_ET2: 3474 /* if quad port adapter, disable WoL on all but port A */ 3475 if (global_quad_port_a != 0) 3476 adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; 3477 else 3478 adapter->flags |= IGB_FLAG_QUAD_PORT_A; 3479 /* Reset for multiple quad port adapters */ 3480 if (++global_quad_port_a == 4) 3481 global_quad_port_a = 0; 3482 break; 3483 default: 3484 /* If the device can't wake, don't set software support */ 3485 if (!device_can_wakeup(&adapter->pdev->dev)) 3486 adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; 3487 } 3488 3489 /* initialize the wol settings based on the eeprom settings */ 3490 if (adapter->flags & IGB_FLAG_WOL_SUPPORTED) 3491 adapter->wol |= E1000_WUFC_MAG; 3492 3493 /* Some vendors want WoL disabled by default, but still supported */ 3494 if ((hw->mac.type == e1000_i350) && 3495 (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) { 3496 adapter->flags |= IGB_FLAG_WOL_SUPPORTED; 3497 adapter->wol = 0; 3498 } 3499 3500 /* Some vendors want the ability to Use the EEPROM setting as 3501 * enable/disable only, and not for capability 3502 */ 3503 if (((hw->mac.type == e1000_i350) || 3504 (hw->mac.type == e1000_i354)) && 3505 (pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)) { 3506 adapter->flags |= IGB_FLAG_WOL_SUPPORTED; 3507 adapter->wol = 0; 3508 } 3509 if (hw->mac.type == e1000_i350) { 3510 if (((pdev->subsystem_device == 0x5001) || 3511 (pdev->subsystem_device == 0x5002)) && 3512 (hw->bus.func == 0)) { 3513 adapter->flags |= IGB_FLAG_WOL_SUPPORTED; 3514 adapter->wol = 0; 3515 } 3516 if (pdev->subsystem_device == 0x1F52) 3517 adapter->flags |= IGB_FLAG_WOL_SUPPORTED; 3518 } 3519 3520 device_set_wakeup_enable(&adapter->pdev->dev, 3521 adapter->flags & IGB_FLAG_WOL_SUPPORTED); 3522 3523 /* reset the hardware with the new settings */ 3524 igb_reset(adapter); 3525 3526 /* Init the I2C interface */ 3527 err = igb_init_i2c(adapter); 3528 if (err) { 3529 dev_err(&pdev->dev, "failed to init i2c interface\n"); 3530 goto err_eeprom; 3531 } 3532 3533 /* let the f/w know that the h/w is now under the control of the 3534 * driver. 3535 */ 3536 igb_get_hw_control(adapter); 3537 3538 strcpy(netdev->name, "eth%d"); 3539 err = register_netdev(netdev); 3540 if (err) 3541 goto err_register; 3542 3543 /* carrier off reporting is important to ethtool even BEFORE open */ 3544 netif_carrier_off(netdev); 3545 3546 #ifdef CONFIG_IGB_DCA 3547 if (dca_add_requester(&pdev->dev) == 0) { 3548 adapter->flags |= IGB_FLAG_DCA_ENABLED; 3549 dev_info(&pdev->dev, "DCA enabled\n"); 3550 igb_setup_dca(adapter); 3551 } 3552 3553 #endif 3554 #ifdef CONFIG_IGB_HWMON 3555 /* Initialize the thermal sensor on i350 devices. */ 3556 if (hw->mac.type == e1000_i350 && hw->bus.func == 0) { 3557 u16 ets_word; 3558 3559 /* Read the NVM to determine if this i350 device supports an 3560 * external thermal sensor. 3561 */ 3562 hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_word); 3563 if (ets_word != 0x0000 && ets_word != 0xFFFF) 3564 adapter->ets = true; 3565 else 3566 adapter->ets = false; 3567 /* Only enable I2C bit banging if an external thermal 3568 * sensor is supported. 3569 */ 3570 if (adapter->ets) 3571 igb_set_i2c_bb(hw); 3572 hw->mac.ops.init_thermal_sensor_thresh(hw); 3573 if (igb_sysfs_init(adapter)) 3574 dev_err(&pdev->dev, 3575 "failed to allocate sysfs resources\n"); 3576 } else { 3577 adapter->ets = false; 3578 } 3579 #endif 3580 /* Check if Media Autosense is enabled */ 3581 adapter->ei = *ei; 3582 if (hw->dev_spec._82575.mas_capable) 3583 igb_init_mas(adapter); 3584 3585 /* do hw tstamp init after resetting */ 3586 igb_ptp_init(adapter); 3587 3588 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); 3589 /* print bus type/speed/width info, not applicable to i354 */ 3590 if (hw->mac.type != e1000_i354) { 3591 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", 3592 netdev->name, 3593 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" : 3594 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" : 3595 "unknown"), 3596 ((hw->bus.width == e1000_bus_width_pcie_x4) ? 3597 "Width x4" : 3598 (hw->bus.width == e1000_bus_width_pcie_x2) ? 3599 "Width x2" : 3600 (hw->bus.width == e1000_bus_width_pcie_x1) ? 3601 "Width x1" : "unknown"), netdev->dev_addr); 3602 } 3603 3604 if ((hw->mac.type == e1000_82576 && 3605 rd32(E1000_EECD) & E1000_EECD_PRES) || 3606 (hw->mac.type >= e1000_i210 || 3607 igb_get_flash_presence_i210(hw))) { 3608 ret_val = igb_read_part_string(hw, part_str, 3609 E1000_PBANUM_LENGTH); 3610 } else { 3611 ret_val = -E1000_ERR_INVM_VALUE_NOT_FOUND; 3612 } 3613 3614 if (ret_val) 3615 strcpy(part_str, "Unknown"); 3616 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str); 3617 dev_info(&pdev->dev, 3618 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", 3619 (adapter->flags & IGB_FLAG_HAS_MSIX) ? "MSI-X" : 3620 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", 3621 adapter->num_rx_queues, adapter->num_tx_queues); 3622 if (hw->phy.media_type == e1000_media_type_copper) { 3623 switch (hw->mac.type) { 3624 case e1000_i350: 3625 case e1000_i210: 3626 case e1000_i211: 3627 /* Enable EEE for internal copper PHY devices */ 3628 err = igb_set_eee_i350(hw, true, true); 3629 if ((!err) && 3630 (!hw->dev_spec._82575.eee_disable)) { 3631 adapter->eee_advert = 3632 MDIO_EEE_100TX | MDIO_EEE_1000T; 3633 adapter->flags |= IGB_FLAG_EEE; 3634 } 3635 break; 3636 case e1000_i354: 3637 if ((rd32(E1000_CTRL_EXT) & 3638 E1000_CTRL_EXT_LINK_MODE_SGMII)) { 3639 err = igb_set_eee_i354(hw, true, true); 3640 if ((!err) && 3641 (!hw->dev_spec._82575.eee_disable)) { 3642 adapter->eee_advert = 3643 MDIO_EEE_100TX | MDIO_EEE_1000T; 3644 adapter->flags |= IGB_FLAG_EEE; 3645 } 3646 } 3647 break; 3648 default: 3649 break; 3650 } 3651 } 3652 3653 dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); 3654 3655 pm_runtime_put_noidle(&pdev->dev); 3656 return 0; 3657 3658 err_register: 3659 igb_release_hw_control(adapter); 3660 memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap)); 3661 err_eeprom: 3662 if (!igb_check_reset_block(hw)) 3663 igb_reset_phy(hw); 3664 3665 if (hw->flash_address) 3666 iounmap(hw->flash_address); 3667 err_sw_init: 3668 kfree(adapter->mac_table); 3669 kfree(adapter->shadow_vfta); 3670 igb_clear_interrupt_scheme(adapter); 3671 #ifdef CONFIG_PCI_IOV 3672 igb_disable_sriov(pdev, false); 3673 #endif 3674 pci_iounmap(pdev, adapter->io_addr); 3675 err_ioremap: 3676 free_netdev(netdev); 3677 err_alloc_etherdev: 3678 pci_release_mem_regions(pdev); 3679 err_pci_reg: 3680 err_dma: 3681 pci_disable_device(pdev); 3682 return err; 3683 } 3684 3685 #ifdef CONFIG_PCI_IOV 3686 static int igb_sriov_reinit(struct pci_dev *dev) 3687 { 3688 struct net_device *netdev = pci_get_drvdata(dev); 3689 struct igb_adapter *adapter = netdev_priv(netdev); 3690 struct pci_dev *pdev = adapter->pdev; 3691 3692 rtnl_lock(); 3693 3694 if (netif_running(netdev)) 3695 igb_close(netdev); 3696 else 3697 igb_reset(adapter); 3698 3699 igb_clear_interrupt_scheme(adapter); 3700 3701 igb_init_queue_configuration(adapter); 3702 3703 if (igb_init_interrupt_scheme(adapter, true)) { 3704 rtnl_unlock(); 3705 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 3706 return -ENOMEM; 3707 } 3708 3709 if (netif_running(netdev)) 3710 igb_open(netdev); 3711 3712 rtnl_unlock(); 3713 3714 return 0; 3715 } 3716 3717 static int igb_disable_sriov(struct pci_dev *pdev, bool reinit) 3718 { 3719 struct net_device *netdev = pci_get_drvdata(pdev); 3720 struct igb_adapter *adapter = netdev_priv(netdev); 3721 struct e1000_hw *hw = &adapter->hw; 3722 unsigned long flags; 3723 3724 /* reclaim resources allocated to VFs */ 3725 if (adapter->vf_data) { 3726 /* disable iov and allow time for transactions to clear */ 3727 if (pci_vfs_assigned(pdev)) { 3728 dev_warn(&pdev->dev, 3729 "Cannot deallocate SR-IOV virtual functions while they are assigned - VFs will not be deallocated\n"); 3730 return -EPERM; 3731 } else { 3732 pci_disable_sriov(pdev); 3733 msleep(500); 3734 } 3735 spin_lock_irqsave(&adapter->vfs_lock, flags); 3736 kfree(adapter->vf_mac_list); 3737 adapter->vf_mac_list = NULL; 3738 kfree(adapter->vf_data); 3739 adapter->vf_data = NULL; 3740 adapter->vfs_allocated_count = 0; 3741 spin_unlock_irqrestore(&adapter->vfs_lock, flags); 3742 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); 3743 wrfl(); 3744 msleep(100); 3745 dev_info(&pdev->dev, "IOV Disabled\n"); 3746 3747 /* Re-enable DMA Coalescing flag since IOV is turned off */ 3748 adapter->flags |= IGB_FLAG_DMAC; 3749 } 3750 3751 return reinit ? igb_sriov_reinit(pdev) : 0; 3752 } 3753 3754 static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs, bool reinit) 3755 { 3756 struct net_device *netdev = pci_get_drvdata(pdev); 3757 struct igb_adapter *adapter = netdev_priv(netdev); 3758 int old_vfs = pci_num_vf(pdev); 3759 struct vf_mac_filter *mac_list; 3760 int err = 0; 3761 int num_vf_mac_filters, i; 3762 3763 if (!(adapter->flags & IGB_FLAG_HAS_MSIX) || num_vfs > 7) { 3764 err = -EPERM; 3765 goto out; 3766 } 3767 if (!num_vfs) 3768 goto out; 3769 3770 if (old_vfs) { 3771 dev_info(&pdev->dev, "%d pre-allocated VFs found - override max_vfs setting of %d\n", 3772 old_vfs, max_vfs); 3773 adapter->vfs_allocated_count = old_vfs; 3774 } else 3775 adapter->vfs_allocated_count = num_vfs; 3776 3777 adapter->vf_data = kzalloc_objs(struct vf_data_storage, 3778 adapter->vfs_allocated_count); 3779 3780 /* if allocation failed then we do not support SR-IOV */ 3781 if (!adapter->vf_data) { 3782 adapter->vfs_allocated_count = 0; 3783 err = -ENOMEM; 3784 goto out; 3785 } 3786 3787 /* Due to the limited number of RAR entries calculate potential 3788 * number of MAC filters available for the VFs. Reserve entries 3789 * for PF default MAC, PF MAC filters and at least one RAR entry 3790 * for each VF for VF MAC. 3791 */ 3792 num_vf_mac_filters = adapter->hw.mac.rar_entry_count - 3793 (1 + IGB_PF_MAC_FILTERS_RESERVED + 3794 adapter->vfs_allocated_count); 3795 3796 adapter->vf_mac_list = kzalloc_objs(struct vf_mac_filter, 3797 num_vf_mac_filters); 3798 3799 mac_list = adapter->vf_mac_list; 3800 INIT_LIST_HEAD(&adapter->vf_macs.l); 3801 3802 if (adapter->vf_mac_list) { 3803 /* Initialize list of VF MAC filters */ 3804 for (i = 0; i < num_vf_mac_filters; i++) { 3805 mac_list->vf = -1; 3806 mac_list->free = true; 3807 list_add(&mac_list->l, &adapter->vf_macs.l); 3808 mac_list++; 3809 } 3810 } else { 3811 /* If we could not allocate memory for the VF MAC filters 3812 * we can continue without this feature but warn user. 3813 */ 3814 dev_err(&pdev->dev, 3815 "Unable to allocate memory for VF MAC filter list\n"); 3816 } 3817 3818 dev_info(&pdev->dev, "%d VFs allocated\n", 3819 adapter->vfs_allocated_count); 3820 for (i = 0; i < adapter->vfs_allocated_count; i++) 3821 igb_vf_configure(adapter, i); 3822 3823 /* DMA Coalescing is not supported in IOV mode. */ 3824 adapter->flags &= ~IGB_FLAG_DMAC; 3825 3826 if (reinit) { 3827 err = igb_sriov_reinit(pdev); 3828 if (err) 3829 goto err_out; 3830 } 3831 3832 /* only call pci_enable_sriov() if no VFs are allocated already */ 3833 if (!old_vfs) { 3834 err = pci_enable_sriov(pdev, adapter->vfs_allocated_count); 3835 if (err) 3836 goto err_out; 3837 } 3838 3839 goto out; 3840 3841 err_out: 3842 kfree(adapter->vf_mac_list); 3843 adapter->vf_mac_list = NULL; 3844 kfree(adapter->vf_data); 3845 adapter->vf_data = NULL; 3846 adapter->vfs_allocated_count = 0; 3847 out: 3848 return err; 3849 } 3850 3851 #endif 3852 /** 3853 * igb_remove_i2c - Cleanup I2C interface 3854 * @adapter: pointer to adapter structure 3855 **/ 3856 static void igb_remove_i2c(struct igb_adapter *adapter) 3857 { 3858 /* free the adapter bus structure */ 3859 i2c_del_adapter(&adapter->i2c_adap); 3860 } 3861 3862 /** 3863 * igb_remove - Device Removal Routine 3864 * @pdev: PCI device information struct 3865 * 3866 * igb_remove is called by the PCI subsystem to alert the driver 3867 * that it should release a PCI device. The could be caused by a 3868 * Hot-Plug event, or because the driver is going to be removed from 3869 * memory. 3870 **/ 3871 static void igb_remove(struct pci_dev *pdev) 3872 { 3873 struct net_device *netdev = pci_get_drvdata(pdev); 3874 struct igb_adapter *adapter = netdev_priv(netdev); 3875 struct e1000_hw *hw = &adapter->hw; 3876 3877 pm_runtime_get_noresume(&pdev->dev); 3878 #ifdef CONFIG_IGB_HWMON 3879 igb_sysfs_exit(adapter); 3880 #endif 3881 igb_remove_i2c(adapter); 3882 igb_ptp_stop(adapter); 3883 /* The watchdog timer may be rescheduled, so explicitly 3884 * disable watchdog from being rescheduled. 3885 */ 3886 set_bit(__IGB_DOWN, &adapter->state); 3887 timer_delete_sync(&adapter->watchdog_timer); 3888 timer_delete_sync(&adapter->phy_info_timer); 3889 3890 cancel_work_sync(&adapter->reset_task); 3891 cancel_work_sync(&adapter->watchdog_task); 3892 3893 #ifdef CONFIG_IGB_DCA 3894 if (adapter->flags & IGB_FLAG_DCA_ENABLED) { 3895 dev_info(&pdev->dev, "DCA disabled\n"); 3896 dca_remove_requester(&pdev->dev); 3897 adapter->flags &= ~IGB_FLAG_DCA_ENABLED; 3898 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); 3899 } 3900 #endif 3901 3902 /* Release control of h/w to f/w. If f/w is AMT enabled, this 3903 * would have already happened in close and is redundant. 3904 */ 3905 igb_release_hw_control(adapter); 3906 3907 #ifdef CONFIG_PCI_IOV 3908 igb_disable_sriov(pdev, false); 3909 #endif 3910 3911 unregister_netdev(netdev); 3912 3913 igb_clear_interrupt_scheme(adapter); 3914 3915 pci_iounmap(pdev, adapter->io_addr); 3916 if (hw->flash_address) 3917 iounmap(hw->flash_address); 3918 pci_release_mem_regions(pdev); 3919 3920 kfree(adapter->mac_table); 3921 kfree(adapter->shadow_vfta); 3922 free_netdev(netdev); 3923 3924 pci_disable_device(pdev); 3925 } 3926 3927 /** 3928 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space 3929 * @adapter: board private structure to initialize 3930 * 3931 * This function initializes the vf specific data storage and then attempts to 3932 * allocate the VFs. The reason for ordering it this way is because it is much 3933 * more expensive time wise to disable SR-IOV than it is to allocate and free 3934 * the memory for the VFs. 3935 **/ 3936 static void igb_probe_vfs(struct igb_adapter *adapter) 3937 { 3938 #ifdef CONFIG_PCI_IOV 3939 struct pci_dev *pdev = adapter->pdev; 3940 struct e1000_hw *hw = &adapter->hw; 3941 3942 /* Virtualization features not supported on i210 and 82580 family. */ 3943 if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211) || 3944 (hw->mac.type == e1000_82580)) 3945 return; 3946 3947 /* Of the below we really only want the effect of getting 3948 * IGB_FLAG_HAS_MSIX set (if available), without which 3949 * igb_enable_sriov() has no effect. 3950 */ 3951 igb_set_interrupt_capability(adapter, true); 3952 igb_reset_interrupt_capability(adapter); 3953 3954 pci_sriov_set_totalvfs(pdev, 7); 3955 igb_enable_sriov(pdev, max_vfs, false); 3956 3957 #endif /* CONFIG_PCI_IOV */ 3958 } 3959 3960 unsigned int igb_get_max_rss_queues(struct igb_adapter *adapter) 3961 { 3962 struct e1000_hw *hw = &adapter->hw; 3963 unsigned int max_rss_queues; 3964 3965 /* Determine the maximum number of RSS queues supported. */ 3966 switch (hw->mac.type) { 3967 case e1000_i211: 3968 max_rss_queues = IGB_MAX_RX_QUEUES_I211; 3969 break; 3970 case e1000_82575: 3971 case e1000_i210: 3972 max_rss_queues = IGB_MAX_RX_QUEUES_82575; 3973 break; 3974 case e1000_i350: 3975 /* I350 cannot do RSS and SR-IOV at the same time */ 3976 if (!!adapter->vfs_allocated_count) { 3977 max_rss_queues = 1; 3978 break; 3979 } 3980 fallthrough; 3981 case e1000_82576: 3982 if (!!adapter->vfs_allocated_count) { 3983 max_rss_queues = 2; 3984 break; 3985 } 3986 fallthrough; 3987 case e1000_82580: 3988 case e1000_i354: 3989 default: 3990 max_rss_queues = IGB_MAX_RX_QUEUES; 3991 break; 3992 } 3993 3994 return max_rss_queues; 3995 } 3996 3997 static void igb_init_queue_configuration(struct igb_adapter *adapter) 3998 { 3999 u32 max_rss_queues; 4000 4001 max_rss_queues = igb_get_max_rss_queues(adapter); 4002 adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); 4003 4004 igb_set_flag_queue_pairs(adapter, max_rss_queues); 4005 } 4006 4007 void igb_set_flag_queue_pairs(struct igb_adapter *adapter, 4008 const u32 max_rss_queues) 4009 { 4010 struct e1000_hw *hw = &adapter->hw; 4011 4012 /* Determine if we need to pair queues. */ 4013 switch (hw->mac.type) { 4014 case e1000_82575: 4015 case e1000_i211: 4016 /* Device supports enough interrupts without queue pairing. */ 4017 break; 4018 case e1000_82576: 4019 case e1000_82580: 4020 case e1000_i350: 4021 case e1000_i354: 4022 case e1000_i210: 4023 default: 4024 /* If rss_queues > half of max_rss_queues, pair the queues in 4025 * order to conserve interrupts due to limited supply. 4026 */ 4027 if (adapter->rss_queues > (max_rss_queues / 2)) 4028 adapter->flags |= IGB_FLAG_QUEUE_PAIRS; 4029 else 4030 adapter->flags &= ~IGB_FLAG_QUEUE_PAIRS; 4031 break; 4032 } 4033 } 4034 4035 /** 4036 * igb_sw_init - Initialize general software structures (struct igb_adapter) 4037 * @adapter: board private structure to initialize 4038 * 4039 * igb_sw_init initializes the Adapter private data structure. 4040 * Fields are initialized based on PCI device information and 4041 * OS network device settings (MTU size). 4042 **/ 4043 static int igb_sw_init(struct igb_adapter *adapter) 4044 { 4045 struct e1000_hw *hw = &adapter->hw; 4046 struct net_device *netdev = adapter->netdev; 4047 struct pci_dev *pdev = adapter->pdev; 4048 4049 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 4050 4051 /* set default ring sizes */ 4052 adapter->tx_ring_count = IGB_DEFAULT_TXD; 4053 adapter->rx_ring_count = IGB_DEFAULT_RXD; 4054 4055 /* set default ITR values */ 4056 adapter->rx_itr_setting = IGB_DEFAULT_ITR; 4057 adapter->tx_itr_setting = IGB_DEFAULT_ITR; 4058 4059 /* set default work limits */ 4060 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; 4061 4062 adapter->max_frame_size = netdev->mtu + IGB_ETH_PKT_HDR_PAD; 4063 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 4064 4065 spin_lock_init(&adapter->nfc_lock); 4066 spin_lock_init(&adapter->stats64_lock); 4067 4068 /* init spinlock to avoid concurrency of VF resources */ 4069 spin_lock_init(&adapter->vfs_lock); 4070 #ifdef CONFIG_PCI_IOV 4071 switch (hw->mac.type) { 4072 case e1000_82576: 4073 case e1000_i350: 4074 if (max_vfs > 7) { 4075 dev_warn(&pdev->dev, 4076 "Maximum of 7 VFs per PF, using max\n"); 4077 max_vfs = adapter->vfs_allocated_count = 7; 4078 } else 4079 adapter->vfs_allocated_count = max_vfs; 4080 if (adapter->vfs_allocated_count) 4081 dev_warn(&pdev->dev, 4082 "Enabling SR-IOV VFs using the module parameter is deprecated - please use the pci sysfs interface.\n"); 4083 break; 4084 default: 4085 break; 4086 } 4087 #endif /* CONFIG_PCI_IOV */ 4088 4089 /* Assume MSI-X interrupts, will be checked during IRQ allocation */ 4090 adapter->flags |= IGB_FLAG_HAS_MSIX; 4091 4092 adapter->mac_table = kzalloc_objs(struct igb_mac_addr, 4093 hw->mac.rar_entry_count); 4094 if (!adapter->mac_table) 4095 return -ENOMEM; 4096 4097 igb_probe_vfs(adapter); 4098 4099 igb_init_queue_configuration(adapter); 4100 4101 /* Setup and initialize a copy of the hw vlan table array */ 4102 adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), 4103 GFP_KERNEL); 4104 if (!adapter->shadow_vfta) 4105 return -ENOMEM; 4106 4107 /* This call may decrease the number of queues */ 4108 if (igb_init_interrupt_scheme(adapter, true)) { 4109 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 4110 return -ENOMEM; 4111 } 4112 4113 /* Explicitly disable IRQ since the NIC can be in any state. */ 4114 igb_irq_disable(adapter); 4115 4116 if (hw->mac.type >= e1000_i350) 4117 adapter->flags &= ~IGB_FLAG_DMAC; 4118 4119 set_bit(__IGB_DOWN, &adapter->state); 4120 return 0; 4121 } 4122 4123 /** 4124 * __igb_open - Called when a network interface is made active 4125 * @netdev: network interface device structure 4126 * @resuming: indicates whether we are in a resume call 4127 * 4128 * Returns 0 on success, negative value on failure 4129 * 4130 * The open entry point is called when a network interface is made 4131 * active by the system (IFF_UP). At this point all resources needed 4132 * for transmit and receive operations are allocated, the interrupt 4133 * handler is registered with the OS, the watchdog timer is started, 4134 * and the stack is notified that the interface is ready. 4135 **/ 4136 static int __igb_open(struct net_device *netdev, bool resuming) 4137 { 4138 struct igb_adapter *adapter = netdev_priv(netdev); 4139 struct pci_dev *pdev = adapter->pdev; 4140 struct e1000_hw *hw = &adapter->hw; 4141 struct napi_struct *napi; 4142 int err; 4143 int i; 4144 4145 /* disallow open during test */ 4146 if (test_bit(__IGB_TESTING, &adapter->state)) { 4147 WARN_ON(resuming); 4148 return -EBUSY; 4149 } 4150 4151 if (!resuming) 4152 pm_runtime_get_sync(&pdev->dev); 4153 4154 netif_carrier_off(netdev); 4155 4156 /* allocate transmit descriptors */ 4157 err = igb_setup_all_tx_resources(adapter); 4158 if (err) 4159 goto err_setup_tx; 4160 4161 /* allocate receive descriptors */ 4162 err = igb_setup_all_rx_resources(adapter); 4163 if (err) 4164 goto err_setup_rx; 4165 4166 igb_power_up_link(adapter); 4167 4168 /* before we allocate an interrupt, we must be ready to handle it. 4169 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt 4170 * as soon as we call pci_request_irq, so we have to setup our 4171 * clean_rx handler before we do so. 4172 */ 4173 igb_configure(adapter); 4174 4175 err = igb_request_irq(adapter); 4176 if (err) 4177 goto err_req_irq; 4178 4179 /* Notify the stack of the actual queue counts. */ 4180 err = netif_set_real_num_tx_queues(adapter->netdev, 4181 adapter->num_tx_queues); 4182 if (err) 4183 goto err_set_queues; 4184 4185 err = netif_set_real_num_rx_queues(adapter->netdev, 4186 adapter->num_rx_queues); 4187 if (err) 4188 goto err_set_queues; 4189 4190 /* From here on the code is the same as igb_up() */ 4191 clear_bit(__IGB_DOWN, &adapter->state); 4192 4193 for (i = 0; i < adapter->num_q_vectors; i++) { 4194 napi = &adapter->q_vector[i]->napi; 4195 napi_enable(napi); 4196 igb_set_queue_napi(adapter, i, napi); 4197 } 4198 4199 /* Clear any pending interrupts. */ 4200 rd32(E1000_TSICR); 4201 rd32(E1000_ICR); 4202 4203 igb_irq_enable(adapter); 4204 4205 /* notify VFs that reset has been completed */ 4206 if (adapter->vfs_allocated_count) { 4207 u32 reg_data = rd32(E1000_CTRL_EXT); 4208 4209 reg_data |= E1000_CTRL_EXT_PFRSTD; 4210 wr32(E1000_CTRL_EXT, reg_data); 4211 } 4212 4213 netif_tx_start_all_queues(netdev); 4214 4215 if (!resuming) 4216 pm_runtime_put(&pdev->dev); 4217 4218 /* start the watchdog. */ 4219 hw->mac.get_link_status = 1; 4220 schedule_work(&adapter->watchdog_task); 4221 4222 return 0; 4223 4224 err_set_queues: 4225 igb_free_irq(adapter); 4226 err_req_irq: 4227 igb_release_hw_control(adapter); 4228 igb_power_down_link(adapter); 4229 igb_free_all_rx_resources(adapter); 4230 err_setup_rx: 4231 igb_free_all_tx_resources(adapter); 4232 err_setup_tx: 4233 igb_reset(adapter); 4234 if (!resuming) 4235 pm_runtime_put(&pdev->dev); 4236 4237 return err; 4238 } 4239 4240 int igb_open(struct net_device *netdev) 4241 { 4242 return __igb_open(netdev, false); 4243 } 4244 4245 /** 4246 * __igb_close - Disables a network interface 4247 * @netdev: network interface device structure 4248 * @suspending: indicates we are in a suspend call 4249 * 4250 * Returns 0, this is not allowed to fail 4251 * 4252 * The close entry point is called when an interface is de-activated 4253 * by the OS. The hardware is still under the driver's control, but 4254 * needs to be disabled. A global MAC reset is issued to stop the 4255 * hardware, and all transmit and receive resources are freed. 4256 **/ 4257 static int __igb_close(struct net_device *netdev, bool suspending) 4258 { 4259 struct igb_adapter *adapter = netdev_priv(netdev); 4260 struct pci_dev *pdev = adapter->pdev; 4261 4262 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state)); 4263 4264 if (!suspending) 4265 pm_runtime_get_sync(&pdev->dev); 4266 4267 igb_down(adapter); 4268 igb_free_irq(adapter); 4269 4270 igb_free_all_tx_resources(adapter); 4271 igb_free_all_rx_resources(adapter); 4272 4273 if (!suspending) 4274 pm_runtime_put_sync(&pdev->dev); 4275 return 0; 4276 } 4277 4278 int igb_close(struct net_device *netdev) 4279 { 4280 if (netif_device_present(netdev) || netdev->dismantle) 4281 return __igb_close(netdev, false); 4282 return 0; 4283 } 4284 4285 /** 4286 * igb_setup_tx_resources - allocate Tx resources (Descriptors) 4287 * @tx_ring: tx descriptor ring (for a specific queue) to setup 4288 * 4289 * Return 0 on success, negative on failure 4290 **/ 4291 int igb_setup_tx_resources(struct igb_ring *tx_ring) 4292 { 4293 struct device *dev = tx_ring->dev; 4294 int size; 4295 4296 size = sizeof(struct igb_tx_buffer) * tx_ring->count; 4297 4298 tx_ring->tx_buffer_info = vmalloc(size); 4299 if (!tx_ring->tx_buffer_info) 4300 goto err; 4301 4302 /* round up to nearest 4K */ 4303 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); 4304 tx_ring->size = ALIGN(tx_ring->size, 4096); 4305 4306 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 4307 &tx_ring->dma, GFP_KERNEL); 4308 if (!tx_ring->desc) 4309 goto err; 4310 4311 tx_ring->next_to_use = 0; 4312 tx_ring->next_to_clean = 0; 4313 4314 return 0; 4315 4316 err: 4317 vfree(tx_ring->tx_buffer_info); 4318 tx_ring->tx_buffer_info = NULL; 4319 dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n"); 4320 return -ENOMEM; 4321 } 4322 4323 /** 4324 * igb_setup_all_tx_resources - wrapper to allocate Tx resources 4325 * (Descriptors) for all queues 4326 * @adapter: board private structure 4327 * 4328 * Return 0 on success, negative on failure 4329 **/ 4330 static int igb_setup_all_tx_resources(struct igb_adapter *adapter) 4331 { 4332 struct pci_dev *pdev = adapter->pdev; 4333 int i, err = 0; 4334 4335 for (i = 0; i < adapter->num_tx_queues; i++) { 4336 err = igb_setup_tx_resources(adapter->tx_ring[i]); 4337 if (err) { 4338 dev_err(&pdev->dev, 4339 "Allocation for Tx Queue %u failed\n", i); 4340 for (i--; i >= 0; i--) 4341 igb_free_tx_resources(adapter->tx_ring[i]); 4342 break; 4343 } 4344 } 4345 4346 return err; 4347 } 4348 4349 /** 4350 * igb_setup_tctl - configure the transmit control registers 4351 * @adapter: Board private structure 4352 **/ 4353 void igb_setup_tctl(struct igb_adapter *adapter) 4354 { 4355 struct e1000_hw *hw = &adapter->hw; 4356 u32 tctl; 4357 4358 /* disable queue 0 which is enabled by default on 82575 and 82576 */ 4359 wr32(E1000_TXDCTL(0), 0); 4360 4361 /* Program the Transmit Control Register */ 4362 tctl = rd32(E1000_TCTL); 4363 tctl &= ~E1000_TCTL_CT; 4364 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | 4365 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); 4366 4367 igb_config_collision_dist(hw); 4368 4369 /* Enable transmits */ 4370 tctl |= E1000_TCTL_EN; 4371 4372 wr32(E1000_TCTL, tctl); 4373 } 4374 4375 /** 4376 * igb_configure_tx_ring - Configure transmit ring after Reset 4377 * @adapter: board private structure 4378 * @ring: tx ring to configure 4379 * 4380 * Configure a transmit ring after a reset. 4381 **/ 4382 void igb_configure_tx_ring(struct igb_adapter *adapter, 4383 struct igb_ring *ring) 4384 { 4385 struct e1000_hw *hw = &adapter->hw; 4386 u32 txdctl = 0; 4387 u64 tdba = ring->dma; 4388 int reg_idx = ring->reg_idx; 4389 4390 WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring)); 4391 4392 wr32(E1000_TDLEN(reg_idx), 4393 ring->count * sizeof(union e1000_adv_tx_desc)); 4394 wr32(E1000_TDBAL(reg_idx), 4395 tdba & 0x00000000ffffffffULL); 4396 wr32(E1000_TDBAH(reg_idx), tdba >> 32); 4397 4398 ring->tail = adapter->io_addr + E1000_TDT(reg_idx); 4399 wr32(E1000_TDH(reg_idx), 0); 4400 writel(0, ring->tail); 4401 4402 txdctl |= IGB_TX_PTHRESH; 4403 txdctl |= IGB_TX_HTHRESH << 8; 4404 txdctl |= IGB_TX_WTHRESH << 16; 4405 4406 /* reinitialize tx_buffer_info */ 4407 memset(ring->tx_buffer_info, 0, 4408 sizeof(struct igb_tx_buffer) * ring->count); 4409 4410 txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 4411 wr32(E1000_TXDCTL(reg_idx), txdctl); 4412 } 4413 4414 /** 4415 * igb_configure_tx - Configure transmit Unit after Reset 4416 * @adapter: board private structure 4417 * 4418 * Configure the Tx unit of the MAC after a reset. 4419 **/ 4420 static void igb_configure_tx(struct igb_adapter *adapter) 4421 { 4422 struct e1000_hw *hw = &adapter->hw; 4423 int i; 4424 4425 /* disable the queues */ 4426 for (i = 0; i < adapter->num_tx_queues; i++) 4427 wr32(E1000_TXDCTL(adapter->tx_ring[i]->reg_idx), 0); 4428 4429 wrfl(); 4430 usleep_range(10000, 20000); 4431 4432 for (i = 0; i < adapter->num_tx_queues; i++) 4433 igb_configure_tx_ring(adapter, adapter->tx_ring[i]); 4434 } 4435 4436 /** 4437 * igb_setup_rx_resources - allocate Rx resources (Descriptors) 4438 * @rx_ring: Rx descriptor ring (for a specific queue) to setup 4439 * 4440 * Returns 0 on success, negative on failure 4441 **/ 4442 int igb_setup_rx_resources(struct igb_ring *rx_ring) 4443 { 4444 struct igb_adapter *adapter = netdev_priv(rx_ring->netdev); 4445 struct device *dev = rx_ring->dev; 4446 int size, res; 4447 4448 /* XDP RX-queue info */ 4449 if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) 4450 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 4451 res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, 4452 rx_ring->queue_index, 0); 4453 if (res < 0) { 4454 dev_err(dev, "Failed to register xdp_rxq index %u\n", 4455 rx_ring->queue_index); 4456 return res; 4457 } 4458 4459 size = sizeof(struct igb_rx_buffer) * rx_ring->count; 4460 4461 rx_ring->rx_buffer_info = vmalloc(size); 4462 if (!rx_ring->rx_buffer_info) 4463 goto err; 4464 4465 /* Round up to nearest 4K */ 4466 rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc); 4467 rx_ring->size = ALIGN(rx_ring->size, 4096); 4468 4469 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 4470 &rx_ring->dma, GFP_KERNEL); 4471 if (!rx_ring->desc) 4472 goto err; 4473 4474 rx_ring->next_to_alloc = 0; 4475 rx_ring->next_to_clean = 0; 4476 rx_ring->next_to_use = 0; 4477 4478 rx_ring->xdp_prog = adapter->xdp_prog; 4479 4480 return 0; 4481 4482 err: 4483 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 4484 vfree(rx_ring->rx_buffer_info); 4485 rx_ring->rx_buffer_info = NULL; 4486 dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); 4487 return -ENOMEM; 4488 } 4489 4490 /** 4491 * igb_setup_all_rx_resources - wrapper to allocate Rx resources 4492 * (Descriptors) for all queues 4493 * @adapter: board private structure 4494 * 4495 * Return 0 on success, negative on failure 4496 **/ 4497 static int igb_setup_all_rx_resources(struct igb_adapter *adapter) 4498 { 4499 struct pci_dev *pdev = adapter->pdev; 4500 int i, err = 0; 4501 4502 for (i = 0; i < adapter->num_rx_queues; i++) { 4503 err = igb_setup_rx_resources(adapter->rx_ring[i]); 4504 if (err) { 4505 dev_err(&pdev->dev, 4506 "Allocation for Rx Queue %u failed\n", i); 4507 for (i--; i >= 0; i--) 4508 igb_free_rx_resources(adapter->rx_ring[i]); 4509 break; 4510 } 4511 } 4512 4513 return err; 4514 } 4515 4516 /** 4517 * igb_setup_mrqc - configure the multiple receive queue control registers 4518 * @adapter: Board private structure 4519 **/ 4520 static void igb_setup_mrqc(struct igb_adapter *adapter) 4521 { 4522 struct e1000_hw *hw = &adapter->hw; 4523 u32 mrqc, rxcsum; 4524 u32 j, num_rx_queues; 4525 u32 rss_key[10]; 4526 4527 netdev_rss_key_fill(rss_key, sizeof(rss_key)); 4528 for (j = 0; j < 10; j++) 4529 wr32(E1000_RSSRK(j), rss_key[j]); 4530 4531 num_rx_queues = adapter->rss_queues; 4532 4533 switch (hw->mac.type) { 4534 case e1000_82576: 4535 /* 82576 supports 2 RSS queues for SR-IOV */ 4536 if (adapter->vfs_allocated_count) 4537 num_rx_queues = 2; 4538 break; 4539 default: 4540 break; 4541 } 4542 4543 if (adapter->rss_indir_tbl_init != num_rx_queues) { 4544 for (j = 0; j < IGB_RETA_SIZE; j++) 4545 adapter->rss_indir_tbl[j] = 4546 (j * num_rx_queues) / IGB_RETA_SIZE; 4547 adapter->rss_indir_tbl_init = num_rx_queues; 4548 } 4549 igb_write_rss_indir_tbl(adapter); 4550 4551 /* Disable raw packet checksumming so that RSS hash is placed in 4552 * descriptor on writeback. No need to enable TCP/UDP/IP checksum 4553 * offloads as they are enabled by default 4554 */ 4555 rxcsum = rd32(E1000_RXCSUM); 4556 rxcsum |= E1000_RXCSUM_PCSD; 4557 4558 if (adapter->hw.mac.type >= e1000_82576) 4559 /* Enable Receive Checksum Offload for SCTP */ 4560 rxcsum |= E1000_RXCSUM_CRCOFL; 4561 4562 /* Don't need to set TUOFL or IPOFL, they default to 1 */ 4563 wr32(E1000_RXCSUM, rxcsum); 4564 4565 /* Generate RSS hash based on packet types, TCP/UDP 4566 * port numbers and/or IPv4/v6 src and dst addresses 4567 */ 4568 mrqc = E1000_MRQC_RSS_FIELD_IPV4 | 4569 E1000_MRQC_RSS_FIELD_IPV4_TCP | 4570 E1000_MRQC_RSS_FIELD_IPV6 | 4571 E1000_MRQC_RSS_FIELD_IPV6_TCP | 4572 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX; 4573 4574 if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP) 4575 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP; 4576 if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP) 4577 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP; 4578 4579 /* If VMDq is enabled then we set the appropriate mode for that, else 4580 * we default to RSS so that an RSS hash is calculated per packet even 4581 * if we are only using one queue 4582 */ 4583 if (adapter->vfs_allocated_count) { 4584 if (hw->mac.type > e1000_82575) { 4585 /* Set the default pool for the PF's first queue */ 4586 u32 vtctl = rd32(E1000_VT_CTL); 4587 4588 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK | 4589 E1000_VT_CTL_DISABLE_DEF_POOL); 4590 vtctl |= adapter->vfs_allocated_count << 4591 E1000_VT_CTL_DEFAULT_POOL_SHIFT; 4592 wr32(E1000_VT_CTL, vtctl); 4593 } 4594 if (adapter->rss_queues > 1) 4595 mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_MQ; 4596 else 4597 mrqc |= E1000_MRQC_ENABLE_VMDQ; 4598 } else { 4599 mrqc |= E1000_MRQC_ENABLE_RSS_MQ; 4600 } 4601 igb_vmm_control(adapter); 4602 4603 wr32(E1000_MRQC, mrqc); 4604 } 4605 4606 /** 4607 * igb_setup_rctl - configure the receive control registers 4608 * @adapter: Board private structure 4609 **/ 4610 void igb_setup_rctl(struct igb_adapter *adapter) 4611 { 4612 struct e1000_hw *hw = &adapter->hw; 4613 u32 rctl; 4614 4615 rctl = rd32(E1000_RCTL); 4616 4617 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 4618 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); 4619 4620 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF | 4621 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 4622 4623 /* enable stripping of CRC. It's unlikely this will break BMC 4624 * redirection as it did with e1000. Newer features require 4625 * that the HW strips the CRC. 4626 */ 4627 rctl |= E1000_RCTL_SECRC; 4628 4629 /* disable store bad packets and clear size bits. */ 4630 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256); 4631 4632 /* enable LPE to allow for reception of jumbo frames */ 4633 rctl |= E1000_RCTL_LPE; 4634 4635 /* disable queue 0 to prevent tail write w/o re-config */ 4636 wr32(E1000_RXDCTL(0), 0); 4637 4638 /* Attention!!! For SR-IOV PF driver operations you must enable 4639 * queue drop for all VF and PF queues to prevent head of line blocking 4640 * if an un-trusted VF does not provide descriptors to hardware. 4641 */ 4642 if (adapter->vfs_allocated_count) { 4643 /* set all queue drop enable bits */ 4644 wr32(E1000_QDE, ALL_QUEUES); 4645 } 4646 4647 /* This is useful for sniffing bad packets. */ 4648 if (adapter->netdev->features & NETIF_F_RXALL) { 4649 /* UPE and MPE will be handled by normal PROMISC logic 4650 * in e1000e_set_rx_mode 4651 */ 4652 rctl |= (E1000_RCTL_SBP | /* Receive bad packets */ 4653 E1000_RCTL_BAM | /* RX All Bcast Pkts */ 4654 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ 4655 4656 rctl &= ~(E1000_RCTL_DPF | /* Allow filtered pause */ 4657 E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */ 4658 /* Do not mess with E1000_CTRL_VME, it affects transmit as well, 4659 * and that breaks VLANs. 4660 */ 4661 } 4662 4663 wr32(E1000_RCTL, rctl); 4664 } 4665 4666 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, 4667 int vfn) 4668 { 4669 struct e1000_hw *hw = &adapter->hw; 4670 u32 vmolr; 4671 4672 if (size > MAX_JUMBO_FRAME_SIZE) 4673 size = MAX_JUMBO_FRAME_SIZE; 4674 4675 vmolr = rd32(E1000_VMOLR(vfn)); 4676 vmolr &= ~E1000_VMOLR_RLPML_MASK; 4677 vmolr |= size | E1000_VMOLR_LPE; 4678 wr32(E1000_VMOLR(vfn), vmolr); 4679 4680 return 0; 4681 } 4682 4683 static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter, 4684 int vfn, bool enable) 4685 { 4686 struct e1000_hw *hw = &adapter->hw; 4687 u32 val, reg; 4688 4689 if (hw->mac.type < e1000_82576) 4690 return; 4691 4692 if (hw->mac.type == e1000_i350) 4693 reg = E1000_DVMOLR(vfn); 4694 else 4695 reg = E1000_VMOLR(vfn); 4696 4697 val = rd32(reg); 4698 if (enable) 4699 val |= E1000_VMOLR_STRVLAN; 4700 else 4701 val &= ~(E1000_VMOLR_STRVLAN); 4702 wr32(reg, val); 4703 } 4704 4705 static inline void igb_set_vmolr(struct igb_adapter *adapter, 4706 int vfn, bool aupe) 4707 { 4708 struct e1000_hw *hw = &adapter->hw; 4709 u32 vmolr; 4710 4711 /* This register exists only on 82576 and newer so if we are older then 4712 * we should exit and do nothing 4713 */ 4714 if (hw->mac.type < e1000_82576) 4715 return; 4716 4717 vmolr = rd32(E1000_VMOLR(vfn)); 4718 if (aupe) 4719 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ 4720 else 4721 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */ 4722 4723 /* clear all bits that might not be set */ 4724 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE); 4725 4726 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count) 4727 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */ 4728 /* for VMDq only allow the VFs and pool 0 to accept broadcast and 4729 * multicast packets 4730 */ 4731 if (vfn <= adapter->vfs_allocated_count) 4732 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */ 4733 4734 wr32(E1000_VMOLR(vfn), vmolr); 4735 } 4736 4737 /** 4738 * igb_setup_srrctl - configure the split and replication receive control 4739 * registers 4740 * @adapter: Board private structure 4741 * @ring: receive ring to be configured 4742 **/ 4743 void igb_setup_srrctl(struct igb_adapter *adapter, struct igb_ring *ring) 4744 { 4745 struct e1000_hw *hw = &adapter->hw; 4746 int reg_idx = ring->reg_idx; 4747 u32 srrctl = 0; 4748 u32 buf_size; 4749 4750 if (ring->xsk_pool) 4751 buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool); 4752 else if (ring_uses_large_buffer(ring)) 4753 buf_size = IGB_RXBUFFER_3072; 4754 else 4755 buf_size = IGB_RXBUFFER_2048; 4756 4757 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; 4758 srrctl |= buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT; 4759 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; 4760 if (hw->mac.type >= e1000_82580) 4761 srrctl |= E1000_SRRCTL_TIMESTAMP; 4762 /* Only set Drop Enable if VFs allocated, or we are supporting multiple 4763 * queues and rx flow control is disabled 4764 */ 4765 if (adapter->vfs_allocated_count || 4766 (!(hw->fc.current_mode & e1000_fc_rx_pause) && 4767 adapter->num_rx_queues > 1)) 4768 srrctl |= E1000_SRRCTL_DROP_EN; 4769 4770 wr32(E1000_SRRCTL(reg_idx), srrctl); 4771 } 4772 4773 /** 4774 * igb_configure_rx_ring - Configure a receive ring after Reset 4775 * @adapter: board private structure 4776 * @ring: receive ring to be configured 4777 * 4778 * Configure the Rx unit of the MAC after a reset. 4779 **/ 4780 void igb_configure_rx_ring(struct igb_adapter *adapter, 4781 struct igb_ring *ring) 4782 { 4783 struct e1000_hw *hw = &adapter->hw; 4784 union e1000_adv_rx_desc *rx_desc; 4785 u64 rdba = ring->dma; 4786 int reg_idx = ring->reg_idx; 4787 u32 rxdctl = 0; 4788 4789 xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); 4790 WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring)); 4791 if (ring->xsk_pool) { 4792 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 4793 MEM_TYPE_XSK_BUFF_POOL, 4794 NULL)); 4795 xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); 4796 } else { 4797 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 4798 MEM_TYPE_PAGE_SHARED, 4799 NULL)); 4800 } 4801 4802 /* disable the queue */ 4803 wr32(E1000_RXDCTL(reg_idx), 0); 4804 4805 /* Set DMA base address registers */ 4806 wr32(E1000_RDBAL(reg_idx), 4807 rdba & 0x00000000ffffffffULL); 4808 wr32(E1000_RDBAH(reg_idx), rdba >> 32); 4809 wr32(E1000_RDLEN(reg_idx), 4810 ring->count * sizeof(union e1000_adv_rx_desc)); 4811 4812 /* initialize head and tail */ 4813 ring->tail = adapter->io_addr + E1000_RDT(reg_idx); 4814 wr32(E1000_RDH(reg_idx), 0); 4815 writel(0, ring->tail); 4816 4817 /* set descriptor configuration */ 4818 igb_setup_srrctl(adapter, ring); 4819 4820 /* set filtering for VMDQ pools */ 4821 igb_set_vmolr(adapter, reg_idx & 0x7, true); 4822 4823 rxdctl |= IGB_RX_PTHRESH; 4824 rxdctl |= IGB_RX_HTHRESH << 8; 4825 rxdctl |= IGB_RX_WTHRESH << 16; 4826 4827 if (ring->xsk_pool) 4828 memset(ring->rx_buffer_info_zc, 0, 4829 sizeof(*ring->rx_buffer_info_zc) * ring->count); 4830 else 4831 memset(ring->rx_buffer_info, 0, 4832 sizeof(*ring->rx_buffer_info) * ring->count); 4833 4834 /* initialize Rx descriptor 0 */ 4835 rx_desc = IGB_RX_DESC(ring, 0); 4836 rx_desc->wb.upper.length = 0; 4837 4838 /* enable receive descriptor fetching */ 4839 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; 4840 wr32(E1000_RXDCTL(reg_idx), rxdctl); 4841 } 4842 4843 static void igb_set_rx_buffer_len(struct igb_adapter *adapter, 4844 struct igb_ring *rx_ring) 4845 { 4846 #if (PAGE_SIZE < 8192) 4847 struct e1000_hw *hw = &adapter->hw; 4848 #endif 4849 4850 /* set build_skb and buffer size flags */ 4851 clear_ring_build_skb_enabled(rx_ring); 4852 clear_ring_uses_large_buffer(rx_ring); 4853 4854 if (adapter->flags & IGB_FLAG_RX_LEGACY) 4855 return; 4856 4857 set_ring_build_skb_enabled(rx_ring); 4858 4859 #if (PAGE_SIZE < 8192) 4860 if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB || 4861 IGB_2K_TOO_SMALL_WITH_PADDING || 4862 rd32(E1000_RCTL) & E1000_RCTL_SBP) 4863 set_ring_uses_large_buffer(rx_ring); 4864 #endif 4865 } 4866 4867 /** 4868 * igb_configure_rx - Configure receive Unit after Reset 4869 * @adapter: board private structure 4870 * 4871 * Configure the Rx unit of the MAC after a reset. 4872 **/ 4873 static void igb_configure_rx(struct igb_adapter *adapter) 4874 { 4875 int i; 4876 4877 /* set the correct pool for the PF default MAC address in entry 0 */ 4878 igb_set_default_mac_filter(adapter); 4879 4880 /* Setup the HW Rx Head and Tail Descriptor Pointers and 4881 * the Base and Length of the Rx Descriptor Ring 4882 */ 4883 for (i = 0; i < adapter->num_rx_queues; i++) { 4884 struct igb_ring *rx_ring = adapter->rx_ring[i]; 4885 4886 igb_set_rx_buffer_len(adapter, rx_ring); 4887 igb_configure_rx_ring(adapter, rx_ring); 4888 } 4889 } 4890 4891 /** 4892 * igb_free_tx_resources - Free Tx Resources per Queue 4893 * @tx_ring: Tx descriptor ring for a specific queue 4894 * 4895 * Free all transmit software resources 4896 **/ 4897 void igb_free_tx_resources(struct igb_ring *tx_ring) 4898 { 4899 igb_clean_tx_ring(tx_ring); 4900 4901 vfree(tx_ring->tx_buffer_info); 4902 tx_ring->tx_buffer_info = NULL; 4903 4904 /* if not set, then don't free */ 4905 if (!tx_ring->desc) 4906 return; 4907 4908 dma_free_coherent(tx_ring->dev, tx_ring->size, 4909 tx_ring->desc, tx_ring->dma); 4910 4911 tx_ring->desc = NULL; 4912 } 4913 4914 /** 4915 * igb_free_all_tx_resources - Free Tx Resources for All Queues 4916 * @adapter: board private structure 4917 * 4918 * Free all transmit software resources 4919 **/ 4920 static void igb_free_all_tx_resources(struct igb_adapter *adapter) 4921 { 4922 int i; 4923 4924 for (i = 0; i < adapter->num_tx_queues; i++) 4925 if (adapter->tx_ring[i]) 4926 igb_free_tx_resources(adapter->tx_ring[i]); 4927 } 4928 4929 /** 4930 * igb_clean_tx_ring - Free Tx Buffers 4931 * @tx_ring: ring to be cleaned 4932 **/ 4933 void igb_clean_tx_ring(struct igb_ring *tx_ring) 4934 { 4935 u16 i = tx_ring->next_to_clean; 4936 struct igb_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; 4937 u32 xsk_frames = 0; 4938 4939 while (i != tx_ring->next_to_use) { 4940 union e1000_adv_tx_desc *eop_desc, *tx_desc; 4941 4942 /* Free all the Tx ring sk_buffs or xdp frames */ 4943 if (tx_buffer->type == IGB_TYPE_SKB) { 4944 dev_kfree_skb_any(tx_buffer->skb); 4945 } else if (tx_buffer->type == IGB_TYPE_XDP) { 4946 xdp_return_frame(tx_buffer->xdpf); 4947 } else if (tx_buffer->type == IGB_TYPE_XSK) { 4948 xsk_frames++; 4949 goto skip_for_xsk; 4950 } 4951 4952 /* unmap skb header data */ 4953 dma_unmap_single(tx_ring->dev, 4954 dma_unmap_addr(tx_buffer, dma), 4955 dma_unmap_len(tx_buffer, len), 4956 DMA_TO_DEVICE); 4957 4958 /* check for eop_desc to determine the end of the packet */ 4959 eop_desc = tx_buffer->next_to_watch; 4960 tx_desc = IGB_TX_DESC(tx_ring, i); 4961 4962 /* unmap remaining buffers */ 4963 while (tx_desc != eop_desc) { 4964 tx_buffer++; 4965 tx_desc++; 4966 i++; 4967 if (unlikely(i == tx_ring->count)) { 4968 i = 0; 4969 tx_buffer = tx_ring->tx_buffer_info; 4970 tx_desc = IGB_TX_DESC(tx_ring, 0); 4971 } 4972 4973 /* unmap any remaining paged data */ 4974 if (dma_unmap_len(tx_buffer, len)) 4975 dma_unmap_page(tx_ring->dev, 4976 dma_unmap_addr(tx_buffer, dma), 4977 dma_unmap_len(tx_buffer, len), 4978 DMA_TO_DEVICE); 4979 } 4980 4981 skip_for_xsk: 4982 tx_buffer->next_to_watch = NULL; 4983 4984 /* move us one more past the eop_desc for start of next pkt */ 4985 tx_buffer++; 4986 i++; 4987 if (unlikely(i == tx_ring->count)) { 4988 i = 0; 4989 tx_buffer = tx_ring->tx_buffer_info; 4990 } 4991 } 4992 4993 /* reset BQL for queue */ 4994 netdev_tx_reset_queue(txring_txq(tx_ring)); 4995 4996 if (tx_ring->xsk_pool && xsk_frames) 4997 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 4998 4999 /* reset next_to_use and next_to_clean */ 5000 tx_ring->next_to_use = 0; 5001 tx_ring->next_to_clean = 0; 5002 } 5003 5004 /** 5005 * igb_clean_all_tx_rings - Free Tx Buffers for all queues 5006 * @adapter: board private structure 5007 **/ 5008 static void igb_clean_all_tx_rings(struct igb_adapter *adapter) 5009 { 5010 int i; 5011 5012 for (i = 0; i < adapter->num_tx_queues; i++) 5013 if (adapter->tx_ring[i]) 5014 igb_clean_tx_ring(adapter->tx_ring[i]); 5015 } 5016 5017 /** 5018 * igb_free_rx_resources - Free Rx Resources 5019 * @rx_ring: ring to clean the resources from 5020 * 5021 * Free all receive software resources 5022 **/ 5023 void igb_free_rx_resources(struct igb_ring *rx_ring) 5024 { 5025 igb_clean_rx_ring(rx_ring); 5026 5027 rx_ring->xdp_prog = NULL; 5028 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 5029 if (rx_ring->xsk_pool) { 5030 vfree(rx_ring->rx_buffer_info_zc); 5031 rx_ring->rx_buffer_info_zc = NULL; 5032 } else { 5033 vfree(rx_ring->rx_buffer_info); 5034 rx_ring->rx_buffer_info = NULL; 5035 } 5036 5037 /* if not set, then don't free */ 5038 if (!rx_ring->desc) 5039 return; 5040 5041 dma_free_coherent(rx_ring->dev, rx_ring->size, 5042 rx_ring->desc, rx_ring->dma); 5043 5044 rx_ring->desc = NULL; 5045 } 5046 5047 /** 5048 * igb_free_all_rx_resources - Free Rx Resources for All Queues 5049 * @adapter: board private structure 5050 * 5051 * Free all receive software resources 5052 **/ 5053 static void igb_free_all_rx_resources(struct igb_adapter *adapter) 5054 { 5055 int i; 5056 5057 for (i = 0; i < adapter->num_rx_queues; i++) 5058 if (adapter->rx_ring[i]) 5059 igb_free_rx_resources(adapter->rx_ring[i]); 5060 } 5061 5062 /** 5063 * igb_clean_rx_ring - Free Rx Buffers per Queue 5064 * @rx_ring: ring to free buffers from 5065 **/ 5066 void igb_clean_rx_ring(struct igb_ring *rx_ring) 5067 { 5068 u16 i = rx_ring->next_to_clean; 5069 5070 dev_kfree_skb(rx_ring->skb); 5071 rx_ring->skb = NULL; 5072 5073 if (rx_ring->xsk_pool) { 5074 igb_clean_rx_ring_zc(rx_ring); 5075 goto skip_for_xsk; 5076 } 5077 5078 /* Free all the Rx ring sk_buffs */ 5079 while (i != rx_ring->next_to_alloc) { 5080 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 5081 5082 /* Invalidate cache lines that may have been written to by 5083 * device so that we avoid corrupting memory. 5084 */ 5085 dma_sync_single_range_for_cpu(rx_ring->dev, 5086 buffer_info->dma, 5087 buffer_info->page_offset, 5088 igb_rx_bufsz(rx_ring), 5089 DMA_FROM_DEVICE); 5090 5091 /* free resources associated with mapping */ 5092 dma_unmap_page_attrs(rx_ring->dev, 5093 buffer_info->dma, 5094 igb_rx_pg_size(rx_ring), 5095 DMA_FROM_DEVICE, 5096 IGB_RX_DMA_ATTR); 5097 __page_frag_cache_drain(buffer_info->page, 5098 buffer_info->pagecnt_bias); 5099 5100 i++; 5101 if (i == rx_ring->count) 5102 i = 0; 5103 } 5104 5105 skip_for_xsk: 5106 rx_ring->next_to_alloc = 0; 5107 rx_ring->next_to_clean = 0; 5108 rx_ring->next_to_use = 0; 5109 } 5110 5111 /** 5112 * igb_clean_all_rx_rings - Free Rx Buffers for all queues 5113 * @adapter: board private structure 5114 **/ 5115 static void igb_clean_all_rx_rings(struct igb_adapter *adapter) 5116 { 5117 int i; 5118 5119 for (i = 0; i < adapter->num_rx_queues; i++) 5120 if (adapter->rx_ring[i]) 5121 igb_clean_rx_ring(adapter->rx_ring[i]); 5122 } 5123 5124 /** 5125 * igb_set_mac - Change the Ethernet Address of the NIC 5126 * @netdev: network interface device structure 5127 * @p: pointer to an address structure 5128 * 5129 * Returns 0 on success, negative on failure 5130 **/ 5131 static int igb_set_mac(struct net_device *netdev, void *p) 5132 { 5133 struct igb_adapter *adapter = netdev_priv(netdev); 5134 struct e1000_hw *hw = &adapter->hw; 5135 struct sockaddr *addr = p; 5136 5137 if (!is_valid_ether_addr(addr->sa_data)) 5138 return -EADDRNOTAVAIL; 5139 5140 eth_hw_addr_set(netdev, addr->sa_data); 5141 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); 5142 5143 /* set the correct pool for the new PF MAC address in entry 0 */ 5144 igb_set_default_mac_filter(adapter); 5145 5146 return 0; 5147 } 5148 5149 /** 5150 * igb_write_mc_addr_list - write multicast addresses to MTA 5151 * @netdev: network interface device structure 5152 * 5153 * Writes multicast address list to the MTA hash table. 5154 * Returns: -ENOMEM on failure 5155 * 0 on no addresses written 5156 * X on writing X addresses to MTA 5157 **/ 5158 static int igb_write_mc_addr_list(struct net_device *netdev) 5159 { 5160 struct igb_adapter *adapter = netdev_priv(netdev); 5161 struct e1000_hw *hw = &adapter->hw; 5162 struct netdev_hw_addr *ha; 5163 u8 *mta_list; 5164 int i; 5165 5166 if (netdev_mc_empty(netdev)) { 5167 /* nothing to program, so clear mc list */ 5168 igb_update_mc_addr_list(hw, NULL, 0); 5169 igb_restore_vf_multicasts(adapter); 5170 return 0; 5171 } 5172 5173 mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC); 5174 if (!mta_list) 5175 return -ENOMEM; 5176 5177 /* The shared function expects a packed array of only addresses. */ 5178 i = 0; 5179 netdev_for_each_mc_addr(ha, netdev) 5180 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); 5181 5182 igb_update_mc_addr_list(hw, mta_list, i); 5183 kfree(mta_list); 5184 5185 return netdev_mc_count(netdev); 5186 } 5187 5188 static int igb_vlan_promisc_enable(struct igb_adapter *adapter) 5189 { 5190 struct e1000_hw *hw = &adapter->hw; 5191 u32 i, pf_id; 5192 5193 switch (hw->mac.type) { 5194 case e1000_i210: 5195 case e1000_i211: 5196 case e1000_i350: 5197 /* VLAN filtering needed for VLAN prio filter */ 5198 if (adapter->netdev->features & NETIF_F_NTUPLE) 5199 break; 5200 fallthrough; 5201 case e1000_82576: 5202 case e1000_82580: 5203 case e1000_i354: 5204 /* VLAN filtering needed for pool filtering */ 5205 if (adapter->vfs_allocated_count) 5206 break; 5207 fallthrough; 5208 default: 5209 return 1; 5210 } 5211 5212 /* We are already in VLAN promisc, nothing to do */ 5213 if (adapter->flags & IGB_FLAG_VLAN_PROMISC) 5214 return 0; 5215 5216 if (!adapter->vfs_allocated_count) 5217 goto set_vfta; 5218 5219 /* Add PF to all active pools */ 5220 pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; 5221 5222 for (i = E1000_VLVF_ARRAY_SIZE; --i;) { 5223 u32 vlvf = rd32(E1000_VLVF(i)); 5224 5225 vlvf |= BIT(pf_id); 5226 wr32(E1000_VLVF(i), vlvf); 5227 } 5228 5229 set_vfta: 5230 /* Set all bits in the VLAN filter table array */ 5231 for (i = E1000_VLAN_FILTER_TBL_SIZE; i--;) 5232 hw->mac.ops.write_vfta(hw, i, ~0U); 5233 5234 /* Set flag so we don't redo unnecessary work */ 5235 adapter->flags |= IGB_FLAG_VLAN_PROMISC; 5236 5237 return 0; 5238 } 5239 5240 #define VFTA_BLOCK_SIZE 8 5241 static void igb_scrub_vfta(struct igb_adapter *adapter, u32 vfta_offset) 5242 { 5243 struct e1000_hw *hw = &adapter->hw; 5244 u32 vfta[VFTA_BLOCK_SIZE] = { 0 }; 5245 u32 vid_start = vfta_offset * 32; 5246 u32 vid_end = vid_start + (VFTA_BLOCK_SIZE * 32); 5247 u32 i, vid, word, bits, pf_id; 5248 5249 /* guarantee that we don't scrub out management VLAN */ 5250 vid = adapter->mng_vlan_id; 5251 if (vid >= vid_start && vid < vid_end) 5252 vfta[(vid - vid_start) / 32] |= BIT(vid % 32); 5253 5254 if (!adapter->vfs_allocated_count) 5255 goto set_vfta; 5256 5257 pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; 5258 5259 for (i = E1000_VLVF_ARRAY_SIZE; --i;) { 5260 u32 vlvf = rd32(E1000_VLVF(i)); 5261 5262 /* pull VLAN ID from VLVF */ 5263 vid = vlvf & VLAN_VID_MASK; 5264 5265 /* only concern ourselves with a certain range */ 5266 if (vid < vid_start || vid >= vid_end) 5267 continue; 5268 5269 if (vlvf & E1000_VLVF_VLANID_ENABLE) { 5270 /* record VLAN ID in VFTA */ 5271 vfta[(vid - vid_start) / 32] |= BIT(vid % 32); 5272 5273 /* if PF is part of this then continue */ 5274 if (test_bit(vid, adapter->active_vlans)) 5275 continue; 5276 } 5277 5278 /* remove PF from the pool */ 5279 bits = ~BIT(pf_id); 5280 bits &= rd32(E1000_VLVF(i)); 5281 wr32(E1000_VLVF(i), bits); 5282 } 5283 5284 set_vfta: 5285 /* extract values from active_vlans and write back to VFTA */ 5286 for (i = VFTA_BLOCK_SIZE; i--;) { 5287 vid = (vfta_offset + i) * 32; 5288 word = vid / BITS_PER_LONG; 5289 bits = vid % BITS_PER_LONG; 5290 5291 vfta[i] |= adapter->active_vlans[word] >> bits; 5292 5293 hw->mac.ops.write_vfta(hw, vfta_offset + i, vfta[i]); 5294 } 5295 } 5296 5297 static void igb_vlan_promisc_disable(struct igb_adapter *adapter) 5298 { 5299 u32 i; 5300 5301 /* We are not in VLAN promisc, nothing to do */ 5302 if (!(adapter->flags & IGB_FLAG_VLAN_PROMISC)) 5303 return; 5304 5305 /* Set flag so we don't redo unnecessary work */ 5306 adapter->flags &= ~IGB_FLAG_VLAN_PROMISC; 5307 5308 for (i = 0; i < E1000_VLAN_FILTER_TBL_SIZE; i += VFTA_BLOCK_SIZE) 5309 igb_scrub_vfta(adapter, i); 5310 } 5311 5312 /** 5313 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set 5314 * @netdev: network interface device structure 5315 * 5316 * The set_rx_mode entry point is called whenever the unicast or multicast 5317 * address lists or the network interface flags are updated. This routine is 5318 * responsible for configuring the hardware for proper unicast, multicast, 5319 * promiscuous mode, and all-multi behavior. 5320 **/ 5321 static void igb_set_rx_mode(struct net_device *netdev) 5322 { 5323 struct igb_adapter *adapter = netdev_priv(netdev); 5324 struct e1000_hw *hw = &adapter->hw; 5325 unsigned int vfn = adapter->vfs_allocated_count; 5326 u32 rctl = 0, vmolr = 0, rlpml = MAX_JUMBO_FRAME_SIZE; 5327 int count; 5328 5329 /* Check for Promiscuous and All Multicast modes */ 5330 if (netdev->flags & IFF_PROMISC) { 5331 rctl |= E1000_RCTL_UPE | E1000_RCTL_MPE; 5332 vmolr |= E1000_VMOLR_MPME; 5333 5334 /* enable use of UTA filter to force packets to default pool */ 5335 if (hw->mac.type == e1000_82576) 5336 vmolr |= E1000_VMOLR_ROPE; 5337 } else { 5338 if (netdev->flags & IFF_ALLMULTI) { 5339 rctl |= E1000_RCTL_MPE; 5340 vmolr |= E1000_VMOLR_MPME; 5341 } else { 5342 /* Write addresses to the MTA, if the attempt fails 5343 * then we should just turn on promiscuous mode so 5344 * that we can at least receive multicast traffic 5345 */ 5346 count = igb_write_mc_addr_list(netdev); 5347 if (count < 0) { 5348 rctl |= E1000_RCTL_MPE; 5349 vmolr |= E1000_VMOLR_MPME; 5350 } else if (count) { 5351 vmolr |= E1000_VMOLR_ROMPE; 5352 } 5353 } 5354 } 5355 5356 /* Write addresses to available RAR registers, if there is not 5357 * sufficient space to store all the addresses then enable 5358 * unicast promiscuous mode 5359 */ 5360 if (__dev_uc_sync(netdev, igb_uc_sync, igb_uc_unsync)) { 5361 rctl |= E1000_RCTL_UPE; 5362 vmolr |= E1000_VMOLR_ROPE; 5363 } 5364 5365 /* enable VLAN filtering by default */ 5366 rctl |= E1000_RCTL_VFE; 5367 5368 /* disable VLAN filtering for modes that require it */ 5369 if ((netdev->flags & IFF_PROMISC) || 5370 (netdev->features & NETIF_F_RXALL)) { 5371 /* if we fail to set all rules then just clear VFE */ 5372 if (igb_vlan_promisc_enable(adapter)) 5373 rctl &= ~E1000_RCTL_VFE; 5374 } else { 5375 igb_vlan_promisc_disable(adapter); 5376 } 5377 5378 /* update state of unicast, multicast, and VLAN filtering modes */ 5379 rctl |= rd32(E1000_RCTL) & ~(E1000_RCTL_UPE | E1000_RCTL_MPE | 5380 E1000_RCTL_VFE); 5381 wr32(E1000_RCTL, rctl); 5382 5383 #if (PAGE_SIZE < 8192) 5384 if (!adapter->vfs_allocated_count) { 5385 if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB) 5386 rlpml = IGB_MAX_FRAME_BUILD_SKB; 5387 } 5388 #endif 5389 wr32(E1000_RLPML, rlpml); 5390 5391 /* In order to support SR-IOV and eventually VMDq it is necessary to set 5392 * the VMOLR to enable the appropriate modes. Without this workaround 5393 * we will have issues with VLAN tag stripping not being done for frames 5394 * that are only arriving because we are the default pool 5395 */ 5396 if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350)) 5397 return; 5398 5399 /* set UTA to appropriate mode */ 5400 igb_set_uta(adapter, !!(vmolr & E1000_VMOLR_ROPE)); 5401 5402 vmolr |= rd32(E1000_VMOLR(vfn)) & 5403 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); 5404 5405 /* enable Rx jumbo frames, restrict as needed to support build_skb */ 5406 vmolr &= ~E1000_VMOLR_RLPML_MASK; 5407 #if (PAGE_SIZE < 8192) 5408 if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB) 5409 vmolr |= IGB_MAX_FRAME_BUILD_SKB; 5410 else 5411 #endif 5412 vmolr |= MAX_JUMBO_FRAME_SIZE; 5413 vmolr |= E1000_VMOLR_LPE; 5414 5415 wr32(E1000_VMOLR(vfn), vmolr); 5416 5417 igb_restore_vf_multicasts(adapter); 5418 } 5419 5420 static void igb_check_wvbr(struct igb_adapter *adapter) 5421 { 5422 struct e1000_hw *hw = &adapter->hw; 5423 u32 wvbr = 0; 5424 5425 switch (hw->mac.type) { 5426 case e1000_82576: 5427 case e1000_i350: 5428 wvbr = rd32(E1000_WVBR); 5429 if (!wvbr) 5430 return; 5431 break; 5432 default: 5433 break; 5434 } 5435 5436 adapter->wvbr |= wvbr; 5437 } 5438 5439 #define IGB_STAGGERED_QUEUE_OFFSET 8 5440 5441 static void igb_spoof_check(struct igb_adapter *adapter) 5442 { 5443 int j; 5444 5445 if (!adapter->wvbr) 5446 return; 5447 5448 for (j = 0; j < adapter->vfs_allocated_count; j++) { 5449 if (adapter->wvbr & BIT(j) || 5450 adapter->wvbr & BIT(j + IGB_STAGGERED_QUEUE_OFFSET)) { 5451 dev_warn(&adapter->pdev->dev, 5452 "Spoof event(s) detected on VF %d\n", j); 5453 adapter->wvbr &= 5454 ~(BIT(j) | 5455 BIT(j + IGB_STAGGERED_QUEUE_OFFSET)); 5456 } 5457 } 5458 } 5459 5460 /* Need to wait a few seconds after link up to get diagnostic information from 5461 * the phy 5462 */ 5463 static void igb_update_phy_info(struct timer_list *t) 5464 { 5465 struct igb_adapter *adapter = timer_container_of(adapter, t, 5466 phy_info_timer); 5467 igb_get_phy_info(&adapter->hw); 5468 } 5469 5470 /** 5471 * igb_has_link - check shared code for link and determine up/down 5472 * @adapter: pointer to driver private info 5473 **/ 5474 bool igb_has_link(struct igb_adapter *adapter) 5475 { 5476 struct e1000_hw *hw = &adapter->hw; 5477 bool link_active = false; 5478 5479 /* get_link_status is set on LSC (link status) interrupt or 5480 * rx sequence error interrupt. get_link_status will stay 5481 * false until the e1000_check_for_link establishes link 5482 * for copper adapters ONLY 5483 */ 5484 switch (hw->phy.media_type) { 5485 case e1000_media_type_copper: 5486 if (!hw->mac.get_link_status) 5487 return true; 5488 fallthrough; 5489 case e1000_media_type_internal_serdes: 5490 hw->mac.ops.check_for_link(hw); 5491 link_active = !hw->mac.get_link_status; 5492 break; 5493 default: 5494 case e1000_media_type_unknown: 5495 break; 5496 } 5497 5498 if (((hw->mac.type == e1000_i210) || 5499 (hw->mac.type == e1000_i211)) && 5500 (hw->phy.id == I210_I_PHY_ID)) { 5501 if (!netif_carrier_ok(adapter->netdev)) { 5502 adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; 5503 } else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) { 5504 adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE; 5505 adapter->link_check_timeout = jiffies; 5506 } 5507 } 5508 5509 return link_active; 5510 } 5511 5512 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event) 5513 { 5514 bool ret = false; 5515 u32 ctrl_ext, thstat; 5516 5517 /* check for thermal sensor event on i350 copper only */ 5518 if (hw->mac.type == e1000_i350) { 5519 thstat = rd32(E1000_THSTAT); 5520 ctrl_ext = rd32(E1000_CTRL_EXT); 5521 5522 if ((hw->phy.media_type == e1000_media_type_copper) && 5523 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) 5524 ret = !!(thstat & event); 5525 } 5526 5527 return ret; 5528 } 5529 5530 /** 5531 * igb_check_lvmmc - check for malformed packets received 5532 * and indicated in LVMMC register 5533 * @adapter: pointer to adapter 5534 **/ 5535 static void igb_check_lvmmc(struct igb_adapter *adapter) 5536 { 5537 struct e1000_hw *hw = &adapter->hw; 5538 u32 lvmmc; 5539 5540 lvmmc = rd32(E1000_LVMMC); 5541 if (lvmmc) { 5542 if (unlikely(net_ratelimit())) { 5543 netdev_warn(adapter->netdev, 5544 "malformed Tx packet detected and dropped, LVMMC:0x%08x\n", 5545 lvmmc); 5546 } 5547 } 5548 } 5549 5550 /** 5551 * igb_watchdog - Timer Call-back 5552 * @t: pointer to timer_list containing our private info pointer 5553 **/ 5554 static void igb_watchdog(struct timer_list *t) 5555 { 5556 struct igb_adapter *adapter = timer_container_of(adapter, t, 5557 watchdog_timer); 5558 /* Do the rest outside of interrupt context */ 5559 schedule_work(&adapter->watchdog_task); 5560 } 5561 5562 static void igb_watchdog_task(struct work_struct *work) 5563 { 5564 struct igb_adapter *adapter = container_of(work, 5565 struct igb_adapter, 5566 watchdog_task); 5567 struct e1000_hw *hw = &adapter->hw; 5568 struct e1000_phy_info *phy = &hw->phy; 5569 struct net_device *netdev = adapter->netdev; 5570 u32 link; 5571 int i; 5572 u32 connsw; 5573 u16 phy_data, retry_count = 20; 5574 5575 link = igb_has_link(adapter); 5576 5577 if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) { 5578 if (time_after(jiffies, (adapter->link_check_timeout + HZ))) 5579 adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; 5580 else 5581 link = false; 5582 } 5583 5584 /* Force link down if we have fiber to swap to */ 5585 if (adapter->flags & IGB_FLAG_MAS_ENABLE) { 5586 if (hw->phy.media_type == e1000_media_type_copper) { 5587 connsw = rd32(E1000_CONNSW); 5588 if (!(connsw & E1000_CONNSW_AUTOSENSE_EN)) 5589 link = 0; 5590 } 5591 } 5592 if (link) { 5593 /* Perform a reset if the media type changed. */ 5594 if (hw->dev_spec._82575.media_changed) { 5595 hw->dev_spec._82575.media_changed = false; 5596 adapter->flags |= IGB_FLAG_MEDIA_RESET; 5597 igb_reset(adapter); 5598 } 5599 /* Cancel scheduled suspend requests. */ 5600 pm_runtime_resume(netdev->dev.parent); 5601 5602 if (!netif_carrier_ok(netdev)) { 5603 u32 ctrl; 5604 5605 hw->mac.ops.get_speed_and_duplex(hw, 5606 &adapter->link_speed, 5607 &adapter->link_duplex); 5608 5609 ctrl = rd32(E1000_CTRL); 5610 /* Links status message must follow this format */ 5611 netdev_info(netdev, 5612 "igb: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", 5613 netdev->name, 5614 adapter->link_speed, 5615 adapter->link_duplex == FULL_DUPLEX ? 5616 "Full" : "Half", 5617 (ctrl & E1000_CTRL_TFCE) && 5618 (ctrl & E1000_CTRL_RFCE) ? "RX/TX" : 5619 (ctrl & E1000_CTRL_RFCE) ? "RX" : 5620 (ctrl & E1000_CTRL_TFCE) ? "TX" : "None"); 5621 5622 /* disable EEE if enabled */ 5623 if ((adapter->flags & IGB_FLAG_EEE) && 5624 (adapter->link_duplex == HALF_DUPLEX)) { 5625 dev_info(&adapter->pdev->dev, 5626 "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex.\n"); 5627 adapter->hw.dev_spec._82575.eee_disable = true; 5628 adapter->flags &= ~IGB_FLAG_EEE; 5629 } 5630 5631 /* check if SmartSpeed worked */ 5632 igb_check_downshift(hw); 5633 if (phy->speed_downgraded) 5634 netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n"); 5635 5636 /* check for thermal sensor event */ 5637 if (igb_thermal_sensor_event(hw, 5638 E1000_THSTAT_LINK_THROTTLE)) 5639 netdev_info(netdev, "The network adapter link speed was downshifted because it overheated\n"); 5640 5641 /* adjust timeout factor according to speed/duplex */ 5642 adapter->tx_timeout_factor = 1; 5643 switch (adapter->link_speed) { 5644 case SPEED_10: 5645 adapter->tx_timeout_factor = 14; 5646 break; 5647 case SPEED_100: 5648 /* maybe add some timeout factor ? */ 5649 break; 5650 } 5651 5652 if (adapter->link_speed != SPEED_1000 || 5653 !hw->phy.ops.read_reg) 5654 goto no_wait; 5655 5656 /* wait for Remote receiver status OK */ 5657 retry_read_status: 5658 if (!igb_read_phy_reg(hw, PHY_1000T_STATUS, 5659 &phy_data)) { 5660 if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && 5661 retry_count) { 5662 msleep(100); 5663 retry_count--; 5664 goto retry_read_status; 5665 } else if (!retry_count) { 5666 dev_err(&adapter->pdev->dev, "exceed max 2 second\n"); 5667 } 5668 } else { 5669 dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n"); 5670 } 5671 no_wait: 5672 netif_carrier_on(netdev); 5673 5674 igb_ping_all_vfs(adapter); 5675 igb_check_vf_rate_limit(adapter); 5676 5677 /* link state has changed, schedule phy info update */ 5678 if (!test_bit(__IGB_DOWN, &adapter->state)) 5679 mod_timer(&adapter->phy_info_timer, 5680 round_jiffies(jiffies + 2 * HZ)); 5681 } 5682 } else { 5683 if (netif_carrier_ok(netdev)) { 5684 adapter->link_speed = 0; 5685 adapter->link_duplex = 0; 5686 5687 /* check for thermal sensor event */ 5688 if (igb_thermal_sensor_event(hw, 5689 E1000_THSTAT_PWR_DOWN)) { 5690 netdev_err(netdev, "The network adapter was stopped because it overheated\n"); 5691 } 5692 5693 /* Links status message must follow this format */ 5694 netdev_info(netdev, "igb: %s NIC Link is Down\n", 5695 netdev->name); 5696 netif_carrier_off(netdev); 5697 5698 igb_ping_all_vfs(adapter); 5699 5700 /* link state has changed, schedule phy info update */ 5701 if (!test_bit(__IGB_DOWN, &adapter->state)) 5702 mod_timer(&adapter->phy_info_timer, 5703 round_jiffies(jiffies + 2 * HZ)); 5704 5705 /* link is down, time to check for alternate media */ 5706 if (adapter->flags & IGB_FLAG_MAS_ENABLE) { 5707 igb_check_swap_media(adapter); 5708 if (adapter->flags & IGB_FLAG_MEDIA_RESET) { 5709 schedule_work(&adapter->reset_task); 5710 /* return immediately */ 5711 return; 5712 } 5713 } 5714 pm_schedule_suspend(netdev->dev.parent, 5715 MSEC_PER_SEC * 5); 5716 5717 /* also check for alternate media here */ 5718 } else if (!netif_carrier_ok(netdev) && 5719 (adapter->flags & IGB_FLAG_MAS_ENABLE)) { 5720 igb_check_swap_media(adapter); 5721 if (adapter->flags & IGB_FLAG_MEDIA_RESET) { 5722 schedule_work(&adapter->reset_task); 5723 /* return immediately */ 5724 return; 5725 } 5726 } 5727 } 5728 5729 spin_lock(&adapter->stats64_lock); 5730 igb_update_stats(adapter); 5731 spin_unlock(&adapter->stats64_lock); 5732 5733 for (i = 0; i < adapter->num_tx_queues; i++) { 5734 struct igb_ring *tx_ring = adapter->tx_ring[i]; 5735 if (!netif_carrier_ok(netdev)) { 5736 /* We've lost link, so the controller stops DMA, 5737 * but we've got queued Tx work that's never going 5738 * to get done, so reset controller to flush Tx. 5739 * (Do the reset outside of interrupt context). 5740 */ 5741 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) { 5742 adapter->tx_timeout_count++; 5743 schedule_work(&adapter->reset_task); 5744 /* return immediately since reset is imminent */ 5745 return; 5746 } 5747 } 5748 5749 /* Force detection of hung controller every watchdog period */ 5750 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 5751 } 5752 5753 /* Cause software interrupt to ensure Rx ring is cleaned */ 5754 if (adapter->flags & IGB_FLAG_HAS_MSIX) { 5755 u32 eics = 0; 5756 5757 for (i = 0; i < adapter->num_q_vectors; i++) { 5758 struct igb_q_vector *q_vector = adapter->q_vector[i]; 5759 struct igb_ring *rx_ring; 5760 5761 if (!q_vector->rx.ring) 5762 continue; 5763 5764 rx_ring = adapter->rx_ring[q_vector->rx.ring->queue_index]; 5765 5766 if (test_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { 5767 eics |= q_vector->eims_value; 5768 clear_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 5769 } 5770 } 5771 if (eics) 5772 wr32(E1000_EICS, eics); 5773 } else { 5774 struct igb_ring *rx_ring = adapter->rx_ring[0]; 5775 5776 if (test_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { 5777 clear_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 5778 wr32(E1000_ICS, E1000_ICS_RXDMT0); 5779 } 5780 } 5781 5782 igb_spoof_check(adapter); 5783 igb_ptp_rx_hang(adapter); 5784 igb_ptp_tx_hang(adapter); 5785 5786 /* Check LVMMC register on i350/i354 only */ 5787 if ((adapter->hw.mac.type == e1000_i350) || 5788 (adapter->hw.mac.type == e1000_i354)) 5789 igb_check_lvmmc(adapter); 5790 5791 /* Reset the timer */ 5792 if (!test_bit(__IGB_DOWN, &adapter->state)) { 5793 if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) 5794 mod_timer(&adapter->watchdog_timer, 5795 round_jiffies(jiffies + HZ)); 5796 else 5797 mod_timer(&adapter->watchdog_timer, 5798 round_jiffies(jiffies + 2 * HZ)); 5799 } 5800 } 5801 5802 enum latency_range { 5803 lowest_latency = 0, 5804 low_latency = 1, 5805 bulk_latency = 2, 5806 latency_invalid = 255 5807 }; 5808 5809 /** 5810 * igb_update_ring_itr - update the dynamic ITR value based on packet size 5811 * @q_vector: pointer to q_vector 5812 * 5813 * Stores a new ITR value based on strictly on packet size. This 5814 * algorithm is less sophisticated than that used in igb_update_itr, 5815 * due to the difficulty of synchronizing statistics across multiple 5816 * receive rings. The divisors and thresholds used by this function 5817 * were determined based on theoretical maximum wire speed and testing 5818 * data, in order to minimize response time while increasing bulk 5819 * throughput. 5820 * This functionality is controlled by ethtool's coalescing settings. 5821 * NOTE: This function is called only when operating in a multiqueue 5822 * receive environment. 5823 **/ 5824 static void igb_update_ring_itr(struct igb_q_vector *q_vector) 5825 { 5826 int new_val = q_vector->itr_val; 5827 int avg_wire_size = 0; 5828 struct igb_adapter *adapter = q_vector->adapter; 5829 unsigned int packets; 5830 5831 /* For non-gigabit speeds, just fix the interrupt rate at 4000 5832 * ints/sec - ITR timer value of 120 ticks. 5833 */ 5834 if (adapter->link_speed != SPEED_1000) { 5835 new_val = IGB_4K_ITR; 5836 goto set_itr_val; 5837 } 5838 5839 packets = q_vector->rx.total_packets; 5840 if (packets) 5841 avg_wire_size = q_vector->rx.total_bytes / packets; 5842 5843 packets = q_vector->tx.total_packets; 5844 if (packets) 5845 avg_wire_size = max_t(u32, avg_wire_size, 5846 q_vector->tx.total_bytes / packets); 5847 5848 /* if avg_wire_size isn't set no work was done */ 5849 if (!avg_wire_size) 5850 goto clear_counts; 5851 5852 /* Add 24 bytes to size to account for CRC, preamble, and gap */ 5853 avg_wire_size += 24; 5854 5855 /* Don't starve jumbo frames */ 5856 avg_wire_size = min(avg_wire_size, 3000); 5857 5858 /* Give a little boost to mid-size frames */ 5859 if ((avg_wire_size > 300) && (avg_wire_size < 1200)) 5860 new_val = avg_wire_size / 3; 5861 else 5862 new_val = avg_wire_size / 2; 5863 5864 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 5865 if (new_val < IGB_20K_ITR && 5866 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 5867 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 5868 new_val = IGB_20K_ITR; 5869 5870 set_itr_val: 5871 if (new_val != q_vector->itr_val) { 5872 q_vector->itr_val = new_val; 5873 q_vector->set_itr = 1; 5874 } 5875 clear_counts: 5876 q_vector->rx.total_bytes = 0; 5877 q_vector->rx.total_packets = 0; 5878 q_vector->tx.total_bytes = 0; 5879 q_vector->tx.total_packets = 0; 5880 } 5881 5882 /** 5883 * igb_update_itr - update the dynamic ITR value based on statistics 5884 * @q_vector: pointer to q_vector 5885 * @ring_container: ring info to update the itr for 5886 * 5887 * Stores a new ITR value based on packets and byte 5888 * counts during the last interrupt. The advantage of per interrupt 5889 * computation is faster updates and more accurate ITR for the current 5890 * traffic pattern. Constants in this function were computed 5891 * based on theoretical maximum wire speed and thresholds were set based 5892 * on testing data as well as attempting to minimize response time 5893 * while increasing bulk throughput. 5894 * This functionality is controlled by ethtool's coalescing settings. 5895 * NOTE: These calculations are only valid when operating in a single- 5896 * queue environment. 5897 **/ 5898 static void igb_update_itr(struct igb_q_vector *q_vector, 5899 struct igb_ring_container *ring_container) 5900 { 5901 unsigned int packets = ring_container->total_packets; 5902 unsigned int bytes = ring_container->total_bytes; 5903 u8 itrval = ring_container->itr; 5904 5905 /* no packets, exit with status unchanged */ 5906 if (packets == 0) 5907 return; 5908 5909 switch (itrval) { 5910 case lowest_latency: 5911 /* handle TSO and jumbo frames */ 5912 if (bytes/packets > 8000) 5913 itrval = bulk_latency; 5914 else if ((packets < 5) && (bytes > 512)) 5915 itrval = low_latency; 5916 break; 5917 case low_latency: /* 50 usec aka 20000 ints/s */ 5918 if (bytes > 10000) { 5919 /* this if handles the TSO accounting */ 5920 if (bytes/packets > 8000) 5921 itrval = bulk_latency; 5922 else if ((packets < 10) || ((bytes/packets) > 1200)) 5923 itrval = bulk_latency; 5924 else if ((packets > 35)) 5925 itrval = lowest_latency; 5926 } else if (bytes/packets > 2000) { 5927 itrval = bulk_latency; 5928 } else if (packets <= 2 && bytes < 512) { 5929 itrval = lowest_latency; 5930 } 5931 break; 5932 case bulk_latency: /* 250 usec aka 4000 ints/s */ 5933 if (bytes > 25000) { 5934 if (packets > 35) 5935 itrval = low_latency; 5936 } else if (bytes < 1500) { 5937 itrval = low_latency; 5938 } 5939 break; 5940 } 5941 5942 /* clear work counters since we have the values we need */ 5943 ring_container->total_bytes = 0; 5944 ring_container->total_packets = 0; 5945 5946 /* write updated itr to ring container */ 5947 ring_container->itr = itrval; 5948 } 5949 5950 static void igb_set_itr(struct igb_q_vector *q_vector) 5951 { 5952 struct igb_adapter *adapter = q_vector->adapter; 5953 u32 new_itr = q_vector->itr_val; 5954 u8 current_itr = 0; 5955 5956 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ 5957 if (adapter->link_speed != SPEED_1000) { 5958 current_itr = 0; 5959 new_itr = IGB_4K_ITR; 5960 goto set_itr_now; 5961 } 5962 5963 igb_update_itr(q_vector, &q_vector->tx); 5964 igb_update_itr(q_vector, &q_vector->rx); 5965 5966 current_itr = max(q_vector->rx.itr, q_vector->tx.itr); 5967 5968 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 5969 if (current_itr == lowest_latency && 5970 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 5971 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 5972 current_itr = low_latency; 5973 5974 switch (current_itr) { 5975 /* counts and packets in update_itr are dependent on these numbers */ 5976 case lowest_latency: 5977 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */ 5978 break; 5979 case low_latency: 5980 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */ 5981 break; 5982 case bulk_latency: 5983 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */ 5984 break; 5985 default: 5986 break; 5987 } 5988 5989 set_itr_now: 5990 if (new_itr != q_vector->itr_val) { 5991 /* this attempts to bias the interrupt rate towards Bulk 5992 * by adding intermediate steps when interrupt rate is 5993 * increasing 5994 */ 5995 new_itr = new_itr > q_vector->itr_val ? 5996 max((new_itr * q_vector->itr_val) / 5997 (new_itr + (q_vector->itr_val >> 2)), 5998 new_itr) : new_itr; 5999 /* Don't write the value here; it resets the adapter's 6000 * internal timer, and causes us to delay far longer than 6001 * we should between interrupts. Instead, we write the ITR 6002 * value at the beginning of the next interrupt so the timing 6003 * ends up being correct. 6004 */ 6005 q_vector->itr_val = new_itr; 6006 q_vector->set_itr = 1; 6007 } 6008 } 6009 6010 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, 6011 struct igb_tx_buffer *first, 6012 u32 vlan_macip_lens, u32 type_tucmd, 6013 u32 mss_l4len_idx) 6014 { 6015 struct e1000_adv_tx_context_desc *context_desc; 6016 u16 i = tx_ring->next_to_use; 6017 struct timespec64 ts; 6018 6019 context_desc = IGB_TX_CTXTDESC(tx_ring, i); 6020 6021 i++; 6022 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 6023 6024 /* set bits to identify this as an advanced context descriptor */ 6025 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 6026 6027 /* For 82575, context index must be unique per ring. */ 6028 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 6029 mss_l4len_idx |= tx_ring->reg_idx << 4; 6030 6031 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 6032 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 6033 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 6034 6035 /* We assume there is always a valid tx time available. Invalid times 6036 * should have been handled by the upper layers. 6037 */ 6038 if (tx_ring->launchtime_enable) { 6039 ts = ktime_to_timespec64(first->skb->tstamp); 6040 skb_txtime_consumed(first->skb); 6041 context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32); 6042 } else { 6043 context_desc->seqnum_seed = 0; 6044 } 6045 } 6046 6047 static int igb_tso(struct igb_ring *tx_ring, 6048 struct igb_tx_buffer *first, 6049 u8 *hdr_len) 6050 { 6051 u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; 6052 struct sk_buff *skb = first->skb; 6053 union { 6054 struct iphdr *v4; 6055 struct ipv6hdr *v6; 6056 unsigned char *hdr; 6057 } ip; 6058 union { 6059 struct tcphdr *tcp; 6060 struct udphdr *udp; 6061 unsigned char *hdr; 6062 } l4; 6063 u32 paylen, l4_offset; 6064 int err; 6065 6066 if (skb->ip_summed != CHECKSUM_PARTIAL) 6067 return 0; 6068 6069 if (!skb_is_gso(skb)) 6070 return 0; 6071 6072 err = skb_cow_head(skb, 0); 6073 if (err < 0) 6074 return err; 6075 6076 ip.hdr = skb_network_header(skb); 6077 l4.hdr = skb_checksum_start(skb); 6078 6079 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ 6080 type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ? 6081 E1000_ADVTXD_TUCMD_L4T_UDP : E1000_ADVTXD_TUCMD_L4T_TCP; 6082 6083 /* initialize outer IP header fields */ 6084 if (ip.v4->version == 4) { 6085 unsigned char *csum_start = skb_checksum_start(skb); 6086 unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); 6087 6088 /* IP header will have to cancel out any data that 6089 * is not a part of the outer IP header 6090 */ 6091 ip.v4->check = csum_fold(csum_partial(trans_start, 6092 csum_start - trans_start, 6093 0)); 6094 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; 6095 6096 ip.v4->tot_len = 0; 6097 first->tx_flags |= IGB_TX_FLAGS_TSO | 6098 IGB_TX_FLAGS_CSUM | 6099 IGB_TX_FLAGS_IPV4; 6100 } else { 6101 ip.v6->payload_len = 0; 6102 first->tx_flags |= IGB_TX_FLAGS_TSO | 6103 IGB_TX_FLAGS_CSUM; 6104 } 6105 6106 /* determine offset of inner transport header */ 6107 l4_offset = l4.hdr - skb->data; 6108 6109 /* remove payload length from inner checksum */ 6110 paylen = skb->len - l4_offset; 6111 if (type_tucmd & E1000_ADVTXD_TUCMD_L4T_TCP) { 6112 /* compute length of segmentation header */ 6113 *hdr_len = (l4.tcp->doff * 4) + l4_offset; 6114 csum_replace_by_diff(&l4.tcp->check, 6115 (__force __wsum)htonl(paylen)); 6116 } else { 6117 /* compute length of segmentation header */ 6118 *hdr_len = sizeof(*l4.udp) + l4_offset; 6119 csum_replace_by_diff(&l4.udp->check, 6120 (__force __wsum)htonl(paylen)); 6121 } 6122 6123 /* update gso size and bytecount with header size */ 6124 first->gso_segs = skb_shinfo(skb)->gso_segs; 6125 first->bytecount += (first->gso_segs - 1) * *hdr_len; 6126 6127 /* MSS L4LEN IDX */ 6128 mss_l4len_idx = (*hdr_len - l4_offset) << E1000_ADVTXD_L4LEN_SHIFT; 6129 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT; 6130 6131 /* VLAN MACLEN IPLEN */ 6132 vlan_macip_lens = l4.hdr - ip.hdr; 6133 vlan_macip_lens |= (ip.hdr - skb->data) << E1000_ADVTXD_MACLEN_SHIFT; 6134 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; 6135 6136 igb_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, 6137 type_tucmd, mss_l4len_idx); 6138 6139 return 1; 6140 } 6141 6142 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) 6143 { 6144 struct sk_buff *skb = first->skb; 6145 u32 vlan_macip_lens = 0; 6146 u32 type_tucmd = 0; 6147 6148 if (skb->ip_summed != CHECKSUM_PARTIAL) { 6149 csum_failed: 6150 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN) && 6151 !tx_ring->launchtime_enable) 6152 return; 6153 goto no_csum; 6154 } 6155 6156 switch (skb->csum_offset) { 6157 case offsetof(struct tcphdr, check): 6158 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; 6159 fallthrough; 6160 case offsetof(struct udphdr, check): 6161 break; 6162 case offsetof(struct sctphdr, checksum): 6163 /* validate that this is actually an SCTP request */ 6164 if (skb_csum_is_sctp(skb)) { 6165 type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP; 6166 break; 6167 } 6168 fallthrough; 6169 default: 6170 skb_checksum_help(skb); 6171 goto csum_failed; 6172 } 6173 6174 /* update TX checksum flag */ 6175 first->tx_flags |= IGB_TX_FLAGS_CSUM; 6176 vlan_macip_lens = skb_checksum_start_offset(skb) - 6177 skb_network_offset(skb); 6178 no_csum: 6179 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; 6180 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; 6181 6182 igb_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0); 6183 } 6184 6185 #define IGB_SET_FLAG(_input, _flag, _result) \ 6186 ((_flag <= _result) ? \ 6187 ((u32)(_input & _flag) * (_result / _flag)) : \ 6188 ((u32)(_input & _flag) / (_flag / _result))) 6189 6190 static u32 igb_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) 6191 { 6192 /* set type for advanced descriptor with frame checksum insertion */ 6193 u32 cmd_type = E1000_ADVTXD_DTYP_DATA | 6194 E1000_ADVTXD_DCMD_DEXT | 6195 E1000_ADVTXD_DCMD_IFCS; 6196 6197 /* set HW vlan bit if vlan is present */ 6198 cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_VLAN, 6199 (E1000_ADVTXD_DCMD_VLE)); 6200 6201 /* set segmentation bits for TSO */ 6202 cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSO, 6203 (E1000_ADVTXD_DCMD_TSE)); 6204 6205 /* set timestamp bit if present */ 6206 cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSTAMP, 6207 (E1000_ADVTXD_MAC_TSTAMP)); 6208 6209 /* insert frame checksum */ 6210 cmd_type ^= IGB_SET_FLAG(skb->no_fcs, 1, E1000_ADVTXD_DCMD_IFCS); 6211 6212 return cmd_type; 6213 } 6214 6215 static void igb_tx_olinfo_status(struct igb_ring *tx_ring, 6216 union e1000_adv_tx_desc *tx_desc, 6217 u32 tx_flags, unsigned int paylen) 6218 { 6219 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT; 6220 6221 /* 82575 requires a unique index per ring */ 6222 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 6223 olinfo_status |= tx_ring->reg_idx << 4; 6224 6225 /* insert L4 checksum */ 6226 olinfo_status |= IGB_SET_FLAG(tx_flags, 6227 IGB_TX_FLAGS_CSUM, 6228 (E1000_TXD_POPTS_TXSM << 8)); 6229 6230 /* insert IPv4 checksum */ 6231 olinfo_status |= IGB_SET_FLAG(tx_flags, 6232 IGB_TX_FLAGS_IPV4, 6233 (E1000_TXD_POPTS_IXSM << 8)); 6234 6235 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 6236 } 6237 6238 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) 6239 { 6240 struct net_device *netdev = tx_ring->netdev; 6241 6242 netif_stop_subqueue(netdev, tx_ring->queue_index); 6243 6244 /* Herbert's original patch had: 6245 * smp_mb__after_netif_stop_queue(); 6246 * but since that doesn't exist yet, just open code it. 6247 */ 6248 smp_mb(); 6249 6250 /* We need to check again in a case another CPU has just 6251 * made room available. 6252 */ 6253 if (igb_desc_unused(tx_ring) < size) 6254 return -EBUSY; 6255 6256 /* A reprieve! */ 6257 netif_wake_subqueue(netdev, tx_ring->queue_index); 6258 6259 u64_stats_update_begin(&tx_ring->tx_syncp2); 6260 tx_ring->tx_stats.restart_queue2++; 6261 u64_stats_update_end(&tx_ring->tx_syncp2); 6262 6263 return 0; 6264 } 6265 6266 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) 6267 { 6268 if (igb_desc_unused(tx_ring) >= size) 6269 return 0; 6270 return __igb_maybe_stop_tx(tx_ring, size); 6271 } 6272 6273 static int igb_tx_map(struct igb_ring *tx_ring, 6274 struct igb_tx_buffer *first, 6275 const u8 hdr_len) 6276 { 6277 struct sk_buff *skb = first->skb; 6278 struct igb_tx_buffer *tx_buffer; 6279 union e1000_adv_tx_desc *tx_desc; 6280 skb_frag_t *frag; 6281 dma_addr_t dma; 6282 unsigned int data_len, size; 6283 u32 tx_flags = first->tx_flags; 6284 u32 cmd_type = igb_tx_cmd_type(skb, tx_flags); 6285 u16 i = tx_ring->next_to_use; 6286 6287 tx_desc = IGB_TX_DESC(tx_ring, i); 6288 6289 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); 6290 6291 size = skb_headlen(skb); 6292 data_len = skb->data_len; 6293 6294 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 6295 6296 tx_buffer = first; 6297 6298 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 6299 if (dma_mapping_error(tx_ring->dev, dma)) 6300 goto dma_error; 6301 6302 /* record length, and DMA address */ 6303 dma_unmap_len_set(tx_buffer, len, size); 6304 dma_unmap_addr_set(tx_buffer, dma, dma); 6305 6306 tx_desc->read.buffer_addr = cpu_to_le64(dma); 6307 6308 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) { 6309 tx_desc->read.cmd_type_len = 6310 cpu_to_le32(cmd_type ^ IGB_MAX_DATA_PER_TXD); 6311 6312 i++; 6313 tx_desc++; 6314 if (i == tx_ring->count) { 6315 tx_desc = IGB_TX_DESC(tx_ring, 0); 6316 i = 0; 6317 } 6318 tx_desc->read.olinfo_status = 0; 6319 6320 dma += IGB_MAX_DATA_PER_TXD; 6321 size -= IGB_MAX_DATA_PER_TXD; 6322 6323 tx_desc->read.buffer_addr = cpu_to_le64(dma); 6324 } 6325 6326 if (likely(!data_len)) 6327 break; 6328 6329 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); 6330 6331 i++; 6332 tx_desc++; 6333 if (i == tx_ring->count) { 6334 tx_desc = IGB_TX_DESC(tx_ring, 0); 6335 i = 0; 6336 } 6337 tx_desc->read.olinfo_status = 0; 6338 6339 size = skb_frag_size(frag); 6340 data_len -= size; 6341 6342 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, 6343 size, DMA_TO_DEVICE); 6344 6345 tx_buffer = &tx_ring->tx_buffer_info[i]; 6346 } 6347 6348 /* write last descriptor with RS and EOP bits */ 6349 cmd_type |= size | IGB_TXD_DCMD; 6350 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 6351 6352 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 6353 6354 /* set the timestamp */ 6355 first->time_stamp = jiffies; 6356 6357 skb_tx_timestamp(skb); 6358 6359 /* Force memory writes to complete before letting h/w know there 6360 * are new descriptors to fetch. (Only applicable for weak-ordered 6361 * memory model archs, such as IA-64). 6362 * 6363 * We also need this memory barrier to make certain all of the 6364 * status bits have been updated before next_to_watch is written. 6365 */ 6366 dma_wmb(); 6367 6368 /* set next_to_watch value indicating a packet is present */ 6369 first->next_to_watch = tx_desc; 6370 6371 i++; 6372 if (i == tx_ring->count) 6373 i = 0; 6374 6375 tx_ring->next_to_use = i; 6376 6377 /* Make sure there is space in the ring for the next send. */ 6378 igb_maybe_stop_tx(tx_ring, DESC_NEEDED); 6379 6380 if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { 6381 writel(i, tx_ring->tail); 6382 } 6383 return 0; 6384 6385 dma_error: 6386 dev_err(tx_ring->dev, "TX DMA map failed\n"); 6387 tx_buffer = &tx_ring->tx_buffer_info[i]; 6388 6389 /* clear dma mappings for failed tx_buffer_info map */ 6390 while (tx_buffer != first) { 6391 if (dma_unmap_len(tx_buffer, len)) 6392 dma_unmap_page(tx_ring->dev, 6393 dma_unmap_addr(tx_buffer, dma), 6394 dma_unmap_len(tx_buffer, len), 6395 DMA_TO_DEVICE); 6396 dma_unmap_len_set(tx_buffer, len, 0); 6397 6398 if (i-- == 0) 6399 i += tx_ring->count; 6400 tx_buffer = &tx_ring->tx_buffer_info[i]; 6401 } 6402 6403 if (dma_unmap_len(tx_buffer, len)) 6404 dma_unmap_single(tx_ring->dev, 6405 dma_unmap_addr(tx_buffer, dma), 6406 dma_unmap_len(tx_buffer, len), 6407 DMA_TO_DEVICE); 6408 dma_unmap_len_set(tx_buffer, len, 0); 6409 6410 dev_kfree_skb_any(tx_buffer->skb); 6411 tx_buffer->skb = NULL; 6412 6413 tx_ring->next_to_use = i; 6414 6415 return -1; 6416 } 6417 6418 int igb_xmit_xdp_ring(struct igb_adapter *adapter, 6419 struct igb_ring *tx_ring, 6420 struct xdp_frame *xdpf) 6421 { 6422 struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); 6423 u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0; 6424 u16 count, i, index = tx_ring->next_to_use; 6425 struct igb_tx_buffer *tx_head = &tx_ring->tx_buffer_info[index]; 6426 struct igb_tx_buffer *tx_buffer = tx_head; 6427 union e1000_adv_tx_desc *tx_desc = IGB_TX_DESC(tx_ring, index); 6428 u32 len = xdpf->len, cmd_type, olinfo_status; 6429 void *data = xdpf->data; 6430 6431 count = TXD_USE_COUNT(len); 6432 for (i = 0; i < nr_frags; i++) 6433 count += TXD_USE_COUNT(skb_frag_size(&sinfo->frags[i])); 6434 6435 if (igb_maybe_stop_tx(tx_ring, count + 3)) 6436 return IGB_XDP_CONSUMED; 6437 6438 i = 0; 6439 /* record the location of the first descriptor for this packet */ 6440 tx_head->bytecount = xdp_get_frame_len(xdpf); 6441 tx_head->type = IGB_TYPE_XDP; 6442 tx_head->gso_segs = 1; 6443 tx_head->xdpf = xdpf; 6444 6445 olinfo_status = tx_head->bytecount << E1000_ADVTXD_PAYLEN_SHIFT; 6446 /* 82575 requires a unique index per ring */ 6447 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 6448 olinfo_status |= tx_ring->reg_idx << 4; 6449 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 6450 6451 for (;;) { 6452 dma_addr_t dma; 6453 6454 dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE); 6455 if (dma_mapping_error(tx_ring->dev, dma)) 6456 goto unmap; 6457 6458 /* record length, and DMA address */ 6459 dma_unmap_len_set(tx_buffer, len, len); 6460 dma_unmap_addr_set(tx_buffer, dma, dma); 6461 6462 /* put descriptor type bits */ 6463 cmd_type = E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_DEXT | 6464 E1000_ADVTXD_DCMD_IFCS | len; 6465 6466 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 6467 tx_desc->read.buffer_addr = cpu_to_le64(dma); 6468 6469 tx_buffer->protocol = 0; 6470 6471 if (++index == tx_ring->count) 6472 index = 0; 6473 6474 if (i == nr_frags) 6475 break; 6476 6477 tx_buffer = &tx_ring->tx_buffer_info[index]; 6478 tx_desc = IGB_TX_DESC(tx_ring, index); 6479 tx_desc->read.olinfo_status = 0; 6480 6481 data = skb_frag_address(&sinfo->frags[i]); 6482 len = skb_frag_size(&sinfo->frags[i]); 6483 i++; 6484 } 6485 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_TXD_DCMD); 6486 6487 netdev_tx_sent_queue(txring_txq(tx_ring), tx_head->bytecount); 6488 /* set the timestamp */ 6489 tx_head->time_stamp = jiffies; 6490 6491 /* Avoid any potential race with xdp_xmit and cleanup */ 6492 smp_wmb(); 6493 6494 /* set next_to_watch value indicating a packet is present */ 6495 tx_head->next_to_watch = tx_desc; 6496 tx_ring->next_to_use = index; 6497 6498 /* Make sure there is space in the ring for the next send. */ 6499 igb_maybe_stop_tx(tx_ring, DESC_NEEDED); 6500 6501 if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) 6502 writel(index, tx_ring->tail); 6503 6504 return IGB_XDP_TX; 6505 6506 unmap: 6507 for (;;) { 6508 tx_buffer = &tx_ring->tx_buffer_info[index]; 6509 if (dma_unmap_len(tx_buffer, len)) 6510 dma_unmap_page(tx_ring->dev, 6511 dma_unmap_addr(tx_buffer, dma), 6512 dma_unmap_len(tx_buffer, len), 6513 DMA_TO_DEVICE); 6514 dma_unmap_len_set(tx_buffer, len, 0); 6515 if (tx_buffer == tx_head) 6516 break; 6517 6518 if (!index) 6519 index += tx_ring->count; 6520 index--; 6521 } 6522 6523 return IGB_XDP_CONSUMED; 6524 } 6525 6526 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, 6527 struct igb_ring *tx_ring) 6528 { 6529 struct igb_tx_buffer *first; 6530 int tso; 6531 u32 tx_flags = 0; 6532 unsigned short f; 6533 u16 count = TXD_USE_COUNT(skb_headlen(skb)); 6534 __be16 protocol = vlan_get_protocol(skb); 6535 u8 hdr_len = 0; 6536 6537 /* need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD, 6538 * + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD, 6539 * + 2 desc gap to keep tail from touching head, 6540 * + 1 desc for context descriptor, 6541 * otherwise try next time 6542 */ 6543 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 6544 count += TXD_USE_COUNT(skb_frag_size( 6545 &skb_shinfo(skb)->frags[f])); 6546 6547 if (igb_maybe_stop_tx(tx_ring, count + 3)) { 6548 /* this is a hard error */ 6549 return NETDEV_TX_BUSY; 6550 } 6551 6552 if (unlikely(test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags))) 6553 return NETDEV_TX_BUSY; 6554 6555 /* record the location of the first descriptor for this packet */ 6556 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 6557 first->type = IGB_TYPE_SKB; 6558 first->skb = skb; 6559 first->bytecount = skb->len; 6560 first->gso_segs = 1; 6561 6562 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 6563 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); 6564 6565 if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && 6566 !test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS, 6567 &adapter->state)) { 6568 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 6569 tx_flags |= IGB_TX_FLAGS_TSTAMP; 6570 6571 adapter->ptp_tx_skb = skb_get(skb); 6572 adapter->ptp_tx_start = jiffies; 6573 if (adapter->hw.mac.type == e1000_82576) 6574 schedule_work(&adapter->ptp_tx_work); 6575 } else { 6576 adapter->tx_hwtstamp_skipped++; 6577 } 6578 } 6579 6580 if (skb_vlan_tag_present(skb)) { 6581 tx_flags |= IGB_TX_FLAGS_VLAN; 6582 tx_flags |= (skb_vlan_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); 6583 } 6584 6585 /* record initial flags and protocol */ 6586 first->tx_flags = tx_flags; 6587 first->protocol = protocol; 6588 6589 tso = igb_tso(tx_ring, first, &hdr_len); 6590 if (tso < 0) 6591 goto out_drop; 6592 else if (!tso) 6593 igb_tx_csum(tx_ring, first); 6594 6595 if (igb_tx_map(tx_ring, first, hdr_len)) 6596 goto cleanup_tx_tstamp; 6597 6598 return NETDEV_TX_OK; 6599 6600 out_drop: 6601 dev_kfree_skb_any(first->skb); 6602 first->skb = NULL; 6603 cleanup_tx_tstamp: 6604 if (unlikely(tx_flags & IGB_TX_FLAGS_TSTAMP)) { 6605 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); 6606 6607 dev_kfree_skb_any(adapter->ptp_tx_skb); 6608 adapter->ptp_tx_skb = NULL; 6609 if (adapter->hw.mac.type == e1000_82576) 6610 cancel_work_sync(&adapter->ptp_tx_work); 6611 clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state); 6612 } 6613 6614 return NETDEV_TX_OK; 6615 } 6616 6617 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, 6618 struct sk_buff *skb) 6619 { 6620 unsigned int r_idx = skb->queue_mapping; 6621 6622 if (r_idx >= adapter->num_tx_queues) 6623 r_idx = r_idx % adapter->num_tx_queues; 6624 6625 return adapter->tx_ring[r_idx]; 6626 } 6627 6628 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, 6629 struct net_device *netdev) 6630 { 6631 struct igb_adapter *adapter = netdev_priv(netdev); 6632 6633 /* The minimum packet size with TCTL.PSP set is 17 so pad the skb 6634 * in order to meet this minimum size requirement. 6635 */ 6636 if (skb_put_padto(skb, 17)) 6637 return NETDEV_TX_OK; 6638 6639 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); 6640 } 6641 6642 /** 6643 * igb_tx_timeout - Respond to a Tx Hang 6644 * @netdev: network interface device structure 6645 * @txqueue: number of the Tx queue that hung (unused) 6646 **/ 6647 static void igb_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue) 6648 { 6649 struct igb_adapter *adapter = netdev_priv(netdev); 6650 struct e1000_hw *hw = &adapter->hw; 6651 6652 /* Do the reset outside of interrupt context */ 6653 adapter->tx_timeout_count++; 6654 6655 if (hw->mac.type >= e1000_82580) 6656 hw->dev_spec._82575.global_device_reset = true; 6657 6658 schedule_work(&adapter->reset_task); 6659 wr32(E1000_EICS, 6660 (adapter->eims_enable_mask & ~adapter->eims_other)); 6661 } 6662 6663 static void igb_reset_task(struct work_struct *work) 6664 { 6665 struct igb_adapter *adapter; 6666 adapter = container_of(work, struct igb_adapter, reset_task); 6667 6668 rtnl_lock(); 6669 /* If we're already down or resetting, just bail */ 6670 if (test_bit(__IGB_DOWN, &adapter->state) || 6671 test_bit(__IGB_RESETTING, &adapter->state)) { 6672 rtnl_unlock(); 6673 return; 6674 } 6675 6676 igb_dump(adapter); 6677 netdev_err(adapter->netdev, "Reset adapter\n"); 6678 igb_reinit_locked(adapter); 6679 rtnl_unlock(); 6680 } 6681 6682 /** 6683 * igb_get_stats64 - Get System Network Statistics 6684 * @netdev: network interface device structure 6685 * @stats: rtnl_link_stats64 pointer 6686 **/ 6687 static void igb_get_stats64(struct net_device *netdev, 6688 struct rtnl_link_stats64 *stats) 6689 { 6690 struct igb_adapter *adapter = netdev_priv(netdev); 6691 6692 spin_lock(&adapter->stats64_lock); 6693 igb_update_stats(adapter); 6694 memcpy(stats, &adapter->stats64, sizeof(*stats)); 6695 spin_unlock(&adapter->stats64_lock); 6696 } 6697 6698 /** 6699 * igb_change_mtu - Change the Maximum Transfer Unit 6700 * @netdev: network interface device structure 6701 * @new_mtu: new value for maximum frame size 6702 * 6703 * Returns 0 on success, negative on failure 6704 **/ 6705 static int igb_change_mtu(struct net_device *netdev, int new_mtu) 6706 { 6707 struct igb_adapter *adapter = netdev_priv(netdev); 6708 int max_frame = new_mtu + IGB_ETH_PKT_HDR_PAD; 6709 6710 if (igb_xdp_is_enabled(adapter)) { 6711 int i; 6712 6713 for (i = 0; i < adapter->num_rx_queues; i++) { 6714 struct igb_ring *ring = adapter->rx_ring[i]; 6715 6716 if (max_frame > igb_rx_bufsz(ring)) { 6717 netdev_warn(adapter->netdev, 6718 "Requested MTU size is not supported with XDP. Max frame size is %d\n", 6719 max_frame); 6720 return -EINVAL; 6721 } 6722 } 6723 } 6724 6725 /* adjust max frame to be at least the size of a standard frame */ 6726 if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) 6727 max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; 6728 6729 while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) 6730 usleep_range(1000, 2000); 6731 6732 /* igb_down has a dependency on max_frame_size */ 6733 adapter->max_frame_size = max_frame; 6734 6735 if (netif_running(netdev)) 6736 igb_down(adapter); 6737 6738 netdev_dbg(netdev, "changing MTU from %d to %d\n", 6739 netdev->mtu, new_mtu); 6740 WRITE_ONCE(netdev->mtu, new_mtu); 6741 6742 if (netif_running(netdev)) 6743 igb_up(adapter); 6744 else 6745 igb_reset(adapter); 6746 6747 clear_bit(__IGB_RESETTING, &adapter->state); 6748 6749 return 0; 6750 } 6751 6752 /** 6753 * igb_update_stats - Update the board statistics counters 6754 * @adapter: board private structure 6755 **/ 6756 void igb_update_stats(struct igb_adapter *adapter) 6757 { 6758 struct rtnl_link_stats64 *net_stats = &adapter->stats64; 6759 struct e1000_hw *hw = &adapter->hw; 6760 struct pci_dev *pdev = adapter->pdev; 6761 u32 reg, mpc; 6762 int i; 6763 u64 bytes, packets; 6764 unsigned int start; 6765 u64 _bytes, _packets; 6766 6767 /* Prevent stats update while adapter is being reset, or if the pci 6768 * connection is down. 6769 */ 6770 if (adapter->link_speed == 0) 6771 return; 6772 if (pci_channel_offline(pdev)) 6773 return; 6774 6775 bytes = 0; 6776 packets = 0; 6777 6778 rcu_read_lock(); 6779 for (i = 0; i < adapter->num_rx_queues; i++) { 6780 struct igb_ring *ring = adapter->rx_ring[i]; 6781 u32 rqdpc = rd32(E1000_RQDPC(i)); 6782 if (hw->mac.type >= e1000_i210) 6783 wr32(E1000_RQDPC(i), 0); 6784 6785 if (rqdpc) { 6786 ring->rx_stats.drops += rqdpc; 6787 net_stats->rx_fifo_errors += rqdpc; 6788 } 6789 6790 do { 6791 start = u64_stats_fetch_begin(&ring->rx_syncp); 6792 _bytes = ring->rx_stats.bytes; 6793 _packets = ring->rx_stats.packets; 6794 } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); 6795 bytes += _bytes; 6796 packets += _packets; 6797 } 6798 6799 net_stats->rx_bytes = bytes; 6800 net_stats->rx_packets = packets; 6801 6802 bytes = 0; 6803 packets = 0; 6804 for (i = 0; i < adapter->num_tx_queues; i++) { 6805 struct igb_ring *ring = adapter->tx_ring[i]; 6806 do { 6807 start = u64_stats_fetch_begin(&ring->tx_syncp); 6808 _bytes = ring->tx_stats.bytes; 6809 _packets = ring->tx_stats.packets; 6810 } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); 6811 bytes += _bytes; 6812 packets += _packets; 6813 } 6814 net_stats->tx_bytes = bytes; 6815 net_stats->tx_packets = packets; 6816 rcu_read_unlock(); 6817 6818 /* read stats registers */ 6819 adapter->stats.crcerrs += rd32(E1000_CRCERRS); 6820 adapter->stats.gprc += rd32(E1000_GPRC); 6821 adapter->stats.gorc += rd32(E1000_GORCL); 6822 rd32(E1000_GORCH); /* clear GORCL */ 6823 adapter->stats.bprc += rd32(E1000_BPRC); 6824 adapter->stats.mprc += rd32(E1000_MPRC); 6825 adapter->stats.roc += rd32(E1000_ROC); 6826 6827 adapter->stats.prc64 += rd32(E1000_PRC64); 6828 adapter->stats.prc127 += rd32(E1000_PRC127); 6829 adapter->stats.prc255 += rd32(E1000_PRC255); 6830 adapter->stats.prc511 += rd32(E1000_PRC511); 6831 adapter->stats.prc1023 += rd32(E1000_PRC1023); 6832 adapter->stats.prc1522 += rd32(E1000_PRC1522); 6833 adapter->stats.symerrs += rd32(E1000_SYMERRS); 6834 adapter->stats.sec += rd32(E1000_SEC); 6835 6836 mpc = rd32(E1000_MPC); 6837 adapter->stats.mpc += mpc; 6838 net_stats->rx_fifo_errors += mpc; 6839 adapter->stats.scc += rd32(E1000_SCC); 6840 adapter->stats.ecol += rd32(E1000_ECOL); 6841 adapter->stats.mcc += rd32(E1000_MCC); 6842 adapter->stats.latecol += rd32(E1000_LATECOL); 6843 adapter->stats.dc += rd32(E1000_DC); 6844 adapter->stats.rlec += rd32(E1000_RLEC); 6845 adapter->stats.xonrxc += rd32(E1000_XONRXC); 6846 adapter->stats.xontxc += rd32(E1000_XONTXC); 6847 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC); 6848 adapter->stats.xofftxc += rd32(E1000_XOFFTXC); 6849 adapter->stats.fcruc += rd32(E1000_FCRUC); 6850 adapter->stats.gptc += rd32(E1000_GPTC); 6851 adapter->stats.gotc += rd32(E1000_GOTCL); 6852 rd32(E1000_GOTCH); /* clear GOTCL */ 6853 adapter->stats.rnbc += rd32(E1000_RNBC); 6854 adapter->stats.ruc += rd32(E1000_RUC); 6855 adapter->stats.rfc += rd32(E1000_RFC); 6856 adapter->stats.rjc += rd32(E1000_RJC); 6857 adapter->stats.tor += rd32(E1000_TORH); 6858 adapter->stats.tot += rd32(E1000_TOTH); 6859 adapter->stats.tpr += rd32(E1000_TPR); 6860 6861 adapter->stats.ptc64 += rd32(E1000_PTC64); 6862 adapter->stats.ptc127 += rd32(E1000_PTC127); 6863 adapter->stats.ptc255 += rd32(E1000_PTC255); 6864 adapter->stats.ptc511 += rd32(E1000_PTC511); 6865 adapter->stats.ptc1023 += rd32(E1000_PTC1023); 6866 adapter->stats.ptc1522 += rd32(E1000_PTC1522); 6867 6868 adapter->stats.mptc += rd32(E1000_MPTC); 6869 adapter->stats.bptc += rd32(E1000_BPTC); 6870 6871 adapter->stats.tpt += rd32(E1000_TPT); 6872 adapter->stats.colc += rd32(E1000_COLC); 6873 6874 adapter->stats.algnerrc += rd32(E1000_ALGNERRC); 6875 /* read internal phy specific stats */ 6876 reg = rd32(E1000_CTRL_EXT); 6877 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { 6878 adapter->stats.rxerrc += rd32(E1000_RXERRC); 6879 6880 /* this stat has invalid values on i210/i211 */ 6881 if ((hw->mac.type != e1000_i210) && 6882 (hw->mac.type != e1000_i211)) 6883 adapter->stats.tncrs += rd32(E1000_TNCRS); 6884 } 6885 6886 adapter->stats.tsctc += rd32(E1000_TSCTC); 6887 adapter->stats.tsctfc += rd32(E1000_TSCTFC); 6888 6889 adapter->stats.iac += rd32(E1000_IAC); 6890 adapter->stats.icrxoc += rd32(E1000_ICRXOC); 6891 adapter->stats.icrxptc += rd32(E1000_ICRXPTC); 6892 adapter->stats.icrxatc += rd32(E1000_ICRXATC); 6893 adapter->stats.ictxptc += rd32(E1000_ICTXPTC); 6894 adapter->stats.ictxatc += rd32(E1000_ICTXATC); 6895 adapter->stats.ictxqec += rd32(E1000_ICTXQEC); 6896 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC); 6897 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC); 6898 6899 /* Fill out the OS statistics structure */ 6900 net_stats->multicast = adapter->stats.mprc; 6901 net_stats->collisions = adapter->stats.colc; 6902 6903 /* Rx Errors */ 6904 6905 /* RLEC on some newer hardware can be incorrect so build 6906 * our own version based on RUC and ROC 6907 */ 6908 net_stats->rx_errors = adapter->stats.rxerrc + 6909 adapter->stats.crcerrs + adapter->stats.algnerrc + 6910 adapter->stats.ruc + adapter->stats.roc + 6911 adapter->stats.cexterr; 6912 net_stats->rx_length_errors = adapter->stats.ruc + 6913 adapter->stats.roc; 6914 net_stats->rx_crc_errors = adapter->stats.crcerrs; 6915 net_stats->rx_frame_errors = adapter->stats.algnerrc; 6916 net_stats->rx_missed_errors = adapter->stats.mpc; 6917 6918 /* Tx Errors */ 6919 net_stats->tx_errors = adapter->stats.ecol + 6920 adapter->stats.latecol; 6921 net_stats->tx_aborted_errors = adapter->stats.ecol; 6922 net_stats->tx_window_errors = adapter->stats.latecol; 6923 net_stats->tx_carrier_errors = adapter->stats.tncrs; 6924 6925 /* Tx Dropped needs to be maintained elsewhere */ 6926 6927 /* Management Stats */ 6928 adapter->stats.mgptc += rd32(E1000_MGTPTC); 6929 adapter->stats.mgprc += rd32(E1000_MGTPRC); 6930 adapter->stats.mgpdc += rd32(E1000_MGTPDC); 6931 6932 /* OS2BMC Stats */ 6933 reg = rd32(E1000_MANC); 6934 if (reg & E1000_MANC_EN_BMC2OS) { 6935 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC); 6936 adapter->stats.o2bspc += rd32(E1000_O2BSPC); 6937 adapter->stats.b2ospc += rd32(E1000_B2OSPC); 6938 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC); 6939 } 6940 } 6941 6942 static void igb_perout(struct igb_adapter *adapter, int tsintr_tt) 6943 { 6944 int pin = ptp_find_pin(adapter->ptp_clock, PTP_PF_PEROUT, tsintr_tt); 6945 struct e1000_hw *hw = &adapter->hw; 6946 struct timespec64 ts; 6947 u32 tsauxc; 6948 6949 if (pin < 0 || pin >= IGB_N_SDP) 6950 return; 6951 6952 spin_lock(&adapter->tmreg_lock); 6953 6954 if (hw->mac.type == e1000_82580 || 6955 hw->mac.type == e1000_i354 || 6956 hw->mac.type == e1000_i350) { 6957 s64 ns = timespec64_to_ns(&adapter->perout[tsintr_tt].period); 6958 u32 systiml, systimh, level_mask, level, rem; 6959 u64 systim, now; 6960 6961 /* read systim registers in sequence */ 6962 rd32(E1000_SYSTIMR); 6963 systiml = rd32(E1000_SYSTIML); 6964 systimh = rd32(E1000_SYSTIMH); 6965 systim = (((u64)(systimh & 0xFF)) << 32) | ((u64)systiml); 6966 now = timecounter_cyc2time(&adapter->tc, systim); 6967 6968 if (pin < 2) { 6969 level_mask = (tsintr_tt == 1) ? 0x80000 : 0x40000; 6970 level = (rd32(E1000_CTRL) & level_mask) ? 1 : 0; 6971 } else { 6972 level_mask = (tsintr_tt == 1) ? 0x80 : 0x40; 6973 level = (rd32(E1000_CTRL_EXT) & level_mask) ? 1 : 0; 6974 } 6975 6976 div_u64_rem(now, ns, &rem); 6977 systim = systim + (ns - rem); 6978 6979 /* synchronize pin level with rising/falling edges */ 6980 div_u64_rem(now, ns << 1, &rem); 6981 if (rem < ns) { 6982 /* first half of period */ 6983 if (level == 0) { 6984 /* output is already low, skip this period */ 6985 systim += ns; 6986 pr_notice("igb: periodic output on %s missed falling edge\n", 6987 adapter->sdp_config[pin].name); 6988 } 6989 } else { 6990 /* second half of period */ 6991 if (level == 1) { 6992 /* output is already high, skip this period */ 6993 systim += ns; 6994 pr_notice("igb: periodic output on %s missed rising edge\n", 6995 adapter->sdp_config[pin].name); 6996 } 6997 } 6998 6999 /* for this chip family tv_sec is the upper part of the binary value, 7000 * so not seconds 7001 */ 7002 ts.tv_nsec = (u32)systim; 7003 ts.tv_sec = ((u32)(systim >> 32)) & 0xFF; 7004 } else { 7005 ts = timespec64_add(adapter->perout[tsintr_tt].start, 7006 adapter->perout[tsintr_tt].period); 7007 } 7008 7009 /* u32 conversion of tv_sec is safe until y2106 */ 7010 wr32((tsintr_tt == 1) ? E1000_TRGTTIML1 : E1000_TRGTTIML0, ts.tv_nsec); 7011 wr32((tsintr_tt == 1) ? E1000_TRGTTIMH1 : E1000_TRGTTIMH0, (u32)ts.tv_sec); 7012 tsauxc = rd32(E1000_TSAUXC); 7013 tsauxc |= TSAUXC_EN_TT0; 7014 wr32(E1000_TSAUXC, tsauxc); 7015 adapter->perout[tsintr_tt].start = ts; 7016 7017 spin_unlock(&adapter->tmreg_lock); 7018 } 7019 7020 static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) 7021 { 7022 int pin = ptp_find_pin(adapter->ptp_clock, PTP_PF_EXTTS, tsintr_tt); 7023 int auxstmpl = (tsintr_tt == 1) ? E1000_AUXSTMPL1 : E1000_AUXSTMPL0; 7024 int auxstmph = (tsintr_tt == 1) ? E1000_AUXSTMPH1 : E1000_AUXSTMPH0; 7025 struct e1000_hw *hw = &adapter->hw; 7026 struct ptp_clock_event event; 7027 struct timespec64 ts; 7028 unsigned long flags; 7029 7030 if (pin < 0 || pin >= IGB_N_SDP) 7031 return; 7032 7033 if (hw->mac.type == e1000_82580 || 7034 hw->mac.type == e1000_i354 || 7035 hw->mac.type == e1000_i350) { 7036 u64 ns = rd32(auxstmpl); 7037 7038 ns += ((u64)(rd32(auxstmph) & 0xFF)) << 32; 7039 spin_lock_irqsave(&adapter->tmreg_lock, flags); 7040 ns = timecounter_cyc2time(&adapter->tc, ns); 7041 spin_unlock_irqrestore(&adapter->tmreg_lock, flags); 7042 ts = ns_to_timespec64(ns); 7043 } else { 7044 ts.tv_nsec = rd32(auxstmpl); 7045 ts.tv_sec = rd32(auxstmph); 7046 } 7047 7048 event.type = PTP_CLOCK_EXTTS; 7049 event.index = tsintr_tt; 7050 event.timestamp = ts.tv_sec * 1000000000ULL + ts.tv_nsec; 7051 ptp_clock_event(adapter->ptp_clock, &event); 7052 } 7053 7054 static void igb_tsync_interrupt(struct igb_adapter *adapter) 7055 { 7056 const u32 mask = (TSINTR_SYS_WRAP | E1000_TSICR_TXTS | 7057 TSINTR_TT0 | TSINTR_TT1 | 7058 TSINTR_AUTT0 | TSINTR_AUTT1); 7059 struct e1000_hw *hw = &adapter->hw; 7060 u32 tsicr = rd32(E1000_TSICR); 7061 struct ptp_clock_event event; 7062 7063 if (hw->mac.type == e1000_82580) { 7064 /* 82580 has a hardware bug that requires an explicit 7065 * write to clear the TimeSync interrupt cause. 7066 */ 7067 wr32(E1000_TSICR, tsicr & mask); 7068 } 7069 7070 if (tsicr & TSINTR_SYS_WRAP) { 7071 event.type = PTP_CLOCK_PPS; 7072 if (adapter->ptp_caps.pps) 7073 ptp_clock_event(adapter->ptp_clock, &event); 7074 } 7075 7076 if (tsicr & E1000_TSICR_TXTS) { 7077 /* retrieve hardware timestamp */ 7078 schedule_work(&adapter->ptp_tx_work); 7079 } 7080 7081 if (tsicr & TSINTR_TT0) 7082 igb_perout(adapter, 0); 7083 7084 if (tsicr & TSINTR_TT1) 7085 igb_perout(adapter, 1); 7086 7087 if (tsicr & TSINTR_AUTT0) 7088 igb_extts(adapter, 0); 7089 7090 if (tsicr & TSINTR_AUTT1) 7091 igb_extts(adapter, 1); 7092 } 7093 7094 static irqreturn_t igb_msix_other(int irq, void *data) 7095 { 7096 struct igb_adapter *adapter = data; 7097 struct e1000_hw *hw = &adapter->hw; 7098 u32 icr = rd32(E1000_ICR); 7099 /* reading ICR causes bit 31 of EICR to be cleared */ 7100 7101 if (icr & E1000_ICR_DRSTA) 7102 schedule_work(&adapter->reset_task); 7103 7104 if (icr & E1000_ICR_DOUTSYNC) { 7105 /* HW is reporting DMA is out of sync */ 7106 adapter->stats.doosync++; 7107 /* The DMA Out of Sync is also indication of a spoof event 7108 * in IOV mode. Check the Wrong VM Behavior register to 7109 * see if it is really a spoof event. 7110 */ 7111 igb_check_wvbr(adapter); 7112 } 7113 7114 /* Check for a mailbox event */ 7115 if (icr & E1000_ICR_VMMB) 7116 igb_msg_task(adapter); 7117 7118 if (icr & E1000_ICR_LSC) { 7119 hw->mac.get_link_status = 1; 7120 /* guard against interrupt when we're going down */ 7121 if (!test_bit(__IGB_DOWN, &adapter->state)) 7122 mod_timer(&adapter->watchdog_timer, jiffies + 1); 7123 } 7124 7125 if (icr & E1000_ICR_TS) 7126 igb_tsync_interrupt(adapter); 7127 7128 wr32(E1000_EIMS, adapter->eims_other); 7129 7130 return IRQ_HANDLED; 7131 } 7132 7133 static void igb_write_itr(struct igb_q_vector *q_vector) 7134 { 7135 struct igb_adapter *adapter = q_vector->adapter; 7136 u32 itr_val = q_vector->itr_val & 0x7FFC; 7137 7138 if (!q_vector->set_itr) 7139 return; 7140 7141 if (!itr_val) 7142 itr_val = 0x4; 7143 7144 if (adapter->hw.mac.type == e1000_82575) 7145 itr_val |= itr_val << 16; 7146 else 7147 itr_val |= E1000_EITR_CNT_IGNR; 7148 7149 writel(itr_val, q_vector->itr_register); 7150 q_vector->set_itr = 0; 7151 } 7152 7153 static irqreturn_t igb_msix_ring(int irq, void *data) 7154 { 7155 struct igb_q_vector *q_vector = data; 7156 7157 /* Write the ITR value calculated from the previous interrupt. */ 7158 igb_write_itr(q_vector); 7159 7160 napi_schedule(&q_vector->napi); 7161 7162 return IRQ_HANDLED; 7163 } 7164 7165 #ifdef CONFIG_IGB_DCA 7166 static void igb_update_tx_dca(struct igb_adapter *adapter, 7167 struct igb_ring *tx_ring, 7168 int cpu) 7169 { 7170 struct e1000_hw *hw = &adapter->hw; 7171 u32 txctrl = dca3_get_tag(tx_ring->dev, cpu); 7172 7173 if (hw->mac.type != e1000_82575) 7174 txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT; 7175 7176 /* We can enable relaxed ordering for reads, but not writes when 7177 * DCA is enabled. This is due to a known issue in some chipsets 7178 * which will cause the DCA tag to be cleared. 7179 */ 7180 txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN | 7181 E1000_DCA_TXCTRL_DATA_RRO_EN | 7182 E1000_DCA_TXCTRL_DESC_DCA_EN; 7183 7184 wr32(E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl); 7185 } 7186 7187 static void igb_update_rx_dca(struct igb_adapter *adapter, 7188 struct igb_ring *rx_ring, 7189 int cpu) 7190 { 7191 struct e1000_hw *hw = &adapter->hw; 7192 u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu); 7193 7194 if (hw->mac.type != e1000_82575) 7195 rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT; 7196 7197 /* We can enable relaxed ordering for reads, but not writes when 7198 * DCA is enabled. This is due to a known issue in some chipsets 7199 * which will cause the DCA tag to be cleared. 7200 */ 7201 rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN | 7202 E1000_DCA_RXCTRL_DESC_DCA_EN; 7203 7204 wr32(E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl); 7205 } 7206 7207 static void igb_update_dca(struct igb_q_vector *q_vector) 7208 { 7209 struct igb_adapter *adapter = q_vector->adapter; 7210 int cpu = get_cpu(); 7211 7212 if (q_vector->cpu == cpu) 7213 goto out_no_update; 7214 7215 if (q_vector->tx.ring) 7216 igb_update_tx_dca(adapter, q_vector->tx.ring, cpu); 7217 7218 if (q_vector->rx.ring) 7219 igb_update_rx_dca(adapter, q_vector->rx.ring, cpu); 7220 7221 q_vector->cpu = cpu; 7222 out_no_update: 7223 put_cpu(); 7224 } 7225 7226 static void igb_setup_dca(struct igb_adapter *adapter) 7227 { 7228 struct e1000_hw *hw = &adapter->hw; 7229 int i; 7230 7231 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED)) 7232 return; 7233 7234 /* Always use CB2 mode, difference is masked in the CB driver. */ 7235 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2); 7236 7237 for (i = 0; i < adapter->num_q_vectors; i++) { 7238 adapter->q_vector[i]->cpu = -1; 7239 igb_update_dca(adapter->q_vector[i]); 7240 } 7241 } 7242 7243 static int __igb_notify_dca(struct device *dev, void *data) 7244 { 7245 struct net_device *netdev = dev_get_drvdata(dev); 7246 struct igb_adapter *adapter = netdev_priv(netdev); 7247 struct pci_dev *pdev = adapter->pdev; 7248 struct e1000_hw *hw = &adapter->hw; 7249 unsigned long event = *(unsigned long *)data; 7250 7251 switch (event) { 7252 case DCA_PROVIDER_ADD: 7253 /* if already enabled, don't do it again */ 7254 if (adapter->flags & IGB_FLAG_DCA_ENABLED) 7255 break; 7256 if (dca_add_requester(dev) == 0) { 7257 adapter->flags |= IGB_FLAG_DCA_ENABLED; 7258 dev_info(&pdev->dev, "DCA enabled\n"); 7259 igb_setup_dca(adapter); 7260 break; 7261 } 7262 fallthrough; /* since DCA is disabled. */ 7263 case DCA_PROVIDER_REMOVE: 7264 if (adapter->flags & IGB_FLAG_DCA_ENABLED) { 7265 /* without this a class_device is left 7266 * hanging around in the sysfs model 7267 */ 7268 dca_remove_requester(dev); 7269 dev_info(&pdev->dev, "DCA disabled\n"); 7270 adapter->flags &= ~IGB_FLAG_DCA_ENABLED; 7271 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); 7272 } 7273 break; 7274 } 7275 7276 return 0; 7277 } 7278 7279 static int igb_notify_dca(struct notifier_block *nb, unsigned long event, 7280 void *p) 7281 { 7282 int ret_val; 7283 7284 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event, 7285 __igb_notify_dca); 7286 7287 return ret_val ? NOTIFY_BAD : NOTIFY_DONE; 7288 } 7289 #endif /* CONFIG_IGB_DCA */ 7290 7291 #ifdef CONFIG_PCI_IOV 7292 static int igb_vf_configure(struct igb_adapter *adapter, int vf) 7293 { 7294 unsigned char mac_addr[ETH_ALEN]; 7295 7296 eth_zero_addr(mac_addr); 7297 igb_set_vf_mac(adapter, vf, mac_addr); 7298 7299 /* By default spoof check is enabled for all VFs */ 7300 adapter->vf_data[vf].spoofchk_enabled = true; 7301 7302 /* By default VFs are not trusted */ 7303 adapter->vf_data[vf].trusted = false; 7304 7305 return 0; 7306 } 7307 7308 #endif 7309 static void igb_ping_all_vfs(struct igb_adapter *adapter) 7310 { 7311 struct e1000_hw *hw = &adapter->hw; 7312 u32 ping; 7313 int i; 7314 7315 for (i = 0 ; i < adapter->vfs_allocated_count; i++) { 7316 ping = E1000_PF_CONTROL_MSG; 7317 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS) 7318 ping |= E1000_VT_MSGTYPE_CTS; 7319 igb_write_mbx(hw, &ping, 1, i); 7320 } 7321 } 7322 7323 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) 7324 { 7325 struct e1000_hw *hw = &adapter->hw; 7326 u32 vmolr = rd32(E1000_VMOLR(vf)); 7327 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 7328 7329 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC | 7330 IGB_VF_FLAG_MULTI_PROMISC); 7331 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); 7332 7333 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) { 7334 vmolr |= E1000_VMOLR_MPME; 7335 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC; 7336 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST; 7337 } else { 7338 /* if we have hashes and we are clearing a multicast promisc 7339 * flag we need to write the hashes to the MTA as this step 7340 * was previously skipped 7341 */ 7342 if (vf_data->num_vf_mc_hashes > 30) { 7343 vmolr |= E1000_VMOLR_MPME; 7344 } else if (vf_data->num_vf_mc_hashes) { 7345 int j; 7346 7347 vmolr |= E1000_VMOLR_ROMPE; 7348 for (j = 0; j < vf_data->num_vf_mc_hashes; j++) 7349 igb_mta_set(hw, vf_data->vf_mc_hashes[j]); 7350 } 7351 } 7352 7353 wr32(E1000_VMOLR(vf), vmolr); 7354 7355 /* there are flags left unprocessed, likely not supported */ 7356 if (*msgbuf & E1000_VT_MSGINFO_MASK) 7357 return -EINVAL; 7358 7359 return 0; 7360 } 7361 7362 static int igb_set_vf_multicasts(struct igb_adapter *adapter, 7363 u32 *msgbuf, u32 vf) 7364 { 7365 int n = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]); 7366 u16 *hash_list = (u16 *)&msgbuf[1]; 7367 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 7368 int i; 7369 7370 /* salt away the number of multicast addresses assigned 7371 * to this VF for later use to restore when the PF multi cast 7372 * list changes 7373 */ 7374 vf_data->num_vf_mc_hashes = n; 7375 7376 /* only up to 30 hash values supported */ 7377 if (n > 30) 7378 n = 30; 7379 7380 /* store the hashes for later use */ 7381 for (i = 0; i < n; i++) 7382 vf_data->vf_mc_hashes[i] = hash_list[i]; 7383 7384 /* Flush and reset the mta with the new values */ 7385 igb_set_rx_mode(adapter->netdev); 7386 7387 return 0; 7388 } 7389 7390 static void igb_restore_vf_multicasts(struct igb_adapter *adapter) 7391 { 7392 struct e1000_hw *hw = &adapter->hw; 7393 struct vf_data_storage *vf_data; 7394 int i, j; 7395 7396 for (i = 0; i < adapter->vfs_allocated_count; i++) { 7397 u32 vmolr = rd32(E1000_VMOLR(i)); 7398 7399 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); 7400 7401 vf_data = &adapter->vf_data[i]; 7402 7403 if ((vf_data->num_vf_mc_hashes > 30) || 7404 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) { 7405 vmolr |= E1000_VMOLR_MPME; 7406 } else if (vf_data->num_vf_mc_hashes) { 7407 vmolr |= E1000_VMOLR_ROMPE; 7408 for (j = 0; j < vf_data->num_vf_mc_hashes; j++) 7409 igb_mta_set(hw, vf_data->vf_mc_hashes[j]); 7410 } 7411 wr32(E1000_VMOLR(i), vmolr); 7412 } 7413 } 7414 7415 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) 7416 { 7417 struct e1000_hw *hw = &adapter->hw; 7418 u32 pool_mask, vlvf_mask, i; 7419 7420 /* create mask for VF and other pools */ 7421 pool_mask = E1000_VLVF_POOLSEL_MASK; 7422 vlvf_mask = BIT(E1000_VLVF_POOLSEL_SHIFT + vf); 7423 7424 /* drop PF from pool bits */ 7425 pool_mask &= ~BIT(E1000_VLVF_POOLSEL_SHIFT + 7426 adapter->vfs_allocated_count); 7427 7428 /* Find the vlan filter for this id */ 7429 for (i = E1000_VLVF_ARRAY_SIZE; i--;) { 7430 u32 vlvf = rd32(E1000_VLVF(i)); 7431 u32 vfta_mask, vid, vfta; 7432 7433 /* remove the vf from the pool */ 7434 if (!(vlvf & vlvf_mask)) 7435 continue; 7436 7437 /* clear out bit from VLVF */ 7438 vlvf ^= vlvf_mask; 7439 7440 /* if other pools are present, just remove ourselves */ 7441 if (vlvf & pool_mask) 7442 goto update_vlvfb; 7443 7444 /* if PF is present, leave VFTA */ 7445 if (vlvf & E1000_VLVF_POOLSEL_MASK) 7446 goto update_vlvf; 7447 7448 vid = vlvf & E1000_VLVF_VLANID_MASK; 7449 vfta_mask = BIT(vid % 32); 7450 7451 /* clear bit from VFTA */ 7452 vfta = adapter->shadow_vfta[vid / 32]; 7453 if (vfta & vfta_mask) 7454 hw->mac.ops.write_vfta(hw, vid / 32, vfta ^ vfta_mask); 7455 update_vlvf: 7456 /* clear pool selection enable */ 7457 if (adapter->flags & IGB_FLAG_VLAN_PROMISC) 7458 vlvf &= E1000_VLVF_POOLSEL_MASK; 7459 else 7460 vlvf = 0; 7461 update_vlvfb: 7462 /* clear pool bits */ 7463 wr32(E1000_VLVF(i), vlvf); 7464 } 7465 } 7466 7467 static int igb_find_vlvf_entry(struct e1000_hw *hw, u32 vlan) 7468 { 7469 u32 vlvf; 7470 int idx; 7471 7472 /* short cut the special case */ 7473 if (vlan == 0) 7474 return 0; 7475 7476 /* Search for the VLAN id in the VLVF entries */ 7477 for (idx = E1000_VLVF_ARRAY_SIZE; --idx;) { 7478 vlvf = rd32(E1000_VLVF(idx)); 7479 if ((vlvf & VLAN_VID_MASK) == vlan) 7480 break; 7481 } 7482 7483 return idx; 7484 } 7485 7486 static void igb_update_pf_vlvf(struct igb_adapter *adapter, u32 vid) 7487 { 7488 struct e1000_hw *hw = &adapter->hw; 7489 u32 bits, pf_id; 7490 int idx; 7491 7492 idx = igb_find_vlvf_entry(hw, vid); 7493 if (!idx) 7494 return; 7495 7496 /* See if any other pools are set for this VLAN filter 7497 * entry other than the PF. 7498 */ 7499 pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; 7500 bits = ~BIT(pf_id) & E1000_VLVF_POOLSEL_MASK; 7501 bits &= rd32(E1000_VLVF(idx)); 7502 7503 /* Disable the filter so this falls into the default pool. */ 7504 if (!bits) { 7505 if (adapter->flags & IGB_FLAG_VLAN_PROMISC) 7506 wr32(E1000_VLVF(idx), BIT(pf_id)); 7507 else 7508 wr32(E1000_VLVF(idx), 0); 7509 } 7510 } 7511 7512 static s32 igb_set_vf_vlan(struct igb_adapter *adapter, u32 vid, 7513 bool add, u32 vf) 7514 { 7515 int pf_id = adapter->vfs_allocated_count; 7516 struct e1000_hw *hw = &adapter->hw; 7517 int err; 7518 7519 /* If VLAN overlaps with one the PF is currently monitoring make 7520 * sure that we are able to allocate a VLVF entry. This may be 7521 * redundant but it guarantees PF will maintain visibility to 7522 * the VLAN. 7523 */ 7524 if (add && test_bit(vid, adapter->active_vlans)) { 7525 err = igb_vfta_set(hw, vid, pf_id, true, false); 7526 if (err) 7527 return err; 7528 } 7529 7530 err = igb_vfta_set(hw, vid, vf, add, false); 7531 7532 if (add && !err) 7533 return err; 7534 7535 /* If we failed to add the VF VLAN or we are removing the VF VLAN 7536 * we may need to drop the PF pool bit in order to allow us to free 7537 * up the VLVF resources. 7538 */ 7539 if (test_bit(vid, adapter->active_vlans) || 7540 (adapter->flags & IGB_FLAG_VLAN_PROMISC)) 7541 igb_update_pf_vlvf(adapter, vid); 7542 7543 return err; 7544 } 7545 7546 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) 7547 { 7548 struct e1000_hw *hw = &adapter->hw; 7549 7550 if (vid) 7551 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT)); 7552 else 7553 wr32(E1000_VMVIR(vf), 0); 7554 } 7555 7556 static int igb_enable_port_vlan(struct igb_adapter *adapter, int vf, 7557 u16 vlan, u8 qos) 7558 { 7559 int err; 7560 7561 err = igb_set_vf_vlan(adapter, vlan, true, vf); 7562 if (err) 7563 return err; 7564 7565 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); 7566 igb_set_vmolr(adapter, vf, !vlan); 7567 7568 /* revoke access to previous VLAN */ 7569 if (vlan != adapter->vf_data[vf].pf_vlan) 7570 igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan, 7571 false, vf); 7572 7573 adapter->vf_data[vf].pf_vlan = vlan; 7574 adapter->vf_data[vf].pf_qos = qos; 7575 igb_set_vf_vlan_strip(adapter, vf, true); 7576 dev_info(&adapter->pdev->dev, 7577 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); 7578 if (test_bit(__IGB_DOWN, &adapter->state)) { 7579 dev_warn(&adapter->pdev->dev, 7580 "The VF VLAN has been set, but the PF device is not up.\n"); 7581 dev_warn(&adapter->pdev->dev, 7582 "Bring the PF device up before attempting to use the VF device.\n"); 7583 } 7584 7585 return err; 7586 } 7587 7588 static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf) 7589 { 7590 /* Restore tagless access via VLAN 0 */ 7591 igb_set_vf_vlan(adapter, 0, true, vf); 7592 7593 igb_set_vmvir(adapter, 0, vf); 7594 igb_set_vmolr(adapter, vf, true); 7595 7596 /* Remove any PF assigned VLAN */ 7597 if (adapter->vf_data[vf].pf_vlan) 7598 igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan, 7599 false, vf); 7600 7601 adapter->vf_data[vf].pf_vlan = 0; 7602 adapter->vf_data[vf].pf_qos = 0; 7603 igb_set_vf_vlan_strip(adapter, vf, false); 7604 7605 return 0; 7606 } 7607 7608 static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf, 7609 u16 vlan, u8 qos, __be16 vlan_proto) 7610 { 7611 struct igb_adapter *adapter = netdev_priv(netdev); 7612 7613 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) 7614 return -EINVAL; 7615 7616 if (vlan_proto != htons(ETH_P_8021Q)) 7617 return -EPROTONOSUPPORT; 7618 7619 return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) : 7620 igb_disable_port_vlan(adapter, vf); 7621 } 7622 7623 static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) 7624 { 7625 int add = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]); 7626 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); 7627 int ret; 7628 7629 if (adapter->vf_data[vf].pf_vlan) 7630 return -1; 7631 7632 /* VLAN 0 is a special case, don't allow it to be removed */ 7633 if (!vid && !add) 7634 return 0; 7635 7636 ret = igb_set_vf_vlan(adapter, vid, !!add, vf); 7637 if (!ret) 7638 igb_set_vf_vlan_strip(adapter, vf, !!vid); 7639 return ret; 7640 } 7641 7642 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) 7643 { 7644 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 7645 7646 /* clear flags - except flag that indicates PF has set the MAC */ 7647 vf_data->flags &= IGB_VF_FLAG_PF_SET_MAC; 7648 vf_data->last_nack = jiffies; 7649 7650 /* reset vlans for device */ 7651 igb_clear_vf_vfta(adapter, vf); 7652 igb_set_vf_vlan(adapter, vf_data->pf_vlan, true, vf); 7653 igb_set_vmvir(adapter, vf_data->pf_vlan | 7654 (vf_data->pf_qos << VLAN_PRIO_SHIFT), vf); 7655 igb_set_vmolr(adapter, vf, !vf_data->pf_vlan); 7656 igb_set_vf_vlan_strip(adapter, vf, !!(vf_data->pf_vlan)); 7657 7658 /* reset multicast table array for vf */ 7659 adapter->vf_data[vf].num_vf_mc_hashes = 0; 7660 7661 /* Flush and reset the mta with the new values */ 7662 igb_set_rx_mode(adapter->netdev); 7663 } 7664 7665 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf) 7666 { 7667 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; 7668 7669 /* clear mac address as we were hotplug removed/added */ 7670 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC)) 7671 eth_zero_addr(vf_mac); 7672 7673 /* process remaining reset events */ 7674 igb_vf_reset(adapter, vf); 7675 } 7676 7677 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) 7678 { 7679 struct e1000_hw *hw = &adapter->hw; 7680 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; 7681 u32 reg, msgbuf[3] = {}; 7682 u8 *addr = (u8 *)(&msgbuf[1]); 7683 7684 /* process all the same items cleared in a function level reset */ 7685 igb_vf_reset(adapter, vf); 7686 7687 /* set vf mac address */ 7688 igb_set_vf_mac(adapter, vf, vf_mac); 7689 7690 /* enable transmit and receive for vf */ 7691 reg = rd32(E1000_VFTE); 7692 wr32(E1000_VFTE, reg | BIT(vf)); 7693 reg = rd32(E1000_VFRE); 7694 wr32(E1000_VFRE, reg | BIT(vf)); 7695 7696 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS; 7697 7698 /* reply to reset with ack and vf mac address */ 7699 if (!is_zero_ether_addr(vf_mac)) { 7700 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; 7701 memcpy(addr, vf_mac, ETH_ALEN); 7702 } else { 7703 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_NACK; 7704 } 7705 igb_write_mbx(hw, msgbuf, 3, vf); 7706 } 7707 7708 static void igb_flush_mac_table(struct igb_adapter *adapter) 7709 { 7710 struct e1000_hw *hw = &adapter->hw; 7711 int i; 7712 7713 for (i = 0; i < hw->mac.rar_entry_count; i++) { 7714 adapter->mac_table[i].state &= ~IGB_MAC_STATE_IN_USE; 7715 eth_zero_addr(adapter->mac_table[i].addr); 7716 adapter->mac_table[i].queue = 0; 7717 igb_rar_set_index(adapter, i); 7718 } 7719 } 7720 7721 static int igb_available_rars(struct igb_adapter *adapter, u8 queue) 7722 { 7723 struct e1000_hw *hw = &adapter->hw; 7724 /* do not count rar entries reserved for VFs MAC addresses */ 7725 int rar_entries = hw->mac.rar_entry_count - 7726 adapter->vfs_allocated_count; 7727 int i, count = 0; 7728 7729 for (i = 0; i < rar_entries; i++) { 7730 /* do not count default entries */ 7731 if (adapter->mac_table[i].state & IGB_MAC_STATE_DEFAULT) 7732 continue; 7733 7734 /* do not count "in use" entries for different queues */ 7735 if ((adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE) && 7736 (adapter->mac_table[i].queue != queue)) 7737 continue; 7738 7739 count++; 7740 } 7741 7742 return count; 7743 } 7744 7745 /* Set default MAC address for the PF in the first RAR entry */ 7746 static void igb_set_default_mac_filter(struct igb_adapter *adapter) 7747 { 7748 struct igb_mac_addr *mac_table = &adapter->mac_table[0]; 7749 7750 ether_addr_copy(mac_table->addr, adapter->hw.mac.addr); 7751 mac_table->queue = adapter->vfs_allocated_count; 7752 mac_table->state = IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE; 7753 7754 igb_rar_set_index(adapter, 0); 7755 } 7756 7757 /* If the filter to be added and an already existing filter express 7758 * the same address and address type, it should be possible to only 7759 * override the other configurations, for example the queue to steer 7760 * traffic. 7761 */ 7762 static bool igb_mac_entry_can_be_used(const struct igb_mac_addr *entry, 7763 const u8 *addr, const u8 flags) 7764 { 7765 if (!(entry->state & IGB_MAC_STATE_IN_USE)) 7766 return true; 7767 7768 if ((entry->state & IGB_MAC_STATE_SRC_ADDR) != 7769 (flags & IGB_MAC_STATE_SRC_ADDR)) 7770 return false; 7771 7772 if (!ether_addr_equal(addr, entry->addr)) 7773 return false; 7774 7775 return true; 7776 } 7777 7778 /* Add a MAC filter for 'addr' directing matching traffic to 'queue', 7779 * 'flags' is used to indicate what kind of match is made, match is by 7780 * default for the destination address, if matching by source address 7781 * is desired the flag IGB_MAC_STATE_SRC_ADDR can be used. 7782 */ 7783 static int igb_add_mac_filter_flags(struct igb_adapter *adapter, 7784 const u8 *addr, const u8 queue, 7785 const u8 flags) 7786 { 7787 struct e1000_hw *hw = &adapter->hw; 7788 int rar_entries = hw->mac.rar_entry_count - 7789 adapter->vfs_allocated_count; 7790 int i; 7791 7792 if (is_zero_ether_addr(addr)) 7793 return -EINVAL; 7794 7795 /* Search for the first empty entry in the MAC table. 7796 * Do not touch entries at the end of the table reserved for the VF MAC 7797 * addresses. 7798 */ 7799 for (i = 0; i < rar_entries; i++) { 7800 if (!igb_mac_entry_can_be_used(&adapter->mac_table[i], 7801 addr, flags)) 7802 continue; 7803 7804 ether_addr_copy(adapter->mac_table[i].addr, addr); 7805 adapter->mac_table[i].queue = queue; 7806 adapter->mac_table[i].state |= IGB_MAC_STATE_IN_USE | flags; 7807 7808 igb_rar_set_index(adapter, i); 7809 return i; 7810 } 7811 7812 return -ENOSPC; 7813 } 7814 7815 static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr, 7816 const u8 queue) 7817 { 7818 return igb_add_mac_filter_flags(adapter, addr, queue, 0); 7819 } 7820 7821 /* Remove a MAC filter for 'addr' directing matching traffic to 7822 * 'queue', 'flags' is used to indicate what kind of match need to be 7823 * removed, match is by default for the destination address, if 7824 * matching by source address is to be removed the flag 7825 * IGB_MAC_STATE_SRC_ADDR can be used. 7826 */ 7827 static int igb_del_mac_filter_flags(struct igb_adapter *adapter, 7828 const u8 *addr, const u8 queue, 7829 const u8 flags) 7830 { 7831 struct e1000_hw *hw = &adapter->hw; 7832 int rar_entries = hw->mac.rar_entry_count - 7833 adapter->vfs_allocated_count; 7834 int i; 7835 7836 if (is_zero_ether_addr(addr)) 7837 return -EINVAL; 7838 7839 /* Search for matching entry in the MAC table based on given address 7840 * and queue. Do not touch entries at the end of the table reserved 7841 * for the VF MAC addresses. 7842 */ 7843 for (i = 0; i < rar_entries; i++) { 7844 if (!(adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE)) 7845 continue; 7846 if ((adapter->mac_table[i].state & flags) != flags) 7847 continue; 7848 if (adapter->mac_table[i].queue != queue) 7849 continue; 7850 if (!ether_addr_equal(adapter->mac_table[i].addr, addr)) 7851 continue; 7852 7853 /* When a filter for the default address is "deleted", 7854 * we return it to its initial configuration 7855 */ 7856 if (adapter->mac_table[i].state & IGB_MAC_STATE_DEFAULT) { 7857 adapter->mac_table[i].state = 7858 IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE; 7859 adapter->mac_table[i].queue = 7860 adapter->vfs_allocated_count; 7861 } else { 7862 adapter->mac_table[i].state = 0; 7863 adapter->mac_table[i].queue = 0; 7864 eth_zero_addr(adapter->mac_table[i].addr); 7865 } 7866 7867 igb_rar_set_index(adapter, i); 7868 return 0; 7869 } 7870 7871 return -ENOENT; 7872 } 7873 7874 static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr, 7875 const u8 queue) 7876 { 7877 return igb_del_mac_filter_flags(adapter, addr, queue, 0); 7878 } 7879 7880 int igb_add_mac_steering_filter(struct igb_adapter *adapter, 7881 const u8 *addr, u8 queue, u8 flags) 7882 { 7883 struct e1000_hw *hw = &adapter->hw; 7884 7885 /* In theory, this should be supported on 82575 as well, but 7886 * that part wasn't easily accessible during development. 7887 */ 7888 if (hw->mac.type != e1000_i210) 7889 return -EOPNOTSUPP; 7890 7891 return igb_add_mac_filter_flags(adapter, addr, queue, 7892 IGB_MAC_STATE_QUEUE_STEERING | flags); 7893 } 7894 7895 int igb_del_mac_steering_filter(struct igb_adapter *adapter, 7896 const u8 *addr, u8 queue, u8 flags) 7897 { 7898 return igb_del_mac_filter_flags(adapter, addr, queue, 7899 IGB_MAC_STATE_QUEUE_STEERING | flags); 7900 } 7901 7902 static int igb_uc_sync(struct net_device *netdev, const unsigned char *addr) 7903 { 7904 struct igb_adapter *adapter = netdev_priv(netdev); 7905 int ret; 7906 7907 ret = igb_add_mac_filter(adapter, addr, adapter->vfs_allocated_count); 7908 7909 return min_t(int, ret, 0); 7910 } 7911 7912 static int igb_uc_unsync(struct net_device *netdev, const unsigned char *addr) 7913 { 7914 struct igb_adapter *adapter = netdev_priv(netdev); 7915 7916 igb_del_mac_filter(adapter, addr, adapter->vfs_allocated_count); 7917 7918 return 0; 7919 } 7920 7921 static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, 7922 const u32 info, const u8 *addr) 7923 { 7924 struct pci_dev *pdev = adapter->pdev; 7925 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 7926 struct vf_mac_filter *entry; 7927 bool found = false; 7928 int ret = 0; 7929 7930 if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && 7931 !vf_data->trusted) { 7932 dev_warn(&pdev->dev, 7933 "VF %d requested MAC filter but is administratively denied\n", 7934 vf); 7935 return -EINVAL; 7936 } 7937 if (!is_valid_ether_addr(addr)) { 7938 dev_warn(&pdev->dev, 7939 "VF %d attempted to set invalid MAC filter\n", 7940 vf); 7941 return -EINVAL; 7942 } 7943 7944 switch (info) { 7945 case E1000_VF_MAC_FILTER_CLR: 7946 /* remove all unicast MAC filters related to the current VF */ 7947 list_for_each_entry(entry, &adapter->vf_macs.l, l) { 7948 if (entry->vf == vf) { 7949 entry->vf = -1; 7950 entry->free = true; 7951 igb_del_mac_filter(adapter, entry->vf_mac, vf); 7952 } 7953 } 7954 break; 7955 case E1000_VF_MAC_FILTER_ADD: 7956 /* try to find empty slot in the list */ 7957 list_for_each_entry(entry, &adapter->vf_macs.l, l) { 7958 if (entry->free) { 7959 found = true; 7960 break; 7961 } 7962 } 7963 7964 if (found) { 7965 entry->free = false; 7966 entry->vf = vf; 7967 ether_addr_copy(entry->vf_mac, addr); 7968 7969 ret = igb_add_mac_filter(adapter, addr, vf); 7970 ret = min_t(int, ret, 0); 7971 } else { 7972 ret = -ENOSPC; 7973 } 7974 7975 if (ret == -ENOSPC) 7976 dev_warn(&pdev->dev, 7977 "VF %d has requested MAC filter but there is no space for it\n", 7978 vf); 7979 break; 7980 default: 7981 ret = -EINVAL; 7982 break; 7983 } 7984 7985 return ret; 7986 } 7987 7988 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) 7989 { 7990 struct pci_dev *pdev = adapter->pdev; 7991 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 7992 u32 info = msg[0] & E1000_VT_MSGINFO_MASK; 7993 7994 /* The VF MAC Address is stored in a packed array of bytes 7995 * starting at the second 32 bit word of the msg array 7996 */ 7997 unsigned char *addr = (unsigned char *)&msg[1]; 7998 int ret = 0; 7999 8000 if (!info) { 8001 if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && 8002 !vf_data->trusted) { 8003 dev_warn(&pdev->dev, 8004 "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n", 8005 vf); 8006 return -EINVAL; 8007 } 8008 8009 if (!is_valid_ether_addr(addr)) { 8010 dev_warn(&pdev->dev, 8011 "VF %d attempted to set invalid MAC\n", 8012 vf); 8013 return -EINVAL; 8014 } 8015 8016 ret = igb_set_vf_mac(adapter, vf, addr); 8017 } else { 8018 ret = igb_set_vf_mac_filter(adapter, vf, info, addr); 8019 } 8020 8021 return ret; 8022 } 8023 8024 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf) 8025 { 8026 struct e1000_hw *hw = &adapter->hw; 8027 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 8028 u32 msg = E1000_VT_MSGTYPE_NACK; 8029 8030 /* if device isn't clear to send it shouldn't be reading either */ 8031 if (!(vf_data->flags & IGB_VF_FLAG_CTS) && 8032 time_after(jiffies, vf_data->last_nack + (2 * HZ))) { 8033 igb_write_mbx(hw, &msg, 1, vf); 8034 vf_data->last_nack = jiffies; 8035 } 8036 } 8037 8038 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) 8039 { 8040 struct pci_dev *pdev = adapter->pdev; 8041 u32 msgbuf[E1000_VFMAILBOX_SIZE]; 8042 struct e1000_hw *hw = &adapter->hw; 8043 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 8044 s32 retval; 8045 8046 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf, false); 8047 8048 if (retval) { 8049 /* if receive failed revoke VF CTS stats and restart init */ 8050 dev_err(&pdev->dev, "Error receiving message from VF\n"); 8051 vf_data->flags &= ~IGB_VF_FLAG_CTS; 8052 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ))) 8053 goto unlock; 8054 goto out; 8055 } 8056 8057 /* this is a message we already processed, do nothing */ 8058 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK)) 8059 goto unlock; 8060 8061 /* until the vf completes a reset it should not be 8062 * allowed to start any configuration. 8063 */ 8064 if (msgbuf[0] == E1000_VF_RESET) { 8065 /* unlocks mailbox */ 8066 igb_vf_reset_msg(adapter, vf); 8067 return; 8068 } 8069 8070 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) { 8071 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ))) 8072 goto unlock; 8073 retval = -1; 8074 goto out; 8075 } 8076 8077 switch ((msgbuf[0] & 0xFFFF)) { 8078 case E1000_VF_SET_MAC_ADDR: 8079 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); 8080 break; 8081 case E1000_VF_SET_PROMISC: 8082 retval = igb_set_vf_promisc(adapter, msgbuf, vf); 8083 break; 8084 case E1000_VF_SET_MULTICAST: 8085 retval = igb_set_vf_multicasts(adapter, msgbuf, vf); 8086 break; 8087 case E1000_VF_SET_LPE: 8088 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf); 8089 break; 8090 case E1000_VF_SET_VLAN: 8091 retval = -1; 8092 if (vf_data->pf_vlan) 8093 dev_warn(&pdev->dev, 8094 "VF %d attempted to override administratively set VLAN tag\nReload the VF driver to resume operations\n", 8095 vf); 8096 else 8097 retval = igb_set_vf_vlan_msg(adapter, msgbuf, vf); 8098 break; 8099 default: 8100 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]); 8101 retval = -1; 8102 break; 8103 } 8104 8105 msgbuf[0] |= E1000_VT_MSGTYPE_CTS; 8106 out: 8107 /* notify the VF of the results of what it sent us */ 8108 if (retval) 8109 msgbuf[0] |= E1000_VT_MSGTYPE_NACK; 8110 else 8111 msgbuf[0] |= E1000_VT_MSGTYPE_ACK; 8112 8113 /* unlocks mailbox */ 8114 igb_write_mbx(hw, msgbuf, 1, vf); 8115 return; 8116 8117 unlock: 8118 igb_unlock_mbx(hw, vf); 8119 } 8120 8121 static void igb_msg_task(struct igb_adapter *adapter) 8122 { 8123 struct e1000_hw *hw = &adapter->hw; 8124 unsigned long flags; 8125 u32 vf; 8126 8127 spin_lock_irqsave(&adapter->vfs_lock, flags); 8128 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { 8129 /* process any reset requests */ 8130 if (!igb_check_for_rst(hw, vf)) 8131 igb_vf_reset_event(adapter, vf); 8132 8133 /* process any messages pending */ 8134 if (!igb_check_for_msg(hw, vf)) 8135 igb_rcv_msg_from_vf(adapter, vf); 8136 8137 /* process any acks */ 8138 if (!igb_check_for_ack(hw, vf)) 8139 igb_rcv_ack_from_vf(adapter, vf); 8140 } 8141 spin_unlock_irqrestore(&adapter->vfs_lock, flags); 8142 } 8143 8144 /** 8145 * igb_set_uta - Set unicast filter table address 8146 * @adapter: board private structure 8147 * @set: boolean indicating if we are setting or clearing bits 8148 * 8149 * The unicast table address is a register array of 32-bit registers. 8150 * The table is meant to be used in a way similar to how the MTA is used 8151 * however due to certain limitations in the hardware it is necessary to 8152 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous 8153 * enable bit to allow vlan tag stripping when promiscuous mode is enabled 8154 **/ 8155 static void igb_set_uta(struct igb_adapter *adapter, bool set) 8156 { 8157 struct e1000_hw *hw = &adapter->hw; 8158 u32 uta = set ? ~0 : 0; 8159 int i; 8160 8161 /* we only need to do this if VMDq is enabled */ 8162 if (!adapter->vfs_allocated_count) 8163 return; 8164 8165 for (i = hw->mac.uta_reg_count; i--;) 8166 array_wr32(E1000_UTA, i, uta); 8167 } 8168 8169 /** 8170 * igb_intr_msi - Interrupt Handler 8171 * @irq: interrupt number 8172 * @data: pointer to a network interface device structure 8173 **/ 8174 static irqreturn_t igb_intr_msi(int irq, void *data) 8175 { 8176 struct igb_adapter *adapter = data; 8177 struct igb_q_vector *q_vector = adapter->q_vector[0]; 8178 struct e1000_hw *hw = &adapter->hw; 8179 /* read ICR disables interrupts using IAM */ 8180 u32 icr = rd32(E1000_ICR); 8181 8182 igb_write_itr(q_vector); 8183 8184 if (icr & E1000_ICR_DRSTA) 8185 schedule_work(&adapter->reset_task); 8186 8187 if (icr & E1000_ICR_DOUTSYNC) { 8188 /* HW is reporting DMA is out of sync */ 8189 adapter->stats.doosync++; 8190 } 8191 8192 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 8193 hw->mac.get_link_status = 1; 8194 if (!test_bit(__IGB_DOWN, &adapter->state)) 8195 mod_timer(&adapter->watchdog_timer, jiffies + 1); 8196 } 8197 8198 if (icr & E1000_ICR_TS) 8199 igb_tsync_interrupt(adapter); 8200 8201 napi_schedule(&q_vector->napi); 8202 8203 return IRQ_HANDLED; 8204 } 8205 8206 /** 8207 * igb_intr - Legacy Interrupt Handler 8208 * @irq: interrupt number 8209 * @data: pointer to a network interface device structure 8210 **/ 8211 static irqreturn_t igb_intr(int irq, void *data) 8212 { 8213 struct igb_adapter *adapter = data; 8214 struct igb_q_vector *q_vector = adapter->q_vector[0]; 8215 struct e1000_hw *hw = &adapter->hw; 8216 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No 8217 * need for the IMC write 8218 */ 8219 u32 icr = rd32(E1000_ICR); 8220 8221 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is 8222 * not set, then the adapter didn't send an interrupt 8223 */ 8224 if (!(icr & E1000_ICR_INT_ASSERTED)) 8225 return IRQ_NONE; 8226 8227 igb_write_itr(q_vector); 8228 8229 if (icr & E1000_ICR_DRSTA) 8230 schedule_work(&adapter->reset_task); 8231 8232 if (icr & E1000_ICR_DOUTSYNC) { 8233 /* HW is reporting DMA is out of sync */ 8234 adapter->stats.doosync++; 8235 } 8236 8237 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 8238 hw->mac.get_link_status = 1; 8239 /* guard against interrupt when we're going down */ 8240 if (!test_bit(__IGB_DOWN, &adapter->state)) 8241 mod_timer(&adapter->watchdog_timer, jiffies + 1); 8242 } 8243 8244 if (icr & E1000_ICR_TS) 8245 igb_tsync_interrupt(adapter); 8246 8247 napi_schedule(&q_vector->napi); 8248 8249 return IRQ_HANDLED; 8250 } 8251 8252 static void igb_ring_irq_enable(struct igb_q_vector *q_vector) 8253 { 8254 struct igb_adapter *adapter = q_vector->adapter; 8255 struct e1000_hw *hw = &adapter->hw; 8256 8257 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || 8258 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { 8259 if ((adapter->num_q_vectors == 1) && !adapter->vf_data) 8260 igb_set_itr(q_vector); 8261 else 8262 igb_update_ring_itr(q_vector); 8263 } 8264 8265 if (!test_bit(__IGB_DOWN, &adapter->state)) { 8266 if (adapter->flags & IGB_FLAG_HAS_MSIX) 8267 wr32(E1000_EIMS, q_vector->eims_value); 8268 else 8269 igb_irq_enable(adapter); 8270 } 8271 } 8272 8273 /** 8274 * igb_poll - NAPI Rx polling callback 8275 * @napi: napi polling structure 8276 * @budget: count of how many packets we should handle 8277 **/ 8278 static int igb_poll(struct napi_struct *napi, int budget) 8279 { 8280 struct igb_q_vector *q_vector = container_of(napi, 8281 struct igb_q_vector, 8282 napi); 8283 struct xsk_buff_pool *xsk_pool; 8284 bool clean_complete = true; 8285 int work_done = 0; 8286 8287 #ifdef CONFIG_IGB_DCA 8288 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED) 8289 igb_update_dca(q_vector); 8290 #endif 8291 if (q_vector->tx.ring) 8292 clean_complete = igb_clean_tx_irq(q_vector, budget); 8293 8294 if (q_vector->rx.ring) { 8295 int cleaned; 8296 8297 xsk_pool = READ_ONCE(q_vector->rx.ring->xsk_pool); 8298 cleaned = xsk_pool ? 8299 igb_clean_rx_irq_zc(q_vector, xsk_pool, budget) : 8300 igb_clean_rx_irq(q_vector, budget); 8301 8302 work_done += cleaned; 8303 if (cleaned >= budget) 8304 clean_complete = false; 8305 } 8306 8307 /* If all work not completed, return budget and keep polling */ 8308 if (!clean_complete) 8309 return budget; 8310 8311 /* Exit the polling mode, but don't re-enable interrupts if stack might 8312 * poll us due to busy-polling 8313 */ 8314 if (likely(napi_complete_done(napi, work_done))) 8315 igb_ring_irq_enable(q_vector); 8316 8317 return work_done; 8318 } 8319 8320 /** 8321 * igb_clean_tx_irq - Reclaim resources after transmit completes 8322 * @q_vector: pointer to q_vector containing needed info 8323 * @napi_budget: Used to determine if we are in netpoll 8324 * 8325 * returns true if ring is completely cleaned 8326 **/ 8327 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget) 8328 { 8329 unsigned int total_bytes = 0, total_packets = 0; 8330 struct igb_adapter *adapter = q_vector->adapter; 8331 unsigned int budget = q_vector->tx.work_limit; 8332 struct igb_ring *tx_ring = q_vector->tx.ring; 8333 unsigned int i = tx_ring->next_to_clean; 8334 union e1000_adv_tx_desc *tx_desc; 8335 struct igb_tx_buffer *tx_buffer; 8336 struct xsk_buff_pool *xsk_pool; 8337 int cpu = smp_processor_id(); 8338 bool xsk_xmit_done = true; 8339 struct netdev_queue *nq; 8340 u32 xsk_frames = 0; 8341 8342 if (test_bit(__IGB_DOWN, &adapter->state)) 8343 return true; 8344 8345 tx_buffer = &tx_ring->tx_buffer_info[i]; 8346 tx_desc = IGB_TX_DESC(tx_ring, i); 8347 i -= tx_ring->count; 8348 8349 do { 8350 union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; 8351 8352 /* if next_to_watch is not set then there is no work pending */ 8353 if (!eop_desc) 8354 break; 8355 8356 /* prevent any other reads prior to eop_desc */ 8357 smp_rmb(); 8358 8359 /* if DD is not set pending work has not been completed */ 8360 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) 8361 break; 8362 8363 /* clear next_to_watch to prevent false hangs */ 8364 tx_buffer->next_to_watch = NULL; 8365 8366 /* update the statistics for this packet */ 8367 total_bytes += tx_buffer->bytecount; 8368 total_packets += tx_buffer->gso_segs; 8369 8370 /* free the skb */ 8371 if (tx_buffer->type == IGB_TYPE_SKB) { 8372 napi_consume_skb(tx_buffer->skb, napi_budget); 8373 } else if (tx_buffer->type == IGB_TYPE_XDP) { 8374 xdp_return_frame(tx_buffer->xdpf); 8375 } else if (tx_buffer->type == IGB_TYPE_XSK) { 8376 xsk_frames++; 8377 goto skip_for_xsk; 8378 } 8379 8380 /* unmap skb header data */ 8381 dma_unmap_single(tx_ring->dev, 8382 dma_unmap_addr(tx_buffer, dma), 8383 dma_unmap_len(tx_buffer, len), 8384 DMA_TO_DEVICE); 8385 8386 /* clear tx_buffer data */ 8387 dma_unmap_len_set(tx_buffer, len, 0); 8388 8389 /* clear last DMA location and unmap remaining buffers */ 8390 while (tx_desc != eop_desc) { 8391 tx_buffer++; 8392 tx_desc++; 8393 i++; 8394 if (unlikely(!i)) { 8395 i -= tx_ring->count; 8396 tx_buffer = tx_ring->tx_buffer_info; 8397 tx_desc = IGB_TX_DESC(tx_ring, 0); 8398 } 8399 8400 /* unmap any remaining paged data */ 8401 if (dma_unmap_len(tx_buffer, len)) { 8402 dma_unmap_page(tx_ring->dev, 8403 dma_unmap_addr(tx_buffer, dma), 8404 dma_unmap_len(tx_buffer, len), 8405 DMA_TO_DEVICE); 8406 dma_unmap_len_set(tx_buffer, len, 0); 8407 } 8408 } 8409 8410 skip_for_xsk: 8411 /* move us one more past the eop_desc for start of next pkt */ 8412 tx_buffer++; 8413 tx_desc++; 8414 i++; 8415 if (unlikely(!i)) { 8416 i -= tx_ring->count; 8417 tx_buffer = tx_ring->tx_buffer_info; 8418 tx_desc = IGB_TX_DESC(tx_ring, 0); 8419 } 8420 8421 /* issue prefetch for next Tx descriptor */ 8422 prefetch(tx_desc); 8423 8424 /* update budget accounting */ 8425 budget--; 8426 } while (likely(budget)); 8427 8428 netdev_tx_completed_queue(txring_txq(tx_ring), 8429 total_packets, total_bytes); 8430 i += tx_ring->count; 8431 tx_ring->next_to_clean = i; 8432 u64_stats_update_begin(&tx_ring->tx_syncp); 8433 tx_ring->tx_stats.bytes += total_bytes; 8434 tx_ring->tx_stats.packets += total_packets; 8435 u64_stats_update_end(&tx_ring->tx_syncp); 8436 q_vector->tx.total_bytes += total_bytes; 8437 q_vector->tx.total_packets += total_packets; 8438 8439 xsk_pool = READ_ONCE(tx_ring->xsk_pool); 8440 if (xsk_pool) { 8441 if (xsk_frames) 8442 xsk_tx_completed(xsk_pool, xsk_frames); 8443 if (xsk_uses_need_wakeup(xsk_pool)) 8444 xsk_set_tx_need_wakeup(xsk_pool); 8445 8446 nq = txring_txq(tx_ring); 8447 __netif_tx_lock(nq, cpu); 8448 /* Avoid transmit queue timeout since we share it with the slow path */ 8449 txq_trans_cond_update(nq); 8450 xsk_xmit_done = igb_xmit_zc(tx_ring, xsk_pool); 8451 __netif_tx_unlock(nq); 8452 } 8453 8454 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { 8455 struct e1000_hw *hw = &adapter->hw; 8456 8457 /* Detect a transmit hang in hardware, this serializes the 8458 * check with the clearing of time_stamp and movement of i 8459 */ 8460 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 8461 if (tx_buffer->next_to_watch && 8462 time_after(jiffies, tx_buffer->time_stamp + 8463 (adapter->tx_timeout_factor * HZ)) && 8464 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) { 8465 8466 /* detected Tx unit hang */ 8467 dev_err(tx_ring->dev, 8468 "Detected Tx Unit Hang\n" 8469 " Tx Queue <%d>\n" 8470 " TDH <%x>\n" 8471 " TDT <%x>\n" 8472 " next_to_use <%x>\n" 8473 " next_to_clean <%x>\n" 8474 "buffer_info[next_to_clean]\n" 8475 " time_stamp <%lx>\n" 8476 " next_to_watch <%p>\n" 8477 " jiffies <%lx>\n" 8478 " desc.status <%x>\n", 8479 tx_ring->queue_index, 8480 rd32(E1000_TDH(tx_ring->reg_idx)), 8481 readl(tx_ring->tail), 8482 tx_ring->next_to_use, 8483 tx_ring->next_to_clean, 8484 tx_buffer->time_stamp, 8485 tx_buffer->next_to_watch, 8486 jiffies, 8487 tx_buffer->next_to_watch->wb.status); 8488 netif_stop_subqueue(tx_ring->netdev, 8489 tx_ring->queue_index); 8490 8491 /* we are about to reset, no point in enabling stuff */ 8492 return true; 8493 } 8494 } 8495 8496 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 8497 if (unlikely(total_packets && 8498 netif_carrier_ok(tx_ring->netdev) && 8499 igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { 8500 /* Make sure that anybody stopping the queue after this 8501 * sees the new next_to_clean. 8502 */ 8503 smp_mb(); 8504 if (__netif_subqueue_stopped(tx_ring->netdev, 8505 tx_ring->queue_index) && 8506 !(test_bit(__IGB_DOWN, &adapter->state))) { 8507 netif_wake_subqueue(tx_ring->netdev, 8508 tx_ring->queue_index); 8509 8510 u64_stats_update_begin(&tx_ring->tx_syncp); 8511 tx_ring->tx_stats.restart_queue++; 8512 u64_stats_update_end(&tx_ring->tx_syncp); 8513 } 8514 } 8515 8516 return !!budget && xsk_xmit_done; 8517 } 8518 8519 /** 8520 * igb_reuse_rx_page - page flip buffer and store it back on the ring 8521 * @rx_ring: rx descriptor ring to store buffers on 8522 * @old_buff: donor buffer to have page reused 8523 * 8524 * Synchronizes page for reuse by the adapter 8525 **/ 8526 static void igb_reuse_rx_page(struct igb_ring *rx_ring, 8527 struct igb_rx_buffer *old_buff) 8528 { 8529 struct igb_rx_buffer *new_buff; 8530 u16 nta = rx_ring->next_to_alloc; 8531 8532 new_buff = &rx_ring->rx_buffer_info[nta]; 8533 8534 /* update, and store next to alloc */ 8535 nta++; 8536 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 8537 8538 /* Transfer page from old buffer to new buffer. 8539 * Move each member individually to avoid possible store 8540 * forwarding stalls. 8541 */ 8542 new_buff->dma = old_buff->dma; 8543 new_buff->page = old_buff->page; 8544 new_buff->page_offset = old_buff->page_offset; 8545 new_buff->pagecnt_bias = old_buff->pagecnt_bias; 8546 } 8547 8548 static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, 8549 int rx_buf_pgcnt) 8550 { 8551 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; 8552 struct page *page = rx_buffer->page; 8553 8554 /* avoid re-using remote and pfmemalloc pages */ 8555 if (!dev_page_is_reusable(page)) 8556 return false; 8557 8558 #if (PAGE_SIZE < 8192) 8559 /* if we are only owner of page we can reuse it */ 8560 if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1)) 8561 return false; 8562 #else 8563 #define IGB_LAST_OFFSET \ 8564 (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGB_RXBUFFER_2048) 8565 8566 if (rx_buffer->page_offset > IGB_LAST_OFFSET) 8567 return false; 8568 #endif 8569 8570 /* If we have drained the page fragment pool we need to update 8571 * the pagecnt_bias and page count so that we fully restock the 8572 * number of references the driver holds. 8573 */ 8574 if (unlikely(pagecnt_bias == 1)) { 8575 page_ref_add(page, USHRT_MAX - 1); 8576 rx_buffer->pagecnt_bias = USHRT_MAX; 8577 } 8578 8579 return true; 8580 } 8581 8582 /** 8583 * igb_add_rx_frag - Add contents of Rx buffer to sk_buff 8584 * @rx_ring: rx descriptor ring to transact packets on 8585 * @rx_buffer: buffer containing page to add 8586 * @skb: sk_buff to place the data into 8587 * @size: size of buffer to be added 8588 * 8589 * This function will add the data contained in rx_buffer->page to the skb. 8590 **/ 8591 static void igb_add_rx_frag(struct igb_ring *rx_ring, 8592 struct igb_rx_buffer *rx_buffer, 8593 struct sk_buff *skb, 8594 unsigned int size) 8595 { 8596 #if (PAGE_SIZE < 8192) 8597 unsigned int truesize = igb_rx_pg_size(rx_ring) / 2; 8598 #else 8599 unsigned int truesize = ring_uses_build_skb(rx_ring) ? 8600 SKB_DATA_ALIGN(IGB_SKB_PAD + size) : 8601 SKB_DATA_ALIGN(size); 8602 #endif 8603 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, 8604 rx_buffer->page_offset, size, truesize); 8605 #if (PAGE_SIZE < 8192) 8606 rx_buffer->page_offset ^= truesize; 8607 #else 8608 rx_buffer->page_offset += truesize; 8609 #endif 8610 } 8611 8612 static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring, 8613 struct igb_rx_buffer *rx_buffer, 8614 struct xdp_buff *xdp, 8615 ktime_t timestamp) 8616 { 8617 #if (PAGE_SIZE < 8192) 8618 unsigned int truesize = igb_rx_pg_size(rx_ring) / 2; 8619 #else 8620 unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end - 8621 xdp->data_hard_start); 8622 #endif 8623 unsigned int size = xdp->data_end - xdp->data; 8624 unsigned int headlen; 8625 struct sk_buff *skb; 8626 8627 /* prefetch first cache line of first page */ 8628 net_prefetch(xdp->data); 8629 8630 /* allocate a skb to store the frags */ 8631 skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN); 8632 if (unlikely(!skb)) 8633 return NULL; 8634 8635 if (timestamp) 8636 skb_hwtstamps(skb)->hwtstamp = timestamp; 8637 8638 /* Determine available headroom for copy */ 8639 headlen = size; 8640 if (headlen > IGB_RX_HDR_LEN) 8641 headlen = eth_get_headlen(skb->dev, xdp->data, IGB_RX_HDR_LEN); 8642 8643 /* align pull length to size of long to optimize memcpy performance */ 8644 memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, sizeof(long))); 8645 8646 /* update all of the pointers */ 8647 size -= headlen; 8648 if (size) { 8649 skb_add_rx_frag(skb, 0, rx_buffer->page, 8650 (xdp->data + headlen) - page_address(rx_buffer->page), 8651 size, truesize); 8652 #if (PAGE_SIZE < 8192) 8653 rx_buffer->page_offset ^= truesize; 8654 #else 8655 rx_buffer->page_offset += truesize; 8656 #endif 8657 } else { 8658 rx_buffer->pagecnt_bias++; 8659 } 8660 8661 return skb; 8662 } 8663 8664 static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring, 8665 struct igb_rx_buffer *rx_buffer, 8666 struct xdp_buff *xdp, 8667 ktime_t timestamp) 8668 { 8669 #if (PAGE_SIZE < 8192) 8670 unsigned int truesize = igb_rx_pg_size(rx_ring) / 2; 8671 #else 8672 unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + 8673 SKB_DATA_ALIGN(xdp->data_end - 8674 xdp->data_hard_start); 8675 #endif 8676 unsigned int metasize = xdp->data - xdp->data_meta; 8677 struct sk_buff *skb; 8678 8679 /* prefetch first cache line of first page */ 8680 net_prefetch(xdp->data_meta); 8681 8682 /* build an skb around the page buffer */ 8683 skb = napi_build_skb(xdp->data_hard_start, truesize); 8684 if (unlikely(!skb)) 8685 return NULL; 8686 8687 /* update pointers within the skb to store the data */ 8688 skb_reserve(skb, xdp->data - xdp->data_hard_start); 8689 __skb_put(skb, xdp->data_end - xdp->data); 8690 8691 if (metasize) 8692 skb_metadata_set(skb, metasize); 8693 8694 if (timestamp) 8695 skb_hwtstamps(skb)->hwtstamp = timestamp; 8696 8697 /* update buffer offset */ 8698 #if (PAGE_SIZE < 8192) 8699 rx_buffer->page_offset ^= truesize; 8700 #else 8701 rx_buffer->page_offset += truesize; 8702 #endif 8703 8704 return skb; 8705 } 8706 8707 static int igb_run_xdp(struct igb_adapter *adapter, struct igb_ring *rx_ring, 8708 struct xdp_buff *xdp) 8709 { 8710 int err, result = IGB_XDP_PASS; 8711 struct bpf_prog *xdp_prog; 8712 u32 act; 8713 8714 xdp_prog = READ_ONCE(rx_ring->xdp_prog); 8715 8716 if (!xdp_prog) 8717 goto xdp_out; 8718 8719 prefetchw(xdp->data_hard_start); /* xdp_frame write */ 8720 8721 act = bpf_prog_run_xdp(xdp_prog, xdp); 8722 switch (act) { 8723 case XDP_PASS: 8724 break; 8725 case XDP_TX: 8726 result = igb_xdp_xmit_back(adapter, xdp); 8727 if (result == IGB_XDP_CONSUMED) 8728 goto out_failure; 8729 break; 8730 case XDP_REDIRECT: 8731 err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); 8732 if (err) 8733 goto out_failure; 8734 result = IGB_XDP_REDIR; 8735 break; 8736 default: 8737 bpf_warn_invalid_xdp_action(adapter->netdev, xdp_prog, act); 8738 fallthrough; 8739 case XDP_ABORTED: 8740 out_failure: 8741 trace_xdp_exception(rx_ring->netdev, xdp_prog, act); 8742 fallthrough; 8743 case XDP_DROP: 8744 result = IGB_XDP_CONSUMED; 8745 break; 8746 } 8747 xdp_out: 8748 return result; 8749 } 8750 8751 static unsigned int igb_rx_frame_truesize(struct igb_ring *rx_ring, 8752 unsigned int size) 8753 { 8754 unsigned int truesize; 8755 8756 #if (PAGE_SIZE < 8192) 8757 truesize = igb_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ 8758 #else 8759 truesize = ring_uses_build_skb(rx_ring) ? 8760 SKB_DATA_ALIGN(IGB_SKB_PAD + size) + 8761 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : 8762 SKB_DATA_ALIGN(size); 8763 #endif 8764 return truesize; 8765 } 8766 8767 static void igb_rx_buffer_flip(struct igb_ring *rx_ring, 8768 struct igb_rx_buffer *rx_buffer, 8769 unsigned int size) 8770 { 8771 unsigned int truesize = igb_rx_frame_truesize(rx_ring, size); 8772 #if (PAGE_SIZE < 8192) 8773 rx_buffer->page_offset ^= truesize; 8774 #else 8775 rx_buffer->page_offset += truesize; 8776 #endif 8777 } 8778 8779 static inline void igb_rx_checksum(struct igb_ring *ring, 8780 union e1000_adv_rx_desc *rx_desc, 8781 struct sk_buff *skb) 8782 { 8783 skb_checksum_none_assert(skb); 8784 8785 /* Ignore Checksum bit is set */ 8786 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM)) 8787 return; 8788 8789 /* Rx checksum disabled via ethtool */ 8790 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 8791 return; 8792 8793 /* TCP/UDP checksum error bit is set */ 8794 if (igb_test_staterr(rx_desc, 8795 E1000_RXDEXT_STATERR_TCPE | 8796 E1000_RXDEXT_STATERR_IPE)) { 8797 /* work around errata with sctp packets where the TCPE aka 8798 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) 8799 * packets, (aka let the stack check the crc32c) 8800 */ 8801 if (!((skb->len == 60) && 8802 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { 8803 u64_stats_update_begin(&ring->rx_syncp); 8804 ring->rx_stats.csum_err++; 8805 u64_stats_update_end(&ring->rx_syncp); 8806 } 8807 /* let the stack verify checksum errors */ 8808 return; 8809 } 8810 /* It must be a TCP or UDP packet with a valid checksum */ 8811 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS | 8812 E1000_RXD_STAT_UDPCS)) 8813 skb->ip_summed = CHECKSUM_UNNECESSARY; 8814 8815 dev_dbg(ring->dev, "cksum success: bits %08X\n", 8816 le32_to_cpu(rx_desc->wb.upper.status_error)); 8817 } 8818 8819 static inline void igb_rx_hash(struct igb_ring *ring, 8820 union e1000_adv_rx_desc *rx_desc, 8821 struct sk_buff *skb) 8822 { 8823 if (ring->netdev->features & NETIF_F_RXHASH) 8824 skb_set_hash(skb, 8825 le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), 8826 PKT_HASH_TYPE_L3); 8827 } 8828 8829 /** 8830 * igb_is_non_eop - process handling of non-EOP buffers 8831 * @rx_ring: Rx ring being processed 8832 * @rx_desc: Rx descriptor for current buffer 8833 * 8834 * This function updates next to clean. If the buffer is an EOP buffer 8835 * this function exits returning false, otherwise it will place the 8836 * sk_buff in the next buffer to be chained and return true indicating 8837 * that this is in fact a non-EOP buffer. 8838 **/ 8839 static bool igb_is_non_eop(struct igb_ring *rx_ring, 8840 union e1000_adv_rx_desc *rx_desc) 8841 { 8842 u32 ntc = rx_ring->next_to_clean + 1; 8843 8844 /* fetch, update, and store next to clean */ 8845 ntc = (ntc < rx_ring->count) ? ntc : 0; 8846 rx_ring->next_to_clean = ntc; 8847 8848 prefetch(IGB_RX_DESC(rx_ring, ntc)); 8849 8850 if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP))) 8851 return false; 8852 8853 return true; 8854 } 8855 8856 /** 8857 * igb_cleanup_headers - Correct corrupted or empty headers 8858 * @rx_ring: rx descriptor ring packet is being transacted on 8859 * @rx_desc: pointer to the EOP Rx descriptor 8860 * @skb: pointer to current skb being fixed 8861 * 8862 * Address the case where we are pulling data in on pages only 8863 * and as such no data is present in the skb header. 8864 * 8865 * In addition if skb is not at least 60 bytes we need to pad it so that 8866 * it is large enough to qualify as a valid Ethernet frame. 8867 * 8868 * Returns true if an error was encountered and skb was freed. 8869 **/ 8870 static bool igb_cleanup_headers(struct igb_ring *rx_ring, 8871 union e1000_adv_rx_desc *rx_desc, 8872 struct sk_buff *skb) 8873 { 8874 if (unlikely((igb_test_staterr(rx_desc, 8875 E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) { 8876 struct net_device *netdev = rx_ring->netdev; 8877 if (!(netdev->features & NETIF_F_RXALL)) { 8878 dev_kfree_skb_any(skb); 8879 return true; 8880 } 8881 } 8882 8883 /* if eth_skb_pad returns an error the skb was freed */ 8884 if (eth_skb_pad(skb)) 8885 return true; 8886 8887 return false; 8888 } 8889 8890 /** 8891 * igb_process_skb_fields - Populate skb header fields from Rx descriptor 8892 * @rx_ring: rx descriptor ring packet is being transacted on 8893 * @rx_desc: pointer to the EOP Rx descriptor 8894 * @skb: pointer to current skb being populated 8895 * 8896 * This function checks the ring, descriptor, and packet information in 8897 * order to populate the hash, checksum, VLAN, timestamp, protocol, and 8898 * other fields within the skb. 8899 **/ 8900 void igb_process_skb_fields(struct igb_ring *rx_ring, 8901 union e1000_adv_rx_desc *rx_desc, 8902 struct sk_buff *skb) 8903 { 8904 struct net_device *dev = rx_ring->netdev; 8905 8906 igb_rx_hash(rx_ring, rx_desc, skb); 8907 8908 igb_rx_checksum(rx_ring, rx_desc, skb); 8909 8910 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS) && 8911 !igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) 8912 igb_ptp_rx_rgtstamp(rx_ring->q_vector, skb); 8913 8914 if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && 8915 igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { 8916 u16 vid; 8917 8918 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && 8919 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) 8920 vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan); 8921 else 8922 vid = le16_to_cpu(rx_desc->wb.upper.vlan); 8923 8924 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); 8925 } 8926 8927 skb_record_rx_queue(skb, rx_ring->queue_index); 8928 8929 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 8930 } 8931 8932 static unsigned int igb_rx_offset(struct igb_ring *rx_ring) 8933 { 8934 return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0; 8935 } 8936 8937 static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring, 8938 const unsigned int size, int *rx_buf_pgcnt) 8939 { 8940 struct igb_rx_buffer *rx_buffer; 8941 8942 rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; 8943 *rx_buf_pgcnt = 8944 #if (PAGE_SIZE < 8192) 8945 page_count(rx_buffer->page); 8946 #else 8947 0; 8948 #endif 8949 prefetchw(rx_buffer->page); 8950 8951 /* we are reusing so sync this buffer for CPU use */ 8952 dma_sync_single_range_for_cpu(rx_ring->dev, 8953 rx_buffer->dma, 8954 rx_buffer->page_offset, 8955 size, 8956 DMA_FROM_DEVICE); 8957 8958 rx_buffer->pagecnt_bias--; 8959 8960 return rx_buffer; 8961 } 8962 8963 static void igb_put_rx_buffer(struct igb_ring *rx_ring, 8964 struct igb_rx_buffer *rx_buffer, int rx_buf_pgcnt) 8965 { 8966 if (igb_can_reuse_rx_page(rx_buffer, rx_buf_pgcnt)) { 8967 /* hand second half of page back to the ring */ 8968 igb_reuse_rx_page(rx_ring, rx_buffer); 8969 } else { 8970 /* We are not reusing the buffer so unmap it and free 8971 * any references we are holding to it 8972 */ 8973 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, 8974 igb_rx_pg_size(rx_ring), DMA_FROM_DEVICE, 8975 IGB_RX_DMA_ATTR); 8976 __page_frag_cache_drain(rx_buffer->page, 8977 rx_buffer->pagecnt_bias); 8978 } 8979 8980 /* clear contents of rx_buffer */ 8981 rx_buffer->page = NULL; 8982 } 8983 8984 void igb_finalize_xdp(struct igb_adapter *adapter, unsigned int status) 8985 { 8986 int cpu = smp_processor_id(); 8987 struct netdev_queue *nq; 8988 8989 if (status & IGB_XDP_REDIR) 8990 xdp_do_flush(); 8991 8992 if (status & IGB_XDP_TX) { 8993 struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter); 8994 8995 nq = txring_txq(tx_ring); 8996 __netif_tx_lock(nq, cpu); 8997 igb_xdp_ring_update_tail(tx_ring); 8998 __netif_tx_unlock(nq); 8999 } 9000 } 9001 9002 void igb_update_rx_stats(struct igb_q_vector *q_vector, unsigned int packets, 9003 unsigned int bytes) 9004 { 9005 struct igb_ring *ring = q_vector->rx.ring; 9006 9007 u64_stats_update_begin(&ring->rx_syncp); 9008 ring->rx_stats.packets += packets; 9009 ring->rx_stats.bytes += bytes; 9010 u64_stats_update_end(&ring->rx_syncp); 9011 9012 q_vector->rx.total_packets += packets; 9013 q_vector->rx.total_bytes += bytes; 9014 } 9015 9016 static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) 9017 { 9018 unsigned int total_bytes = 0, total_packets = 0; 9019 struct igb_adapter *adapter = q_vector->adapter; 9020 struct igb_ring *rx_ring = q_vector->rx.ring; 9021 u16 cleaned_count = igb_desc_unused(rx_ring); 9022 struct sk_buff *skb = rx_ring->skb; 9023 unsigned int xdp_xmit = 0; 9024 struct xdp_buff xdp; 9025 u32 frame_sz = 0; 9026 int rx_buf_pgcnt; 9027 int xdp_res = 0; 9028 9029 /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ 9030 #if (PAGE_SIZE < 8192) 9031 frame_sz = igb_rx_frame_truesize(rx_ring, 0); 9032 #endif 9033 xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); 9034 9035 while (likely(total_packets < budget)) { 9036 union e1000_adv_rx_desc *rx_desc; 9037 struct igb_rx_buffer *rx_buffer; 9038 ktime_t timestamp = 0; 9039 int pkt_offset = 0; 9040 unsigned int size; 9041 void *pktbuf; 9042 9043 /* return some buffers to hardware, one at a time is too slow */ 9044 if (cleaned_count >= IGB_RX_BUFFER_WRITE) { 9045 igb_alloc_rx_buffers(rx_ring, cleaned_count); 9046 cleaned_count = 0; 9047 } 9048 9049 rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean); 9050 size = le16_to_cpu(rx_desc->wb.upper.length); 9051 if (!size) 9052 break; 9053 9054 /* This memory barrier is needed to keep us from reading 9055 * any other fields out of the rx_desc until we know the 9056 * descriptor has been written back 9057 */ 9058 dma_rmb(); 9059 9060 rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt); 9061 pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; 9062 9063 /* pull rx packet timestamp if available and valid */ 9064 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { 9065 int ts_hdr_len; 9066 9067 ts_hdr_len = igb_ptp_rx_pktstamp(rx_ring->q_vector, 9068 pktbuf, ×tamp); 9069 9070 pkt_offset += ts_hdr_len; 9071 size -= ts_hdr_len; 9072 } 9073 9074 /* retrieve a buffer from the ring */ 9075 if (!skb) { 9076 unsigned char *hard_start = pktbuf - igb_rx_offset(rx_ring); 9077 unsigned int offset = pkt_offset + igb_rx_offset(rx_ring); 9078 9079 xdp_prepare_buff(&xdp, hard_start, offset, size, true); 9080 xdp_buff_clear_frags_flag(&xdp); 9081 #if (PAGE_SIZE > 4096) 9082 /* At larger PAGE_SIZE, frame_sz depend on len size */ 9083 xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size); 9084 #endif 9085 xdp_res = igb_run_xdp(adapter, rx_ring, &xdp); 9086 } 9087 9088 if (xdp_res) { 9089 if (xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR)) { 9090 xdp_xmit |= xdp_res; 9091 igb_rx_buffer_flip(rx_ring, rx_buffer, size); 9092 } else { 9093 rx_buffer->pagecnt_bias++; 9094 } 9095 total_packets++; 9096 total_bytes += size; 9097 } else if (skb) 9098 igb_add_rx_frag(rx_ring, rx_buffer, skb, size); 9099 else if (ring_uses_build_skb(rx_ring)) 9100 skb = igb_build_skb(rx_ring, rx_buffer, &xdp, 9101 timestamp); 9102 else 9103 skb = igb_construct_skb(rx_ring, rx_buffer, 9104 &xdp, timestamp); 9105 9106 /* exit if we failed to retrieve a buffer */ 9107 if (!xdp_res && !skb) { 9108 rx_ring->rx_stats.alloc_failed++; 9109 rx_buffer->pagecnt_bias++; 9110 set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 9111 break; 9112 } 9113 9114 igb_put_rx_buffer(rx_ring, rx_buffer, rx_buf_pgcnt); 9115 cleaned_count++; 9116 9117 /* fetch next buffer in frame if non-eop */ 9118 if (igb_is_non_eop(rx_ring, rx_desc)) 9119 continue; 9120 9121 /* verify the packet layout is correct */ 9122 if (xdp_res || igb_cleanup_headers(rx_ring, rx_desc, skb)) { 9123 skb = NULL; 9124 continue; 9125 } 9126 9127 /* probably a little skewed due to removing CRC */ 9128 total_bytes += skb->len; 9129 9130 /* populate checksum, timestamp, VLAN, and protocol */ 9131 igb_process_skb_fields(rx_ring, rx_desc, skb); 9132 9133 napi_gro_receive(&q_vector->napi, skb); 9134 9135 /* reset skb pointer */ 9136 skb = NULL; 9137 9138 /* update budget accounting */ 9139 total_packets++; 9140 } 9141 9142 /* place incomplete frames back on ring for completion */ 9143 rx_ring->skb = skb; 9144 9145 if (xdp_xmit) 9146 igb_finalize_xdp(adapter, xdp_xmit); 9147 9148 igb_update_rx_stats(q_vector, total_packets, total_bytes); 9149 9150 if (cleaned_count) 9151 igb_alloc_rx_buffers(rx_ring, cleaned_count); 9152 9153 return total_packets; 9154 } 9155 9156 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, 9157 struct igb_rx_buffer *bi) 9158 { 9159 struct page *page = bi->page; 9160 dma_addr_t dma; 9161 9162 /* since we are recycling buffers we should seldom need to alloc */ 9163 if (likely(page)) 9164 return true; 9165 9166 /* alloc new page for storage */ 9167 page = dev_alloc_pages(igb_rx_pg_order(rx_ring)); 9168 if (unlikely(!page)) { 9169 rx_ring->rx_stats.alloc_failed++; 9170 set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 9171 return false; 9172 } 9173 9174 /* map page for use */ 9175 dma = dma_map_page_attrs(rx_ring->dev, page, 0, 9176 igb_rx_pg_size(rx_ring), 9177 DMA_FROM_DEVICE, 9178 IGB_RX_DMA_ATTR); 9179 9180 /* if mapping failed free memory back to system since 9181 * there isn't much point in holding memory we can't use 9182 */ 9183 if (dma_mapping_error(rx_ring->dev, dma)) { 9184 __free_pages(page, igb_rx_pg_order(rx_ring)); 9185 9186 rx_ring->rx_stats.alloc_failed++; 9187 set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); 9188 return false; 9189 } 9190 9191 bi->dma = dma; 9192 bi->page = page; 9193 bi->page_offset = igb_rx_offset(rx_ring); 9194 page_ref_add(page, USHRT_MAX - 1); 9195 bi->pagecnt_bias = USHRT_MAX; 9196 9197 return true; 9198 } 9199 9200 /** 9201 * igb_alloc_rx_buffers - Replace used receive buffers 9202 * @rx_ring: rx descriptor ring to allocate new receive buffers 9203 * @cleaned_count: count of buffers to allocate 9204 **/ 9205 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) 9206 { 9207 union e1000_adv_rx_desc *rx_desc; 9208 struct igb_rx_buffer *bi; 9209 u16 i = rx_ring->next_to_use; 9210 u16 bufsz; 9211 9212 /* nothing to do */ 9213 if (!cleaned_count) 9214 return; 9215 9216 rx_desc = IGB_RX_DESC(rx_ring, i); 9217 bi = &rx_ring->rx_buffer_info[i]; 9218 i -= rx_ring->count; 9219 9220 bufsz = igb_rx_bufsz(rx_ring); 9221 9222 do { 9223 if (!igb_alloc_mapped_page(rx_ring, bi)) 9224 break; 9225 9226 /* sync the buffer for use by the device */ 9227 dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 9228 bi->page_offset, bufsz, 9229 DMA_FROM_DEVICE); 9230 9231 /* Refresh the desc even if buffer_addrs didn't change 9232 * because each write-back erases this info. 9233 */ 9234 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); 9235 9236 rx_desc++; 9237 bi++; 9238 i++; 9239 if (unlikely(!i)) { 9240 rx_desc = IGB_RX_DESC(rx_ring, 0); 9241 bi = rx_ring->rx_buffer_info; 9242 i -= rx_ring->count; 9243 } 9244 9245 /* clear the length for the next_to_use descriptor */ 9246 rx_desc->wb.upper.length = 0; 9247 9248 cleaned_count--; 9249 } while (cleaned_count); 9250 9251 i += rx_ring->count; 9252 9253 if (rx_ring->next_to_use != i) { 9254 /* record the next descriptor to use */ 9255 rx_ring->next_to_use = i; 9256 9257 /* update next to alloc since we have filled the ring */ 9258 rx_ring->next_to_alloc = i; 9259 9260 /* Force memory writes to complete before letting h/w 9261 * know there are new descriptors to fetch. (Only 9262 * applicable for weak-ordered memory model archs, 9263 * such as IA-64). 9264 */ 9265 dma_wmb(); 9266 writel(i, rx_ring->tail); 9267 } 9268 } 9269 9270 /** 9271 * igb_mii_ioctl - 9272 * @netdev: pointer to netdev struct 9273 * @ifr: interface structure 9274 * @cmd: ioctl command to execute 9275 **/ 9276 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 9277 { 9278 struct igb_adapter *adapter = netdev_priv(netdev); 9279 struct mii_ioctl_data *data = if_mii(ifr); 9280 9281 if (adapter->hw.phy.media_type != e1000_media_type_copper) 9282 return -EOPNOTSUPP; 9283 9284 switch (cmd) { 9285 case SIOCGMIIPHY: 9286 data->phy_id = adapter->hw.phy.addr; 9287 break; 9288 case SIOCGMIIREG: 9289 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, 9290 &data->val_out)) 9291 return -EIO; 9292 break; 9293 case SIOCSMIIREG: 9294 if (igb_write_phy_reg(&adapter->hw, data->reg_num & 0x1F, 9295 data->val_in)) 9296 return -EIO; 9297 break; 9298 default: 9299 return -EOPNOTSUPP; 9300 } 9301 return 0; 9302 } 9303 9304 /** 9305 * igb_ioctl - 9306 * @netdev: pointer to netdev struct 9307 * @ifr: interface structure 9308 * @cmd: ioctl command to execute 9309 **/ 9310 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 9311 { 9312 switch (cmd) { 9313 case SIOCGMIIPHY: 9314 case SIOCGMIIREG: 9315 case SIOCSMIIREG: 9316 return igb_mii_ioctl(netdev, ifr, cmd); 9317 default: 9318 return -EOPNOTSUPP; 9319 } 9320 } 9321 9322 void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) 9323 { 9324 struct igb_adapter *adapter = hw->back; 9325 9326 pci_read_config_word(adapter->pdev, reg, value); 9327 } 9328 9329 void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) 9330 { 9331 struct igb_adapter *adapter = hw->back; 9332 9333 pci_write_config_word(adapter->pdev, reg, *value); 9334 } 9335 9336 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) 9337 { 9338 struct igb_adapter *adapter = hw->back; 9339 9340 if (pcie_capability_read_word(adapter->pdev, reg, value)) 9341 return -E1000_ERR_CONFIG; 9342 9343 return 0; 9344 } 9345 9346 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) 9347 { 9348 struct igb_adapter *adapter = hw->back; 9349 9350 if (pcie_capability_write_word(adapter->pdev, reg, *value)) 9351 return -E1000_ERR_CONFIG; 9352 9353 return 0; 9354 } 9355 9356 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) 9357 { 9358 struct igb_adapter *adapter = netdev_priv(netdev); 9359 struct e1000_hw *hw = &adapter->hw; 9360 u32 ctrl, rctl; 9361 bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); 9362 9363 if (enable) { 9364 /* enable VLAN tag insert/strip */ 9365 ctrl = rd32(E1000_CTRL); 9366 ctrl |= E1000_CTRL_VME; 9367 wr32(E1000_CTRL, ctrl); 9368 9369 /* Disable CFI check */ 9370 rctl = rd32(E1000_RCTL); 9371 rctl &= ~E1000_RCTL_CFIEN; 9372 wr32(E1000_RCTL, rctl); 9373 } else { 9374 /* disable VLAN tag insert/strip */ 9375 ctrl = rd32(E1000_CTRL); 9376 ctrl &= ~E1000_CTRL_VME; 9377 wr32(E1000_CTRL, ctrl); 9378 } 9379 9380 igb_set_vf_vlan_strip(adapter, adapter->vfs_allocated_count, enable); 9381 } 9382 9383 static int igb_vlan_rx_add_vid(struct net_device *netdev, 9384 __be16 proto, u16 vid) 9385 { 9386 struct igb_adapter *adapter = netdev_priv(netdev); 9387 struct e1000_hw *hw = &adapter->hw; 9388 int pf_id = adapter->vfs_allocated_count; 9389 9390 /* add the filter since PF can receive vlans w/o entry in vlvf */ 9391 if (!vid || !(adapter->flags & IGB_FLAG_VLAN_PROMISC)) 9392 igb_vfta_set(hw, vid, pf_id, true, !!vid); 9393 9394 set_bit(vid, adapter->active_vlans); 9395 9396 return 0; 9397 } 9398 9399 static int igb_vlan_rx_kill_vid(struct net_device *netdev, 9400 __be16 proto, u16 vid) 9401 { 9402 struct igb_adapter *adapter = netdev_priv(netdev); 9403 int pf_id = adapter->vfs_allocated_count; 9404 struct e1000_hw *hw = &adapter->hw; 9405 9406 /* remove VID from filter table */ 9407 if (vid && !(adapter->flags & IGB_FLAG_VLAN_PROMISC)) 9408 igb_vfta_set(hw, vid, pf_id, false, true); 9409 9410 clear_bit(vid, adapter->active_vlans); 9411 9412 return 0; 9413 } 9414 9415 static void igb_restore_vlan(struct igb_adapter *adapter) 9416 { 9417 u16 vid = 1; 9418 9419 igb_vlan_mode(adapter->netdev, adapter->netdev->features); 9420 igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0); 9421 9422 for_each_set_bit_from(vid, adapter->active_vlans, VLAN_N_VID) 9423 igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); 9424 } 9425 9426 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) 9427 { 9428 struct pci_dev *pdev = adapter->pdev; 9429 struct e1000_mac_info *mac = &adapter->hw.mac; 9430 9431 mac->autoneg = 0; 9432 9433 /* Make sure dplx is at most 1 bit and lsb of speed is not set 9434 * for the switch() below to work 9435 */ 9436 if ((spd & 1) || (dplx & ~1)) 9437 goto err_inval; 9438 9439 /* Fiber NIC's only allow 1000 gbps Full duplex 9440 * and 100Mbps Full duplex for 100baseFx sfp 9441 */ 9442 if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { 9443 switch (spd + dplx) { 9444 case SPEED_10 + DUPLEX_HALF: 9445 case SPEED_10 + DUPLEX_FULL: 9446 case SPEED_100 + DUPLEX_HALF: 9447 goto err_inval; 9448 default: 9449 break; 9450 } 9451 } 9452 9453 switch (spd + dplx) { 9454 case SPEED_10 + DUPLEX_HALF: 9455 mac->forced_speed_duplex = ADVERTISE_10_HALF; 9456 break; 9457 case SPEED_10 + DUPLEX_FULL: 9458 mac->forced_speed_duplex = ADVERTISE_10_FULL; 9459 break; 9460 case SPEED_100 + DUPLEX_HALF: 9461 mac->forced_speed_duplex = ADVERTISE_100_HALF; 9462 break; 9463 case SPEED_100 + DUPLEX_FULL: 9464 mac->forced_speed_duplex = ADVERTISE_100_FULL; 9465 break; 9466 case SPEED_1000 + DUPLEX_FULL: 9467 mac->autoneg = 1; 9468 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 9469 break; 9470 case SPEED_1000 + DUPLEX_HALF: /* not supported */ 9471 default: 9472 goto err_inval; 9473 } 9474 9475 /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ 9476 adapter->hw.phy.mdix = AUTO_ALL_MODES; 9477 9478 return 0; 9479 9480 err_inval: 9481 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n"); 9482 return -EINVAL; 9483 } 9484 9485 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, 9486 bool runtime) 9487 { 9488 struct net_device *netdev = pci_get_drvdata(pdev); 9489 struct igb_adapter *adapter = netdev_priv(netdev); 9490 struct e1000_hw *hw = &adapter->hw; 9491 u32 ctrl, rctl, status; 9492 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; 9493 bool wake; 9494 9495 rtnl_lock(); 9496 netif_device_detach(netdev); 9497 9498 if (netif_running(netdev)) 9499 __igb_close(netdev, true); 9500 9501 igb_ptp_suspend(adapter); 9502 9503 igb_clear_interrupt_scheme(adapter); 9504 rtnl_unlock(); 9505 9506 status = rd32(E1000_STATUS); 9507 if (status & E1000_STATUS_LU) 9508 wufc &= ~E1000_WUFC_LNKC; 9509 9510 if (wufc) { 9511 igb_setup_rctl(adapter); 9512 igb_set_rx_mode(netdev); 9513 9514 /* turn on all-multi mode if wake on multicast is enabled */ 9515 if (wufc & E1000_WUFC_MC) { 9516 rctl = rd32(E1000_RCTL); 9517 rctl |= E1000_RCTL_MPE; 9518 wr32(E1000_RCTL, rctl); 9519 } 9520 9521 ctrl = rd32(E1000_CTRL); 9522 ctrl |= E1000_CTRL_ADVD3WUC; 9523 wr32(E1000_CTRL, ctrl); 9524 9525 /* Allow time for pending master requests to run */ 9526 igb_disable_pcie_master(hw); 9527 9528 wr32(E1000_WUC, E1000_WUC_PME_EN); 9529 wr32(E1000_WUFC, wufc); 9530 } else { 9531 wr32(E1000_WUC, 0); 9532 wr32(E1000_WUFC, 0); 9533 } 9534 9535 wake = wufc || adapter->en_mng_pt; 9536 if (!wake) 9537 igb_power_down_link(adapter); 9538 else 9539 igb_power_up_link(adapter); 9540 9541 if (enable_wake) 9542 *enable_wake = wake; 9543 9544 /* Release control of h/w to f/w. If f/w is AMT enabled, this 9545 * would have already happened in close and is redundant. 9546 */ 9547 igb_release_hw_control(adapter); 9548 9549 pci_disable_device(pdev); 9550 9551 return 0; 9552 } 9553 9554 static void igb_deliver_wake_packet(struct net_device *netdev) 9555 { 9556 struct igb_adapter *adapter = netdev_priv(netdev); 9557 struct e1000_hw *hw = &adapter->hw; 9558 struct sk_buff *skb; 9559 u32 wupl; 9560 9561 wupl = rd32(E1000_WUPL) & E1000_WUPL_MASK; 9562 9563 /* WUPM stores only the first 128 bytes of the wake packet. 9564 * Read the packet only if we have the whole thing. 9565 */ 9566 if ((wupl == 0) || (wupl > E1000_WUPM_BYTES)) 9567 return; 9568 9569 skb = netdev_alloc_skb_ip_align(netdev, E1000_WUPM_BYTES); 9570 if (!skb) 9571 return; 9572 9573 skb_put(skb, wupl); 9574 9575 /* Ensure reads are 32-bit aligned */ 9576 wupl = roundup(wupl, 4); 9577 9578 memcpy_fromio(skb->data, hw->hw_addr + E1000_WUPM_REG(0), wupl); 9579 9580 skb->protocol = eth_type_trans(skb, netdev); 9581 netif_rx(skb); 9582 } 9583 9584 static int igb_suspend(struct device *dev) 9585 { 9586 return __igb_shutdown(to_pci_dev(dev), NULL, 0); 9587 } 9588 9589 static int __igb_resume(struct device *dev, bool rpm) 9590 { 9591 struct pci_dev *pdev = to_pci_dev(dev); 9592 struct net_device *netdev = pci_get_drvdata(pdev); 9593 struct igb_adapter *adapter = netdev_priv(netdev); 9594 struct e1000_hw *hw = &adapter->hw; 9595 u32 err, val; 9596 9597 pci_set_power_state(pdev, PCI_D0); 9598 pci_restore_state(pdev); 9599 9600 if (!pci_device_is_present(pdev)) 9601 return -ENODEV; 9602 err = pci_enable_device_mem(pdev); 9603 if (err) { 9604 dev_err(&pdev->dev, 9605 "igb: Cannot enable PCI device from suspend\n"); 9606 return err; 9607 } 9608 pci_set_master(pdev); 9609 9610 pci_enable_wake(pdev, PCI_D3hot, 0); 9611 pci_enable_wake(pdev, PCI_D3cold, 0); 9612 9613 if (igb_init_interrupt_scheme(adapter, true)) { 9614 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 9615 return -ENOMEM; 9616 } 9617 9618 igb_reset(adapter); 9619 9620 /* let the f/w know that the h/w is now under the control of the 9621 * driver. 9622 */ 9623 igb_get_hw_control(adapter); 9624 9625 val = rd32(E1000_WUS); 9626 if (val & WAKE_PKT_WUS) 9627 igb_deliver_wake_packet(netdev); 9628 9629 wr32(E1000_WUS, ~0); 9630 9631 if (!rpm) 9632 rtnl_lock(); 9633 if (!err && netif_running(netdev)) 9634 err = __igb_open(netdev, true); 9635 9636 if (!err) 9637 netif_device_attach(netdev); 9638 if (!rpm) 9639 rtnl_unlock(); 9640 9641 return err; 9642 } 9643 9644 static int igb_resume(struct device *dev) 9645 { 9646 return __igb_resume(dev, false); 9647 } 9648 9649 static int igb_runtime_idle(struct device *dev) 9650 { 9651 struct net_device *netdev = dev_get_drvdata(dev); 9652 struct igb_adapter *adapter = netdev_priv(netdev); 9653 9654 if (!igb_has_link(adapter)) 9655 pm_schedule_suspend(dev, MSEC_PER_SEC * 5); 9656 9657 return -EBUSY; 9658 } 9659 9660 static int igb_runtime_suspend(struct device *dev) 9661 { 9662 return __igb_shutdown(to_pci_dev(dev), NULL, 1); 9663 } 9664 9665 static int igb_runtime_resume(struct device *dev) 9666 { 9667 return __igb_resume(dev, true); 9668 } 9669 9670 static void igb_shutdown(struct pci_dev *pdev) 9671 { 9672 bool wake; 9673 9674 __igb_shutdown(pdev, &wake, 0); 9675 9676 if (system_state == SYSTEM_POWER_OFF) { 9677 pci_wake_from_d3(pdev, wake); 9678 pci_set_power_state(pdev, PCI_D3hot); 9679 } 9680 } 9681 9682 static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs) 9683 { 9684 #ifdef CONFIG_PCI_IOV 9685 int err; 9686 9687 if (num_vfs == 0) { 9688 return igb_disable_sriov(dev, true); 9689 } else { 9690 err = igb_enable_sriov(dev, num_vfs, true); 9691 return err ? err : num_vfs; 9692 } 9693 #endif 9694 return 0; 9695 } 9696 9697 /** 9698 * igb_io_error_detected - called when PCI error is detected 9699 * @pdev: Pointer to PCI device 9700 * @state: The current pci connection state 9701 * 9702 * This function is called after a PCI bus error affecting 9703 * this device has been detected. 9704 **/ 9705 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, 9706 pci_channel_state_t state) 9707 { 9708 struct net_device *netdev = pci_get_drvdata(pdev); 9709 struct igb_adapter *adapter = netdev_priv(netdev); 9710 9711 if (state == pci_channel_io_normal) { 9712 dev_warn(&pdev->dev, "Non-correctable non-fatal error reported.\n"); 9713 return PCI_ERS_RESULT_CAN_RECOVER; 9714 } 9715 9716 netif_device_detach(netdev); 9717 9718 if (state == pci_channel_io_perm_failure) 9719 return PCI_ERS_RESULT_DISCONNECT; 9720 9721 rtnl_lock(); 9722 if (netif_running(netdev)) 9723 igb_down(adapter); 9724 rtnl_unlock(); 9725 9726 pci_disable_device(pdev); 9727 9728 /* Request a slot reset. */ 9729 return PCI_ERS_RESULT_NEED_RESET; 9730 } 9731 9732 /** 9733 * igb_io_slot_reset - called after the pci bus has been reset. 9734 * @pdev: Pointer to PCI device 9735 * 9736 * Restart the card from scratch, as if from a cold-boot. Implementation 9737 * resembles the first-half of the __igb_resume routine. 9738 **/ 9739 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) 9740 { 9741 struct net_device *netdev = pci_get_drvdata(pdev); 9742 struct igb_adapter *adapter = netdev_priv(netdev); 9743 struct e1000_hw *hw = &adapter->hw; 9744 pci_ers_result_t result; 9745 9746 if (pci_enable_device_mem(pdev)) { 9747 dev_err(&pdev->dev, 9748 "Cannot re-enable PCI device after reset.\n"); 9749 result = PCI_ERS_RESULT_DISCONNECT; 9750 } else { 9751 pci_set_master(pdev); 9752 pci_restore_state(pdev); 9753 9754 pci_enable_wake(pdev, PCI_D3hot, 0); 9755 pci_enable_wake(pdev, PCI_D3cold, 0); 9756 9757 /* In case of PCI error, adapter lose its HW address 9758 * so we should re-assign it here. 9759 */ 9760 hw->hw_addr = adapter->io_addr; 9761 9762 igb_reset(adapter); 9763 wr32(E1000_WUS, ~0); 9764 result = PCI_ERS_RESULT_RECOVERED; 9765 } 9766 9767 return result; 9768 } 9769 9770 /** 9771 * igb_io_resume - called when traffic can start flowing again. 9772 * @pdev: Pointer to PCI device 9773 * 9774 * This callback is called when the error recovery driver tells us that 9775 * its OK to resume normal operation. Implementation resembles the 9776 * second-half of the __igb_resume routine. 9777 */ 9778 static void igb_io_resume(struct pci_dev *pdev) 9779 { 9780 struct net_device *netdev = pci_get_drvdata(pdev); 9781 struct igb_adapter *adapter = netdev_priv(netdev); 9782 9783 rtnl_lock(); 9784 if (netif_running(netdev)) { 9785 if (!test_bit(__IGB_DOWN, &adapter->state)) { 9786 dev_dbg(&pdev->dev, "Resuming from non-fatal error, do nothing.\n"); 9787 rtnl_unlock(); 9788 return; 9789 } 9790 9791 if (igb_up(adapter)) { 9792 dev_err(&pdev->dev, "igb_up failed after reset\n"); 9793 rtnl_unlock(); 9794 return; 9795 } 9796 } 9797 rtnl_unlock(); 9798 9799 netif_device_attach(netdev); 9800 9801 /* let the f/w know that the h/w is now under the control of the 9802 * driver. 9803 */ 9804 igb_get_hw_control(adapter); 9805 } 9806 9807 /** 9808 * igb_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table 9809 * @adapter: Pointer to adapter structure 9810 * @index: Index of the RAR entry which need to be synced with MAC table 9811 **/ 9812 static void igb_rar_set_index(struct igb_adapter *adapter, u32 index) 9813 { 9814 struct e1000_hw *hw = &adapter->hw; 9815 u32 rar_low, rar_high; 9816 u8 *addr = adapter->mac_table[index].addr; 9817 9818 /* HW expects these to be in network order when they are plugged 9819 * into the registers which are little endian. In order to guarantee 9820 * that ordering we need to do an leXX_to_cpup here in order to be 9821 * ready for the byteswap that occurs with writel 9822 */ 9823 rar_low = le32_to_cpup((__le32 *)(addr)); 9824 rar_high = le16_to_cpup((__le16 *)(addr + 4)); 9825 9826 /* Indicate to hardware the Address is Valid. */ 9827 if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE) { 9828 if (is_valid_ether_addr(addr)) 9829 rar_high |= E1000_RAH_AV; 9830 9831 if (adapter->mac_table[index].state & IGB_MAC_STATE_SRC_ADDR) 9832 rar_high |= E1000_RAH_ASEL_SRC_ADDR; 9833 9834 switch (hw->mac.type) { 9835 case e1000_82575: 9836 case e1000_i210: 9837 if (adapter->mac_table[index].state & 9838 IGB_MAC_STATE_QUEUE_STEERING) 9839 rar_high |= E1000_RAH_QSEL_ENABLE; 9840 9841 rar_high |= E1000_RAH_POOL_1 * 9842 adapter->mac_table[index].queue; 9843 break; 9844 default: 9845 rar_high |= E1000_RAH_POOL_1 << 9846 adapter->mac_table[index].queue; 9847 break; 9848 } 9849 } 9850 9851 wr32(E1000_RAL(index), rar_low); 9852 wrfl(); 9853 wr32(E1000_RAH(index), rar_high); 9854 wrfl(); 9855 } 9856 9857 static int igb_set_vf_mac(struct igb_adapter *adapter, 9858 int vf, unsigned char *mac_addr) 9859 { 9860 struct e1000_hw *hw = &adapter->hw; 9861 /* VF MAC addresses start at end of receive addresses and moves 9862 * towards the first, as a result a collision should not be possible 9863 */ 9864 int rar_entry = hw->mac.rar_entry_count - (vf + 1); 9865 unsigned char *vf_mac_addr = adapter->vf_data[vf].vf_mac_addresses; 9866 9867 ether_addr_copy(vf_mac_addr, mac_addr); 9868 ether_addr_copy(adapter->mac_table[rar_entry].addr, mac_addr); 9869 adapter->mac_table[rar_entry].queue = vf; 9870 adapter->mac_table[rar_entry].state |= IGB_MAC_STATE_IN_USE; 9871 igb_rar_set_index(adapter, rar_entry); 9872 9873 return 0; 9874 } 9875 9876 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) 9877 { 9878 struct igb_adapter *adapter = netdev_priv(netdev); 9879 9880 if (vf >= adapter->vfs_allocated_count) 9881 return -EINVAL; 9882 9883 /* Setting the VF MAC to 0 reverts the IGB_VF_FLAG_PF_SET_MAC 9884 * flag and allows to overwrite the MAC via VF netdev. This 9885 * is necessary to allow libvirt a way to restore the original 9886 * MAC after unbinding vfio-pci and reloading igbvf after shutting 9887 * down a VM. 9888 */ 9889 if (is_zero_ether_addr(mac)) { 9890 adapter->vf_data[vf].flags &= ~IGB_VF_FLAG_PF_SET_MAC; 9891 dev_info(&adapter->pdev->dev, 9892 "remove administratively set MAC on VF %d\n", 9893 vf); 9894 } else if (is_valid_ether_addr(mac)) { 9895 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; 9896 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", 9897 mac, vf); 9898 dev_info(&adapter->pdev->dev, 9899 "Reload the VF driver to make this change effective."); 9900 /* Generate additional warning if PF is down */ 9901 if (test_bit(__IGB_DOWN, &adapter->state)) { 9902 dev_warn(&adapter->pdev->dev, 9903 "The VF MAC address has been set, but the PF device is not up.\n"); 9904 dev_warn(&adapter->pdev->dev, 9905 "Bring the PF device up before attempting to use the VF device.\n"); 9906 } 9907 } else { 9908 return -EINVAL; 9909 } 9910 return igb_set_vf_mac(adapter, vf, mac); 9911 } 9912 9913 static int igb_link_mbps(int internal_link_speed) 9914 { 9915 switch (internal_link_speed) { 9916 case SPEED_100: 9917 return 100; 9918 case SPEED_1000: 9919 return 1000; 9920 default: 9921 return 0; 9922 } 9923 } 9924 9925 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate, 9926 int link_speed) 9927 { 9928 int rf_dec, rf_int; 9929 u32 bcnrc_val; 9930 9931 if (tx_rate != 0) { 9932 /* Calculate the rate factor values to set */ 9933 rf_int = link_speed / tx_rate; 9934 rf_dec = (link_speed - (rf_int * tx_rate)); 9935 rf_dec = (rf_dec * BIT(E1000_RTTBCNRC_RF_INT_SHIFT)) / 9936 tx_rate; 9937 9938 bcnrc_val = E1000_RTTBCNRC_RS_ENA; 9939 bcnrc_val |= FIELD_PREP(E1000_RTTBCNRC_RF_INT_MASK, rf_int); 9940 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK); 9941 } else { 9942 bcnrc_val = 0; 9943 } 9944 9945 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */ 9946 /* Set global transmit compensation time to the MMW_SIZE in RTTBCNRM 9947 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported. 9948 */ 9949 wr32(E1000_RTTBCNRM, 0x14); 9950 wr32(E1000_RTTBCNRC, bcnrc_val); 9951 } 9952 9953 static void igb_check_vf_rate_limit(struct igb_adapter *adapter) 9954 { 9955 int actual_link_speed, i; 9956 bool reset_rate = false; 9957 9958 /* VF TX rate limit was not set or not supported */ 9959 if ((adapter->vf_rate_link_speed == 0) || 9960 (adapter->hw.mac.type != e1000_82576)) 9961 return; 9962 9963 actual_link_speed = igb_link_mbps(adapter->link_speed); 9964 if (actual_link_speed != adapter->vf_rate_link_speed) { 9965 reset_rate = true; 9966 adapter->vf_rate_link_speed = 0; 9967 dev_info(&adapter->pdev->dev, 9968 "Link speed has been changed. VF Transmit rate is disabled\n"); 9969 } 9970 9971 for (i = 0; i < adapter->vfs_allocated_count; i++) { 9972 if (reset_rate) 9973 adapter->vf_data[i].tx_rate = 0; 9974 9975 igb_set_vf_rate_limit(&adapter->hw, i, 9976 adapter->vf_data[i].tx_rate, 9977 actual_link_speed); 9978 } 9979 } 9980 9981 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, 9982 int min_tx_rate, int max_tx_rate) 9983 { 9984 struct igb_adapter *adapter = netdev_priv(netdev); 9985 struct e1000_hw *hw = &adapter->hw; 9986 int actual_link_speed; 9987 9988 if (hw->mac.type != e1000_82576) 9989 return -EOPNOTSUPP; 9990 9991 if (min_tx_rate) 9992 return -EINVAL; 9993 9994 actual_link_speed = igb_link_mbps(adapter->link_speed); 9995 if ((vf >= adapter->vfs_allocated_count) || 9996 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) || 9997 (max_tx_rate < 0) || 9998 (max_tx_rate > actual_link_speed)) 9999 return -EINVAL; 10000 10001 adapter->vf_rate_link_speed = actual_link_speed; 10002 adapter->vf_data[vf].tx_rate = (u16)max_tx_rate; 10003 igb_set_vf_rate_limit(hw, vf, max_tx_rate, actual_link_speed); 10004 10005 return 0; 10006 } 10007 10008 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, 10009 bool setting) 10010 { 10011 struct igb_adapter *adapter = netdev_priv(netdev); 10012 struct e1000_hw *hw = &adapter->hw; 10013 u32 reg_val, reg_offset; 10014 10015 if (!adapter->vfs_allocated_count) 10016 return -EOPNOTSUPP; 10017 10018 if (vf >= adapter->vfs_allocated_count) 10019 return -EINVAL; 10020 10021 reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC; 10022 reg_val = rd32(reg_offset); 10023 if (setting) 10024 reg_val |= (BIT(vf) | 10025 BIT(vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)); 10026 else 10027 reg_val &= ~(BIT(vf) | 10028 BIT(vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)); 10029 wr32(reg_offset, reg_val); 10030 10031 adapter->vf_data[vf].spoofchk_enabled = setting; 10032 return 0; 10033 } 10034 10035 static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting) 10036 { 10037 struct igb_adapter *adapter = netdev_priv(netdev); 10038 10039 if (vf >= adapter->vfs_allocated_count) 10040 return -EINVAL; 10041 if (adapter->vf_data[vf].trusted == setting) 10042 return 0; 10043 10044 adapter->vf_data[vf].trusted = setting; 10045 10046 dev_info(&adapter->pdev->dev, "VF %u is %strusted\n", 10047 vf, setting ? "" : "not "); 10048 return 0; 10049 } 10050 10051 static int igb_ndo_get_vf_config(struct net_device *netdev, 10052 int vf, struct ifla_vf_info *ivi) 10053 { 10054 struct igb_adapter *adapter = netdev_priv(netdev); 10055 if (vf >= adapter->vfs_allocated_count) 10056 return -EINVAL; 10057 ivi->vf = vf; 10058 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN); 10059 ivi->max_tx_rate = adapter->vf_data[vf].tx_rate; 10060 ivi->min_tx_rate = 0; 10061 ivi->vlan = adapter->vf_data[vf].pf_vlan; 10062 ivi->qos = adapter->vf_data[vf].pf_qos; 10063 ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled; 10064 ivi->trusted = adapter->vf_data[vf].trusted; 10065 return 0; 10066 } 10067 10068 static void igb_vmm_control(struct igb_adapter *adapter) 10069 { 10070 struct e1000_hw *hw = &adapter->hw; 10071 u32 reg; 10072 10073 switch (hw->mac.type) { 10074 case e1000_82575: 10075 case e1000_i210: 10076 case e1000_i211: 10077 case e1000_i354: 10078 default: 10079 /* replication is not supported for 82575 */ 10080 return; 10081 case e1000_82576: 10082 /* notify HW that the MAC is adding vlan tags */ 10083 reg = rd32(E1000_DTXCTL); 10084 reg |= E1000_DTXCTL_VLAN_ADDED; 10085 wr32(E1000_DTXCTL, reg); 10086 fallthrough; 10087 case e1000_82580: 10088 /* enable replication vlan tag stripping */ 10089 reg = rd32(E1000_RPLOLR); 10090 reg |= E1000_RPLOLR_STRVLAN; 10091 wr32(E1000_RPLOLR, reg); 10092 fallthrough; 10093 case e1000_i350: 10094 /* none of the above registers are supported by i350 */ 10095 break; 10096 } 10097 10098 if (adapter->vfs_allocated_count) { 10099 igb_vmdq_set_loopback_pf(hw, true); 10100 igb_vmdq_set_replication_pf(hw, true); 10101 igb_vmdq_set_anti_spoofing_pf(hw, true, 10102 adapter->vfs_allocated_count); 10103 } else { 10104 igb_vmdq_set_loopback_pf(hw, false); 10105 igb_vmdq_set_replication_pf(hw, false); 10106 } 10107 } 10108 10109 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) 10110 { 10111 struct e1000_hw *hw = &adapter->hw; 10112 u32 dmac_thr; 10113 u16 hwm; 10114 u32 reg; 10115 10116 if (hw->mac.type > e1000_82580) { 10117 if (adapter->flags & IGB_FLAG_DMAC) { 10118 /* force threshold to 0. */ 10119 wr32(E1000_DMCTXTH, 0); 10120 10121 /* DMA Coalescing high water mark needs to be greater 10122 * than the Rx threshold. Set hwm to PBA - max frame 10123 * size in 16B units, capping it at PBA - 6KB. 10124 */ 10125 hwm = 64 * (pba - 6); 10126 reg = rd32(E1000_FCRTC); 10127 reg &= ~E1000_FCRTC_RTH_COAL_MASK; 10128 reg |= FIELD_PREP(E1000_FCRTC_RTH_COAL_MASK, hwm); 10129 wr32(E1000_FCRTC, reg); 10130 10131 /* Set the DMA Coalescing Rx threshold to PBA - 2 * max 10132 * frame size, capping it at PBA - 10KB. 10133 */ 10134 dmac_thr = pba - 10; 10135 reg = rd32(E1000_DMACR); 10136 reg &= ~E1000_DMACR_DMACTHR_MASK; 10137 reg |= FIELD_PREP(E1000_DMACR_DMACTHR_MASK, dmac_thr); 10138 10139 /* transition to L0x or L1 if available..*/ 10140 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); 10141 10142 /* watchdog timer= +-1000 usec in 32usec intervals */ 10143 reg |= (1000 >> 5); 10144 10145 /* Disable BMC-to-OS Watchdog Enable */ 10146 if (hw->mac.type != e1000_i354) 10147 reg &= ~E1000_DMACR_DC_BMC2OSW_EN; 10148 wr32(E1000_DMACR, reg); 10149 10150 /* no lower threshold to disable 10151 * coalescing(smart fifb)-UTRESH=0 10152 */ 10153 wr32(E1000_DMCRTRH, 0); 10154 10155 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4); 10156 10157 wr32(E1000_DMCTLX, reg); 10158 10159 /* free space in tx packet buffer to wake from 10160 * DMA coal 10161 */ 10162 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - 10163 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); 10164 } 10165 10166 if (hw->mac.type >= e1000_i210 || 10167 (adapter->flags & IGB_FLAG_DMAC)) { 10168 reg = rd32(E1000_PCIEMISC); 10169 reg |= E1000_PCIEMISC_LX_DECISION; 10170 wr32(E1000_PCIEMISC, reg); 10171 } /* endif adapter->dmac is not disabled */ 10172 } else if (hw->mac.type == e1000_82580) { 10173 u32 reg = rd32(E1000_PCIEMISC); 10174 10175 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); 10176 wr32(E1000_DMACR, 0); 10177 } 10178 } 10179 10180 /** 10181 * igb_read_i2c_byte - Reads 8 bit word over I2C 10182 * @hw: pointer to hardware structure 10183 * @byte_offset: byte offset to read 10184 * @dev_addr: device address 10185 * @data: value read 10186 * 10187 * Performs byte read operation over I2C interface at 10188 * a specified device address. 10189 **/ 10190 s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, 10191 u8 dev_addr, u8 *data) 10192 { 10193 struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); 10194 struct i2c_client *this_client = adapter->i2c_client; 10195 s32 status; 10196 u16 swfw_mask = 0; 10197 10198 if (!this_client) 10199 return E1000_ERR_I2C; 10200 10201 swfw_mask = E1000_SWFW_PHY0_SM; 10202 10203 if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) 10204 return E1000_ERR_SWFW_SYNC; 10205 10206 status = i2c_smbus_read_byte_data(this_client, byte_offset); 10207 hw->mac.ops.release_swfw_sync(hw, swfw_mask); 10208 10209 if (status < 0) 10210 return E1000_ERR_I2C; 10211 else { 10212 *data = status; 10213 return 0; 10214 } 10215 } 10216 10217 /** 10218 * igb_write_i2c_byte - Writes 8 bit word over I2C 10219 * @hw: pointer to hardware structure 10220 * @byte_offset: byte offset to write 10221 * @dev_addr: device address 10222 * @data: value to write 10223 * 10224 * Performs byte write operation over I2C interface at 10225 * a specified device address. 10226 **/ 10227 s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, 10228 u8 dev_addr, u8 data) 10229 { 10230 struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); 10231 struct i2c_client *this_client = adapter->i2c_client; 10232 s32 status; 10233 u16 swfw_mask = E1000_SWFW_PHY0_SM; 10234 10235 if (!this_client) 10236 return E1000_ERR_I2C; 10237 10238 if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) 10239 return E1000_ERR_SWFW_SYNC; 10240 status = i2c_smbus_write_byte_data(this_client, byte_offset, data); 10241 hw->mac.ops.release_swfw_sync(hw, swfw_mask); 10242 10243 if (status) 10244 return E1000_ERR_I2C; 10245 else 10246 return 0; 10247 10248 } 10249 10250 int igb_reinit_queues(struct igb_adapter *adapter) 10251 { 10252 struct net_device *netdev = adapter->netdev; 10253 struct pci_dev *pdev = adapter->pdev; 10254 int err = 0; 10255 10256 if (netif_running(netdev)) 10257 igb_close(netdev); 10258 10259 igb_reset_interrupt_capability(adapter); 10260 10261 if (igb_init_interrupt_scheme(adapter, true)) { 10262 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 10263 return -ENOMEM; 10264 } 10265 10266 if (netif_running(netdev)) 10267 err = igb_open(netdev); 10268 10269 return err; 10270 } 10271 10272 static void igb_nfc_filter_exit(struct igb_adapter *adapter) 10273 { 10274 struct igb_nfc_filter *rule; 10275 10276 spin_lock(&adapter->nfc_lock); 10277 10278 hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) 10279 igb_erase_filter(adapter, rule); 10280 10281 hlist_for_each_entry(rule, &adapter->cls_flower_list, nfc_node) 10282 igb_erase_filter(adapter, rule); 10283 10284 spin_unlock(&adapter->nfc_lock); 10285 } 10286 10287 static void igb_nfc_filter_restore(struct igb_adapter *adapter) 10288 { 10289 struct igb_nfc_filter *rule; 10290 10291 spin_lock(&adapter->nfc_lock); 10292 10293 hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) 10294 igb_add_filter(adapter, rule); 10295 10296 spin_unlock(&adapter->nfc_lock); 10297 } 10298 10299 static _DEFINE_DEV_PM_OPS(igb_pm_ops, igb_suspend, igb_resume, 10300 igb_runtime_suspend, igb_runtime_resume, 10301 igb_runtime_idle); 10302 10303 static struct pci_driver igb_driver = { 10304 .name = igb_driver_name, 10305 .id_table = igb_pci_tbl, 10306 .probe = igb_probe, 10307 .remove = igb_remove, 10308 .driver.pm = pm_ptr(&igb_pm_ops), 10309 .shutdown = igb_shutdown, 10310 .sriov_configure = igb_pci_sriov_configure, 10311 .err_handler = &igb_err_handler 10312 }; 10313 10314 /* igb_main.c */ 10315