xref: /linux/drivers/net/ethernet/intel/igb/igb_main.c (revision b43ab901d671e3e3cad425ea5e9a3c74e266dcdd)
1 /*******************************************************************************
2 
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2011 Intel Corporation.
5 
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9 
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14 
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21 
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25 
26 *******************************************************************************/
27 
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29 
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
61 
62 #define MAJ 3
63 #define MIN 2
64 #define BUILD 10
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70 				"Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
72 
73 static const struct e1000_info *igb_info_tbl[] = {
74 	[board_82575] = &e1000_82575_info,
75 };
76 
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103 	/* required last entry */
104 	{0, }
105 };
106 
107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
108 
109 void igb_reset(struct igb_adapter *);
110 static int igb_setup_all_tx_resources(struct igb_adapter *);
111 static int igb_setup_all_rx_resources(struct igb_adapter *);
112 static void igb_free_all_tx_resources(struct igb_adapter *);
113 static void igb_free_all_rx_resources(struct igb_adapter *);
114 static void igb_setup_mrqc(struct igb_adapter *);
115 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116 static void __devexit igb_remove(struct pci_dev *pdev);
117 static void igb_init_hw_timer(struct igb_adapter *adapter);
118 static int igb_sw_init(struct igb_adapter *);
119 static int igb_open(struct net_device *);
120 static int igb_close(struct net_device *);
121 static void igb_configure_tx(struct igb_adapter *);
122 static void igb_configure_rx(struct igb_adapter *);
123 static void igb_clean_all_tx_rings(struct igb_adapter *);
124 static void igb_clean_all_rx_rings(struct igb_adapter *);
125 static void igb_clean_tx_ring(struct igb_ring *);
126 static void igb_clean_rx_ring(struct igb_ring *);
127 static void igb_set_rx_mode(struct net_device *);
128 static void igb_update_phy_info(unsigned long);
129 static void igb_watchdog(unsigned long);
130 static void igb_watchdog_task(struct work_struct *);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133 						 struct rtnl_link_stats64 *stats);
134 static int igb_change_mtu(struct net_device *, int);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef CONFIG_IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* CONFIG_IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152 static int igb_vlan_rx_add_vid(struct net_device *, u16);
153 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154 static void igb_restore_vlan(struct igb_adapter *);
155 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156 static void igb_ping_all_vfs(struct igb_adapter *);
157 static void igb_msg_task(struct igb_adapter *);
158 static void igb_vmm_control(struct igb_adapter *);
159 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163 			       int vf, u16 vlan, u8 qos);
164 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166 				 struct ifla_vf_info *ivi);
167 static void igb_check_vf_rate_limit(struct igb_adapter *);
168 
169 #ifdef CONFIG_PCI_IOV
170 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172 static int igb_check_vf_assignment(struct igb_adapter *adapter);
173 #endif
174 
175 #ifdef CONFIG_PM
176 static int igb_suspend(struct device *);
177 static int igb_resume(struct device *);
178 #ifdef CONFIG_PM_RUNTIME
179 static int igb_runtime_suspend(struct device *dev);
180 static int igb_runtime_resume(struct device *dev);
181 static int igb_runtime_idle(struct device *dev);
182 #endif
183 static const struct dev_pm_ops igb_pm_ops = {
184 	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
185 	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
186 			igb_runtime_idle)
187 };
188 #endif
189 static void igb_shutdown(struct pci_dev *);
190 #ifdef CONFIG_IGB_DCA
191 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
192 static struct notifier_block dca_notifier = {
193 	.notifier_call	= igb_notify_dca,
194 	.next		= NULL,
195 	.priority	= 0
196 };
197 #endif
198 #ifdef CONFIG_NET_POLL_CONTROLLER
199 /* for netdump / net console */
200 static void igb_netpoll(struct net_device *);
201 #endif
202 #ifdef CONFIG_PCI_IOV
203 static unsigned int max_vfs = 0;
204 module_param(max_vfs, uint, 0);
205 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
206                  "per physical function");
207 #endif /* CONFIG_PCI_IOV */
208 
209 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
210 		     pci_channel_state_t);
211 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
212 static void igb_io_resume(struct pci_dev *);
213 
214 static struct pci_error_handlers igb_err_handler = {
215 	.error_detected = igb_io_error_detected,
216 	.slot_reset = igb_io_slot_reset,
217 	.resume = igb_io_resume,
218 };
219 
220 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
221 
222 static struct pci_driver igb_driver = {
223 	.name     = igb_driver_name,
224 	.id_table = igb_pci_tbl,
225 	.probe    = igb_probe,
226 	.remove   = __devexit_p(igb_remove),
227 #ifdef CONFIG_PM
228 	.driver.pm = &igb_pm_ops,
229 #endif
230 	.shutdown = igb_shutdown,
231 	.err_handler = &igb_err_handler
232 };
233 
234 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
235 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
236 MODULE_LICENSE("GPL");
237 MODULE_VERSION(DRV_VERSION);
238 
239 struct igb_reg_info {
240 	u32 ofs;
241 	char *name;
242 };
243 
244 static const struct igb_reg_info igb_reg_info_tbl[] = {
245 
246 	/* General Registers */
247 	{E1000_CTRL, "CTRL"},
248 	{E1000_STATUS, "STATUS"},
249 	{E1000_CTRL_EXT, "CTRL_EXT"},
250 
251 	/* Interrupt Registers */
252 	{E1000_ICR, "ICR"},
253 
254 	/* RX Registers */
255 	{E1000_RCTL, "RCTL"},
256 	{E1000_RDLEN(0), "RDLEN"},
257 	{E1000_RDH(0), "RDH"},
258 	{E1000_RDT(0), "RDT"},
259 	{E1000_RXDCTL(0), "RXDCTL"},
260 	{E1000_RDBAL(0), "RDBAL"},
261 	{E1000_RDBAH(0), "RDBAH"},
262 
263 	/* TX Registers */
264 	{E1000_TCTL, "TCTL"},
265 	{E1000_TDBAL(0), "TDBAL"},
266 	{E1000_TDBAH(0), "TDBAH"},
267 	{E1000_TDLEN(0), "TDLEN"},
268 	{E1000_TDH(0), "TDH"},
269 	{E1000_TDT(0), "TDT"},
270 	{E1000_TXDCTL(0), "TXDCTL"},
271 	{E1000_TDFH, "TDFH"},
272 	{E1000_TDFT, "TDFT"},
273 	{E1000_TDFHS, "TDFHS"},
274 	{E1000_TDFPC, "TDFPC"},
275 
276 	/* List Terminator */
277 	{}
278 };
279 
280 /*
281  * igb_regdump - register printout routine
282  */
283 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
284 {
285 	int n = 0;
286 	char rname[16];
287 	u32 regs[8];
288 
289 	switch (reginfo->ofs) {
290 	case E1000_RDLEN(0):
291 		for (n = 0; n < 4; n++)
292 			regs[n] = rd32(E1000_RDLEN(n));
293 		break;
294 	case E1000_RDH(0):
295 		for (n = 0; n < 4; n++)
296 			regs[n] = rd32(E1000_RDH(n));
297 		break;
298 	case E1000_RDT(0):
299 		for (n = 0; n < 4; n++)
300 			regs[n] = rd32(E1000_RDT(n));
301 		break;
302 	case E1000_RXDCTL(0):
303 		for (n = 0; n < 4; n++)
304 			regs[n] = rd32(E1000_RXDCTL(n));
305 		break;
306 	case E1000_RDBAL(0):
307 		for (n = 0; n < 4; n++)
308 			regs[n] = rd32(E1000_RDBAL(n));
309 		break;
310 	case E1000_RDBAH(0):
311 		for (n = 0; n < 4; n++)
312 			regs[n] = rd32(E1000_RDBAH(n));
313 		break;
314 	case E1000_TDBAL(0):
315 		for (n = 0; n < 4; n++)
316 			regs[n] = rd32(E1000_RDBAL(n));
317 		break;
318 	case E1000_TDBAH(0):
319 		for (n = 0; n < 4; n++)
320 			regs[n] = rd32(E1000_TDBAH(n));
321 		break;
322 	case E1000_TDLEN(0):
323 		for (n = 0; n < 4; n++)
324 			regs[n] = rd32(E1000_TDLEN(n));
325 		break;
326 	case E1000_TDH(0):
327 		for (n = 0; n < 4; n++)
328 			regs[n] = rd32(E1000_TDH(n));
329 		break;
330 	case E1000_TDT(0):
331 		for (n = 0; n < 4; n++)
332 			regs[n] = rd32(E1000_TDT(n));
333 		break;
334 	case E1000_TXDCTL(0):
335 		for (n = 0; n < 4; n++)
336 			regs[n] = rd32(E1000_TXDCTL(n));
337 		break;
338 	default:
339 		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
340 		return;
341 	}
342 
343 	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
344 	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
345 		regs[2], regs[3]);
346 }
347 
348 /*
349  * igb_dump - Print registers, tx-rings and rx-rings
350  */
351 static void igb_dump(struct igb_adapter *adapter)
352 {
353 	struct net_device *netdev = adapter->netdev;
354 	struct e1000_hw *hw = &adapter->hw;
355 	struct igb_reg_info *reginfo;
356 	struct igb_ring *tx_ring;
357 	union e1000_adv_tx_desc *tx_desc;
358 	struct my_u0 { u64 a; u64 b; } *u0;
359 	struct igb_ring *rx_ring;
360 	union e1000_adv_rx_desc *rx_desc;
361 	u32 staterr;
362 	u16 i, n;
363 
364 	if (!netif_msg_hw(adapter))
365 		return;
366 
367 	/* Print netdevice Info */
368 	if (netdev) {
369 		dev_info(&adapter->pdev->dev, "Net device Info\n");
370 		pr_info("Device Name     state            trans_start      "
371 			"last_rx\n");
372 		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
373 			netdev->state, netdev->trans_start, netdev->last_rx);
374 	}
375 
376 	/* Print Registers */
377 	dev_info(&adapter->pdev->dev, "Register Dump\n");
378 	pr_info(" Register Name   Value\n");
379 	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
380 	     reginfo->name; reginfo++) {
381 		igb_regdump(hw, reginfo);
382 	}
383 
384 	/* Print TX Ring Summary */
385 	if (!netdev || !netif_running(netdev))
386 		goto exit;
387 
388 	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
389 	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
390 	for (n = 0; n < adapter->num_tx_queues; n++) {
391 		struct igb_tx_buffer *buffer_info;
392 		tx_ring = adapter->tx_ring[n];
393 		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
394 		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
395 			n, tx_ring->next_to_use, tx_ring->next_to_clean,
396 			(u64)buffer_info->dma,
397 			buffer_info->length,
398 			buffer_info->next_to_watch,
399 			(u64)buffer_info->time_stamp);
400 	}
401 
402 	/* Print TX Rings */
403 	if (!netif_msg_tx_done(adapter))
404 		goto rx_ring_summary;
405 
406 	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
407 
408 	/* Transmit Descriptor Formats
409 	 *
410 	 * Advanced Transmit Descriptor
411 	 *   +--------------------------------------------------------------+
412 	 * 0 |         Buffer Address [63:0]                                |
413 	 *   +--------------------------------------------------------------+
414 	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
415 	 *   +--------------------------------------------------------------+
416 	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
417 	 */
418 
419 	for (n = 0; n < adapter->num_tx_queues; n++) {
420 		tx_ring = adapter->tx_ring[n];
421 		pr_info("------------------------------------\n");
422 		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
423 		pr_info("------------------------------------\n");
424 		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
425 			"[bi->dma       ] leng  ntw timestamp        "
426 			"bi->skb\n");
427 
428 		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
429 			const char *next_desc;
430 			struct igb_tx_buffer *buffer_info;
431 			tx_desc = IGB_TX_DESC(tx_ring, i);
432 			buffer_info = &tx_ring->tx_buffer_info[i];
433 			u0 = (struct my_u0 *)tx_desc;
434 			if (i == tx_ring->next_to_use &&
435 			    i == tx_ring->next_to_clean)
436 				next_desc = " NTC/U";
437 			else if (i == tx_ring->next_to_use)
438 				next_desc = " NTU";
439 			else if (i == tx_ring->next_to_clean)
440 				next_desc = " NTC";
441 			else
442 				next_desc = "";
443 
444 			pr_info("T [0x%03X]    %016llX %016llX %016llX"
445 				" %04X  %p %016llX %p%s\n", i,
446 				le64_to_cpu(u0->a),
447 				le64_to_cpu(u0->b),
448 				(u64)buffer_info->dma,
449 				buffer_info->length,
450 				buffer_info->next_to_watch,
451 				(u64)buffer_info->time_stamp,
452 				buffer_info->skb, next_desc);
453 
454 			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
455 				print_hex_dump(KERN_INFO, "",
456 					DUMP_PREFIX_ADDRESS,
457 					16, 1, phys_to_virt(buffer_info->dma),
458 					buffer_info->length, true);
459 		}
460 	}
461 
462 	/* Print RX Rings Summary */
463 rx_ring_summary:
464 	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
465 	pr_info("Queue [NTU] [NTC]\n");
466 	for (n = 0; n < adapter->num_rx_queues; n++) {
467 		rx_ring = adapter->rx_ring[n];
468 		pr_info(" %5d %5X %5X\n",
469 			n, rx_ring->next_to_use, rx_ring->next_to_clean);
470 	}
471 
472 	/* Print RX Rings */
473 	if (!netif_msg_rx_status(adapter))
474 		goto exit;
475 
476 	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
477 
478 	/* Advanced Receive Descriptor (Read) Format
479 	 *    63                                           1        0
480 	 *    +-----------------------------------------------------+
481 	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
482 	 *    +----------------------------------------------+------+
483 	 *  8 |       Header Buffer Address [63:1]           |  DD  |
484 	 *    +-----------------------------------------------------+
485 	 *
486 	 *
487 	 * Advanced Receive Descriptor (Write-Back) Format
488 	 *
489 	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
490 	 *   +------------------------------------------------------+
491 	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
492 	 *   | Checksum   Ident  |   |           |    | Type | Type |
493 	 *   +------------------------------------------------------+
494 	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
495 	 *   +------------------------------------------------------+
496 	 *   63       48 47    32 31            20 19               0
497 	 */
498 
499 	for (n = 0; n < adapter->num_rx_queues; n++) {
500 		rx_ring = adapter->rx_ring[n];
501 		pr_info("------------------------------------\n");
502 		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
503 		pr_info("------------------------------------\n");
504 		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
505 			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
506 		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
507 			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
508 
509 		for (i = 0; i < rx_ring->count; i++) {
510 			const char *next_desc;
511 			struct igb_rx_buffer *buffer_info;
512 			buffer_info = &rx_ring->rx_buffer_info[i];
513 			rx_desc = IGB_RX_DESC(rx_ring, i);
514 			u0 = (struct my_u0 *)rx_desc;
515 			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
516 
517 			if (i == rx_ring->next_to_use)
518 				next_desc = " NTU";
519 			else if (i == rx_ring->next_to_clean)
520 				next_desc = " NTC";
521 			else
522 				next_desc = "";
523 
524 			if (staterr & E1000_RXD_STAT_DD) {
525 				/* Descriptor Done */
526 				pr_info("%s[0x%03X]     %016llX %016llX -------"
527 					"--------- %p%s\n", "RWB", i,
528 					le64_to_cpu(u0->a),
529 					le64_to_cpu(u0->b),
530 					buffer_info->skb, next_desc);
531 			} else {
532 				pr_info("%s[0x%03X]     %016llX %016llX %016llX"
533 					" %p%s\n", "R  ", i,
534 					le64_to_cpu(u0->a),
535 					le64_to_cpu(u0->b),
536 					(u64)buffer_info->dma,
537 					buffer_info->skb, next_desc);
538 
539 				if (netif_msg_pktdata(adapter)) {
540 					print_hex_dump(KERN_INFO, "",
541 						DUMP_PREFIX_ADDRESS,
542 						16, 1,
543 						phys_to_virt(buffer_info->dma),
544 						IGB_RX_HDR_LEN, true);
545 					print_hex_dump(KERN_INFO, "",
546 					  DUMP_PREFIX_ADDRESS,
547 					  16, 1,
548 					  phys_to_virt(
549 					    buffer_info->page_dma +
550 					    buffer_info->page_offset),
551 					  PAGE_SIZE/2, true);
552 				}
553 			}
554 		}
555 	}
556 
557 exit:
558 	return;
559 }
560 
561 
562 /**
563  * igb_read_clock - read raw cycle counter (to be used by time counter)
564  */
565 static cycle_t igb_read_clock(const struct cyclecounter *tc)
566 {
567 	struct igb_adapter *adapter =
568 		container_of(tc, struct igb_adapter, cycles);
569 	struct e1000_hw *hw = &adapter->hw;
570 	u64 stamp = 0;
571 	int shift = 0;
572 
573 	/*
574 	 * The timestamp latches on lowest register read. For the 82580
575 	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
576 	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
577 	 */
578 	if (hw->mac.type >= e1000_82580) {
579 		stamp = rd32(E1000_SYSTIMR) >> 8;
580 		shift = IGB_82580_TSYNC_SHIFT;
581 	}
582 
583 	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
584 	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
585 	return stamp;
586 }
587 
588 /**
589  * igb_get_hw_dev - return device
590  * used by hardware layer to print debugging information
591  **/
592 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
593 {
594 	struct igb_adapter *adapter = hw->back;
595 	return adapter->netdev;
596 }
597 
598 /**
599  * igb_init_module - Driver Registration Routine
600  *
601  * igb_init_module is the first routine called when the driver is
602  * loaded. All it does is register with the PCI subsystem.
603  **/
604 static int __init igb_init_module(void)
605 {
606 	int ret;
607 	pr_info("%s - version %s\n",
608 	       igb_driver_string, igb_driver_version);
609 
610 	pr_info("%s\n", igb_copyright);
611 
612 #ifdef CONFIG_IGB_DCA
613 	dca_register_notify(&dca_notifier);
614 #endif
615 	ret = pci_register_driver(&igb_driver);
616 	return ret;
617 }
618 
619 module_init(igb_init_module);
620 
621 /**
622  * igb_exit_module - Driver Exit Cleanup Routine
623  *
624  * igb_exit_module is called just before the driver is removed
625  * from memory.
626  **/
627 static void __exit igb_exit_module(void)
628 {
629 #ifdef CONFIG_IGB_DCA
630 	dca_unregister_notify(&dca_notifier);
631 #endif
632 	pci_unregister_driver(&igb_driver);
633 }
634 
635 module_exit(igb_exit_module);
636 
637 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
638 /**
639  * igb_cache_ring_register - Descriptor ring to register mapping
640  * @adapter: board private structure to initialize
641  *
642  * Once we know the feature-set enabled for the device, we'll cache
643  * the register offset the descriptor ring is assigned to.
644  **/
645 static void igb_cache_ring_register(struct igb_adapter *adapter)
646 {
647 	int i = 0, j = 0;
648 	u32 rbase_offset = adapter->vfs_allocated_count;
649 
650 	switch (adapter->hw.mac.type) {
651 	case e1000_82576:
652 		/* The queues are allocated for virtualization such that VF 0
653 		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
654 		 * In order to avoid collision we start at the first free queue
655 		 * and continue consuming queues in the same sequence
656 		 */
657 		if (adapter->vfs_allocated_count) {
658 			for (; i < adapter->rss_queues; i++)
659 				adapter->rx_ring[i]->reg_idx = rbase_offset +
660 				                               Q_IDX_82576(i);
661 		}
662 	case e1000_82575:
663 	case e1000_82580:
664 	case e1000_i350:
665 	default:
666 		for (; i < adapter->num_rx_queues; i++)
667 			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
668 		for (; j < adapter->num_tx_queues; j++)
669 			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
670 		break;
671 	}
672 }
673 
674 static void igb_free_queues(struct igb_adapter *adapter)
675 {
676 	int i;
677 
678 	for (i = 0; i < adapter->num_tx_queues; i++) {
679 		kfree(adapter->tx_ring[i]);
680 		adapter->tx_ring[i] = NULL;
681 	}
682 	for (i = 0; i < adapter->num_rx_queues; i++) {
683 		kfree(adapter->rx_ring[i]);
684 		adapter->rx_ring[i] = NULL;
685 	}
686 	adapter->num_rx_queues = 0;
687 	adapter->num_tx_queues = 0;
688 }
689 
690 /**
691  * igb_alloc_queues - Allocate memory for all rings
692  * @adapter: board private structure to initialize
693  *
694  * We allocate one ring per queue at run-time since we don't know the
695  * number of queues at compile-time.
696  **/
697 static int igb_alloc_queues(struct igb_adapter *adapter)
698 {
699 	struct igb_ring *ring;
700 	int i;
701 	int orig_node = adapter->node;
702 
703 	for (i = 0; i < adapter->num_tx_queues; i++) {
704 		if (orig_node == -1) {
705 			int cur_node = next_online_node(adapter->node);
706 			if (cur_node == MAX_NUMNODES)
707 				cur_node = first_online_node;
708 			adapter->node = cur_node;
709 		}
710 		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
711 				    adapter->node);
712 		if (!ring)
713 			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
714 		if (!ring)
715 			goto err;
716 		ring->count = adapter->tx_ring_count;
717 		ring->queue_index = i;
718 		ring->dev = &adapter->pdev->dev;
719 		ring->netdev = adapter->netdev;
720 		ring->numa_node = adapter->node;
721 		/* For 82575, context index must be unique per ring. */
722 		if (adapter->hw.mac.type == e1000_82575)
723 			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
724 		adapter->tx_ring[i] = ring;
725 	}
726 	/* Restore the adapter's original node */
727 	adapter->node = orig_node;
728 
729 	for (i = 0; i < adapter->num_rx_queues; i++) {
730 		if (orig_node == -1) {
731 			int cur_node = next_online_node(adapter->node);
732 			if (cur_node == MAX_NUMNODES)
733 				cur_node = first_online_node;
734 			adapter->node = cur_node;
735 		}
736 		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
737 				    adapter->node);
738 		if (!ring)
739 			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
740 		if (!ring)
741 			goto err;
742 		ring->count = adapter->rx_ring_count;
743 		ring->queue_index = i;
744 		ring->dev = &adapter->pdev->dev;
745 		ring->netdev = adapter->netdev;
746 		ring->numa_node = adapter->node;
747 		/* set flag indicating ring supports SCTP checksum offload */
748 		if (adapter->hw.mac.type >= e1000_82576)
749 			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
750 
751 		/* On i350, loopback VLAN packets have the tag byte-swapped. */
752 		if (adapter->hw.mac.type == e1000_i350)
753 			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
754 
755 		adapter->rx_ring[i] = ring;
756 	}
757 	/* Restore the adapter's original node */
758 	adapter->node = orig_node;
759 
760 	igb_cache_ring_register(adapter);
761 
762 	return 0;
763 
764 err:
765 	/* Restore the adapter's original node */
766 	adapter->node = orig_node;
767 	igb_free_queues(adapter);
768 
769 	return -ENOMEM;
770 }
771 
772 /**
773  *  igb_write_ivar - configure ivar for given MSI-X vector
774  *  @hw: pointer to the HW structure
775  *  @msix_vector: vector number we are allocating to a given ring
776  *  @index: row index of IVAR register to write within IVAR table
777  *  @offset: column offset of in IVAR, should be multiple of 8
778  *
779  *  This function is intended to handle the writing of the IVAR register
780  *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
781  *  each containing an cause allocation for an Rx and Tx ring, and a
782  *  variable number of rows depending on the number of queues supported.
783  **/
784 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
785 			   int index, int offset)
786 {
787 	u32 ivar = array_rd32(E1000_IVAR0, index);
788 
789 	/* clear any bits that are currently set */
790 	ivar &= ~((u32)0xFF << offset);
791 
792 	/* write vector and valid bit */
793 	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
794 
795 	array_wr32(E1000_IVAR0, index, ivar);
796 }
797 
798 #define IGB_N0_QUEUE -1
799 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
800 {
801 	struct igb_adapter *adapter = q_vector->adapter;
802 	struct e1000_hw *hw = &adapter->hw;
803 	int rx_queue = IGB_N0_QUEUE;
804 	int tx_queue = IGB_N0_QUEUE;
805 	u32 msixbm = 0;
806 
807 	if (q_vector->rx.ring)
808 		rx_queue = q_vector->rx.ring->reg_idx;
809 	if (q_vector->tx.ring)
810 		tx_queue = q_vector->tx.ring->reg_idx;
811 
812 	switch (hw->mac.type) {
813 	case e1000_82575:
814 		/* The 82575 assigns vectors using a bitmask, which matches the
815 		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
816 		   or more queues to a vector, we write the appropriate bits
817 		   into the MSIXBM register for that vector. */
818 		if (rx_queue > IGB_N0_QUEUE)
819 			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
820 		if (tx_queue > IGB_N0_QUEUE)
821 			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
822 		if (!adapter->msix_entries && msix_vector == 0)
823 			msixbm |= E1000_EIMS_OTHER;
824 		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
825 		q_vector->eims_value = msixbm;
826 		break;
827 	case e1000_82576:
828 		/*
829 		 * 82576 uses a table that essentially consists of 2 columns
830 		 * with 8 rows.  The ordering is column-major so we use the
831 		 * lower 3 bits as the row index, and the 4th bit as the
832 		 * column offset.
833 		 */
834 		if (rx_queue > IGB_N0_QUEUE)
835 			igb_write_ivar(hw, msix_vector,
836 				       rx_queue & 0x7,
837 				       (rx_queue & 0x8) << 1);
838 		if (tx_queue > IGB_N0_QUEUE)
839 			igb_write_ivar(hw, msix_vector,
840 				       tx_queue & 0x7,
841 				       ((tx_queue & 0x8) << 1) + 8);
842 		q_vector->eims_value = 1 << msix_vector;
843 		break;
844 	case e1000_82580:
845 	case e1000_i350:
846 		/*
847 		 * On 82580 and newer adapters the scheme is similar to 82576
848 		 * however instead of ordering column-major we have things
849 		 * ordered row-major.  So we traverse the table by using
850 		 * bit 0 as the column offset, and the remaining bits as the
851 		 * row index.
852 		 */
853 		if (rx_queue > IGB_N0_QUEUE)
854 			igb_write_ivar(hw, msix_vector,
855 				       rx_queue >> 1,
856 				       (rx_queue & 0x1) << 4);
857 		if (tx_queue > IGB_N0_QUEUE)
858 			igb_write_ivar(hw, msix_vector,
859 				       tx_queue >> 1,
860 				       ((tx_queue & 0x1) << 4) + 8);
861 		q_vector->eims_value = 1 << msix_vector;
862 		break;
863 	default:
864 		BUG();
865 		break;
866 	}
867 
868 	/* add q_vector eims value to global eims_enable_mask */
869 	adapter->eims_enable_mask |= q_vector->eims_value;
870 
871 	/* configure q_vector to set itr on first interrupt */
872 	q_vector->set_itr = 1;
873 }
874 
875 /**
876  * igb_configure_msix - Configure MSI-X hardware
877  *
878  * igb_configure_msix sets up the hardware to properly
879  * generate MSI-X interrupts.
880  **/
881 static void igb_configure_msix(struct igb_adapter *adapter)
882 {
883 	u32 tmp;
884 	int i, vector = 0;
885 	struct e1000_hw *hw = &adapter->hw;
886 
887 	adapter->eims_enable_mask = 0;
888 
889 	/* set vector for other causes, i.e. link changes */
890 	switch (hw->mac.type) {
891 	case e1000_82575:
892 		tmp = rd32(E1000_CTRL_EXT);
893 		/* enable MSI-X PBA support*/
894 		tmp |= E1000_CTRL_EXT_PBA_CLR;
895 
896 		/* Auto-Mask interrupts upon ICR read. */
897 		tmp |= E1000_CTRL_EXT_EIAME;
898 		tmp |= E1000_CTRL_EXT_IRCA;
899 
900 		wr32(E1000_CTRL_EXT, tmp);
901 
902 		/* enable msix_other interrupt */
903 		array_wr32(E1000_MSIXBM(0), vector++,
904 		                      E1000_EIMS_OTHER);
905 		adapter->eims_other = E1000_EIMS_OTHER;
906 
907 		break;
908 
909 	case e1000_82576:
910 	case e1000_82580:
911 	case e1000_i350:
912 		/* Turn on MSI-X capability first, or our settings
913 		 * won't stick.  And it will take days to debug. */
914 		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
915 		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
916 		                E1000_GPIE_NSICR);
917 
918 		/* enable msix_other interrupt */
919 		adapter->eims_other = 1 << vector;
920 		tmp = (vector++ | E1000_IVAR_VALID) << 8;
921 
922 		wr32(E1000_IVAR_MISC, tmp);
923 		break;
924 	default:
925 		/* do nothing, since nothing else supports MSI-X */
926 		break;
927 	} /* switch (hw->mac.type) */
928 
929 	adapter->eims_enable_mask |= adapter->eims_other;
930 
931 	for (i = 0; i < adapter->num_q_vectors; i++)
932 		igb_assign_vector(adapter->q_vector[i], vector++);
933 
934 	wrfl();
935 }
936 
937 /**
938  * igb_request_msix - Initialize MSI-X interrupts
939  *
940  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
941  * kernel.
942  **/
943 static int igb_request_msix(struct igb_adapter *adapter)
944 {
945 	struct net_device *netdev = adapter->netdev;
946 	struct e1000_hw *hw = &adapter->hw;
947 	int i, err = 0, vector = 0;
948 
949 	err = request_irq(adapter->msix_entries[vector].vector,
950 	                  igb_msix_other, 0, netdev->name, adapter);
951 	if (err)
952 		goto out;
953 	vector++;
954 
955 	for (i = 0; i < adapter->num_q_vectors; i++) {
956 		struct igb_q_vector *q_vector = adapter->q_vector[i];
957 
958 		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
959 
960 		if (q_vector->rx.ring && q_vector->tx.ring)
961 			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
962 				q_vector->rx.ring->queue_index);
963 		else if (q_vector->tx.ring)
964 			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
965 				q_vector->tx.ring->queue_index);
966 		else if (q_vector->rx.ring)
967 			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
968 				q_vector->rx.ring->queue_index);
969 		else
970 			sprintf(q_vector->name, "%s-unused", netdev->name);
971 
972 		err = request_irq(adapter->msix_entries[vector].vector,
973 		                  igb_msix_ring, 0, q_vector->name,
974 		                  q_vector);
975 		if (err)
976 			goto out;
977 		vector++;
978 	}
979 
980 	igb_configure_msix(adapter);
981 	return 0;
982 out:
983 	return err;
984 }
985 
986 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
987 {
988 	if (adapter->msix_entries) {
989 		pci_disable_msix(adapter->pdev);
990 		kfree(adapter->msix_entries);
991 		adapter->msix_entries = NULL;
992 	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
993 		pci_disable_msi(adapter->pdev);
994 	}
995 }
996 
997 /**
998  * igb_free_q_vectors - Free memory allocated for interrupt vectors
999  * @adapter: board private structure to initialize
1000  *
1001  * This function frees the memory allocated to the q_vectors.  In addition if
1002  * NAPI is enabled it will delete any references to the NAPI struct prior
1003  * to freeing the q_vector.
1004  **/
1005 static void igb_free_q_vectors(struct igb_adapter *adapter)
1006 {
1007 	int v_idx;
1008 
1009 	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1010 		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1011 		adapter->q_vector[v_idx] = NULL;
1012 		if (!q_vector)
1013 			continue;
1014 		netif_napi_del(&q_vector->napi);
1015 		kfree(q_vector);
1016 	}
1017 	adapter->num_q_vectors = 0;
1018 }
1019 
1020 /**
1021  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1022  *
1023  * This function resets the device so that it has 0 rx queues, tx queues, and
1024  * MSI-X interrupts allocated.
1025  */
1026 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1027 {
1028 	igb_free_queues(adapter);
1029 	igb_free_q_vectors(adapter);
1030 	igb_reset_interrupt_capability(adapter);
1031 }
1032 
1033 /**
1034  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1035  *
1036  * Attempt to configure interrupts using the best available
1037  * capabilities of the hardware and kernel.
1038  **/
1039 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1040 {
1041 	int err;
1042 	int numvecs, i;
1043 
1044 	/* Number of supported queues. */
1045 	adapter->num_rx_queues = adapter->rss_queues;
1046 	if (adapter->vfs_allocated_count)
1047 		adapter->num_tx_queues = 1;
1048 	else
1049 		adapter->num_tx_queues = adapter->rss_queues;
1050 
1051 	/* start with one vector for every rx queue */
1052 	numvecs = adapter->num_rx_queues;
1053 
1054 	/* if tx handler is separate add 1 for every tx queue */
1055 	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1056 		numvecs += adapter->num_tx_queues;
1057 
1058 	/* store the number of vectors reserved for queues */
1059 	adapter->num_q_vectors = numvecs;
1060 
1061 	/* add 1 vector for link status interrupts */
1062 	numvecs++;
1063 	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1064 					GFP_KERNEL);
1065 	if (!adapter->msix_entries)
1066 		goto msi_only;
1067 
1068 	for (i = 0; i < numvecs; i++)
1069 		adapter->msix_entries[i].entry = i;
1070 
1071 	err = pci_enable_msix(adapter->pdev,
1072 			      adapter->msix_entries,
1073 			      numvecs);
1074 	if (err == 0)
1075 		goto out;
1076 
1077 	igb_reset_interrupt_capability(adapter);
1078 
1079 	/* If we can't do MSI-X, try MSI */
1080 msi_only:
1081 #ifdef CONFIG_PCI_IOV
1082 	/* disable SR-IOV for non MSI-X configurations */
1083 	if (adapter->vf_data) {
1084 		struct e1000_hw *hw = &adapter->hw;
1085 		/* disable iov and allow time for transactions to clear */
1086 		pci_disable_sriov(adapter->pdev);
1087 		msleep(500);
1088 
1089 		kfree(adapter->vf_data);
1090 		adapter->vf_data = NULL;
1091 		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1092 		wrfl();
1093 		msleep(100);
1094 		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1095 	}
1096 #endif
1097 	adapter->vfs_allocated_count = 0;
1098 	adapter->rss_queues = 1;
1099 	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1100 	adapter->num_rx_queues = 1;
1101 	adapter->num_tx_queues = 1;
1102 	adapter->num_q_vectors = 1;
1103 	if (!pci_enable_msi(adapter->pdev))
1104 		adapter->flags |= IGB_FLAG_HAS_MSI;
1105 out:
1106 	/* Notify the stack of the (possibly) reduced queue counts. */
1107 	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1108 	return netif_set_real_num_rx_queues(adapter->netdev,
1109 					    adapter->num_rx_queues);
1110 }
1111 
1112 /**
1113  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1114  * @adapter: board private structure to initialize
1115  *
1116  * We allocate one q_vector per queue interrupt.  If allocation fails we
1117  * return -ENOMEM.
1118  **/
1119 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1120 {
1121 	struct igb_q_vector *q_vector;
1122 	struct e1000_hw *hw = &adapter->hw;
1123 	int v_idx;
1124 	int orig_node = adapter->node;
1125 
1126 	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1127 		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1128 						adapter->num_tx_queues)) &&
1129 		    (adapter->num_rx_queues == v_idx))
1130 			adapter->node = orig_node;
1131 		if (orig_node == -1) {
1132 			int cur_node = next_online_node(adapter->node);
1133 			if (cur_node == MAX_NUMNODES)
1134 				cur_node = first_online_node;
1135 			adapter->node = cur_node;
1136 		}
1137 		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1138 					adapter->node);
1139 		if (!q_vector)
1140 			q_vector = kzalloc(sizeof(struct igb_q_vector),
1141 					   GFP_KERNEL);
1142 		if (!q_vector)
1143 			goto err_out;
1144 		q_vector->adapter = adapter;
1145 		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1146 		q_vector->itr_val = IGB_START_ITR;
1147 		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1148 		adapter->q_vector[v_idx] = q_vector;
1149 	}
1150 	/* Restore the adapter's original node */
1151 	adapter->node = orig_node;
1152 
1153 	return 0;
1154 
1155 err_out:
1156 	/* Restore the adapter's original node */
1157 	adapter->node = orig_node;
1158 	igb_free_q_vectors(adapter);
1159 	return -ENOMEM;
1160 }
1161 
1162 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1163                                       int ring_idx, int v_idx)
1164 {
1165 	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1166 
1167 	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1168 	q_vector->rx.ring->q_vector = q_vector;
1169 	q_vector->rx.count++;
1170 	q_vector->itr_val = adapter->rx_itr_setting;
1171 	if (q_vector->itr_val && q_vector->itr_val <= 3)
1172 		q_vector->itr_val = IGB_START_ITR;
1173 }
1174 
1175 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1176                                       int ring_idx, int v_idx)
1177 {
1178 	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1179 
1180 	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1181 	q_vector->tx.ring->q_vector = q_vector;
1182 	q_vector->tx.count++;
1183 	q_vector->itr_val = adapter->tx_itr_setting;
1184 	q_vector->tx.work_limit = adapter->tx_work_limit;
1185 	if (q_vector->itr_val && q_vector->itr_val <= 3)
1186 		q_vector->itr_val = IGB_START_ITR;
1187 }
1188 
1189 /**
1190  * igb_map_ring_to_vector - maps allocated queues to vectors
1191  *
1192  * This function maps the recently allocated queues to vectors.
1193  **/
1194 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1195 {
1196 	int i;
1197 	int v_idx = 0;
1198 
1199 	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1200 	    (adapter->num_q_vectors < adapter->num_tx_queues))
1201 		return -ENOMEM;
1202 
1203 	if (adapter->num_q_vectors >=
1204 	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1205 		for (i = 0; i < adapter->num_rx_queues; i++)
1206 			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1207 		for (i = 0; i < adapter->num_tx_queues; i++)
1208 			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1209 	} else {
1210 		for (i = 0; i < adapter->num_rx_queues; i++) {
1211 			if (i < adapter->num_tx_queues)
1212 				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1213 			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1214 		}
1215 		for (; i < adapter->num_tx_queues; i++)
1216 			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1217 	}
1218 	return 0;
1219 }
1220 
1221 /**
1222  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1223  *
1224  * This function initializes the interrupts and allocates all of the queues.
1225  **/
1226 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1227 {
1228 	struct pci_dev *pdev = adapter->pdev;
1229 	int err;
1230 
1231 	err = igb_set_interrupt_capability(adapter);
1232 	if (err)
1233 		return err;
1234 
1235 	err = igb_alloc_q_vectors(adapter);
1236 	if (err) {
1237 		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1238 		goto err_alloc_q_vectors;
1239 	}
1240 
1241 	err = igb_alloc_queues(adapter);
1242 	if (err) {
1243 		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1244 		goto err_alloc_queues;
1245 	}
1246 
1247 	err = igb_map_ring_to_vector(adapter);
1248 	if (err) {
1249 		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1250 		goto err_map_queues;
1251 	}
1252 
1253 
1254 	return 0;
1255 err_map_queues:
1256 	igb_free_queues(adapter);
1257 err_alloc_queues:
1258 	igb_free_q_vectors(adapter);
1259 err_alloc_q_vectors:
1260 	igb_reset_interrupt_capability(adapter);
1261 	return err;
1262 }
1263 
1264 /**
1265  * igb_request_irq - initialize interrupts
1266  *
1267  * Attempts to configure interrupts using the best available
1268  * capabilities of the hardware and kernel.
1269  **/
1270 static int igb_request_irq(struct igb_adapter *adapter)
1271 {
1272 	struct net_device *netdev = adapter->netdev;
1273 	struct pci_dev *pdev = adapter->pdev;
1274 	int err = 0;
1275 
1276 	if (adapter->msix_entries) {
1277 		err = igb_request_msix(adapter);
1278 		if (!err)
1279 			goto request_done;
1280 		/* fall back to MSI */
1281 		igb_clear_interrupt_scheme(adapter);
1282 		if (!pci_enable_msi(pdev))
1283 			adapter->flags |= IGB_FLAG_HAS_MSI;
1284 		igb_free_all_tx_resources(adapter);
1285 		igb_free_all_rx_resources(adapter);
1286 		adapter->num_tx_queues = 1;
1287 		adapter->num_rx_queues = 1;
1288 		adapter->num_q_vectors = 1;
1289 		err = igb_alloc_q_vectors(adapter);
1290 		if (err) {
1291 			dev_err(&pdev->dev,
1292 			        "Unable to allocate memory for vectors\n");
1293 			goto request_done;
1294 		}
1295 		err = igb_alloc_queues(adapter);
1296 		if (err) {
1297 			dev_err(&pdev->dev,
1298 			        "Unable to allocate memory for queues\n");
1299 			igb_free_q_vectors(adapter);
1300 			goto request_done;
1301 		}
1302 		igb_setup_all_tx_resources(adapter);
1303 		igb_setup_all_rx_resources(adapter);
1304 	}
1305 
1306 	igb_assign_vector(adapter->q_vector[0], 0);
1307 
1308 	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1309 		err = request_irq(pdev->irq, igb_intr_msi, 0,
1310 				  netdev->name, adapter);
1311 		if (!err)
1312 			goto request_done;
1313 
1314 		/* fall back to legacy interrupts */
1315 		igb_reset_interrupt_capability(adapter);
1316 		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1317 	}
1318 
1319 	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1320 			  netdev->name, adapter);
1321 
1322 	if (err)
1323 		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1324 			err);
1325 
1326 request_done:
1327 	return err;
1328 }
1329 
1330 static void igb_free_irq(struct igb_adapter *adapter)
1331 {
1332 	if (adapter->msix_entries) {
1333 		int vector = 0, i;
1334 
1335 		free_irq(adapter->msix_entries[vector++].vector, adapter);
1336 
1337 		for (i = 0; i < adapter->num_q_vectors; i++)
1338 			free_irq(adapter->msix_entries[vector++].vector,
1339 				 adapter->q_vector[i]);
1340 	} else {
1341 		free_irq(adapter->pdev->irq, adapter);
1342 	}
1343 }
1344 
1345 /**
1346  * igb_irq_disable - Mask off interrupt generation on the NIC
1347  * @adapter: board private structure
1348  **/
1349 static void igb_irq_disable(struct igb_adapter *adapter)
1350 {
1351 	struct e1000_hw *hw = &adapter->hw;
1352 
1353 	/*
1354 	 * we need to be careful when disabling interrupts.  The VFs are also
1355 	 * mapped into these registers and so clearing the bits can cause
1356 	 * issues on the VF drivers so we only need to clear what we set
1357 	 */
1358 	if (adapter->msix_entries) {
1359 		u32 regval = rd32(E1000_EIAM);
1360 		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1361 		wr32(E1000_EIMC, adapter->eims_enable_mask);
1362 		regval = rd32(E1000_EIAC);
1363 		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1364 	}
1365 
1366 	wr32(E1000_IAM, 0);
1367 	wr32(E1000_IMC, ~0);
1368 	wrfl();
1369 	if (adapter->msix_entries) {
1370 		int i;
1371 		for (i = 0; i < adapter->num_q_vectors; i++)
1372 			synchronize_irq(adapter->msix_entries[i].vector);
1373 	} else {
1374 		synchronize_irq(adapter->pdev->irq);
1375 	}
1376 }
1377 
1378 /**
1379  * igb_irq_enable - Enable default interrupt generation settings
1380  * @adapter: board private structure
1381  **/
1382 static void igb_irq_enable(struct igb_adapter *adapter)
1383 {
1384 	struct e1000_hw *hw = &adapter->hw;
1385 
1386 	if (adapter->msix_entries) {
1387 		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1388 		u32 regval = rd32(E1000_EIAC);
1389 		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1390 		regval = rd32(E1000_EIAM);
1391 		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1392 		wr32(E1000_EIMS, adapter->eims_enable_mask);
1393 		if (adapter->vfs_allocated_count) {
1394 			wr32(E1000_MBVFIMR, 0xFF);
1395 			ims |= E1000_IMS_VMMB;
1396 		}
1397 		wr32(E1000_IMS, ims);
1398 	} else {
1399 		wr32(E1000_IMS, IMS_ENABLE_MASK |
1400 				E1000_IMS_DRSTA);
1401 		wr32(E1000_IAM, IMS_ENABLE_MASK |
1402 				E1000_IMS_DRSTA);
1403 	}
1404 }
1405 
1406 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1407 {
1408 	struct e1000_hw *hw = &adapter->hw;
1409 	u16 vid = adapter->hw.mng_cookie.vlan_id;
1410 	u16 old_vid = adapter->mng_vlan_id;
1411 
1412 	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1413 		/* add VID to filter table */
1414 		igb_vfta_set(hw, vid, true);
1415 		adapter->mng_vlan_id = vid;
1416 	} else {
1417 		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1418 	}
1419 
1420 	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1421 	    (vid != old_vid) &&
1422 	    !test_bit(old_vid, adapter->active_vlans)) {
1423 		/* remove VID from filter table */
1424 		igb_vfta_set(hw, old_vid, false);
1425 	}
1426 }
1427 
1428 /**
1429  * igb_release_hw_control - release control of the h/w to f/w
1430  * @adapter: address of board private structure
1431  *
1432  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1433  * For ASF and Pass Through versions of f/w this means that the
1434  * driver is no longer loaded.
1435  *
1436  **/
1437 static void igb_release_hw_control(struct igb_adapter *adapter)
1438 {
1439 	struct e1000_hw *hw = &adapter->hw;
1440 	u32 ctrl_ext;
1441 
1442 	/* Let firmware take over control of h/w */
1443 	ctrl_ext = rd32(E1000_CTRL_EXT);
1444 	wr32(E1000_CTRL_EXT,
1445 			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1446 }
1447 
1448 /**
1449  * igb_get_hw_control - get control of the h/w from f/w
1450  * @adapter: address of board private structure
1451  *
1452  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1453  * For ASF and Pass Through versions of f/w this means that
1454  * the driver is loaded.
1455  *
1456  **/
1457 static void igb_get_hw_control(struct igb_adapter *adapter)
1458 {
1459 	struct e1000_hw *hw = &adapter->hw;
1460 	u32 ctrl_ext;
1461 
1462 	/* Let firmware know the driver has taken over */
1463 	ctrl_ext = rd32(E1000_CTRL_EXT);
1464 	wr32(E1000_CTRL_EXT,
1465 			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1466 }
1467 
1468 /**
1469  * igb_configure - configure the hardware for RX and TX
1470  * @adapter: private board structure
1471  **/
1472 static void igb_configure(struct igb_adapter *adapter)
1473 {
1474 	struct net_device *netdev = adapter->netdev;
1475 	int i;
1476 
1477 	igb_get_hw_control(adapter);
1478 	igb_set_rx_mode(netdev);
1479 
1480 	igb_restore_vlan(adapter);
1481 
1482 	igb_setup_tctl(adapter);
1483 	igb_setup_mrqc(adapter);
1484 	igb_setup_rctl(adapter);
1485 
1486 	igb_configure_tx(adapter);
1487 	igb_configure_rx(adapter);
1488 
1489 	igb_rx_fifo_flush_82575(&adapter->hw);
1490 
1491 	/* call igb_desc_unused which always leaves
1492 	 * at least 1 descriptor unused to make sure
1493 	 * next_to_use != next_to_clean */
1494 	for (i = 0; i < adapter->num_rx_queues; i++) {
1495 		struct igb_ring *ring = adapter->rx_ring[i];
1496 		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1497 	}
1498 }
1499 
1500 /**
1501  * igb_power_up_link - Power up the phy/serdes link
1502  * @adapter: address of board private structure
1503  **/
1504 void igb_power_up_link(struct igb_adapter *adapter)
1505 {
1506 	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1507 		igb_power_up_phy_copper(&adapter->hw);
1508 	else
1509 		igb_power_up_serdes_link_82575(&adapter->hw);
1510 	igb_reset_phy(&adapter->hw);
1511 }
1512 
1513 /**
1514  * igb_power_down_link - Power down the phy/serdes link
1515  * @adapter: address of board private structure
1516  */
1517 static void igb_power_down_link(struct igb_adapter *adapter)
1518 {
1519 	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1520 		igb_power_down_phy_copper_82575(&adapter->hw);
1521 	else
1522 		igb_shutdown_serdes_link_82575(&adapter->hw);
1523 }
1524 
1525 /**
1526  * igb_up - Open the interface and prepare it to handle traffic
1527  * @adapter: board private structure
1528  **/
1529 int igb_up(struct igb_adapter *adapter)
1530 {
1531 	struct e1000_hw *hw = &adapter->hw;
1532 	int i;
1533 
1534 	/* hardware has been reset, we need to reload some things */
1535 	igb_configure(adapter);
1536 
1537 	clear_bit(__IGB_DOWN, &adapter->state);
1538 
1539 	for (i = 0; i < adapter->num_q_vectors; i++)
1540 		napi_enable(&(adapter->q_vector[i]->napi));
1541 
1542 	if (adapter->msix_entries)
1543 		igb_configure_msix(adapter);
1544 	else
1545 		igb_assign_vector(adapter->q_vector[0], 0);
1546 
1547 	/* Clear any pending interrupts. */
1548 	rd32(E1000_ICR);
1549 	igb_irq_enable(adapter);
1550 
1551 	/* notify VFs that reset has been completed */
1552 	if (adapter->vfs_allocated_count) {
1553 		u32 reg_data = rd32(E1000_CTRL_EXT);
1554 		reg_data |= E1000_CTRL_EXT_PFRSTD;
1555 		wr32(E1000_CTRL_EXT, reg_data);
1556 	}
1557 
1558 	netif_tx_start_all_queues(adapter->netdev);
1559 
1560 	/* start the watchdog. */
1561 	hw->mac.get_link_status = 1;
1562 	schedule_work(&adapter->watchdog_task);
1563 
1564 	return 0;
1565 }
1566 
1567 void igb_down(struct igb_adapter *adapter)
1568 {
1569 	struct net_device *netdev = adapter->netdev;
1570 	struct e1000_hw *hw = &adapter->hw;
1571 	u32 tctl, rctl;
1572 	int i;
1573 
1574 	/* signal that we're down so the interrupt handler does not
1575 	 * reschedule our watchdog timer */
1576 	set_bit(__IGB_DOWN, &adapter->state);
1577 
1578 	/* disable receives in the hardware */
1579 	rctl = rd32(E1000_RCTL);
1580 	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1581 	/* flush and sleep below */
1582 
1583 	netif_tx_stop_all_queues(netdev);
1584 
1585 	/* disable transmits in the hardware */
1586 	tctl = rd32(E1000_TCTL);
1587 	tctl &= ~E1000_TCTL_EN;
1588 	wr32(E1000_TCTL, tctl);
1589 	/* flush both disables and wait for them to finish */
1590 	wrfl();
1591 	msleep(10);
1592 
1593 	for (i = 0; i < adapter->num_q_vectors; i++)
1594 		napi_disable(&(adapter->q_vector[i]->napi));
1595 
1596 	igb_irq_disable(adapter);
1597 
1598 	del_timer_sync(&adapter->watchdog_timer);
1599 	del_timer_sync(&adapter->phy_info_timer);
1600 
1601 	netif_carrier_off(netdev);
1602 
1603 	/* record the stats before reset*/
1604 	spin_lock(&adapter->stats64_lock);
1605 	igb_update_stats(adapter, &adapter->stats64);
1606 	spin_unlock(&adapter->stats64_lock);
1607 
1608 	adapter->link_speed = 0;
1609 	adapter->link_duplex = 0;
1610 
1611 	if (!pci_channel_offline(adapter->pdev))
1612 		igb_reset(adapter);
1613 	igb_clean_all_tx_rings(adapter);
1614 	igb_clean_all_rx_rings(adapter);
1615 #ifdef CONFIG_IGB_DCA
1616 
1617 	/* since we reset the hardware DCA settings were cleared */
1618 	igb_setup_dca(adapter);
1619 #endif
1620 }
1621 
1622 void igb_reinit_locked(struct igb_adapter *adapter)
1623 {
1624 	WARN_ON(in_interrupt());
1625 	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1626 		msleep(1);
1627 	igb_down(adapter);
1628 	igb_up(adapter);
1629 	clear_bit(__IGB_RESETTING, &adapter->state);
1630 }
1631 
1632 void igb_reset(struct igb_adapter *adapter)
1633 {
1634 	struct pci_dev *pdev = adapter->pdev;
1635 	struct e1000_hw *hw = &adapter->hw;
1636 	struct e1000_mac_info *mac = &hw->mac;
1637 	struct e1000_fc_info *fc = &hw->fc;
1638 	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1639 	u16 hwm;
1640 
1641 	/* Repartition Pba for greater than 9k mtu
1642 	 * To take effect CTRL.RST is required.
1643 	 */
1644 	switch (mac->type) {
1645 	case e1000_i350:
1646 	case e1000_82580:
1647 		pba = rd32(E1000_RXPBS);
1648 		pba = igb_rxpbs_adjust_82580(pba);
1649 		break;
1650 	case e1000_82576:
1651 		pba = rd32(E1000_RXPBS);
1652 		pba &= E1000_RXPBS_SIZE_MASK_82576;
1653 		break;
1654 	case e1000_82575:
1655 	default:
1656 		pba = E1000_PBA_34K;
1657 		break;
1658 	}
1659 
1660 	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1661 	    (mac->type < e1000_82576)) {
1662 		/* adjust PBA for jumbo frames */
1663 		wr32(E1000_PBA, pba);
1664 
1665 		/* To maintain wire speed transmits, the Tx FIFO should be
1666 		 * large enough to accommodate two full transmit packets,
1667 		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1668 		 * the Rx FIFO should be large enough to accommodate at least
1669 		 * one full receive packet and is similarly rounded up and
1670 		 * expressed in KB. */
1671 		pba = rd32(E1000_PBA);
1672 		/* upper 16 bits has Tx packet buffer allocation size in KB */
1673 		tx_space = pba >> 16;
1674 		/* lower 16 bits has Rx packet buffer allocation size in KB */
1675 		pba &= 0xffff;
1676 		/* the tx fifo also stores 16 bytes of information about the tx
1677 		 * but don't include ethernet FCS because hardware appends it */
1678 		min_tx_space = (adapter->max_frame_size +
1679 				sizeof(union e1000_adv_tx_desc) -
1680 				ETH_FCS_LEN) * 2;
1681 		min_tx_space = ALIGN(min_tx_space, 1024);
1682 		min_tx_space >>= 10;
1683 		/* software strips receive CRC, so leave room for it */
1684 		min_rx_space = adapter->max_frame_size;
1685 		min_rx_space = ALIGN(min_rx_space, 1024);
1686 		min_rx_space >>= 10;
1687 
1688 		/* If current Tx allocation is less than the min Tx FIFO size,
1689 		 * and the min Tx FIFO size is less than the current Rx FIFO
1690 		 * allocation, take space away from current Rx allocation */
1691 		if (tx_space < min_tx_space &&
1692 		    ((min_tx_space - tx_space) < pba)) {
1693 			pba = pba - (min_tx_space - tx_space);
1694 
1695 			/* if short on rx space, rx wins and must trump tx
1696 			 * adjustment */
1697 			if (pba < min_rx_space)
1698 				pba = min_rx_space;
1699 		}
1700 		wr32(E1000_PBA, pba);
1701 	}
1702 
1703 	/* flow control settings */
1704 	/* The high water mark must be low enough to fit one full frame
1705 	 * (or the size used for early receive) above it in the Rx FIFO.
1706 	 * Set it to the lower of:
1707 	 * - 90% of the Rx FIFO size, or
1708 	 * - the full Rx FIFO size minus one full frame */
1709 	hwm = min(((pba << 10) * 9 / 10),
1710 			((pba << 10) - 2 * adapter->max_frame_size));
1711 
1712 	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1713 	fc->low_water = fc->high_water - 16;
1714 	fc->pause_time = 0xFFFF;
1715 	fc->send_xon = 1;
1716 	fc->current_mode = fc->requested_mode;
1717 
1718 	/* disable receive for all VFs and wait one second */
1719 	if (adapter->vfs_allocated_count) {
1720 		int i;
1721 		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1722 			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1723 
1724 		/* ping all the active vfs to let them know we are going down */
1725 		igb_ping_all_vfs(adapter);
1726 
1727 		/* disable transmits and receives */
1728 		wr32(E1000_VFRE, 0);
1729 		wr32(E1000_VFTE, 0);
1730 	}
1731 
1732 	/* Allow time for pending master requests to run */
1733 	hw->mac.ops.reset_hw(hw);
1734 	wr32(E1000_WUC, 0);
1735 
1736 	if (hw->mac.ops.init_hw(hw))
1737 		dev_err(&pdev->dev, "Hardware Error\n");
1738 
1739 	igb_init_dmac(adapter, pba);
1740 	if (!netif_running(adapter->netdev))
1741 		igb_power_down_link(adapter);
1742 
1743 	igb_update_mng_vlan(adapter);
1744 
1745 	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1746 	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1747 
1748 	igb_get_phy_info(hw);
1749 }
1750 
1751 static netdev_features_t igb_fix_features(struct net_device *netdev,
1752 	netdev_features_t features)
1753 {
1754 	/*
1755 	 * Since there is no support for separate rx/tx vlan accel
1756 	 * enable/disable make sure tx flag is always in same state as rx.
1757 	 */
1758 	if (features & NETIF_F_HW_VLAN_RX)
1759 		features |= NETIF_F_HW_VLAN_TX;
1760 	else
1761 		features &= ~NETIF_F_HW_VLAN_TX;
1762 
1763 	return features;
1764 }
1765 
1766 static int igb_set_features(struct net_device *netdev,
1767 	netdev_features_t features)
1768 {
1769 	netdev_features_t changed = netdev->features ^ features;
1770 
1771 	if (changed & NETIF_F_HW_VLAN_RX)
1772 		igb_vlan_mode(netdev, features);
1773 
1774 	return 0;
1775 }
1776 
1777 static const struct net_device_ops igb_netdev_ops = {
1778 	.ndo_open		= igb_open,
1779 	.ndo_stop		= igb_close,
1780 	.ndo_start_xmit		= igb_xmit_frame,
1781 	.ndo_get_stats64	= igb_get_stats64,
1782 	.ndo_set_rx_mode	= igb_set_rx_mode,
1783 	.ndo_set_mac_address	= igb_set_mac,
1784 	.ndo_change_mtu		= igb_change_mtu,
1785 	.ndo_do_ioctl		= igb_ioctl,
1786 	.ndo_tx_timeout		= igb_tx_timeout,
1787 	.ndo_validate_addr	= eth_validate_addr,
1788 	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1789 	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1790 	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1791 	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1792 	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1793 	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1794 #ifdef CONFIG_NET_POLL_CONTROLLER
1795 	.ndo_poll_controller	= igb_netpoll,
1796 #endif
1797 	.ndo_fix_features	= igb_fix_features,
1798 	.ndo_set_features	= igb_set_features,
1799 };
1800 
1801 /**
1802  * igb_probe - Device Initialization Routine
1803  * @pdev: PCI device information struct
1804  * @ent: entry in igb_pci_tbl
1805  *
1806  * Returns 0 on success, negative on failure
1807  *
1808  * igb_probe initializes an adapter identified by a pci_dev structure.
1809  * The OS initialization, configuring of the adapter private structure,
1810  * and a hardware reset occur.
1811  **/
1812 static int __devinit igb_probe(struct pci_dev *pdev,
1813 			       const struct pci_device_id *ent)
1814 {
1815 	struct net_device *netdev;
1816 	struct igb_adapter *adapter;
1817 	struct e1000_hw *hw;
1818 	u16 eeprom_data = 0;
1819 	s32 ret_val;
1820 	static int global_quad_port_a; /* global quad port a indication */
1821 	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1822 	unsigned long mmio_start, mmio_len;
1823 	int err, pci_using_dac;
1824 	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1825 	u8 part_str[E1000_PBANUM_LENGTH];
1826 
1827 	/* Catch broken hardware that put the wrong VF device ID in
1828 	 * the PCIe SR-IOV capability.
1829 	 */
1830 	if (pdev->is_virtfn) {
1831 		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1832 		     pci_name(pdev), pdev->vendor, pdev->device);
1833 		return -EINVAL;
1834 	}
1835 
1836 	err = pci_enable_device_mem(pdev);
1837 	if (err)
1838 		return err;
1839 
1840 	pci_using_dac = 0;
1841 	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1842 	if (!err) {
1843 		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1844 		if (!err)
1845 			pci_using_dac = 1;
1846 	} else {
1847 		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1848 		if (err) {
1849 			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1850 			if (err) {
1851 				dev_err(&pdev->dev, "No usable DMA "
1852 					"configuration, aborting\n");
1853 				goto err_dma;
1854 			}
1855 		}
1856 	}
1857 
1858 	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1859 	                                   IORESOURCE_MEM),
1860 	                                   igb_driver_name);
1861 	if (err)
1862 		goto err_pci_reg;
1863 
1864 	pci_enable_pcie_error_reporting(pdev);
1865 
1866 	pci_set_master(pdev);
1867 	pci_save_state(pdev);
1868 
1869 	err = -ENOMEM;
1870 	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1871 				   IGB_MAX_TX_QUEUES);
1872 	if (!netdev)
1873 		goto err_alloc_etherdev;
1874 
1875 	SET_NETDEV_DEV(netdev, &pdev->dev);
1876 
1877 	pci_set_drvdata(pdev, netdev);
1878 	adapter = netdev_priv(netdev);
1879 	adapter->netdev = netdev;
1880 	adapter->pdev = pdev;
1881 	hw = &adapter->hw;
1882 	hw->back = adapter;
1883 	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1884 
1885 	mmio_start = pci_resource_start(pdev, 0);
1886 	mmio_len = pci_resource_len(pdev, 0);
1887 
1888 	err = -EIO;
1889 	hw->hw_addr = ioremap(mmio_start, mmio_len);
1890 	if (!hw->hw_addr)
1891 		goto err_ioremap;
1892 
1893 	netdev->netdev_ops = &igb_netdev_ops;
1894 	igb_set_ethtool_ops(netdev);
1895 	netdev->watchdog_timeo = 5 * HZ;
1896 
1897 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1898 
1899 	netdev->mem_start = mmio_start;
1900 	netdev->mem_end = mmio_start + mmio_len;
1901 
1902 	/* PCI config space info */
1903 	hw->vendor_id = pdev->vendor;
1904 	hw->device_id = pdev->device;
1905 	hw->revision_id = pdev->revision;
1906 	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1907 	hw->subsystem_device_id = pdev->subsystem_device;
1908 
1909 	/* Copy the default MAC, PHY and NVM function pointers */
1910 	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1911 	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1912 	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1913 	/* Initialize skew-specific constants */
1914 	err = ei->get_invariants(hw);
1915 	if (err)
1916 		goto err_sw_init;
1917 
1918 	/* setup the private structure */
1919 	err = igb_sw_init(adapter);
1920 	if (err)
1921 		goto err_sw_init;
1922 
1923 	igb_get_bus_info_pcie(hw);
1924 
1925 	hw->phy.autoneg_wait_to_complete = false;
1926 
1927 	/* Copper options */
1928 	if (hw->phy.media_type == e1000_media_type_copper) {
1929 		hw->phy.mdix = AUTO_ALL_MODES;
1930 		hw->phy.disable_polarity_correction = false;
1931 		hw->phy.ms_type = e1000_ms_hw_default;
1932 	}
1933 
1934 	if (igb_check_reset_block(hw))
1935 		dev_info(&pdev->dev,
1936 			"PHY reset is blocked due to SOL/IDER session.\n");
1937 
1938 	/*
1939 	 * features is initialized to 0 in allocation, it might have bits
1940 	 * set by igb_sw_init so we should use an or instead of an
1941 	 * assignment.
1942 	 */
1943 	netdev->features |= NETIF_F_SG |
1944 			    NETIF_F_IP_CSUM |
1945 			    NETIF_F_IPV6_CSUM |
1946 			    NETIF_F_TSO |
1947 			    NETIF_F_TSO6 |
1948 			    NETIF_F_RXHASH |
1949 			    NETIF_F_RXCSUM |
1950 			    NETIF_F_HW_VLAN_RX |
1951 			    NETIF_F_HW_VLAN_TX;
1952 
1953 	/* copy netdev features into list of user selectable features */
1954 	netdev->hw_features |= netdev->features;
1955 
1956 	/* set this bit last since it cannot be part of hw_features */
1957 	netdev->features |= NETIF_F_HW_VLAN_FILTER;
1958 
1959 	netdev->vlan_features |= NETIF_F_TSO |
1960 				 NETIF_F_TSO6 |
1961 				 NETIF_F_IP_CSUM |
1962 				 NETIF_F_IPV6_CSUM |
1963 				 NETIF_F_SG;
1964 
1965 	if (pci_using_dac) {
1966 		netdev->features |= NETIF_F_HIGHDMA;
1967 		netdev->vlan_features |= NETIF_F_HIGHDMA;
1968 	}
1969 
1970 	if (hw->mac.type >= e1000_82576) {
1971 		netdev->hw_features |= NETIF_F_SCTP_CSUM;
1972 		netdev->features |= NETIF_F_SCTP_CSUM;
1973 	}
1974 
1975 	netdev->priv_flags |= IFF_UNICAST_FLT;
1976 
1977 	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1978 
1979 	/* before reading the NVM, reset the controller to put the device in a
1980 	 * known good starting state */
1981 	hw->mac.ops.reset_hw(hw);
1982 
1983 	/* make sure the NVM is good */
1984 	if (hw->nvm.ops.validate(hw) < 0) {
1985 		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1986 		err = -EIO;
1987 		goto err_eeprom;
1988 	}
1989 
1990 	/* copy the MAC address out of the NVM */
1991 	if (hw->mac.ops.read_mac_addr(hw))
1992 		dev_err(&pdev->dev, "NVM Read Error\n");
1993 
1994 	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1995 	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1996 
1997 	if (!is_valid_ether_addr(netdev->perm_addr)) {
1998 		dev_err(&pdev->dev, "Invalid MAC Address\n");
1999 		err = -EIO;
2000 		goto err_eeprom;
2001 	}
2002 
2003 	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2004 	            (unsigned long) adapter);
2005 	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2006 	            (unsigned long) adapter);
2007 
2008 	INIT_WORK(&adapter->reset_task, igb_reset_task);
2009 	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2010 
2011 	/* Initialize link properties that are user-changeable */
2012 	adapter->fc_autoneg = true;
2013 	hw->mac.autoneg = true;
2014 	hw->phy.autoneg_advertised = 0x2f;
2015 
2016 	hw->fc.requested_mode = e1000_fc_default;
2017 	hw->fc.current_mode = e1000_fc_default;
2018 
2019 	igb_validate_mdi_setting(hw);
2020 
2021 	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2022 	 * enable the ACPI Magic Packet filter
2023 	 */
2024 
2025 	if (hw->bus.func == 0)
2026 		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2027 	else if (hw->mac.type >= e1000_82580)
2028 		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2029 		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2030 		                 &eeprom_data);
2031 	else if (hw->bus.func == 1)
2032 		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2033 
2034 	if (eeprom_data & eeprom_apme_mask)
2035 		adapter->eeprom_wol |= E1000_WUFC_MAG;
2036 
2037 	/* now that we have the eeprom settings, apply the special cases where
2038 	 * the eeprom may be wrong or the board simply won't support wake on
2039 	 * lan on a particular port */
2040 	switch (pdev->device) {
2041 	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2042 		adapter->eeprom_wol = 0;
2043 		break;
2044 	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2045 	case E1000_DEV_ID_82576_FIBER:
2046 	case E1000_DEV_ID_82576_SERDES:
2047 		/* Wake events only supported on port A for dual fiber
2048 		 * regardless of eeprom setting */
2049 		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2050 			adapter->eeprom_wol = 0;
2051 		break;
2052 	case E1000_DEV_ID_82576_QUAD_COPPER:
2053 	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2054 		/* if quad port adapter, disable WoL on all but port A */
2055 		if (global_quad_port_a != 0)
2056 			adapter->eeprom_wol = 0;
2057 		else
2058 			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2059 		/* Reset for multiple quad port adapters */
2060 		if (++global_quad_port_a == 4)
2061 			global_quad_port_a = 0;
2062 		break;
2063 	}
2064 
2065 	/* initialize the wol settings based on the eeprom settings */
2066 	adapter->wol = adapter->eeprom_wol;
2067 	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2068 
2069 	/* reset the hardware with the new settings */
2070 	igb_reset(adapter);
2071 
2072 	/* let the f/w know that the h/w is now under the control of the
2073 	 * driver. */
2074 	igb_get_hw_control(adapter);
2075 
2076 	strcpy(netdev->name, "eth%d");
2077 	err = register_netdev(netdev);
2078 	if (err)
2079 		goto err_register;
2080 
2081 	/* carrier off reporting is important to ethtool even BEFORE open */
2082 	netif_carrier_off(netdev);
2083 
2084 #ifdef CONFIG_IGB_DCA
2085 	if (dca_add_requester(&pdev->dev) == 0) {
2086 		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2087 		dev_info(&pdev->dev, "DCA enabled\n");
2088 		igb_setup_dca(adapter);
2089 	}
2090 
2091 #endif
2092 	/* do hw tstamp init after resetting */
2093 	igb_init_hw_timer(adapter);
2094 
2095 	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2096 	/* print bus type/speed/width info */
2097 	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2098 		 netdev->name,
2099 		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2100 		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2101 		                                            "unknown"),
2102 		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2103 		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2104 		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2105 		   "unknown"),
2106 		 netdev->dev_addr);
2107 
2108 	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2109 	if (ret_val)
2110 		strcpy(part_str, "Unknown");
2111 	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2112 	dev_info(&pdev->dev,
2113 		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2114 		adapter->msix_entries ? "MSI-X" :
2115 		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2116 		adapter->num_rx_queues, adapter->num_tx_queues);
2117 	switch (hw->mac.type) {
2118 	case e1000_i350:
2119 		igb_set_eee_i350(hw);
2120 		break;
2121 	default:
2122 		break;
2123 	}
2124 
2125 	pm_runtime_put_noidle(&pdev->dev);
2126 	return 0;
2127 
2128 err_register:
2129 	igb_release_hw_control(adapter);
2130 err_eeprom:
2131 	if (!igb_check_reset_block(hw))
2132 		igb_reset_phy(hw);
2133 
2134 	if (hw->flash_address)
2135 		iounmap(hw->flash_address);
2136 err_sw_init:
2137 	igb_clear_interrupt_scheme(adapter);
2138 	iounmap(hw->hw_addr);
2139 err_ioremap:
2140 	free_netdev(netdev);
2141 err_alloc_etherdev:
2142 	pci_release_selected_regions(pdev,
2143 	                             pci_select_bars(pdev, IORESOURCE_MEM));
2144 err_pci_reg:
2145 err_dma:
2146 	pci_disable_device(pdev);
2147 	return err;
2148 }
2149 
2150 /**
2151  * igb_remove - Device Removal Routine
2152  * @pdev: PCI device information struct
2153  *
2154  * igb_remove is called by the PCI subsystem to alert the driver
2155  * that it should release a PCI device.  The could be caused by a
2156  * Hot-Plug event, or because the driver is going to be removed from
2157  * memory.
2158  **/
2159 static void __devexit igb_remove(struct pci_dev *pdev)
2160 {
2161 	struct net_device *netdev = pci_get_drvdata(pdev);
2162 	struct igb_adapter *adapter = netdev_priv(netdev);
2163 	struct e1000_hw *hw = &adapter->hw;
2164 
2165 	pm_runtime_get_noresume(&pdev->dev);
2166 
2167 	/*
2168 	 * The watchdog timer may be rescheduled, so explicitly
2169 	 * disable watchdog from being rescheduled.
2170 	 */
2171 	set_bit(__IGB_DOWN, &adapter->state);
2172 	del_timer_sync(&adapter->watchdog_timer);
2173 	del_timer_sync(&adapter->phy_info_timer);
2174 
2175 	cancel_work_sync(&adapter->reset_task);
2176 	cancel_work_sync(&adapter->watchdog_task);
2177 
2178 #ifdef CONFIG_IGB_DCA
2179 	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2180 		dev_info(&pdev->dev, "DCA disabled\n");
2181 		dca_remove_requester(&pdev->dev);
2182 		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2183 		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2184 	}
2185 #endif
2186 
2187 	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2188 	 * would have already happened in close and is redundant. */
2189 	igb_release_hw_control(adapter);
2190 
2191 	unregister_netdev(netdev);
2192 
2193 	igb_clear_interrupt_scheme(adapter);
2194 
2195 #ifdef CONFIG_PCI_IOV
2196 	/* reclaim resources allocated to VFs */
2197 	if (adapter->vf_data) {
2198 		/* disable iov and allow time for transactions to clear */
2199 		if (!igb_check_vf_assignment(adapter)) {
2200 			pci_disable_sriov(pdev);
2201 			msleep(500);
2202 		} else {
2203 			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2204 		}
2205 
2206 		kfree(adapter->vf_data);
2207 		adapter->vf_data = NULL;
2208 		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2209 		wrfl();
2210 		msleep(100);
2211 		dev_info(&pdev->dev, "IOV Disabled\n");
2212 	}
2213 #endif
2214 
2215 	iounmap(hw->hw_addr);
2216 	if (hw->flash_address)
2217 		iounmap(hw->flash_address);
2218 	pci_release_selected_regions(pdev,
2219 	                             pci_select_bars(pdev, IORESOURCE_MEM));
2220 
2221 	kfree(adapter->shadow_vfta);
2222 	free_netdev(netdev);
2223 
2224 	pci_disable_pcie_error_reporting(pdev);
2225 
2226 	pci_disable_device(pdev);
2227 }
2228 
2229 /**
2230  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2231  * @adapter: board private structure to initialize
2232  *
2233  * This function initializes the vf specific data storage and then attempts to
2234  * allocate the VFs.  The reason for ordering it this way is because it is much
2235  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2236  * the memory for the VFs.
2237  **/
2238 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2239 {
2240 #ifdef CONFIG_PCI_IOV
2241 	struct pci_dev *pdev = adapter->pdev;
2242 	int old_vfs = igb_find_enabled_vfs(adapter);
2243 	int i;
2244 
2245 	if (old_vfs) {
2246 		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2247 			 "max_vfs setting of %d\n", old_vfs, max_vfs);
2248 		adapter->vfs_allocated_count = old_vfs;
2249 	}
2250 
2251 	if (!adapter->vfs_allocated_count)
2252 		return;
2253 
2254 	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2255 				sizeof(struct vf_data_storage), GFP_KERNEL);
2256 	/* if allocation failed then we do not support SR-IOV */
2257 	if (!adapter->vf_data) {
2258 		adapter->vfs_allocated_count = 0;
2259 		dev_err(&pdev->dev, "Unable to allocate memory for VF "
2260 			"Data Storage\n");
2261 		goto out;
2262 	}
2263 
2264 	if (!old_vfs) {
2265 		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2266 			goto err_out;
2267 	}
2268 	dev_info(&pdev->dev, "%d VFs allocated\n",
2269 		 adapter->vfs_allocated_count);
2270 	for (i = 0; i < adapter->vfs_allocated_count; i++)
2271 		igb_vf_configure(adapter, i);
2272 
2273 	/* DMA Coalescing is not supported in IOV mode. */
2274 	adapter->flags &= ~IGB_FLAG_DMAC;
2275 	goto out;
2276 err_out:
2277 	kfree(adapter->vf_data);
2278 	adapter->vf_data = NULL;
2279 	adapter->vfs_allocated_count = 0;
2280 out:
2281 	return;
2282 #endif /* CONFIG_PCI_IOV */
2283 }
2284 
2285 /**
2286  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2287  * @adapter: board private structure to initialize
2288  *
2289  * igb_init_hw_timer initializes the function pointer and values for the hw
2290  * timer found in hardware.
2291  **/
2292 static void igb_init_hw_timer(struct igb_adapter *adapter)
2293 {
2294 	struct e1000_hw *hw = &adapter->hw;
2295 
2296 	switch (hw->mac.type) {
2297 	case e1000_i350:
2298 	case e1000_82580:
2299 		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2300 		adapter->cycles.read = igb_read_clock;
2301 		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2302 		adapter->cycles.mult = 1;
2303 		/*
2304 		 * The 82580 timesync updates the system timer every 8ns by 8ns
2305 		 * and the value cannot be shifted.  Instead we need to shift
2306 		 * the registers to generate a 64bit timer value.  As a result
2307 		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2308 		 * 24 in order to generate a larger value for synchronization.
2309 		 */
2310 		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2311 		/* disable system timer temporarily by setting bit 31 */
2312 		wr32(E1000_TSAUXC, 0x80000000);
2313 		wrfl();
2314 
2315 		/* Set registers so that rollover occurs soon to test this. */
2316 		wr32(E1000_SYSTIMR, 0x00000000);
2317 		wr32(E1000_SYSTIML, 0x80000000);
2318 		wr32(E1000_SYSTIMH, 0x000000FF);
2319 		wrfl();
2320 
2321 		/* enable system timer by clearing bit 31 */
2322 		wr32(E1000_TSAUXC, 0x0);
2323 		wrfl();
2324 
2325 		timecounter_init(&adapter->clock,
2326 				 &adapter->cycles,
2327 				 ktime_to_ns(ktime_get_real()));
2328 		/*
2329 		 * Synchronize our NIC clock against system wall clock. NIC
2330 		 * time stamp reading requires ~3us per sample, each sample
2331 		 * was pretty stable even under load => only require 10
2332 		 * samples for each offset comparison.
2333 		 */
2334 		memset(&adapter->compare, 0, sizeof(adapter->compare));
2335 		adapter->compare.source = &adapter->clock;
2336 		adapter->compare.target = ktime_get_real;
2337 		adapter->compare.num_samples = 10;
2338 		timecompare_update(&adapter->compare, 0);
2339 		break;
2340 	case e1000_82576:
2341 		/*
2342 		 * Initialize hardware timer: we keep it running just in case
2343 		 * that some program needs it later on.
2344 		 */
2345 		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2346 		adapter->cycles.read = igb_read_clock;
2347 		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2348 		adapter->cycles.mult = 1;
2349 		/**
2350 		 * Scale the NIC clock cycle by a large factor so that
2351 		 * relatively small clock corrections can be added or
2352 		 * subtracted at each clock tick. The drawbacks of a large
2353 		 * factor are a) that the clock register overflows more quickly
2354 		 * (not such a big deal) and b) that the increment per tick has
2355 		 * to fit into 24 bits.  As a result we need to use a shift of
2356 		 * 19 so we can fit a value of 16 into the TIMINCA register.
2357 		 */
2358 		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2359 		wr32(E1000_TIMINCA,
2360 		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2361 		                (16 << IGB_82576_TSYNC_SHIFT));
2362 
2363 		/* Set registers so that rollover occurs soon to test this. */
2364 		wr32(E1000_SYSTIML, 0x00000000);
2365 		wr32(E1000_SYSTIMH, 0xFF800000);
2366 		wrfl();
2367 
2368 		timecounter_init(&adapter->clock,
2369 				 &adapter->cycles,
2370 				 ktime_to_ns(ktime_get_real()));
2371 		/*
2372 		 * Synchronize our NIC clock against system wall clock. NIC
2373 		 * time stamp reading requires ~3us per sample, each sample
2374 		 * was pretty stable even under load => only require 10
2375 		 * samples for each offset comparison.
2376 		 */
2377 		memset(&adapter->compare, 0, sizeof(adapter->compare));
2378 		adapter->compare.source = &adapter->clock;
2379 		adapter->compare.target = ktime_get_real;
2380 		adapter->compare.num_samples = 10;
2381 		timecompare_update(&adapter->compare, 0);
2382 		break;
2383 	case e1000_82575:
2384 		/* 82575 does not support timesync */
2385 	default:
2386 		break;
2387 	}
2388 
2389 }
2390 
2391 /**
2392  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2393  * @adapter: board private structure to initialize
2394  *
2395  * igb_sw_init initializes the Adapter private data structure.
2396  * Fields are initialized based on PCI device information and
2397  * OS network device settings (MTU size).
2398  **/
2399 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2400 {
2401 	struct e1000_hw *hw = &adapter->hw;
2402 	struct net_device *netdev = adapter->netdev;
2403 	struct pci_dev *pdev = adapter->pdev;
2404 
2405 	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2406 
2407 	/* set default ring sizes */
2408 	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2409 	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2410 
2411 	/* set default ITR values */
2412 	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2413 	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2414 
2415 	/* set default work limits */
2416 	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2417 
2418 	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2419 				  VLAN_HLEN;
2420 	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2421 
2422 	adapter->node = -1;
2423 
2424 	spin_lock_init(&adapter->stats64_lock);
2425 #ifdef CONFIG_PCI_IOV
2426 	switch (hw->mac.type) {
2427 	case e1000_82576:
2428 	case e1000_i350:
2429 		if (max_vfs > 7) {
2430 			dev_warn(&pdev->dev,
2431 				 "Maximum of 7 VFs per PF, using max\n");
2432 			adapter->vfs_allocated_count = 7;
2433 		} else
2434 			adapter->vfs_allocated_count = max_vfs;
2435 		break;
2436 	default:
2437 		break;
2438 	}
2439 #endif /* CONFIG_PCI_IOV */
2440 	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2441 	/* i350 cannot do RSS and SR-IOV at the same time */
2442 	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2443 		adapter->rss_queues = 1;
2444 
2445 	/*
2446 	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2447 	 * then we should combine the queues into a queue pair in order to
2448 	 * conserve interrupts due to limited supply
2449 	 */
2450 	if ((adapter->rss_queues > 4) ||
2451 	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2452 		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2453 
2454 	/* Setup and initialize a copy of the hw vlan table array */
2455 	adapter->shadow_vfta = kzalloc(sizeof(u32) *
2456 				E1000_VLAN_FILTER_TBL_SIZE,
2457 				GFP_ATOMIC);
2458 
2459 	/* This call may decrease the number of queues */
2460 	if (igb_init_interrupt_scheme(adapter)) {
2461 		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2462 		return -ENOMEM;
2463 	}
2464 
2465 	igb_probe_vfs(adapter);
2466 
2467 	/* Explicitly disable IRQ since the NIC can be in any state. */
2468 	igb_irq_disable(adapter);
2469 
2470 	if (hw->mac.type == e1000_i350)
2471 		adapter->flags &= ~IGB_FLAG_DMAC;
2472 
2473 	set_bit(__IGB_DOWN, &adapter->state);
2474 	return 0;
2475 }
2476 
2477 /**
2478  * igb_open - Called when a network interface is made active
2479  * @netdev: network interface device structure
2480  *
2481  * Returns 0 on success, negative value on failure
2482  *
2483  * The open entry point is called when a network interface is made
2484  * active by the system (IFF_UP).  At this point all resources needed
2485  * for transmit and receive operations are allocated, the interrupt
2486  * handler is registered with the OS, the watchdog timer is started,
2487  * and the stack is notified that the interface is ready.
2488  **/
2489 static int __igb_open(struct net_device *netdev, bool resuming)
2490 {
2491 	struct igb_adapter *adapter = netdev_priv(netdev);
2492 	struct e1000_hw *hw = &adapter->hw;
2493 	struct pci_dev *pdev = adapter->pdev;
2494 	int err;
2495 	int i;
2496 
2497 	/* disallow open during test */
2498 	if (test_bit(__IGB_TESTING, &adapter->state)) {
2499 		WARN_ON(resuming);
2500 		return -EBUSY;
2501 	}
2502 
2503 	if (!resuming)
2504 		pm_runtime_get_sync(&pdev->dev);
2505 
2506 	netif_carrier_off(netdev);
2507 
2508 	/* allocate transmit descriptors */
2509 	err = igb_setup_all_tx_resources(adapter);
2510 	if (err)
2511 		goto err_setup_tx;
2512 
2513 	/* allocate receive descriptors */
2514 	err = igb_setup_all_rx_resources(adapter);
2515 	if (err)
2516 		goto err_setup_rx;
2517 
2518 	igb_power_up_link(adapter);
2519 
2520 	/* before we allocate an interrupt, we must be ready to handle it.
2521 	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2522 	 * as soon as we call pci_request_irq, so we have to setup our
2523 	 * clean_rx handler before we do so.  */
2524 	igb_configure(adapter);
2525 
2526 	err = igb_request_irq(adapter);
2527 	if (err)
2528 		goto err_req_irq;
2529 
2530 	/* From here on the code is the same as igb_up() */
2531 	clear_bit(__IGB_DOWN, &adapter->state);
2532 
2533 	for (i = 0; i < adapter->num_q_vectors; i++)
2534 		napi_enable(&(adapter->q_vector[i]->napi));
2535 
2536 	/* Clear any pending interrupts. */
2537 	rd32(E1000_ICR);
2538 
2539 	igb_irq_enable(adapter);
2540 
2541 	/* notify VFs that reset has been completed */
2542 	if (adapter->vfs_allocated_count) {
2543 		u32 reg_data = rd32(E1000_CTRL_EXT);
2544 		reg_data |= E1000_CTRL_EXT_PFRSTD;
2545 		wr32(E1000_CTRL_EXT, reg_data);
2546 	}
2547 
2548 	netif_tx_start_all_queues(netdev);
2549 
2550 	if (!resuming)
2551 		pm_runtime_put(&pdev->dev);
2552 
2553 	/* start the watchdog. */
2554 	hw->mac.get_link_status = 1;
2555 	schedule_work(&adapter->watchdog_task);
2556 
2557 	return 0;
2558 
2559 err_req_irq:
2560 	igb_release_hw_control(adapter);
2561 	igb_power_down_link(adapter);
2562 	igb_free_all_rx_resources(adapter);
2563 err_setup_rx:
2564 	igb_free_all_tx_resources(adapter);
2565 err_setup_tx:
2566 	igb_reset(adapter);
2567 	if (!resuming)
2568 		pm_runtime_put(&pdev->dev);
2569 
2570 	return err;
2571 }
2572 
2573 static int igb_open(struct net_device *netdev)
2574 {
2575 	return __igb_open(netdev, false);
2576 }
2577 
2578 /**
2579  * igb_close - Disables a network interface
2580  * @netdev: network interface device structure
2581  *
2582  * Returns 0, this is not allowed to fail
2583  *
2584  * The close entry point is called when an interface is de-activated
2585  * by the OS.  The hardware is still under the driver's control, but
2586  * needs to be disabled.  A global MAC reset is issued to stop the
2587  * hardware, and all transmit and receive resources are freed.
2588  **/
2589 static int __igb_close(struct net_device *netdev, bool suspending)
2590 {
2591 	struct igb_adapter *adapter = netdev_priv(netdev);
2592 	struct pci_dev *pdev = adapter->pdev;
2593 
2594 	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2595 
2596 	if (!suspending)
2597 		pm_runtime_get_sync(&pdev->dev);
2598 
2599 	igb_down(adapter);
2600 	igb_free_irq(adapter);
2601 
2602 	igb_free_all_tx_resources(adapter);
2603 	igb_free_all_rx_resources(adapter);
2604 
2605 	if (!suspending)
2606 		pm_runtime_put_sync(&pdev->dev);
2607 	return 0;
2608 }
2609 
2610 static int igb_close(struct net_device *netdev)
2611 {
2612 	return __igb_close(netdev, false);
2613 }
2614 
2615 /**
2616  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2617  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2618  *
2619  * Return 0 on success, negative on failure
2620  **/
2621 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2622 {
2623 	struct device *dev = tx_ring->dev;
2624 	int orig_node = dev_to_node(dev);
2625 	int size;
2626 
2627 	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2628 	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2629 	if (!tx_ring->tx_buffer_info)
2630 		tx_ring->tx_buffer_info = vzalloc(size);
2631 	if (!tx_ring->tx_buffer_info)
2632 		goto err;
2633 
2634 	/* round up to nearest 4K */
2635 	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2636 	tx_ring->size = ALIGN(tx_ring->size, 4096);
2637 
2638 	set_dev_node(dev, tx_ring->numa_node);
2639 	tx_ring->desc = dma_alloc_coherent(dev,
2640 					   tx_ring->size,
2641 					   &tx_ring->dma,
2642 					   GFP_KERNEL);
2643 	set_dev_node(dev, orig_node);
2644 	if (!tx_ring->desc)
2645 		tx_ring->desc = dma_alloc_coherent(dev,
2646 						   tx_ring->size,
2647 						   &tx_ring->dma,
2648 						   GFP_KERNEL);
2649 
2650 	if (!tx_ring->desc)
2651 		goto err;
2652 
2653 	tx_ring->next_to_use = 0;
2654 	tx_ring->next_to_clean = 0;
2655 
2656 	return 0;
2657 
2658 err:
2659 	vfree(tx_ring->tx_buffer_info);
2660 	dev_err(dev,
2661 		"Unable to allocate memory for the transmit descriptor ring\n");
2662 	return -ENOMEM;
2663 }
2664 
2665 /**
2666  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2667  *				  (Descriptors) for all queues
2668  * @adapter: board private structure
2669  *
2670  * Return 0 on success, negative on failure
2671  **/
2672 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2673 {
2674 	struct pci_dev *pdev = adapter->pdev;
2675 	int i, err = 0;
2676 
2677 	for (i = 0; i < adapter->num_tx_queues; i++) {
2678 		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2679 		if (err) {
2680 			dev_err(&pdev->dev,
2681 				"Allocation for Tx Queue %u failed\n", i);
2682 			for (i--; i >= 0; i--)
2683 				igb_free_tx_resources(adapter->tx_ring[i]);
2684 			break;
2685 		}
2686 	}
2687 
2688 	return err;
2689 }
2690 
2691 /**
2692  * igb_setup_tctl - configure the transmit control registers
2693  * @adapter: Board private structure
2694  **/
2695 void igb_setup_tctl(struct igb_adapter *adapter)
2696 {
2697 	struct e1000_hw *hw = &adapter->hw;
2698 	u32 tctl;
2699 
2700 	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2701 	wr32(E1000_TXDCTL(0), 0);
2702 
2703 	/* Program the Transmit Control Register */
2704 	tctl = rd32(E1000_TCTL);
2705 	tctl &= ~E1000_TCTL_CT;
2706 	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2707 		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2708 
2709 	igb_config_collision_dist(hw);
2710 
2711 	/* Enable transmits */
2712 	tctl |= E1000_TCTL_EN;
2713 
2714 	wr32(E1000_TCTL, tctl);
2715 }
2716 
2717 /**
2718  * igb_configure_tx_ring - Configure transmit ring after Reset
2719  * @adapter: board private structure
2720  * @ring: tx ring to configure
2721  *
2722  * Configure a transmit ring after a reset.
2723  **/
2724 void igb_configure_tx_ring(struct igb_adapter *adapter,
2725                            struct igb_ring *ring)
2726 {
2727 	struct e1000_hw *hw = &adapter->hw;
2728 	u32 txdctl = 0;
2729 	u64 tdba = ring->dma;
2730 	int reg_idx = ring->reg_idx;
2731 
2732 	/* disable the queue */
2733 	wr32(E1000_TXDCTL(reg_idx), 0);
2734 	wrfl();
2735 	mdelay(10);
2736 
2737 	wr32(E1000_TDLEN(reg_idx),
2738 	                ring->count * sizeof(union e1000_adv_tx_desc));
2739 	wr32(E1000_TDBAL(reg_idx),
2740 	                tdba & 0x00000000ffffffffULL);
2741 	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2742 
2743 	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2744 	wr32(E1000_TDH(reg_idx), 0);
2745 	writel(0, ring->tail);
2746 
2747 	txdctl |= IGB_TX_PTHRESH;
2748 	txdctl |= IGB_TX_HTHRESH << 8;
2749 	txdctl |= IGB_TX_WTHRESH << 16;
2750 
2751 	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2752 	wr32(E1000_TXDCTL(reg_idx), txdctl);
2753 }
2754 
2755 /**
2756  * igb_configure_tx - Configure transmit Unit after Reset
2757  * @adapter: board private structure
2758  *
2759  * Configure the Tx unit of the MAC after a reset.
2760  **/
2761 static void igb_configure_tx(struct igb_adapter *adapter)
2762 {
2763 	int i;
2764 
2765 	for (i = 0; i < adapter->num_tx_queues; i++)
2766 		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2767 }
2768 
2769 /**
2770  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2771  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2772  *
2773  * Returns 0 on success, negative on failure
2774  **/
2775 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2776 {
2777 	struct device *dev = rx_ring->dev;
2778 	int orig_node = dev_to_node(dev);
2779 	int size, desc_len;
2780 
2781 	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2782 	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2783 	if (!rx_ring->rx_buffer_info)
2784 		rx_ring->rx_buffer_info = vzalloc(size);
2785 	if (!rx_ring->rx_buffer_info)
2786 		goto err;
2787 
2788 	desc_len = sizeof(union e1000_adv_rx_desc);
2789 
2790 	/* Round up to nearest 4K */
2791 	rx_ring->size = rx_ring->count * desc_len;
2792 	rx_ring->size = ALIGN(rx_ring->size, 4096);
2793 
2794 	set_dev_node(dev, rx_ring->numa_node);
2795 	rx_ring->desc = dma_alloc_coherent(dev,
2796 					   rx_ring->size,
2797 					   &rx_ring->dma,
2798 					   GFP_KERNEL);
2799 	set_dev_node(dev, orig_node);
2800 	if (!rx_ring->desc)
2801 		rx_ring->desc = dma_alloc_coherent(dev,
2802 						   rx_ring->size,
2803 						   &rx_ring->dma,
2804 						   GFP_KERNEL);
2805 
2806 	if (!rx_ring->desc)
2807 		goto err;
2808 
2809 	rx_ring->next_to_clean = 0;
2810 	rx_ring->next_to_use = 0;
2811 
2812 	return 0;
2813 
2814 err:
2815 	vfree(rx_ring->rx_buffer_info);
2816 	rx_ring->rx_buffer_info = NULL;
2817 	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2818 		" ring\n");
2819 	return -ENOMEM;
2820 }
2821 
2822 /**
2823  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2824  *				  (Descriptors) for all queues
2825  * @adapter: board private structure
2826  *
2827  * Return 0 on success, negative on failure
2828  **/
2829 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2830 {
2831 	struct pci_dev *pdev = adapter->pdev;
2832 	int i, err = 0;
2833 
2834 	for (i = 0; i < adapter->num_rx_queues; i++) {
2835 		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2836 		if (err) {
2837 			dev_err(&pdev->dev,
2838 				"Allocation for Rx Queue %u failed\n", i);
2839 			for (i--; i >= 0; i--)
2840 				igb_free_rx_resources(adapter->rx_ring[i]);
2841 			break;
2842 		}
2843 	}
2844 
2845 	return err;
2846 }
2847 
2848 /**
2849  * igb_setup_mrqc - configure the multiple receive queue control registers
2850  * @adapter: Board private structure
2851  **/
2852 static void igb_setup_mrqc(struct igb_adapter *adapter)
2853 {
2854 	struct e1000_hw *hw = &adapter->hw;
2855 	u32 mrqc, rxcsum;
2856 	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2857 	union e1000_reta {
2858 		u32 dword;
2859 		u8  bytes[4];
2860 	} reta;
2861 	static const u8 rsshash[40] = {
2862 		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2863 		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2864 		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2865 		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2866 
2867 	/* Fill out hash function seeds */
2868 	for (j = 0; j < 10; j++) {
2869 		u32 rsskey = rsshash[(j * 4)];
2870 		rsskey |= rsshash[(j * 4) + 1] << 8;
2871 		rsskey |= rsshash[(j * 4) + 2] << 16;
2872 		rsskey |= rsshash[(j * 4) + 3] << 24;
2873 		array_wr32(E1000_RSSRK(0), j, rsskey);
2874 	}
2875 
2876 	num_rx_queues = adapter->rss_queues;
2877 
2878 	if (adapter->vfs_allocated_count) {
2879 		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2880 		switch (hw->mac.type) {
2881 		case e1000_i350:
2882 		case e1000_82580:
2883 			num_rx_queues = 1;
2884 			shift = 0;
2885 			break;
2886 		case e1000_82576:
2887 			shift = 3;
2888 			num_rx_queues = 2;
2889 			break;
2890 		case e1000_82575:
2891 			shift = 2;
2892 			shift2 = 6;
2893 		default:
2894 			break;
2895 		}
2896 	} else {
2897 		if (hw->mac.type == e1000_82575)
2898 			shift = 6;
2899 	}
2900 
2901 	for (j = 0; j < (32 * 4); j++) {
2902 		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2903 		if (shift2)
2904 			reta.bytes[j & 3] |= num_rx_queues << shift2;
2905 		if ((j & 3) == 3)
2906 			wr32(E1000_RETA(j >> 2), reta.dword);
2907 	}
2908 
2909 	/*
2910 	 * Disable raw packet checksumming so that RSS hash is placed in
2911 	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2912 	 * offloads as they are enabled by default
2913 	 */
2914 	rxcsum = rd32(E1000_RXCSUM);
2915 	rxcsum |= E1000_RXCSUM_PCSD;
2916 
2917 	if (adapter->hw.mac.type >= e1000_82576)
2918 		/* Enable Receive Checksum Offload for SCTP */
2919 		rxcsum |= E1000_RXCSUM_CRCOFL;
2920 
2921 	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2922 	wr32(E1000_RXCSUM, rxcsum);
2923 
2924 	/* If VMDq is enabled then we set the appropriate mode for that, else
2925 	 * we default to RSS so that an RSS hash is calculated per packet even
2926 	 * if we are only using one queue */
2927 	if (adapter->vfs_allocated_count) {
2928 		if (hw->mac.type > e1000_82575) {
2929 			/* Set the default pool for the PF's first queue */
2930 			u32 vtctl = rd32(E1000_VT_CTL);
2931 			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2932 				   E1000_VT_CTL_DISABLE_DEF_POOL);
2933 			vtctl |= adapter->vfs_allocated_count <<
2934 				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2935 			wr32(E1000_VT_CTL, vtctl);
2936 		}
2937 		if (adapter->rss_queues > 1)
2938 			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2939 		else
2940 			mrqc = E1000_MRQC_ENABLE_VMDQ;
2941 	} else {
2942 		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2943 	}
2944 	igb_vmm_control(adapter);
2945 
2946 	/*
2947 	 * Generate RSS hash based on TCP port numbers and/or
2948 	 * IPv4/v6 src and dst addresses since UDP cannot be
2949 	 * hashed reliably due to IP fragmentation
2950 	 */
2951 	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2952 		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2953 		E1000_MRQC_RSS_FIELD_IPV6 |
2954 		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2955 		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2956 
2957 	wr32(E1000_MRQC, mrqc);
2958 }
2959 
2960 /**
2961  * igb_setup_rctl - configure the receive control registers
2962  * @adapter: Board private structure
2963  **/
2964 void igb_setup_rctl(struct igb_adapter *adapter)
2965 {
2966 	struct e1000_hw *hw = &adapter->hw;
2967 	u32 rctl;
2968 
2969 	rctl = rd32(E1000_RCTL);
2970 
2971 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2972 	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2973 
2974 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2975 		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2976 
2977 	/*
2978 	 * enable stripping of CRC. It's unlikely this will break BMC
2979 	 * redirection as it did with e1000. Newer features require
2980 	 * that the HW strips the CRC.
2981 	 */
2982 	rctl |= E1000_RCTL_SECRC;
2983 
2984 	/* disable store bad packets and clear size bits. */
2985 	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2986 
2987 	/* enable LPE to prevent packets larger than max_frame_size */
2988 	rctl |= E1000_RCTL_LPE;
2989 
2990 	/* disable queue 0 to prevent tail write w/o re-config */
2991 	wr32(E1000_RXDCTL(0), 0);
2992 
2993 	/* Attention!!!  For SR-IOV PF driver operations you must enable
2994 	 * queue drop for all VF and PF queues to prevent head of line blocking
2995 	 * if an un-trusted VF does not provide descriptors to hardware.
2996 	 */
2997 	if (adapter->vfs_allocated_count) {
2998 		/* set all queue drop enable bits */
2999 		wr32(E1000_QDE, ALL_QUEUES);
3000 	}
3001 
3002 	wr32(E1000_RCTL, rctl);
3003 }
3004 
3005 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3006                                    int vfn)
3007 {
3008 	struct e1000_hw *hw = &adapter->hw;
3009 	u32 vmolr;
3010 
3011 	/* if it isn't the PF check to see if VFs are enabled and
3012 	 * increase the size to support vlan tags */
3013 	if (vfn < adapter->vfs_allocated_count &&
3014 	    adapter->vf_data[vfn].vlans_enabled)
3015 		size += VLAN_TAG_SIZE;
3016 
3017 	vmolr = rd32(E1000_VMOLR(vfn));
3018 	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3019 	vmolr |= size | E1000_VMOLR_LPE;
3020 	wr32(E1000_VMOLR(vfn), vmolr);
3021 
3022 	return 0;
3023 }
3024 
3025 /**
3026  * igb_rlpml_set - set maximum receive packet size
3027  * @adapter: board private structure
3028  *
3029  * Configure maximum receivable packet size.
3030  **/
3031 static void igb_rlpml_set(struct igb_adapter *adapter)
3032 {
3033 	u32 max_frame_size = adapter->max_frame_size;
3034 	struct e1000_hw *hw = &adapter->hw;
3035 	u16 pf_id = adapter->vfs_allocated_count;
3036 
3037 	if (pf_id) {
3038 		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3039 		/*
3040 		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3041 		 * to our max jumbo frame size, in case we need to enable
3042 		 * jumbo frames on one of the rings later.
3043 		 * This will not pass over-length frames into the default
3044 		 * queue because it's gated by the VMOLR.RLPML.
3045 		 */
3046 		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3047 	}
3048 
3049 	wr32(E1000_RLPML, max_frame_size);
3050 }
3051 
3052 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3053 				 int vfn, bool aupe)
3054 {
3055 	struct e1000_hw *hw = &adapter->hw;
3056 	u32 vmolr;
3057 
3058 	/*
3059 	 * This register exists only on 82576 and newer so if we are older then
3060 	 * we should exit and do nothing
3061 	 */
3062 	if (hw->mac.type < e1000_82576)
3063 		return;
3064 
3065 	vmolr = rd32(E1000_VMOLR(vfn));
3066 	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3067 	if (aupe)
3068 		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3069 	else
3070 		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3071 
3072 	/* clear all bits that might not be set */
3073 	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3074 
3075 	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3076 		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3077 	/*
3078 	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3079 	 * multicast packets
3080 	 */
3081 	if (vfn <= adapter->vfs_allocated_count)
3082 		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3083 
3084 	wr32(E1000_VMOLR(vfn), vmolr);
3085 }
3086 
3087 /**
3088  * igb_configure_rx_ring - Configure a receive ring after Reset
3089  * @adapter: board private structure
3090  * @ring: receive ring to be configured
3091  *
3092  * Configure the Rx unit of the MAC after a reset.
3093  **/
3094 void igb_configure_rx_ring(struct igb_adapter *adapter,
3095                            struct igb_ring *ring)
3096 {
3097 	struct e1000_hw *hw = &adapter->hw;
3098 	u64 rdba = ring->dma;
3099 	int reg_idx = ring->reg_idx;
3100 	u32 srrctl = 0, rxdctl = 0;
3101 
3102 	/* disable the queue */
3103 	wr32(E1000_RXDCTL(reg_idx), 0);
3104 
3105 	/* Set DMA base address registers */
3106 	wr32(E1000_RDBAL(reg_idx),
3107 	     rdba & 0x00000000ffffffffULL);
3108 	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3109 	wr32(E1000_RDLEN(reg_idx),
3110 	               ring->count * sizeof(union e1000_adv_rx_desc));
3111 
3112 	/* initialize head and tail */
3113 	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3114 	wr32(E1000_RDH(reg_idx), 0);
3115 	writel(0, ring->tail);
3116 
3117 	/* set descriptor configuration */
3118 	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3119 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3120 	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3121 #else
3122 	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3123 #endif
3124 	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3125 	if (hw->mac.type >= e1000_82580)
3126 		srrctl |= E1000_SRRCTL_TIMESTAMP;
3127 	/* Only set Drop Enable if we are supporting multiple queues */
3128 	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3129 		srrctl |= E1000_SRRCTL_DROP_EN;
3130 
3131 	wr32(E1000_SRRCTL(reg_idx), srrctl);
3132 
3133 	/* set filtering for VMDQ pools */
3134 	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3135 
3136 	rxdctl |= IGB_RX_PTHRESH;
3137 	rxdctl |= IGB_RX_HTHRESH << 8;
3138 	rxdctl |= IGB_RX_WTHRESH << 16;
3139 
3140 	/* enable receive descriptor fetching */
3141 	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3142 	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3143 }
3144 
3145 /**
3146  * igb_configure_rx - Configure receive Unit after Reset
3147  * @adapter: board private structure
3148  *
3149  * Configure the Rx unit of the MAC after a reset.
3150  **/
3151 static void igb_configure_rx(struct igb_adapter *adapter)
3152 {
3153 	int i;
3154 
3155 	/* set UTA to appropriate mode */
3156 	igb_set_uta(adapter);
3157 
3158 	/* set the correct pool for the PF default MAC address in entry 0 */
3159 	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3160 	                 adapter->vfs_allocated_count);
3161 
3162 	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3163 	 * the Base and Length of the Rx Descriptor Ring */
3164 	for (i = 0; i < adapter->num_rx_queues; i++)
3165 		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3166 }
3167 
3168 /**
3169  * igb_free_tx_resources - Free Tx Resources per Queue
3170  * @tx_ring: Tx descriptor ring for a specific queue
3171  *
3172  * Free all transmit software resources
3173  **/
3174 void igb_free_tx_resources(struct igb_ring *tx_ring)
3175 {
3176 	igb_clean_tx_ring(tx_ring);
3177 
3178 	vfree(tx_ring->tx_buffer_info);
3179 	tx_ring->tx_buffer_info = NULL;
3180 
3181 	/* if not set, then don't free */
3182 	if (!tx_ring->desc)
3183 		return;
3184 
3185 	dma_free_coherent(tx_ring->dev, tx_ring->size,
3186 			  tx_ring->desc, tx_ring->dma);
3187 
3188 	tx_ring->desc = NULL;
3189 }
3190 
3191 /**
3192  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3193  * @adapter: board private structure
3194  *
3195  * Free all transmit software resources
3196  **/
3197 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3198 {
3199 	int i;
3200 
3201 	for (i = 0; i < adapter->num_tx_queues; i++)
3202 		igb_free_tx_resources(adapter->tx_ring[i]);
3203 }
3204 
3205 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3206 				    struct igb_tx_buffer *tx_buffer)
3207 {
3208 	if (tx_buffer->skb) {
3209 		dev_kfree_skb_any(tx_buffer->skb);
3210 		if (tx_buffer->dma)
3211 			dma_unmap_single(ring->dev,
3212 					 tx_buffer->dma,
3213 					 tx_buffer->length,
3214 					 DMA_TO_DEVICE);
3215 	} else if (tx_buffer->dma) {
3216 		dma_unmap_page(ring->dev,
3217 			       tx_buffer->dma,
3218 			       tx_buffer->length,
3219 			       DMA_TO_DEVICE);
3220 	}
3221 	tx_buffer->next_to_watch = NULL;
3222 	tx_buffer->skb = NULL;
3223 	tx_buffer->dma = 0;
3224 	/* buffer_info must be completely set up in the transmit path */
3225 }
3226 
3227 /**
3228  * igb_clean_tx_ring - Free Tx Buffers
3229  * @tx_ring: ring to be cleaned
3230  **/
3231 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3232 {
3233 	struct igb_tx_buffer *buffer_info;
3234 	unsigned long size;
3235 	u16 i;
3236 
3237 	if (!tx_ring->tx_buffer_info)
3238 		return;
3239 	/* Free all the Tx ring sk_buffs */
3240 
3241 	for (i = 0; i < tx_ring->count; i++) {
3242 		buffer_info = &tx_ring->tx_buffer_info[i];
3243 		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3244 	}
3245 	netdev_tx_reset_queue(txring_txq(tx_ring));
3246 
3247 	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3248 	memset(tx_ring->tx_buffer_info, 0, size);
3249 
3250 	/* Zero out the descriptor ring */
3251 	memset(tx_ring->desc, 0, tx_ring->size);
3252 
3253 	tx_ring->next_to_use = 0;
3254 	tx_ring->next_to_clean = 0;
3255 }
3256 
3257 /**
3258  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3259  * @adapter: board private structure
3260  **/
3261 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3262 {
3263 	int i;
3264 
3265 	for (i = 0; i < adapter->num_tx_queues; i++)
3266 		igb_clean_tx_ring(adapter->tx_ring[i]);
3267 }
3268 
3269 /**
3270  * igb_free_rx_resources - Free Rx Resources
3271  * @rx_ring: ring to clean the resources from
3272  *
3273  * Free all receive software resources
3274  **/
3275 void igb_free_rx_resources(struct igb_ring *rx_ring)
3276 {
3277 	igb_clean_rx_ring(rx_ring);
3278 
3279 	vfree(rx_ring->rx_buffer_info);
3280 	rx_ring->rx_buffer_info = NULL;
3281 
3282 	/* if not set, then don't free */
3283 	if (!rx_ring->desc)
3284 		return;
3285 
3286 	dma_free_coherent(rx_ring->dev, rx_ring->size,
3287 			  rx_ring->desc, rx_ring->dma);
3288 
3289 	rx_ring->desc = NULL;
3290 }
3291 
3292 /**
3293  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3294  * @adapter: board private structure
3295  *
3296  * Free all receive software resources
3297  **/
3298 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3299 {
3300 	int i;
3301 
3302 	for (i = 0; i < adapter->num_rx_queues; i++)
3303 		igb_free_rx_resources(adapter->rx_ring[i]);
3304 }
3305 
3306 /**
3307  * igb_clean_rx_ring - Free Rx Buffers per Queue
3308  * @rx_ring: ring to free buffers from
3309  **/
3310 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3311 {
3312 	unsigned long size;
3313 	u16 i;
3314 
3315 	if (!rx_ring->rx_buffer_info)
3316 		return;
3317 
3318 	/* Free all the Rx ring sk_buffs */
3319 	for (i = 0; i < rx_ring->count; i++) {
3320 		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3321 		if (buffer_info->dma) {
3322 			dma_unmap_single(rx_ring->dev,
3323 			                 buffer_info->dma,
3324 					 IGB_RX_HDR_LEN,
3325 					 DMA_FROM_DEVICE);
3326 			buffer_info->dma = 0;
3327 		}
3328 
3329 		if (buffer_info->skb) {
3330 			dev_kfree_skb(buffer_info->skb);
3331 			buffer_info->skb = NULL;
3332 		}
3333 		if (buffer_info->page_dma) {
3334 			dma_unmap_page(rx_ring->dev,
3335 			               buffer_info->page_dma,
3336 				       PAGE_SIZE / 2,
3337 				       DMA_FROM_DEVICE);
3338 			buffer_info->page_dma = 0;
3339 		}
3340 		if (buffer_info->page) {
3341 			put_page(buffer_info->page);
3342 			buffer_info->page = NULL;
3343 			buffer_info->page_offset = 0;
3344 		}
3345 	}
3346 
3347 	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3348 	memset(rx_ring->rx_buffer_info, 0, size);
3349 
3350 	/* Zero out the descriptor ring */
3351 	memset(rx_ring->desc, 0, rx_ring->size);
3352 
3353 	rx_ring->next_to_clean = 0;
3354 	rx_ring->next_to_use = 0;
3355 }
3356 
3357 /**
3358  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3359  * @adapter: board private structure
3360  **/
3361 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3362 {
3363 	int i;
3364 
3365 	for (i = 0; i < adapter->num_rx_queues; i++)
3366 		igb_clean_rx_ring(adapter->rx_ring[i]);
3367 }
3368 
3369 /**
3370  * igb_set_mac - Change the Ethernet Address of the NIC
3371  * @netdev: network interface device structure
3372  * @p: pointer to an address structure
3373  *
3374  * Returns 0 on success, negative on failure
3375  **/
3376 static int igb_set_mac(struct net_device *netdev, void *p)
3377 {
3378 	struct igb_adapter *adapter = netdev_priv(netdev);
3379 	struct e1000_hw *hw = &adapter->hw;
3380 	struct sockaddr *addr = p;
3381 
3382 	if (!is_valid_ether_addr(addr->sa_data))
3383 		return -EADDRNOTAVAIL;
3384 
3385 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3386 	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3387 
3388 	/* set the correct pool for the new PF MAC address in entry 0 */
3389 	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3390 	                 adapter->vfs_allocated_count);
3391 
3392 	return 0;
3393 }
3394 
3395 /**
3396  * igb_write_mc_addr_list - write multicast addresses to MTA
3397  * @netdev: network interface device structure
3398  *
3399  * Writes multicast address list to the MTA hash table.
3400  * Returns: -ENOMEM on failure
3401  *                0 on no addresses written
3402  *                X on writing X addresses to MTA
3403  **/
3404 static int igb_write_mc_addr_list(struct net_device *netdev)
3405 {
3406 	struct igb_adapter *adapter = netdev_priv(netdev);
3407 	struct e1000_hw *hw = &adapter->hw;
3408 	struct netdev_hw_addr *ha;
3409 	u8  *mta_list;
3410 	int i;
3411 
3412 	if (netdev_mc_empty(netdev)) {
3413 		/* nothing to program, so clear mc list */
3414 		igb_update_mc_addr_list(hw, NULL, 0);
3415 		igb_restore_vf_multicasts(adapter);
3416 		return 0;
3417 	}
3418 
3419 	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3420 	if (!mta_list)
3421 		return -ENOMEM;
3422 
3423 	/* The shared function expects a packed array of only addresses. */
3424 	i = 0;
3425 	netdev_for_each_mc_addr(ha, netdev)
3426 		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3427 
3428 	igb_update_mc_addr_list(hw, mta_list, i);
3429 	kfree(mta_list);
3430 
3431 	return netdev_mc_count(netdev);
3432 }
3433 
3434 /**
3435  * igb_write_uc_addr_list - write unicast addresses to RAR table
3436  * @netdev: network interface device structure
3437  *
3438  * Writes unicast address list to the RAR table.
3439  * Returns: -ENOMEM on failure/insufficient address space
3440  *                0 on no addresses written
3441  *                X on writing X addresses to the RAR table
3442  **/
3443 static int igb_write_uc_addr_list(struct net_device *netdev)
3444 {
3445 	struct igb_adapter *adapter = netdev_priv(netdev);
3446 	struct e1000_hw *hw = &adapter->hw;
3447 	unsigned int vfn = adapter->vfs_allocated_count;
3448 	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3449 	int count = 0;
3450 
3451 	/* return ENOMEM indicating insufficient memory for addresses */
3452 	if (netdev_uc_count(netdev) > rar_entries)
3453 		return -ENOMEM;
3454 
3455 	if (!netdev_uc_empty(netdev) && rar_entries) {
3456 		struct netdev_hw_addr *ha;
3457 
3458 		netdev_for_each_uc_addr(ha, netdev) {
3459 			if (!rar_entries)
3460 				break;
3461 			igb_rar_set_qsel(adapter, ha->addr,
3462 			                 rar_entries--,
3463 			                 vfn);
3464 			count++;
3465 		}
3466 	}
3467 	/* write the addresses in reverse order to avoid write combining */
3468 	for (; rar_entries > 0 ; rar_entries--) {
3469 		wr32(E1000_RAH(rar_entries), 0);
3470 		wr32(E1000_RAL(rar_entries), 0);
3471 	}
3472 	wrfl();
3473 
3474 	return count;
3475 }
3476 
3477 /**
3478  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3479  * @netdev: network interface device structure
3480  *
3481  * The set_rx_mode entry point is called whenever the unicast or multicast
3482  * address lists or the network interface flags are updated.  This routine is
3483  * responsible for configuring the hardware for proper unicast, multicast,
3484  * promiscuous mode, and all-multi behavior.
3485  **/
3486 static void igb_set_rx_mode(struct net_device *netdev)
3487 {
3488 	struct igb_adapter *adapter = netdev_priv(netdev);
3489 	struct e1000_hw *hw = &adapter->hw;
3490 	unsigned int vfn = adapter->vfs_allocated_count;
3491 	u32 rctl, vmolr = 0;
3492 	int count;
3493 
3494 	/* Check for Promiscuous and All Multicast modes */
3495 	rctl = rd32(E1000_RCTL);
3496 
3497 	/* clear the effected bits */
3498 	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3499 
3500 	if (netdev->flags & IFF_PROMISC) {
3501 		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3502 		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3503 	} else {
3504 		if (netdev->flags & IFF_ALLMULTI) {
3505 			rctl |= E1000_RCTL_MPE;
3506 			vmolr |= E1000_VMOLR_MPME;
3507 		} else {
3508 			/*
3509 			 * Write addresses to the MTA, if the attempt fails
3510 			 * then we should just turn on promiscuous mode so
3511 			 * that we can at least receive multicast traffic
3512 			 */
3513 			count = igb_write_mc_addr_list(netdev);
3514 			if (count < 0) {
3515 				rctl |= E1000_RCTL_MPE;
3516 				vmolr |= E1000_VMOLR_MPME;
3517 			} else if (count) {
3518 				vmolr |= E1000_VMOLR_ROMPE;
3519 			}
3520 		}
3521 		/*
3522 		 * Write addresses to available RAR registers, if there is not
3523 		 * sufficient space to store all the addresses then enable
3524 		 * unicast promiscuous mode
3525 		 */
3526 		count = igb_write_uc_addr_list(netdev);
3527 		if (count < 0) {
3528 			rctl |= E1000_RCTL_UPE;
3529 			vmolr |= E1000_VMOLR_ROPE;
3530 		}
3531 		rctl |= E1000_RCTL_VFE;
3532 	}
3533 	wr32(E1000_RCTL, rctl);
3534 
3535 	/*
3536 	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3537 	 * the VMOLR to enable the appropriate modes.  Without this workaround
3538 	 * we will have issues with VLAN tag stripping not being done for frames
3539 	 * that are only arriving because we are the default pool
3540 	 */
3541 	if (hw->mac.type < e1000_82576)
3542 		return;
3543 
3544 	vmolr |= rd32(E1000_VMOLR(vfn)) &
3545 	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3546 	wr32(E1000_VMOLR(vfn), vmolr);
3547 	igb_restore_vf_multicasts(adapter);
3548 }
3549 
3550 static void igb_check_wvbr(struct igb_adapter *adapter)
3551 {
3552 	struct e1000_hw *hw = &adapter->hw;
3553 	u32 wvbr = 0;
3554 
3555 	switch (hw->mac.type) {
3556 	case e1000_82576:
3557 	case e1000_i350:
3558 		if (!(wvbr = rd32(E1000_WVBR)))
3559 			return;
3560 		break;
3561 	default:
3562 		break;
3563 	}
3564 
3565 	adapter->wvbr |= wvbr;
3566 }
3567 
3568 #define IGB_STAGGERED_QUEUE_OFFSET 8
3569 
3570 static void igb_spoof_check(struct igb_adapter *adapter)
3571 {
3572 	int j;
3573 
3574 	if (!adapter->wvbr)
3575 		return;
3576 
3577 	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3578 		if (adapter->wvbr & (1 << j) ||
3579 		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3580 			dev_warn(&adapter->pdev->dev,
3581 				"Spoof event(s) detected on VF %d\n", j);
3582 			adapter->wvbr &=
3583 				~((1 << j) |
3584 				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3585 		}
3586 	}
3587 }
3588 
3589 /* Need to wait a few seconds after link up to get diagnostic information from
3590  * the phy */
3591 static void igb_update_phy_info(unsigned long data)
3592 {
3593 	struct igb_adapter *adapter = (struct igb_adapter *) data;
3594 	igb_get_phy_info(&adapter->hw);
3595 }
3596 
3597 /**
3598  * igb_has_link - check shared code for link and determine up/down
3599  * @adapter: pointer to driver private info
3600  **/
3601 bool igb_has_link(struct igb_adapter *adapter)
3602 {
3603 	struct e1000_hw *hw = &adapter->hw;
3604 	bool link_active = false;
3605 	s32 ret_val = 0;
3606 
3607 	/* get_link_status is set on LSC (link status) interrupt or
3608 	 * rx sequence error interrupt.  get_link_status will stay
3609 	 * false until the e1000_check_for_link establishes link
3610 	 * for copper adapters ONLY
3611 	 */
3612 	switch (hw->phy.media_type) {
3613 	case e1000_media_type_copper:
3614 		if (hw->mac.get_link_status) {
3615 			ret_val = hw->mac.ops.check_for_link(hw);
3616 			link_active = !hw->mac.get_link_status;
3617 		} else {
3618 			link_active = true;
3619 		}
3620 		break;
3621 	case e1000_media_type_internal_serdes:
3622 		ret_val = hw->mac.ops.check_for_link(hw);
3623 		link_active = hw->mac.serdes_has_link;
3624 		break;
3625 	default:
3626 	case e1000_media_type_unknown:
3627 		break;
3628 	}
3629 
3630 	return link_active;
3631 }
3632 
3633 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3634 {
3635 	bool ret = false;
3636 	u32 ctrl_ext, thstat;
3637 
3638 	/* check for thermal sensor event on i350, copper only */
3639 	if (hw->mac.type == e1000_i350) {
3640 		thstat = rd32(E1000_THSTAT);
3641 		ctrl_ext = rd32(E1000_CTRL_EXT);
3642 
3643 		if ((hw->phy.media_type == e1000_media_type_copper) &&
3644 		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3645 			ret = !!(thstat & event);
3646 		}
3647 	}
3648 
3649 	return ret;
3650 }
3651 
3652 /**
3653  * igb_watchdog - Timer Call-back
3654  * @data: pointer to adapter cast into an unsigned long
3655  **/
3656 static void igb_watchdog(unsigned long data)
3657 {
3658 	struct igb_adapter *adapter = (struct igb_adapter *)data;
3659 	/* Do the rest outside of interrupt context */
3660 	schedule_work(&adapter->watchdog_task);
3661 }
3662 
3663 static void igb_watchdog_task(struct work_struct *work)
3664 {
3665 	struct igb_adapter *adapter = container_of(work,
3666 	                                           struct igb_adapter,
3667                                                    watchdog_task);
3668 	struct e1000_hw *hw = &adapter->hw;
3669 	struct net_device *netdev = adapter->netdev;
3670 	u32 link;
3671 	int i;
3672 
3673 	link = igb_has_link(adapter);
3674 	if (link) {
3675 		/* Cancel scheduled suspend requests. */
3676 		pm_runtime_resume(netdev->dev.parent);
3677 
3678 		if (!netif_carrier_ok(netdev)) {
3679 			u32 ctrl;
3680 			hw->mac.ops.get_speed_and_duplex(hw,
3681 			                                 &adapter->link_speed,
3682 			                                 &adapter->link_duplex);
3683 
3684 			ctrl = rd32(E1000_CTRL);
3685 			/* Links status message must follow this format */
3686 			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3687 			       "Duplex, Flow Control: %s\n",
3688 			       netdev->name,
3689 			       adapter->link_speed,
3690 			       adapter->link_duplex == FULL_DUPLEX ?
3691 			       "Full" : "Half",
3692 			       (ctrl & E1000_CTRL_TFCE) &&
3693 			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3694 			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3695 			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3696 
3697 			/* check for thermal sensor event */
3698 			if (igb_thermal_sensor_event(hw,
3699 			    E1000_THSTAT_LINK_THROTTLE)) {
3700 				netdev_info(netdev, "The network adapter link "
3701 					    "speed was downshifted because it "
3702 					    "overheated\n");
3703 			}
3704 
3705 			/* adjust timeout factor according to speed/duplex */
3706 			adapter->tx_timeout_factor = 1;
3707 			switch (adapter->link_speed) {
3708 			case SPEED_10:
3709 				adapter->tx_timeout_factor = 14;
3710 				break;
3711 			case SPEED_100:
3712 				/* maybe add some timeout factor ? */
3713 				break;
3714 			}
3715 
3716 			netif_carrier_on(netdev);
3717 
3718 			igb_ping_all_vfs(adapter);
3719 			igb_check_vf_rate_limit(adapter);
3720 
3721 			/* link state has changed, schedule phy info update */
3722 			if (!test_bit(__IGB_DOWN, &adapter->state))
3723 				mod_timer(&adapter->phy_info_timer,
3724 					  round_jiffies(jiffies + 2 * HZ));
3725 		}
3726 	} else {
3727 		if (netif_carrier_ok(netdev)) {
3728 			adapter->link_speed = 0;
3729 			adapter->link_duplex = 0;
3730 
3731 			/* check for thermal sensor event */
3732 			if (igb_thermal_sensor_event(hw,
3733 			    E1000_THSTAT_PWR_DOWN)) {
3734 				netdev_err(netdev, "The network adapter was "
3735 					   "stopped because it overheated\n");
3736 			}
3737 
3738 			/* Links status message must follow this format */
3739 			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3740 			       netdev->name);
3741 			netif_carrier_off(netdev);
3742 
3743 			igb_ping_all_vfs(adapter);
3744 
3745 			/* link state has changed, schedule phy info update */
3746 			if (!test_bit(__IGB_DOWN, &adapter->state))
3747 				mod_timer(&adapter->phy_info_timer,
3748 					  round_jiffies(jiffies + 2 * HZ));
3749 
3750 			pm_schedule_suspend(netdev->dev.parent,
3751 					    MSEC_PER_SEC * 5);
3752 		}
3753 	}
3754 
3755 	spin_lock(&adapter->stats64_lock);
3756 	igb_update_stats(adapter, &adapter->stats64);
3757 	spin_unlock(&adapter->stats64_lock);
3758 
3759 	for (i = 0; i < adapter->num_tx_queues; i++) {
3760 		struct igb_ring *tx_ring = adapter->tx_ring[i];
3761 		if (!netif_carrier_ok(netdev)) {
3762 			/* We've lost link, so the controller stops DMA,
3763 			 * but we've got queued Tx work that's never going
3764 			 * to get done, so reset controller to flush Tx.
3765 			 * (Do the reset outside of interrupt context). */
3766 			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3767 				adapter->tx_timeout_count++;
3768 				schedule_work(&adapter->reset_task);
3769 				/* return immediately since reset is imminent */
3770 				return;
3771 			}
3772 		}
3773 
3774 		/* Force detection of hung controller every watchdog period */
3775 		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3776 	}
3777 
3778 	/* Cause software interrupt to ensure rx ring is cleaned */
3779 	if (adapter->msix_entries) {
3780 		u32 eics = 0;
3781 		for (i = 0; i < adapter->num_q_vectors; i++)
3782 			eics |= adapter->q_vector[i]->eims_value;
3783 		wr32(E1000_EICS, eics);
3784 	} else {
3785 		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3786 	}
3787 
3788 	igb_spoof_check(adapter);
3789 
3790 	/* Reset the timer */
3791 	if (!test_bit(__IGB_DOWN, &adapter->state))
3792 		mod_timer(&adapter->watchdog_timer,
3793 			  round_jiffies(jiffies + 2 * HZ));
3794 }
3795 
3796 enum latency_range {
3797 	lowest_latency = 0,
3798 	low_latency = 1,
3799 	bulk_latency = 2,
3800 	latency_invalid = 255
3801 };
3802 
3803 /**
3804  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3805  *
3806  *      Stores a new ITR value based on strictly on packet size.  This
3807  *      algorithm is less sophisticated than that used in igb_update_itr,
3808  *      due to the difficulty of synchronizing statistics across multiple
3809  *      receive rings.  The divisors and thresholds used by this function
3810  *      were determined based on theoretical maximum wire speed and testing
3811  *      data, in order to minimize response time while increasing bulk
3812  *      throughput.
3813  *      This functionality is controlled by the InterruptThrottleRate module
3814  *      parameter (see igb_param.c)
3815  *      NOTE:  This function is called only when operating in a multiqueue
3816  *             receive environment.
3817  * @q_vector: pointer to q_vector
3818  **/
3819 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3820 {
3821 	int new_val = q_vector->itr_val;
3822 	int avg_wire_size = 0;
3823 	struct igb_adapter *adapter = q_vector->adapter;
3824 	unsigned int packets;
3825 
3826 	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3827 	 * ints/sec - ITR timer value of 120 ticks.
3828 	 */
3829 	if (adapter->link_speed != SPEED_1000) {
3830 		new_val = IGB_4K_ITR;
3831 		goto set_itr_val;
3832 	}
3833 
3834 	packets = q_vector->rx.total_packets;
3835 	if (packets)
3836 		avg_wire_size = q_vector->rx.total_bytes / packets;
3837 
3838 	packets = q_vector->tx.total_packets;
3839 	if (packets)
3840 		avg_wire_size = max_t(u32, avg_wire_size,
3841 				      q_vector->tx.total_bytes / packets);
3842 
3843 	/* if avg_wire_size isn't set no work was done */
3844 	if (!avg_wire_size)
3845 		goto clear_counts;
3846 
3847 	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3848 	avg_wire_size += 24;
3849 
3850 	/* Don't starve jumbo frames */
3851 	avg_wire_size = min(avg_wire_size, 3000);
3852 
3853 	/* Give a little boost to mid-size frames */
3854 	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3855 		new_val = avg_wire_size / 3;
3856 	else
3857 		new_val = avg_wire_size / 2;
3858 
3859 	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3860 	if (new_val < IGB_20K_ITR &&
3861 	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3862 	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3863 		new_val = IGB_20K_ITR;
3864 
3865 set_itr_val:
3866 	if (new_val != q_vector->itr_val) {
3867 		q_vector->itr_val = new_val;
3868 		q_vector->set_itr = 1;
3869 	}
3870 clear_counts:
3871 	q_vector->rx.total_bytes = 0;
3872 	q_vector->rx.total_packets = 0;
3873 	q_vector->tx.total_bytes = 0;
3874 	q_vector->tx.total_packets = 0;
3875 }
3876 
3877 /**
3878  * igb_update_itr - update the dynamic ITR value based on statistics
3879  *      Stores a new ITR value based on packets and byte
3880  *      counts during the last interrupt.  The advantage of per interrupt
3881  *      computation is faster updates and more accurate ITR for the current
3882  *      traffic pattern.  Constants in this function were computed
3883  *      based on theoretical maximum wire speed and thresholds were set based
3884  *      on testing data as well as attempting to minimize response time
3885  *      while increasing bulk throughput.
3886  *      this functionality is controlled by the InterruptThrottleRate module
3887  *      parameter (see igb_param.c)
3888  *      NOTE:  These calculations are only valid when operating in a single-
3889  *             queue environment.
3890  * @q_vector: pointer to q_vector
3891  * @ring_container: ring info to update the itr for
3892  **/
3893 static void igb_update_itr(struct igb_q_vector *q_vector,
3894 			   struct igb_ring_container *ring_container)
3895 {
3896 	unsigned int packets = ring_container->total_packets;
3897 	unsigned int bytes = ring_container->total_bytes;
3898 	u8 itrval = ring_container->itr;
3899 
3900 	/* no packets, exit with status unchanged */
3901 	if (packets == 0)
3902 		return;
3903 
3904 	switch (itrval) {
3905 	case lowest_latency:
3906 		/* handle TSO and jumbo frames */
3907 		if (bytes/packets > 8000)
3908 			itrval = bulk_latency;
3909 		else if ((packets < 5) && (bytes > 512))
3910 			itrval = low_latency;
3911 		break;
3912 	case low_latency:  /* 50 usec aka 20000 ints/s */
3913 		if (bytes > 10000) {
3914 			/* this if handles the TSO accounting */
3915 			if (bytes/packets > 8000) {
3916 				itrval = bulk_latency;
3917 			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3918 				itrval = bulk_latency;
3919 			} else if ((packets > 35)) {
3920 				itrval = lowest_latency;
3921 			}
3922 		} else if (bytes/packets > 2000) {
3923 			itrval = bulk_latency;
3924 		} else if (packets <= 2 && bytes < 512) {
3925 			itrval = lowest_latency;
3926 		}
3927 		break;
3928 	case bulk_latency: /* 250 usec aka 4000 ints/s */
3929 		if (bytes > 25000) {
3930 			if (packets > 35)
3931 				itrval = low_latency;
3932 		} else if (bytes < 1500) {
3933 			itrval = low_latency;
3934 		}
3935 		break;
3936 	}
3937 
3938 	/* clear work counters since we have the values we need */
3939 	ring_container->total_bytes = 0;
3940 	ring_container->total_packets = 0;
3941 
3942 	/* write updated itr to ring container */
3943 	ring_container->itr = itrval;
3944 }
3945 
3946 static void igb_set_itr(struct igb_q_vector *q_vector)
3947 {
3948 	struct igb_adapter *adapter = q_vector->adapter;
3949 	u32 new_itr = q_vector->itr_val;
3950 	u8 current_itr = 0;
3951 
3952 	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3953 	if (adapter->link_speed != SPEED_1000) {
3954 		current_itr = 0;
3955 		new_itr = IGB_4K_ITR;
3956 		goto set_itr_now;
3957 	}
3958 
3959 	igb_update_itr(q_vector, &q_vector->tx);
3960 	igb_update_itr(q_vector, &q_vector->rx);
3961 
3962 	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3963 
3964 	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3965 	if (current_itr == lowest_latency &&
3966 	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3967 	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3968 		current_itr = low_latency;
3969 
3970 	switch (current_itr) {
3971 	/* counts and packets in update_itr are dependent on these numbers */
3972 	case lowest_latency:
3973 		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3974 		break;
3975 	case low_latency:
3976 		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3977 		break;
3978 	case bulk_latency:
3979 		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3980 		break;
3981 	default:
3982 		break;
3983 	}
3984 
3985 set_itr_now:
3986 	if (new_itr != q_vector->itr_val) {
3987 		/* this attempts to bias the interrupt rate towards Bulk
3988 		 * by adding intermediate steps when interrupt rate is
3989 		 * increasing */
3990 		new_itr = new_itr > q_vector->itr_val ?
3991 		             max((new_itr * q_vector->itr_val) /
3992 		                 (new_itr + (q_vector->itr_val >> 2)),
3993 				 new_itr) :
3994 			     new_itr;
3995 		/* Don't write the value here; it resets the adapter's
3996 		 * internal timer, and causes us to delay far longer than
3997 		 * we should between interrupts.  Instead, we write the ITR
3998 		 * value at the beginning of the next interrupt so the timing
3999 		 * ends up being correct.
4000 		 */
4001 		q_vector->itr_val = new_itr;
4002 		q_vector->set_itr = 1;
4003 	}
4004 }
4005 
4006 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4007 		     u32 type_tucmd, u32 mss_l4len_idx)
4008 {
4009 	struct e1000_adv_tx_context_desc *context_desc;
4010 	u16 i = tx_ring->next_to_use;
4011 
4012 	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4013 
4014 	i++;
4015 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4016 
4017 	/* set bits to identify this as an advanced context descriptor */
4018 	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4019 
4020 	/* For 82575, context index must be unique per ring. */
4021 	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4022 		mss_l4len_idx |= tx_ring->reg_idx << 4;
4023 
4024 	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4025 	context_desc->seqnum_seed	= 0;
4026 	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4027 	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4028 }
4029 
4030 static int igb_tso(struct igb_ring *tx_ring,
4031 		   struct igb_tx_buffer *first,
4032 		   u8 *hdr_len)
4033 {
4034 	struct sk_buff *skb = first->skb;
4035 	u32 vlan_macip_lens, type_tucmd;
4036 	u32 mss_l4len_idx, l4len;
4037 
4038 	if (!skb_is_gso(skb))
4039 		return 0;
4040 
4041 	if (skb_header_cloned(skb)) {
4042 		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4043 		if (err)
4044 			return err;
4045 	}
4046 
4047 	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4048 	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4049 
4050 	if (first->protocol == __constant_htons(ETH_P_IP)) {
4051 		struct iphdr *iph = ip_hdr(skb);
4052 		iph->tot_len = 0;
4053 		iph->check = 0;
4054 		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4055 							 iph->daddr, 0,
4056 							 IPPROTO_TCP,
4057 							 0);
4058 		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4059 		first->tx_flags |= IGB_TX_FLAGS_TSO |
4060 				   IGB_TX_FLAGS_CSUM |
4061 				   IGB_TX_FLAGS_IPV4;
4062 	} else if (skb_is_gso_v6(skb)) {
4063 		ipv6_hdr(skb)->payload_len = 0;
4064 		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4065 						       &ipv6_hdr(skb)->daddr,
4066 						       0, IPPROTO_TCP, 0);
4067 		first->tx_flags |= IGB_TX_FLAGS_TSO |
4068 				   IGB_TX_FLAGS_CSUM;
4069 	}
4070 
4071 	/* compute header lengths */
4072 	l4len = tcp_hdrlen(skb);
4073 	*hdr_len = skb_transport_offset(skb) + l4len;
4074 
4075 	/* update gso size and bytecount with header size */
4076 	first->gso_segs = skb_shinfo(skb)->gso_segs;
4077 	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4078 
4079 	/* MSS L4LEN IDX */
4080 	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4081 	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4082 
4083 	/* VLAN MACLEN IPLEN */
4084 	vlan_macip_lens = skb_network_header_len(skb);
4085 	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4086 	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4087 
4088 	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4089 
4090 	return 1;
4091 }
4092 
4093 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4094 {
4095 	struct sk_buff *skb = first->skb;
4096 	u32 vlan_macip_lens = 0;
4097 	u32 mss_l4len_idx = 0;
4098 	u32 type_tucmd = 0;
4099 
4100 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4101 		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4102 			return;
4103 	} else {
4104 		u8 l4_hdr = 0;
4105 		switch (first->protocol) {
4106 		case __constant_htons(ETH_P_IP):
4107 			vlan_macip_lens |= skb_network_header_len(skb);
4108 			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4109 			l4_hdr = ip_hdr(skb)->protocol;
4110 			break;
4111 		case __constant_htons(ETH_P_IPV6):
4112 			vlan_macip_lens |= skb_network_header_len(skb);
4113 			l4_hdr = ipv6_hdr(skb)->nexthdr;
4114 			break;
4115 		default:
4116 			if (unlikely(net_ratelimit())) {
4117 				dev_warn(tx_ring->dev,
4118 				 "partial checksum but proto=%x!\n",
4119 				 first->protocol);
4120 			}
4121 			break;
4122 		}
4123 
4124 		switch (l4_hdr) {
4125 		case IPPROTO_TCP:
4126 			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4127 			mss_l4len_idx = tcp_hdrlen(skb) <<
4128 					E1000_ADVTXD_L4LEN_SHIFT;
4129 			break;
4130 		case IPPROTO_SCTP:
4131 			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4132 			mss_l4len_idx = sizeof(struct sctphdr) <<
4133 					E1000_ADVTXD_L4LEN_SHIFT;
4134 			break;
4135 		case IPPROTO_UDP:
4136 			mss_l4len_idx = sizeof(struct udphdr) <<
4137 					E1000_ADVTXD_L4LEN_SHIFT;
4138 			break;
4139 		default:
4140 			if (unlikely(net_ratelimit())) {
4141 				dev_warn(tx_ring->dev,
4142 				 "partial checksum but l4 proto=%x!\n",
4143 				 l4_hdr);
4144 			}
4145 			break;
4146 		}
4147 
4148 		/* update TX checksum flag */
4149 		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4150 	}
4151 
4152 	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4153 	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4154 
4155 	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4156 }
4157 
4158 static __le32 igb_tx_cmd_type(u32 tx_flags)
4159 {
4160 	/* set type for advanced descriptor with frame checksum insertion */
4161 	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4162 				      E1000_ADVTXD_DCMD_IFCS |
4163 				      E1000_ADVTXD_DCMD_DEXT);
4164 
4165 	/* set HW vlan bit if vlan is present */
4166 	if (tx_flags & IGB_TX_FLAGS_VLAN)
4167 		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4168 
4169 	/* set timestamp bit if present */
4170 	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4171 		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4172 
4173 	/* set segmentation bits for TSO */
4174 	if (tx_flags & IGB_TX_FLAGS_TSO)
4175 		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4176 
4177 	return cmd_type;
4178 }
4179 
4180 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4181 				 union e1000_adv_tx_desc *tx_desc,
4182 				 u32 tx_flags, unsigned int paylen)
4183 {
4184 	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4185 
4186 	/* 82575 requires a unique index per ring if any offload is enabled */
4187 	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4188 	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4189 		olinfo_status |= tx_ring->reg_idx << 4;
4190 
4191 	/* insert L4 checksum */
4192 	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4193 		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4194 
4195 		/* insert IPv4 checksum */
4196 		if (tx_flags & IGB_TX_FLAGS_IPV4)
4197 			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4198 	}
4199 
4200 	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4201 }
4202 
4203 /*
4204  * The largest size we can write to the descriptor is 65535.  In order to
4205  * maintain a power of two alignment we have to limit ourselves to 32K.
4206  */
4207 #define IGB_MAX_TXD_PWR	15
4208 #define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4209 
4210 static void igb_tx_map(struct igb_ring *tx_ring,
4211 		       struct igb_tx_buffer *first,
4212 		       const u8 hdr_len)
4213 {
4214 	struct sk_buff *skb = first->skb;
4215 	struct igb_tx_buffer *tx_buffer_info;
4216 	union e1000_adv_tx_desc *tx_desc;
4217 	dma_addr_t dma;
4218 	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4219 	unsigned int data_len = skb->data_len;
4220 	unsigned int size = skb_headlen(skb);
4221 	unsigned int paylen = skb->len - hdr_len;
4222 	__le32 cmd_type;
4223 	u32 tx_flags = first->tx_flags;
4224 	u16 i = tx_ring->next_to_use;
4225 
4226 	tx_desc = IGB_TX_DESC(tx_ring, i);
4227 
4228 	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4229 	cmd_type = igb_tx_cmd_type(tx_flags);
4230 
4231 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4232 	if (dma_mapping_error(tx_ring->dev, dma))
4233 		goto dma_error;
4234 
4235 	/* record length, and DMA address */
4236 	first->length = size;
4237 	first->dma = dma;
4238 	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4239 
4240 	for (;;) {
4241 		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4242 			tx_desc->read.cmd_type_len =
4243 				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4244 
4245 			i++;
4246 			tx_desc++;
4247 			if (i == tx_ring->count) {
4248 				tx_desc = IGB_TX_DESC(tx_ring, 0);
4249 				i = 0;
4250 			}
4251 
4252 			dma += IGB_MAX_DATA_PER_TXD;
4253 			size -= IGB_MAX_DATA_PER_TXD;
4254 
4255 			tx_desc->read.olinfo_status = 0;
4256 			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4257 		}
4258 
4259 		if (likely(!data_len))
4260 			break;
4261 
4262 		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4263 
4264 		i++;
4265 		tx_desc++;
4266 		if (i == tx_ring->count) {
4267 			tx_desc = IGB_TX_DESC(tx_ring, 0);
4268 			i = 0;
4269 		}
4270 
4271 		size = skb_frag_size(frag);
4272 		data_len -= size;
4273 
4274 		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4275 				   size, DMA_TO_DEVICE);
4276 		if (dma_mapping_error(tx_ring->dev, dma))
4277 			goto dma_error;
4278 
4279 		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4280 		tx_buffer_info->length = size;
4281 		tx_buffer_info->dma = dma;
4282 
4283 		tx_desc->read.olinfo_status = 0;
4284 		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4285 
4286 		frag++;
4287 	}
4288 
4289 	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4290 
4291 	/* write last descriptor with RS and EOP bits */
4292 	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4293 	tx_desc->read.cmd_type_len = cmd_type;
4294 
4295 	/* set the timestamp */
4296 	first->time_stamp = jiffies;
4297 
4298 	/*
4299 	 * Force memory writes to complete before letting h/w know there
4300 	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4301 	 * memory model archs, such as IA-64).
4302 	 *
4303 	 * We also need this memory barrier to make certain all of the
4304 	 * status bits have been updated before next_to_watch is written.
4305 	 */
4306 	wmb();
4307 
4308 	/* set next_to_watch value indicating a packet is present */
4309 	first->next_to_watch = tx_desc;
4310 
4311 	i++;
4312 	if (i == tx_ring->count)
4313 		i = 0;
4314 
4315 	tx_ring->next_to_use = i;
4316 
4317 	writel(i, tx_ring->tail);
4318 
4319 	/* we need this if more than one processor can write to our tail
4320 	 * at a time, it syncronizes IO on IA64/Altix systems */
4321 	mmiowb();
4322 
4323 	return;
4324 
4325 dma_error:
4326 	dev_err(tx_ring->dev, "TX DMA map failed\n");
4327 
4328 	/* clear dma mappings for failed tx_buffer_info map */
4329 	for (;;) {
4330 		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4331 		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4332 		if (tx_buffer_info == first)
4333 			break;
4334 		if (i == 0)
4335 			i = tx_ring->count;
4336 		i--;
4337 	}
4338 
4339 	tx_ring->next_to_use = i;
4340 }
4341 
4342 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4343 {
4344 	struct net_device *netdev = tx_ring->netdev;
4345 
4346 	netif_stop_subqueue(netdev, tx_ring->queue_index);
4347 
4348 	/* Herbert's original patch had:
4349 	 *  smp_mb__after_netif_stop_queue();
4350 	 * but since that doesn't exist yet, just open code it. */
4351 	smp_mb();
4352 
4353 	/* We need to check again in a case another CPU has just
4354 	 * made room available. */
4355 	if (igb_desc_unused(tx_ring) < size)
4356 		return -EBUSY;
4357 
4358 	/* A reprieve! */
4359 	netif_wake_subqueue(netdev, tx_ring->queue_index);
4360 
4361 	u64_stats_update_begin(&tx_ring->tx_syncp2);
4362 	tx_ring->tx_stats.restart_queue2++;
4363 	u64_stats_update_end(&tx_ring->tx_syncp2);
4364 
4365 	return 0;
4366 }
4367 
4368 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4369 {
4370 	if (igb_desc_unused(tx_ring) >= size)
4371 		return 0;
4372 	return __igb_maybe_stop_tx(tx_ring, size);
4373 }
4374 
4375 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4376 				struct igb_ring *tx_ring)
4377 {
4378 	struct igb_tx_buffer *first;
4379 	int tso;
4380 	u32 tx_flags = 0;
4381 	__be16 protocol = vlan_get_protocol(skb);
4382 	u8 hdr_len = 0;
4383 
4384 	/* need: 1 descriptor per page,
4385 	 *       + 2 desc gap to keep tail from touching head,
4386 	 *       + 1 desc for skb->data,
4387 	 *       + 1 desc for context descriptor,
4388 	 * otherwise try next time */
4389 	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4390 		/* this is a hard error */
4391 		return NETDEV_TX_BUSY;
4392 	}
4393 
4394 	/* record the location of the first descriptor for this packet */
4395 	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4396 	first->skb = skb;
4397 	first->bytecount = skb->len;
4398 	first->gso_segs = 1;
4399 
4400 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4401 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4402 		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4403 	}
4404 
4405 	if (vlan_tx_tag_present(skb)) {
4406 		tx_flags |= IGB_TX_FLAGS_VLAN;
4407 		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4408 	}
4409 
4410 	/* record initial flags and protocol */
4411 	first->tx_flags = tx_flags;
4412 	first->protocol = protocol;
4413 
4414 	tso = igb_tso(tx_ring, first, &hdr_len);
4415 	if (tso < 0)
4416 		goto out_drop;
4417 	else if (!tso)
4418 		igb_tx_csum(tx_ring, first);
4419 
4420 	igb_tx_map(tx_ring, first, hdr_len);
4421 
4422 	/* Make sure there is space in the ring for the next send. */
4423 	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4424 
4425 	return NETDEV_TX_OK;
4426 
4427 out_drop:
4428 	igb_unmap_and_free_tx_resource(tx_ring, first);
4429 
4430 	return NETDEV_TX_OK;
4431 }
4432 
4433 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4434 						    struct sk_buff *skb)
4435 {
4436 	unsigned int r_idx = skb->queue_mapping;
4437 
4438 	if (r_idx >= adapter->num_tx_queues)
4439 		r_idx = r_idx % adapter->num_tx_queues;
4440 
4441 	return adapter->tx_ring[r_idx];
4442 }
4443 
4444 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4445 				  struct net_device *netdev)
4446 {
4447 	struct igb_adapter *adapter = netdev_priv(netdev);
4448 
4449 	if (test_bit(__IGB_DOWN, &adapter->state)) {
4450 		dev_kfree_skb_any(skb);
4451 		return NETDEV_TX_OK;
4452 	}
4453 
4454 	if (skb->len <= 0) {
4455 		dev_kfree_skb_any(skb);
4456 		return NETDEV_TX_OK;
4457 	}
4458 
4459 	/*
4460 	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4461 	 * in order to meet this minimum size requirement.
4462 	 */
4463 	if (skb->len < 17) {
4464 		if (skb_padto(skb, 17))
4465 			return NETDEV_TX_OK;
4466 		skb->len = 17;
4467 	}
4468 
4469 	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4470 }
4471 
4472 /**
4473  * igb_tx_timeout - Respond to a Tx Hang
4474  * @netdev: network interface device structure
4475  **/
4476 static void igb_tx_timeout(struct net_device *netdev)
4477 {
4478 	struct igb_adapter *adapter = netdev_priv(netdev);
4479 	struct e1000_hw *hw = &adapter->hw;
4480 
4481 	/* Do the reset outside of interrupt context */
4482 	adapter->tx_timeout_count++;
4483 
4484 	if (hw->mac.type >= e1000_82580)
4485 		hw->dev_spec._82575.global_device_reset = true;
4486 
4487 	schedule_work(&adapter->reset_task);
4488 	wr32(E1000_EICS,
4489 	     (adapter->eims_enable_mask & ~adapter->eims_other));
4490 }
4491 
4492 static void igb_reset_task(struct work_struct *work)
4493 {
4494 	struct igb_adapter *adapter;
4495 	adapter = container_of(work, struct igb_adapter, reset_task);
4496 
4497 	igb_dump(adapter);
4498 	netdev_err(adapter->netdev, "Reset adapter\n");
4499 	igb_reinit_locked(adapter);
4500 }
4501 
4502 /**
4503  * igb_get_stats64 - Get System Network Statistics
4504  * @netdev: network interface device structure
4505  * @stats: rtnl_link_stats64 pointer
4506  *
4507  **/
4508 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4509 						 struct rtnl_link_stats64 *stats)
4510 {
4511 	struct igb_adapter *adapter = netdev_priv(netdev);
4512 
4513 	spin_lock(&adapter->stats64_lock);
4514 	igb_update_stats(adapter, &adapter->stats64);
4515 	memcpy(stats, &adapter->stats64, sizeof(*stats));
4516 	spin_unlock(&adapter->stats64_lock);
4517 
4518 	return stats;
4519 }
4520 
4521 /**
4522  * igb_change_mtu - Change the Maximum Transfer Unit
4523  * @netdev: network interface device structure
4524  * @new_mtu: new value for maximum frame size
4525  *
4526  * Returns 0 on success, negative on failure
4527  **/
4528 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4529 {
4530 	struct igb_adapter *adapter = netdev_priv(netdev);
4531 	struct pci_dev *pdev = adapter->pdev;
4532 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4533 
4534 	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4535 		dev_err(&pdev->dev, "Invalid MTU setting\n");
4536 		return -EINVAL;
4537 	}
4538 
4539 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4540 	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4541 		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4542 		return -EINVAL;
4543 	}
4544 
4545 	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4546 		msleep(1);
4547 
4548 	/* igb_down has a dependency on max_frame_size */
4549 	adapter->max_frame_size = max_frame;
4550 
4551 	if (netif_running(netdev))
4552 		igb_down(adapter);
4553 
4554 	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4555 		 netdev->mtu, new_mtu);
4556 	netdev->mtu = new_mtu;
4557 
4558 	if (netif_running(netdev))
4559 		igb_up(adapter);
4560 	else
4561 		igb_reset(adapter);
4562 
4563 	clear_bit(__IGB_RESETTING, &adapter->state);
4564 
4565 	return 0;
4566 }
4567 
4568 /**
4569  * igb_update_stats - Update the board statistics counters
4570  * @adapter: board private structure
4571  **/
4572 
4573 void igb_update_stats(struct igb_adapter *adapter,
4574 		      struct rtnl_link_stats64 *net_stats)
4575 {
4576 	struct e1000_hw *hw = &adapter->hw;
4577 	struct pci_dev *pdev = adapter->pdev;
4578 	u32 reg, mpc;
4579 	u16 phy_tmp;
4580 	int i;
4581 	u64 bytes, packets;
4582 	unsigned int start;
4583 	u64 _bytes, _packets;
4584 
4585 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4586 
4587 	/*
4588 	 * Prevent stats update while adapter is being reset, or if the pci
4589 	 * connection is down.
4590 	 */
4591 	if (adapter->link_speed == 0)
4592 		return;
4593 	if (pci_channel_offline(pdev))
4594 		return;
4595 
4596 	bytes = 0;
4597 	packets = 0;
4598 	for (i = 0; i < adapter->num_rx_queues; i++) {
4599 		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4600 		struct igb_ring *ring = adapter->rx_ring[i];
4601 
4602 		ring->rx_stats.drops += rqdpc_tmp;
4603 		net_stats->rx_fifo_errors += rqdpc_tmp;
4604 
4605 		do {
4606 			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4607 			_bytes = ring->rx_stats.bytes;
4608 			_packets = ring->rx_stats.packets;
4609 		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4610 		bytes += _bytes;
4611 		packets += _packets;
4612 	}
4613 
4614 	net_stats->rx_bytes = bytes;
4615 	net_stats->rx_packets = packets;
4616 
4617 	bytes = 0;
4618 	packets = 0;
4619 	for (i = 0; i < adapter->num_tx_queues; i++) {
4620 		struct igb_ring *ring = adapter->tx_ring[i];
4621 		do {
4622 			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4623 			_bytes = ring->tx_stats.bytes;
4624 			_packets = ring->tx_stats.packets;
4625 		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4626 		bytes += _bytes;
4627 		packets += _packets;
4628 	}
4629 	net_stats->tx_bytes = bytes;
4630 	net_stats->tx_packets = packets;
4631 
4632 	/* read stats registers */
4633 	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4634 	adapter->stats.gprc += rd32(E1000_GPRC);
4635 	adapter->stats.gorc += rd32(E1000_GORCL);
4636 	rd32(E1000_GORCH); /* clear GORCL */
4637 	adapter->stats.bprc += rd32(E1000_BPRC);
4638 	adapter->stats.mprc += rd32(E1000_MPRC);
4639 	adapter->stats.roc += rd32(E1000_ROC);
4640 
4641 	adapter->stats.prc64 += rd32(E1000_PRC64);
4642 	adapter->stats.prc127 += rd32(E1000_PRC127);
4643 	adapter->stats.prc255 += rd32(E1000_PRC255);
4644 	adapter->stats.prc511 += rd32(E1000_PRC511);
4645 	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4646 	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4647 	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4648 	adapter->stats.sec += rd32(E1000_SEC);
4649 
4650 	mpc = rd32(E1000_MPC);
4651 	adapter->stats.mpc += mpc;
4652 	net_stats->rx_fifo_errors += mpc;
4653 	adapter->stats.scc += rd32(E1000_SCC);
4654 	adapter->stats.ecol += rd32(E1000_ECOL);
4655 	adapter->stats.mcc += rd32(E1000_MCC);
4656 	adapter->stats.latecol += rd32(E1000_LATECOL);
4657 	adapter->stats.dc += rd32(E1000_DC);
4658 	adapter->stats.rlec += rd32(E1000_RLEC);
4659 	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4660 	adapter->stats.xontxc += rd32(E1000_XONTXC);
4661 	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4662 	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4663 	adapter->stats.fcruc += rd32(E1000_FCRUC);
4664 	adapter->stats.gptc += rd32(E1000_GPTC);
4665 	adapter->stats.gotc += rd32(E1000_GOTCL);
4666 	rd32(E1000_GOTCH); /* clear GOTCL */
4667 	adapter->stats.rnbc += rd32(E1000_RNBC);
4668 	adapter->stats.ruc += rd32(E1000_RUC);
4669 	adapter->stats.rfc += rd32(E1000_RFC);
4670 	adapter->stats.rjc += rd32(E1000_RJC);
4671 	adapter->stats.tor += rd32(E1000_TORH);
4672 	adapter->stats.tot += rd32(E1000_TOTH);
4673 	adapter->stats.tpr += rd32(E1000_TPR);
4674 
4675 	adapter->stats.ptc64 += rd32(E1000_PTC64);
4676 	adapter->stats.ptc127 += rd32(E1000_PTC127);
4677 	adapter->stats.ptc255 += rd32(E1000_PTC255);
4678 	adapter->stats.ptc511 += rd32(E1000_PTC511);
4679 	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4680 	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4681 
4682 	adapter->stats.mptc += rd32(E1000_MPTC);
4683 	adapter->stats.bptc += rd32(E1000_BPTC);
4684 
4685 	adapter->stats.tpt += rd32(E1000_TPT);
4686 	adapter->stats.colc += rd32(E1000_COLC);
4687 
4688 	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4689 	/* read internal phy specific stats */
4690 	reg = rd32(E1000_CTRL_EXT);
4691 	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4692 		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4693 		adapter->stats.tncrs += rd32(E1000_TNCRS);
4694 	}
4695 
4696 	adapter->stats.tsctc += rd32(E1000_TSCTC);
4697 	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4698 
4699 	adapter->stats.iac += rd32(E1000_IAC);
4700 	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4701 	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4702 	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4703 	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4704 	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4705 	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4706 	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4707 	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4708 
4709 	/* Fill out the OS statistics structure */
4710 	net_stats->multicast = adapter->stats.mprc;
4711 	net_stats->collisions = adapter->stats.colc;
4712 
4713 	/* Rx Errors */
4714 
4715 	/* RLEC on some newer hardware can be incorrect so build
4716 	 * our own version based on RUC and ROC */
4717 	net_stats->rx_errors = adapter->stats.rxerrc +
4718 		adapter->stats.crcerrs + adapter->stats.algnerrc +
4719 		adapter->stats.ruc + adapter->stats.roc +
4720 		adapter->stats.cexterr;
4721 	net_stats->rx_length_errors = adapter->stats.ruc +
4722 				      adapter->stats.roc;
4723 	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4724 	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4725 	net_stats->rx_missed_errors = adapter->stats.mpc;
4726 
4727 	/* Tx Errors */
4728 	net_stats->tx_errors = adapter->stats.ecol +
4729 			       adapter->stats.latecol;
4730 	net_stats->tx_aborted_errors = adapter->stats.ecol;
4731 	net_stats->tx_window_errors = adapter->stats.latecol;
4732 	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4733 
4734 	/* Tx Dropped needs to be maintained elsewhere */
4735 
4736 	/* Phy Stats */
4737 	if (hw->phy.media_type == e1000_media_type_copper) {
4738 		if ((adapter->link_speed == SPEED_1000) &&
4739 		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4740 			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4741 			adapter->phy_stats.idle_errors += phy_tmp;
4742 		}
4743 	}
4744 
4745 	/* Management Stats */
4746 	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4747 	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4748 	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4749 
4750 	/* OS2BMC Stats */
4751 	reg = rd32(E1000_MANC);
4752 	if (reg & E1000_MANC_EN_BMC2OS) {
4753 		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4754 		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4755 		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4756 		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4757 	}
4758 }
4759 
4760 static irqreturn_t igb_msix_other(int irq, void *data)
4761 {
4762 	struct igb_adapter *adapter = data;
4763 	struct e1000_hw *hw = &adapter->hw;
4764 	u32 icr = rd32(E1000_ICR);
4765 	/* reading ICR causes bit 31 of EICR to be cleared */
4766 
4767 	if (icr & E1000_ICR_DRSTA)
4768 		schedule_work(&adapter->reset_task);
4769 
4770 	if (icr & E1000_ICR_DOUTSYNC) {
4771 		/* HW is reporting DMA is out of sync */
4772 		adapter->stats.doosync++;
4773 		/* The DMA Out of Sync is also indication of a spoof event
4774 		 * in IOV mode. Check the Wrong VM Behavior register to
4775 		 * see if it is really a spoof event. */
4776 		igb_check_wvbr(adapter);
4777 	}
4778 
4779 	/* Check for a mailbox event */
4780 	if (icr & E1000_ICR_VMMB)
4781 		igb_msg_task(adapter);
4782 
4783 	if (icr & E1000_ICR_LSC) {
4784 		hw->mac.get_link_status = 1;
4785 		/* guard against interrupt when we're going down */
4786 		if (!test_bit(__IGB_DOWN, &adapter->state))
4787 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4788 	}
4789 
4790 	wr32(E1000_EIMS, adapter->eims_other);
4791 
4792 	return IRQ_HANDLED;
4793 }
4794 
4795 static void igb_write_itr(struct igb_q_vector *q_vector)
4796 {
4797 	struct igb_adapter *adapter = q_vector->adapter;
4798 	u32 itr_val = q_vector->itr_val & 0x7FFC;
4799 
4800 	if (!q_vector->set_itr)
4801 		return;
4802 
4803 	if (!itr_val)
4804 		itr_val = 0x4;
4805 
4806 	if (adapter->hw.mac.type == e1000_82575)
4807 		itr_val |= itr_val << 16;
4808 	else
4809 		itr_val |= E1000_EITR_CNT_IGNR;
4810 
4811 	writel(itr_val, q_vector->itr_register);
4812 	q_vector->set_itr = 0;
4813 }
4814 
4815 static irqreturn_t igb_msix_ring(int irq, void *data)
4816 {
4817 	struct igb_q_vector *q_vector = data;
4818 
4819 	/* Write the ITR value calculated from the previous interrupt. */
4820 	igb_write_itr(q_vector);
4821 
4822 	napi_schedule(&q_vector->napi);
4823 
4824 	return IRQ_HANDLED;
4825 }
4826 
4827 #ifdef CONFIG_IGB_DCA
4828 static void igb_update_dca(struct igb_q_vector *q_vector)
4829 {
4830 	struct igb_adapter *adapter = q_vector->adapter;
4831 	struct e1000_hw *hw = &adapter->hw;
4832 	int cpu = get_cpu();
4833 
4834 	if (q_vector->cpu == cpu)
4835 		goto out_no_update;
4836 
4837 	if (q_vector->tx.ring) {
4838 		int q = q_vector->tx.ring->reg_idx;
4839 		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4840 		if (hw->mac.type == e1000_82575) {
4841 			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4842 			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4843 		} else {
4844 			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4845 			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4846 			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4847 		}
4848 		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4849 		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4850 	}
4851 	if (q_vector->rx.ring) {
4852 		int q = q_vector->rx.ring->reg_idx;
4853 		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4854 		if (hw->mac.type == e1000_82575) {
4855 			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4856 			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4857 		} else {
4858 			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4859 			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4860 			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4861 		}
4862 		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4863 		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4864 		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4865 		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4866 	}
4867 	q_vector->cpu = cpu;
4868 out_no_update:
4869 	put_cpu();
4870 }
4871 
4872 static void igb_setup_dca(struct igb_adapter *adapter)
4873 {
4874 	struct e1000_hw *hw = &adapter->hw;
4875 	int i;
4876 
4877 	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4878 		return;
4879 
4880 	/* Always use CB2 mode, difference is masked in the CB driver. */
4881 	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4882 
4883 	for (i = 0; i < adapter->num_q_vectors; i++) {
4884 		adapter->q_vector[i]->cpu = -1;
4885 		igb_update_dca(adapter->q_vector[i]);
4886 	}
4887 }
4888 
4889 static int __igb_notify_dca(struct device *dev, void *data)
4890 {
4891 	struct net_device *netdev = dev_get_drvdata(dev);
4892 	struct igb_adapter *adapter = netdev_priv(netdev);
4893 	struct pci_dev *pdev = adapter->pdev;
4894 	struct e1000_hw *hw = &adapter->hw;
4895 	unsigned long event = *(unsigned long *)data;
4896 
4897 	switch (event) {
4898 	case DCA_PROVIDER_ADD:
4899 		/* if already enabled, don't do it again */
4900 		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4901 			break;
4902 		if (dca_add_requester(dev) == 0) {
4903 			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4904 			dev_info(&pdev->dev, "DCA enabled\n");
4905 			igb_setup_dca(adapter);
4906 			break;
4907 		}
4908 		/* Fall Through since DCA is disabled. */
4909 	case DCA_PROVIDER_REMOVE:
4910 		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4911 			/* without this a class_device is left
4912 			 * hanging around in the sysfs model */
4913 			dca_remove_requester(dev);
4914 			dev_info(&pdev->dev, "DCA disabled\n");
4915 			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4916 			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4917 		}
4918 		break;
4919 	}
4920 
4921 	return 0;
4922 }
4923 
4924 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4925                           void *p)
4926 {
4927 	int ret_val;
4928 
4929 	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4930 	                                 __igb_notify_dca);
4931 
4932 	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4933 }
4934 #endif /* CONFIG_IGB_DCA */
4935 
4936 #ifdef CONFIG_PCI_IOV
4937 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4938 {
4939 	unsigned char mac_addr[ETH_ALEN];
4940 	struct pci_dev *pdev = adapter->pdev;
4941 	struct e1000_hw *hw = &adapter->hw;
4942 	struct pci_dev *pvfdev;
4943 	unsigned int device_id;
4944 	u16 thisvf_devfn;
4945 
4946 	random_ether_addr(mac_addr);
4947 	igb_set_vf_mac(adapter, vf, mac_addr);
4948 
4949 	switch (adapter->hw.mac.type) {
4950 	case e1000_82576:
4951 		device_id = IGB_82576_VF_DEV_ID;
4952 		/* VF Stride for 82576 is 2 */
4953 		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4954 			(pdev->devfn & 1);
4955 		break;
4956 	case e1000_i350:
4957 		device_id = IGB_I350_VF_DEV_ID;
4958 		/* VF Stride for I350 is 4 */
4959 		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4960 				(pdev->devfn & 3);
4961 		break;
4962 	default:
4963 		device_id = 0;
4964 		thisvf_devfn = 0;
4965 		break;
4966 	}
4967 
4968 	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4969 	while (pvfdev) {
4970 		if (pvfdev->devfn == thisvf_devfn)
4971 			break;
4972 		pvfdev = pci_get_device(hw->vendor_id,
4973 					device_id, pvfdev);
4974 	}
4975 
4976 	if (pvfdev)
4977 		adapter->vf_data[vf].vfdev = pvfdev;
4978 	else
4979 		dev_err(&pdev->dev,
4980 			"Couldn't find pci dev ptr for VF %4.4x\n",
4981 			thisvf_devfn);
4982 	return pvfdev != NULL;
4983 }
4984 
4985 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4986 {
4987 	struct e1000_hw *hw = &adapter->hw;
4988 	struct pci_dev *pdev = adapter->pdev;
4989 	struct pci_dev *pvfdev;
4990 	u16 vf_devfn = 0;
4991 	u16 vf_stride;
4992 	unsigned int device_id;
4993 	int vfs_found = 0;
4994 
4995 	switch (adapter->hw.mac.type) {
4996 	case e1000_82576:
4997 		device_id = IGB_82576_VF_DEV_ID;
4998 		/* VF Stride for 82576 is 2 */
4999 		vf_stride = 2;
5000 		break;
5001 	case e1000_i350:
5002 		device_id = IGB_I350_VF_DEV_ID;
5003 		/* VF Stride for I350 is 4 */
5004 		vf_stride = 4;
5005 		break;
5006 	default:
5007 		device_id = 0;
5008 		vf_stride = 0;
5009 		break;
5010 	}
5011 
5012 	vf_devfn = pdev->devfn + 0x80;
5013 	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5014 	while (pvfdev) {
5015 		if (pvfdev->devfn == vf_devfn)
5016 			vfs_found++;
5017 		vf_devfn += vf_stride;
5018 		pvfdev = pci_get_device(hw->vendor_id,
5019 					device_id, pvfdev);
5020 	}
5021 
5022 	return vfs_found;
5023 }
5024 
5025 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5026 {
5027 	int i;
5028 	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5029 		if (adapter->vf_data[i].vfdev) {
5030 			if (adapter->vf_data[i].vfdev->dev_flags &
5031 			    PCI_DEV_FLAGS_ASSIGNED)
5032 				return true;
5033 		}
5034 	}
5035 	return false;
5036 }
5037 
5038 #endif
5039 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5040 {
5041 	struct e1000_hw *hw = &adapter->hw;
5042 	u32 ping;
5043 	int i;
5044 
5045 	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5046 		ping = E1000_PF_CONTROL_MSG;
5047 		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5048 			ping |= E1000_VT_MSGTYPE_CTS;
5049 		igb_write_mbx(hw, &ping, 1, i);
5050 	}
5051 }
5052 
5053 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5054 {
5055 	struct e1000_hw *hw = &adapter->hw;
5056 	u32 vmolr = rd32(E1000_VMOLR(vf));
5057 	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5058 
5059 	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5060 	                    IGB_VF_FLAG_MULTI_PROMISC);
5061 	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5062 
5063 	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5064 		vmolr |= E1000_VMOLR_MPME;
5065 		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5066 		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5067 	} else {
5068 		/*
5069 		 * if we have hashes and we are clearing a multicast promisc
5070 		 * flag we need to write the hashes to the MTA as this step
5071 		 * was previously skipped
5072 		 */
5073 		if (vf_data->num_vf_mc_hashes > 30) {
5074 			vmolr |= E1000_VMOLR_MPME;
5075 		} else if (vf_data->num_vf_mc_hashes) {
5076 			int j;
5077 			vmolr |= E1000_VMOLR_ROMPE;
5078 			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5079 				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5080 		}
5081 	}
5082 
5083 	wr32(E1000_VMOLR(vf), vmolr);
5084 
5085 	/* there are flags left unprocessed, likely not supported */
5086 	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5087 		return -EINVAL;
5088 
5089 	return 0;
5090 
5091 }
5092 
5093 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5094 				  u32 *msgbuf, u32 vf)
5095 {
5096 	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5097 	u16 *hash_list = (u16 *)&msgbuf[1];
5098 	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5099 	int i;
5100 
5101 	/* salt away the number of multicast addresses assigned
5102 	 * to this VF for later use to restore when the PF multi cast
5103 	 * list changes
5104 	 */
5105 	vf_data->num_vf_mc_hashes = n;
5106 
5107 	/* only up to 30 hash values supported */
5108 	if (n > 30)
5109 		n = 30;
5110 
5111 	/* store the hashes for later use */
5112 	for (i = 0; i < n; i++)
5113 		vf_data->vf_mc_hashes[i] = hash_list[i];
5114 
5115 	/* Flush and reset the mta with the new values */
5116 	igb_set_rx_mode(adapter->netdev);
5117 
5118 	return 0;
5119 }
5120 
5121 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5122 {
5123 	struct e1000_hw *hw = &adapter->hw;
5124 	struct vf_data_storage *vf_data;
5125 	int i, j;
5126 
5127 	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5128 		u32 vmolr = rd32(E1000_VMOLR(i));
5129 		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5130 
5131 		vf_data = &adapter->vf_data[i];
5132 
5133 		if ((vf_data->num_vf_mc_hashes > 30) ||
5134 		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5135 			vmolr |= E1000_VMOLR_MPME;
5136 		} else if (vf_data->num_vf_mc_hashes) {
5137 			vmolr |= E1000_VMOLR_ROMPE;
5138 			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5139 				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5140 		}
5141 		wr32(E1000_VMOLR(i), vmolr);
5142 	}
5143 }
5144 
5145 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5146 {
5147 	struct e1000_hw *hw = &adapter->hw;
5148 	u32 pool_mask, reg, vid;
5149 	int i;
5150 
5151 	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5152 
5153 	/* Find the vlan filter for this id */
5154 	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5155 		reg = rd32(E1000_VLVF(i));
5156 
5157 		/* remove the vf from the pool */
5158 		reg &= ~pool_mask;
5159 
5160 		/* if pool is empty then remove entry from vfta */
5161 		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5162 		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5163 			reg = 0;
5164 			vid = reg & E1000_VLVF_VLANID_MASK;
5165 			igb_vfta_set(hw, vid, false);
5166 		}
5167 
5168 		wr32(E1000_VLVF(i), reg);
5169 	}
5170 
5171 	adapter->vf_data[vf].vlans_enabled = 0;
5172 }
5173 
5174 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5175 {
5176 	struct e1000_hw *hw = &adapter->hw;
5177 	u32 reg, i;
5178 
5179 	/* The vlvf table only exists on 82576 hardware and newer */
5180 	if (hw->mac.type < e1000_82576)
5181 		return -1;
5182 
5183 	/* we only need to do this if VMDq is enabled */
5184 	if (!adapter->vfs_allocated_count)
5185 		return -1;
5186 
5187 	/* Find the vlan filter for this id */
5188 	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5189 		reg = rd32(E1000_VLVF(i));
5190 		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5191 		    vid == (reg & E1000_VLVF_VLANID_MASK))
5192 			break;
5193 	}
5194 
5195 	if (add) {
5196 		if (i == E1000_VLVF_ARRAY_SIZE) {
5197 			/* Did not find a matching VLAN ID entry that was
5198 			 * enabled.  Search for a free filter entry, i.e.
5199 			 * one without the enable bit set
5200 			 */
5201 			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5202 				reg = rd32(E1000_VLVF(i));
5203 				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5204 					break;
5205 			}
5206 		}
5207 		if (i < E1000_VLVF_ARRAY_SIZE) {
5208 			/* Found an enabled/available entry */
5209 			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5210 
5211 			/* if !enabled we need to set this up in vfta */
5212 			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5213 				/* add VID to filter table */
5214 				igb_vfta_set(hw, vid, true);
5215 				reg |= E1000_VLVF_VLANID_ENABLE;
5216 			}
5217 			reg &= ~E1000_VLVF_VLANID_MASK;
5218 			reg |= vid;
5219 			wr32(E1000_VLVF(i), reg);
5220 
5221 			/* do not modify RLPML for PF devices */
5222 			if (vf >= adapter->vfs_allocated_count)
5223 				return 0;
5224 
5225 			if (!adapter->vf_data[vf].vlans_enabled) {
5226 				u32 size;
5227 				reg = rd32(E1000_VMOLR(vf));
5228 				size = reg & E1000_VMOLR_RLPML_MASK;
5229 				size += 4;
5230 				reg &= ~E1000_VMOLR_RLPML_MASK;
5231 				reg |= size;
5232 				wr32(E1000_VMOLR(vf), reg);
5233 			}
5234 
5235 			adapter->vf_data[vf].vlans_enabled++;
5236 		}
5237 	} else {
5238 		if (i < E1000_VLVF_ARRAY_SIZE) {
5239 			/* remove vf from the pool */
5240 			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5241 			/* if pool is empty then remove entry from vfta */
5242 			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5243 				reg = 0;
5244 				igb_vfta_set(hw, vid, false);
5245 			}
5246 			wr32(E1000_VLVF(i), reg);
5247 
5248 			/* do not modify RLPML for PF devices */
5249 			if (vf >= adapter->vfs_allocated_count)
5250 				return 0;
5251 
5252 			adapter->vf_data[vf].vlans_enabled--;
5253 			if (!adapter->vf_data[vf].vlans_enabled) {
5254 				u32 size;
5255 				reg = rd32(E1000_VMOLR(vf));
5256 				size = reg & E1000_VMOLR_RLPML_MASK;
5257 				size -= 4;
5258 				reg &= ~E1000_VMOLR_RLPML_MASK;
5259 				reg |= size;
5260 				wr32(E1000_VMOLR(vf), reg);
5261 			}
5262 		}
5263 	}
5264 	return 0;
5265 }
5266 
5267 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5268 {
5269 	struct e1000_hw *hw = &adapter->hw;
5270 
5271 	if (vid)
5272 		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5273 	else
5274 		wr32(E1000_VMVIR(vf), 0);
5275 }
5276 
5277 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5278 			       int vf, u16 vlan, u8 qos)
5279 {
5280 	int err = 0;
5281 	struct igb_adapter *adapter = netdev_priv(netdev);
5282 
5283 	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5284 		return -EINVAL;
5285 	if (vlan || qos) {
5286 		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5287 		if (err)
5288 			goto out;
5289 		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5290 		igb_set_vmolr(adapter, vf, !vlan);
5291 		adapter->vf_data[vf].pf_vlan = vlan;
5292 		adapter->vf_data[vf].pf_qos = qos;
5293 		dev_info(&adapter->pdev->dev,
5294 			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5295 		if (test_bit(__IGB_DOWN, &adapter->state)) {
5296 			dev_warn(&adapter->pdev->dev,
5297 				 "The VF VLAN has been set,"
5298 				 " but the PF device is not up.\n");
5299 			dev_warn(&adapter->pdev->dev,
5300 				 "Bring the PF device up before"
5301 				 " attempting to use the VF device.\n");
5302 		}
5303 	} else {
5304 		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5305 				   false, vf);
5306 		igb_set_vmvir(adapter, vlan, vf);
5307 		igb_set_vmolr(adapter, vf, true);
5308 		adapter->vf_data[vf].pf_vlan = 0;
5309 		adapter->vf_data[vf].pf_qos = 0;
5310        }
5311 out:
5312        return err;
5313 }
5314 
5315 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5316 {
5317 	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5318 	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5319 
5320 	return igb_vlvf_set(adapter, vid, add, vf);
5321 }
5322 
5323 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5324 {
5325 	/* clear flags - except flag that indicates PF has set the MAC */
5326 	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5327 	adapter->vf_data[vf].last_nack = jiffies;
5328 
5329 	/* reset offloads to defaults */
5330 	igb_set_vmolr(adapter, vf, true);
5331 
5332 	/* reset vlans for device */
5333 	igb_clear_vf_vfta(adapter, vf);
5334 	if (adapter->vf_data[vf].pf_vlan)
5335 		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5336 				    adapter->vf_data[vf].pf_vlan,
5337 				    adapter->vf_data[vf].pf_qos);
5338 	else
5339 		igb_clear_vf_vfta(adapter, vf);
5340 
5341 	/* reset multicast table array for vf */
5342 	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5343 
5344 	/* Flush and reset the mta with the new values */
5345 	igb_set_rx_mode(adapter->netdev);
5346 }
5347 
5348 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5349 {
5350 	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5351 
5352 	/* generate a new mac address as we were hotplug removed/added */
5353 	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5354 		random_ether_addr(vf_mac);
5355 
5356 	/* process remaining reset events */
5357 	igb_vf_reset(adapter, vf);
5358 }
5359 
5360 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5361 {
5362 	struct e1000_hw *hw = &adapter->hw;
5363 	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5364 	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5365 	u32 reg, msgbuf[3];
5366 	u8 *addr = (u8 *)(&msgbuf[1]);
5367 
5368 	/* process all the same items cleared in a function level reset */
5369 	igb_vf_reset(adapter, vf);
5370 
5371 	/* set vf mac address */
5372 	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5373 
5374 	/* enable transmit and receive for vf */
5375 	reg = rd32(E1000_VFTE);
5376 	wr32(E1000_VFTE, reg | (1 << vf));
5377 	reg = rd32(E1000_VFRE);
5378 	wr32(E1000_VFRE, reg | (1 << vf));
5379 
5380 	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5381 
5382 	/* reply to reset with ack and vf mac address */
5383 	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5384 	memcpy(addr, vf_mac, 6);
5385 	igb_write_mbx(hw, msgbuf, 3, vf);
5386 }
5387 
5388 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5389 {
5390 	/*
5391 	 * The VF MAC Address is stored in a packed array of bytes
5392 	 * starting at the second 32 bit word of the msg array
5393 	 */
5394 	unsigned char *addr = (char *)&msg[1];
5395 	int err = -1;
5396 
5397 	if (is_valid_ether_addr(addr))
5398 		err = igb_set_vf_mac(adapter, vf, addr);
5399 
5400 	return err;
5401 }
5402 
5403 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5404 {
5405 	struct e1000_hw *hw = &adapter->hw;
5406 	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5407 	u32 msg = E1000_VT_MSGTYPE_NACK;
5408 
5409 	/* if device isn't clear to send it shouldn't be reading either */
5410 	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5411 	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5412 		igb_write_mbx(hw, &msg, 1, vf);
5413 		vf_data->last_nack = jiffies;
5414 	}
5415 }
5416 
5417 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5418 {
5419 	struct pci_dev *pdev = adapter->pdev;
5420 	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5421 	struct e1000_hw *hw = &adapter->hw;
5422 	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5423 	s32 retval;
5424 
5425 	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5426 
5427 	if (retval) {
5428 		/* if receive failed revoke VF CTS stats and restart init */
5429 		dev_err(&pdev->dev, "Error receiving message from VF\n");
5430 		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5431 		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5432 			return;
5433 		goto out;
5434 	}
5435 
5436 	/* this is a message we already processed, do nothing */
5437 	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5438 		return;
5439 
5440 	/*
5441 	 * until the vf completes a reset it should not be
5442 	 * allowed to start any configuration.
5443 	 */
5444 
5445 	if (msgbuf[0] == E1000_VF_RESET) {
5446 		igb_vf_reset_msg(adapter, vf);
5447 		return;
5448 	}
5449 
5450 	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5451 		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5452 			return;
5453 		retval = -1;
5454 		goto out;
5455 	}
5456 
5457 	switch ((msgbuf[0] & 0xFFFF)) {
5458 	case E1000_VF_SET_MAC_ADDR:
5459 		retval = -EINVAL;
5460 		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5461 			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5462 		else
5463 			dev_warn(&pdev->dev,
5464 				 "VF %d attempted to override administratively "
5465 				 "set MAC address\nReload the VF driver to "
5466 				 "resume operations\n", vf);
5467 		break;
5468 	case E1000_VF_SET_PROMISC:
5469 		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5470 		break;
5471 	case E1000_VF_SET_MULTICAST:
5472 		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5473 		break;
5474 	case E1000_VF_SET_LPE:
5475 		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5476 		break;
5477 	case E1000_VF_SET_VLAN:
5478 		retval = -1;
5479 		if (vf_data->pf_vlan)
5480 			dev_warn(&pdev->dev,
5481 				 "VF %d attempted to override administratively "
5482 				 "set VLAN tag\nReload the VF driver to "
5483 				 "resume operations\n", vf);
5484 		else
5485 			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5486 		break;
5487 	default:
5488 		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5489 		retval = -1;
5490 		break;
5491 	}
5492 
5493 	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5494 out:
5495 	/* notify the VF of the results of what it sent us */
5496 	if (retval)
5497 		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5498 	else
5499 		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5500 
5501 	igb_write_mbx(hw, msgbuf, 1, vf);
5502 }
5503 
5504 static void igb_msg_task(struct igb_adapter *adapter)
5505 {
5506 	struct e1000_hw *hw = &adapter->hw;
5507 	u32 vf;
5508 
5509 	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5510 		/* process any reset requests */
5511 		if (!igb_check_for_rst(hw, vf))
5512 			igb_vf_reset_event(adapter, vf);
5513 
5514 		/* process any messages pending */
5515 		if (!igb_check_for_msg(hw, vf))
5516 			igb_rcv_msg_from_vf(adapter, vf);
5517 
5518 		/* process any acks */
5519 		if (!igb_check_for_ack(hw, vf))
5520 			igb_rcv_ack_from_vf(adapter, vf);
5521 	}
5522 }
5523 
5524 /**
5525  *  igb_set_uta - Set unicast filter table address
5526  *  @adapter: board private structure
5527  *
5528  *  The unicast table address is a register array of 32-bit registers.
5529  *  The table is meant to be used in a way similar to how the MTA is used
5530  *  however due to certain limitations in the hardware it is necessary to
5531  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5532  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5533  **/
5534 static void igb_set_uta(struct igb_adapter *adapter)
5535 {
5536 	struct e1000_hw *hw = &adapter->hw;
5537 	int i;
5538 
5539 	/* The UTA table only exists on 82576 hardware and newer */
5540 	if (hw->mac.type < e1000_82576)
5541 		return;
5542 
5543 	/* we only need to do this if VMDq is enabled */
5544 	if (!adapter->vfs_allocated_count)
5545 		return;
5546 
5547 	for (i = 0; i < hw->mac.uta_reg_count; i++)
5548 		array_wr32(E1000_UTA, i, ~0);
5549 }
5550 
5551 /**
5552  * igb_intr_msi - Interrupt Handler
5553  * @irq: interrupt number
5554  * @data: pointer to a network interface device structure
5555  **/
5556 static irqreturn_t igb_intr_msi(int irq, void *data)
5557 {
5558 	struct igb_adapter *adapter = data;
5559 	struct igb_q_vector *q_vector = adapter->q_vector[0];
5560 	struct e1000_hw *hw = &adapter->hw;
5561 	/* read ICR disables interrupts using IAM */
5562 	u32 icr = rd32(E1000_ICR);
5563 
5564 	igb_write_itr(q_vector);
5565 
5566 	if (icr & E1000_ICR_DRSTA)
5567 		schedule_work(&adapter->reset_task);
5568 
5569 	if (icr & E1000_ICR_DOUTSYNC) {
5570 		/* HW is reporting DMA is out of sync */
5571 		adapter->stats.doosync++;
5572 	}
5573 
5574 	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5575 		hw->mac.get_link_status = 1;
5576 		if (!test_bit(__IGB_DOWN, &adapter->state))
5577 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5578 	}
5579 
5580 	napi_schedule(&q_vector->napi);
5581 
5582 	return IRQ_HANDLED;
5583 }
5584 
5585 /**
5586  * igb_intr - Legacy Interrupt Handler
5587  * @irq: interrupt number
5588  * @data: pointer to a network interface device structure
5589  **/
5590 static irqreturn_t igb_intr(int irq, void *data)
5591 {
5592 	struct igb_adapter *adapter = data;
5593 	struct igb_q_vector *q_vector = adapter->q_vector[0];
5594 	struct e1000_hw *hw = &adapter->hw;
5595 	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5596 	 * need for the IMC write */
5597 	u32 icr = rd32(E1000_ICR);
5598 
5599 	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5600 	 * not set, then the adapter didn't send an interrupt */
5601 	if (!(icr & E1000_ICR_INT_ASSERTED))
5602 		return IRQ_NONE;
5603 
5604 	igb_write_itr(q_vector);
5605 
5606 	if (icr & E1000_ICR_DRSTA)
5607 		schedule_work(&adapter->reset_task);
5608 
5609 	if (icr & E1000_ICR_DOUTSYNC) {
5610 		/* HW is reporting DMA is out of sync */
5611 		adapter->stats.doosync++;
5612 	}
5613 
5614 	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5615 		hw->mac.get_link_status = 1;
5616 		/* guard against interrupt when we're going down */
5617 		if (!test_bit(__IGB_DOWN, &adapter->state))
5618 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5619 	}
5620 
5621 	napi_schedule(&q_vector->napi);
5622 
5623 	return IRQ_HANDLED;
5624 }
5625 
5626 void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5627 {
5628 	struct igb_adapter *adapter = q_vector->adapter;
5629 	struct e1000_hw *hw = &adapter->hw;
5630 
5631 	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5632 	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5633 		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5634 			igb_set_itr(q_vector);
5635 		else
5636 			igb_update_ring_itr(q_vector);
5637 	}
5638 
5639 	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5640 		if (adapter->msix_entries)
5641 			wr32(E1000_EIMS, q_vector->eims_value);
5642 		else
5643 			igb_irq_enable(adapter);
5644 	}
5645 }
5646 
5647 /**
5648  * igb_poll - NAPI Rx polling callback
5649  * @napi: napi polling structure
5650  * @budget: count of how many packets we should handle
5651  **/
5652 static int igb_poll(struct napi_struct *napi, int budget)
5653 {
5654 	struct igb_q_vector *q_vector = container_of(napi,
5655 	                                             struct igb_q_vector,
5656 	                                             napi);
5657 	bool clean_complete = true;
5658 
5659 #ifdef CONFIG_IGB_DCA
5660 	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5661 		igb_update_dca(q_vector);
5662 #endif
5663 	if (q_vector->tx.ring)
5664 		clean_complete = igb_clean_tx_irq(q_vector);
5665 
5666 	if (q_vector->rx.ring)
5667 		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5668 
5669 	/* If all work not completed, return budget and keep polling */
5670 	if (!clean_complete)
5671 		return budget;
5672 
5673 	/* If not enough Rx work done, exit the polling mode */
5674 	napi_complete(napi);
5675 	igb_ring_irq_enable(q_vector);
5676 
5677 	return 0;
5678 }
5679 
5680 /**
5681  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5682  * @adapter: board private structure
5683  * @shhwtstamps: timestamp structure to update
5684  * @regval: unsigned 64bit system time value.
5685  *
5686  * We need to convert the system time value stored in the RX/TXSTMP registers
5687  * into a hwtstamp which can be used by the upper level timestamping functions
5688  */
5689 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5690                                    struct skb_shared_hwtstamps *shhwtstamps,
5691                                    u64 regval)
5692 {
5693 	u64 ns;
5694 
5695 	/*
5696 	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5697 	 * 24 to match clock shift we setup earlier.
5698 	 */
5699 	if (adapter->hw.mac.type >= e1000_82580)
5700 		regval <<= IGB_82580_TSYNC_SHIFT;
5701 
5702 	ns = timecounter_cyc2time(&adapter->clock, regval);
5703 	timecompare_update(&adapter->compare, ns);
5704 	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5705 	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5706 	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5707 }
5708 
5709 /**
5710  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5711  * @q_vector: pointer to q_vector containing needed info
5712  * @buffer: pointer to igb_tx_buffer structure
5713  *
5714  * If we were asked to do hardware stamping and such a time stamp is
5715  * available, then it must have been for this skb here because we only
5716  * allow only one such packet into the queue.
5717  */
5718 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5719 			    struct igb_tx_buffer *buffer_info)
5720 {
5721 	struct igb_adapter *adapter = q_vector->adapter;
5722 	struct e1000_hw *hw = &adapter->hw;
5723 	struct skb_shared_hwtstamps shhwtstamps;
5724 	u64 regval;
5725 
5726 	/* if skb does not support hw timestamp or TX stamp not valid exit */
5727 	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5728 	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5729 		return;
5730 
5731 	regval = rd32(E1000_TXSTMPL);
5732 	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5733 
5734 	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5735 	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5736 }
5737 
5738 /**
5739  * igb_clean_tx_irq - Reclaim resources after transmit completes
5740  * @q_vector: pointer to q_vector containing needed info
5741  * returns true if ring is completely cleaned
5742  **/
5743 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5744 {
5745 	struct igb_adapter *adapter = q_vector->adapter;
5746 	struct igb_ring *tx_ring = q_vector->tx.ring;
5747 	struct igb_tx_buffer *tx_buffer;
5748 	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5749 	unsigned int total_bytes = 0, total_packets = 0;
5750 	unsigned int budget = q_vector->tx.work_limit;
5751 	unsigned int i = tx_ring->next_to_clean;
5752 
5753 	if (test_bit(__IGB_DOWN, &adapter->state))
5754 		return true;
5755 
5756 	tx_buffer = &tx_ring->tx_buffer_info[i];
5757 	tx_desc = IGB_TX_DESC(tx_ring, i);
5758 	i -= tx_ring->count;
5759 
5760 	for (; budget; budget--) {
5761 		eop_desc = tx_buffer->next_to_watch;
5762 
5763 		/* prevent any other reads prior to eop_desc */
5764 		rmb();
5765 
5766 		/* if next_to_watch is not set then there is no work pending */
5767 		if (!eop_desc)
5768 			break;
5769 
5770 		/* if DD is not set pending work has not been completed */
5771 		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5772 			break;
5773 
5774 		/* clear next_to_watch to prevent false hangs */
5775 		tx_buffer->next_to_watch = NULL;
5776 
5777 		/* update the statistics for this packet */
5778 		total_bytes += tx_buffer->bytecount;
5779 		total_packets += tx_buffer->gso_segs;
5780 
5781 		/* retrieve hardware timestamp */
5782 		igb_tx_hwtstamp(q_vector, tx_buffer);
5783 
5784 		/* free the skb */
5785 		dev_kfree_skb_any(tx_buffer->skb);
5786 		tx_buffer->skb = NULL;
5787 
5788 		/* unmap skb header data */
5789 		dma_unmap_single(tx_ring->dev,
5790 				 tx_buffer->dma,
5791 				 tx_buffer->length,
5792 				 DMA_TO_DEVICE);
5793 
5794 		/* clear last DMA location and unmap remaining buffers */
5795 		while (tx_desc != eop_desc) {
5796 			tx_buffer->dma = 0;
5797 
5798 			tx_buffer++;
5799 			tx_desc++;
5800 			i++;
5801 			if (unlikely(!i)) {
5802 				i -= tx_ring->count;
5803 				tx_buffer = tx_ring->tx_buffer_info;
5804 				tx_desc = IGB_TX_DESC(tx_ring, 0);
5805 			}
5806 
5807 			/* unmap any remaining paged data */
5808 			if (tx_buffer->dma) {
5809 				dma_unmap_page(tx_ring->dev,
5810 					       tx_buffer->dma,
5811 					       tx_buffer->length,
5812 					       DMA_TO_DEVICE);
5813 			}
5814 		}
5815 
5816 		/* clear last DMA location */
5817 		tx_buffer->dma = 0;
5818 
5819 		/* move us one more past the eop_desc for start of next pkt */
5820 		tx_buffer++;
5821 		tx_desc++;
5822 		i++;
5823 		if (unlikely(!i)) {
5824 			i -= tx_ring->count;
5825 			tx_buffer = tx_ring->tx_buffer_info;
5826 			tx_desc = IGB_TX_DESC(tx_ring, 0);
5827 		}
5828 	}
5829 
5830 	netdev_tx_completed_queue(txring_txq(tx_ring),
5831 				  total_packets, total_bytes);
5832 	i += tx_ring->count;
5833 	tx_ring->next_to_clean = i;
5834 	u64_stats_update_begin(&tx_ring->tx_syncp);
5835 	tx_ring->tx_stats.bytes += total_bytes;
5836 	tx_ring->tx_stats.packets += total_packets;
5837 	u64_stats_update_end(&tx_ring->tx_syncp);
5838 	q_vector->tx.total_bytes += total_bytes;
5839 	q_vector->tx.total_packets += total_packets;
5840 
5841 	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5842 		struct e1000_hw *hw = &adapter->hw;
5843 
5844 		eop_desc = tx_buffer->next_to_watch;
5845 
5846 		/* Detect a transmit hang in hardware, this serializes the
5847 		 * check with the clearing of time_stamp and movement of i */
5848 		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5849 		if (eop_desc &&
5850 		    time_after(jiffies, tx_buffer->time_stamp +
5851 			       (adapter->tx_timeout_factor * HZ)) &&
5852 		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5853 
5854 			/* detected Tx unit hang */
5855 			dev_err(tx_ring->dev,
5856 				"Detected Tx Unit Hang\n"
5857 				"  Tx Queue             <%d>\n"
5858 				"  TDH                  <%x>\n"
5859 				"  TDT                  <%x>\n"
5860 				"  next_to_use          <%x>\n"
5861 				"  next_to_clean        <%x>\n"
5862 				"buffer_info[next_to_clean]\n"
5863 				"  time_stamp           <%lx>\n"
5864 				"  next_to_watch        <%p>\n"
5865 				"  jiffies              <%lx>\n"
5866 				"  desc.status          <%x>\n",
5867 				tx_ring->queue_index,
5868 				rd32(E1000_TDH(tx_ring->reg_idx)),
5869 				readl(tx_ring->tail),
5870 				tx_ring->next_to_use,
5871 				tx_ring->next_to_clean,
5872 				tx_buffer->time_stamp,
5873 				eop_desc,
5874 				jiffies,
5875 				eop_desc->wb.status);
5876 			netif_stop_subqueue(tx_ring->netdev,
5877 					    tx_ring->queue_index);
5878 
5879 			/* we are about to reset, no point in enabling stuff */
5880 			return true;
5881 		}
5882 	}
5883 
5884 	if (unlikely(total_packets &&
5885 		     netif_carrier_ok(tx_ring->netdev) &&
5886 		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5887 		/* Make sure that anybody stopping the queue after this
5888 		 * sees the new next_to_clean.
5889 		 */
5890 		smp_mb();
5891 		if (__netif_subqueue_stopped(tx_ring->netdev,
5892 					     tx_ring->queue_index) &&
5893 		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5894 			netif_wake_subqueue(tx_ring->netdev,
5895 					    tx_ring->queue_index);
5896 
5897 			u64_stats_update_begin(&tx_ring->tx_syncp);
5898 			tx_ring->tx_stats.restart_queue++;
5899 			u64_stats_update_end(&tx_ring->tx_syncp);
5900 		}
5901 	}
5902 
5903 	return !!budget;
5904 }
5905 
5906 static inline void igb_rx_checksum(struct igb_ring *ring,
5907 				   union e1000_adv_rx_desc *rx_desc,
5908 				   struct sk_buff *skb)
5909 {
5910 	skb_checksum_none_assert(skb);
5911 
5912 	/* Ignore Checksum bit is set */
5913 	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5914 		return;
5915 
5916 	/* Rx checksum disabled via ethtool */
5917 	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5918 		return;
5919 
5920 	/* TCP/UDP checksum error bit is set */
5921 	if (igb_test_staterr(rx_desc,
5922 			     E1000_RXDEXT_STATERR_TCPE |
5923 			     E1000_RXDEXT_STATERR_IPE)) {
5924 		/*
5925 		 * work around errata with sctp packets where the TCPE aka
5926 		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5927 		 * packets, (aka let the stack check the crc32c)
5928 		 */
5929 		if (!((skb->len == 60) &&
5930 		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5931 			u64_stats_update_begin(&ring->rx_syncp);
5932 			ring->rx_stats.csum_err++;
5933 			u64_stats_update_end(&ring->rx_syncp);
5934 		}
5935 		/* let the stack verify checksum errors */
5936 		return;
5937 	}
5938 	/* It must be a TCP or UDP packet with a valid checksum */
5939 	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5940 				      E1000_RXD_STAT_UDPCS))
5941 		skb->ip_summed = CHECKSUM_UNNECESSARY;
5942 
5943 	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5944 		le32_to_cpu(rx_desc->wb.upper.status_error));
5945 }
5946 
5947 static inline void igb_rx_hash(struct igb_ring *ring,
5948 			       union e1000_adv_rx_desc *rx_desc,
5949 			       struct sk_buff *skb)
5950 {
5951 	if (ring->netdev->features & NETIF_F_RXHASH)
5952 		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5953 }
5954 
5955 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5956 			    union e1000_adv_rx_desc *rx_desc,
5957 			    struct sk_buff *skb)
5958 {
5959 	struct igb_adapter *adapter = q_vector->adapter;
5960 	struct e1000_hw *hw = &adapter->hw;
5961 	u64 regval;
5962 
5963 	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5964 				       E1000_RXDADV_STAT_TS))
5965 		return;
5966 
5967 	/*
5968 	 * If this bit is set, then the RX registers contain the time stamp. No
5969 	 * other packet will be time stamped until we read these registers, so
5970 	 * read the registers to make them available again. Because only one
5971 	 * packet can be time stamped at a time, we know that the register
5972 	 * values must belong to this one here and therefore we don't need to
5973 	 * compare any of the additional attributes stored for it.
5974 	 *
5975 	 * If nothing went wrong, then it should have a shared tx_flags that we
5976 	 * can turn into a skb_shared_hwtstamps.
5977 	 */
5978 	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5979 		u32 *stamp = (u32 *)skb->data;
5980 		regval = le32_to_cpu(*(stamp + 2));
5981 		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5982 		skb_pull(skb, IGB_TS_HDR_LEN);
5983 	} else {
5984 		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5985 			return;
5986 
5987 		regval = rd32(E1000_RXSTMPL);
5988 		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5989 	}
5990 
5991 	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5992 }
5993 
5994 static void igb_rx_vlan(struct igb_ring *ring,
5995 			union e1000_adv_rx_desc *rx_desc,
5996 			struct sk_buff *skb)
5997 {
5998 	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5999 		u16 vid;
6000 		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6001 		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6002 			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6003 		else
6004 			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6005 
6006 		__vlan_hwaccel_put_tag(skb, vid);
6007 	}
6008 }
6009 
6010 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6011 {
6012 	/* HW will not DMA in data larger than the given buffer, even if it
6013 	 * parses the (NFS, of course) header to be larger.  In that case, it
6014 	 * fills the header buffer and spills the rest into the page.
6015 	 */
6016 	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6017 	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6018 	if (hlen > IGB_RX_HDR_LEN)
6019 		hlen = IGB_RX_HDR_LEN;
6020 	return hlen;
6021 }
6022 
6023 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6024 {
6025 	struct igb_ring *rx_ring = q_vector->rx.ring;
6026 	union e1000_adv_rx_desc *rx_desc;
6027 	const int current_node = numa_node_id();
6028 	unsigned int total_bytes = 0, total_packets = 0;
6029 	u16 cleaned_count = igb_desc_unused(rx_ring);
6030 	u16 i = rx_ring->next_to_clean;
6031 
6032 	rx_desc = IGB_RX_DESC(rx_ring, i);
6033 
6034 	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6035 		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6036 		struct sk_buff *skb = buffer_info->skb;
6037 		union e1000_adv_rx_desc *next_rxd;
6038 
6039 		buffer_info->skb = NULL;
6040 		prefetch(skb->data);
6041 
6042 		i++;
6043 		if (i == rx_ring->count)
6044 			i = 0;
6045 
6046 		next_rxd = IGB_RX_DESC(rx_ring, i);
6047 		prefetch(next_rxd);
6048 
6049 		/*
6050 		 * This memory barrier is needed to keep us from reading
6051 		 * any other fields out of the rx_desc until we know the
6052 		 * RXD_STAT_DD bit is set
6053 		 */
6054 		rmb();
6055 
6056 		if (!skb_is_nonlinear(skb)) {
6057 			__skb_put(skb, igb_get_hlen(rx_desc));
6058 			dma_unmap_single(rx_ring->dev, buffer_info->dma,
6059 					 IGB_RX_HDR_LEN,
6060 					 DMA_FROM_DEVICE);
6061 			buffer_info->dma = 0;
6062 		}
6063 
6064 		if (rx_desc->wb.upper.length) {
6065 			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6066 
6067 			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6068 						buffer_info->page,
6069 						buffer_info->page_offset,
6070 						length);
6071 
6072 			skb->len += length;
6073 			skb->data_len += length;
6074 			skb->truesize += PAGE_SIZE / 2;
6075 
6076 			if ((page_count(buffer_info->page) != 1) ||
6077 			    (page_to_nid(buffer_info->page) != current_node))
6078 				buffer_info->page = NULL;
6079 			else
6080 				get_page(buffer_info->page);
6081 
6082 			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6083 				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
6084 			buffer_info->page_dma = 0;
6085 		}
6086 
6087 		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6088 			struct igb_rx_buffer *next_buffer;
6089 			next_buffer = &rx_ring->rx_buffer_info[i];
6090 			buffer_info->skb = next_buffer->skb;
6091 			buffer_info->dma = next_buffer->dma;
6092 			next_buffer->skb = skb;
6093 			next_buffer->dma = 0;
6094 			goto next_desc;
6095 		}
6096 
6097 		if (igb_test_staterr(rx_desc,
6098 				     E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6099 			dev_kfree_skb_any(skb);
6100 			goto next_desc;
6101 		}
6102 
6103 		igb_rx_hwtstamp(q_vector, rx_desc, skb);
6104 		igb_rx_hash(rx_ring, rx_desc, skb);
6105 		igb_rx_checksum(rx_ring, rx_desc, skb);
6106 		igb_rx_vlan(rx_ring, rx_desc, skb);
6107 
6108 		total_bytes += skb->len;
6109 		total_packets++;
6110 
6111 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6112 
6113 		napi_gro_receive(&q_vector->napi, skb);
6114 
6115 		budget--;
6116 next_desc:
6117 		if (!budget)
6118 			break;
6119 
6120 		cleaned_count++;
6121 		/* return some buffers to hardware, one at a time is too slow */
6122 		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6123 			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6124 			cleaned_count = 0;
6125 		}
6126 
6127 		/* use prefetched values */
6128 		rx_desc = next_rxd;
6129 	}
6130 
6131 	rx_ring->next_to_clean = i;
6132 	u64_stats_update_begin(&rx_ring->rx_syncp);
6133 	rx_ring->rx_stats.packets += total_packets;
6134 	rx_ring->rx_stats.bytes += total_bytes;
6135 	u64_stats_update_end(&rx_ring->rx_syncp);
6136 	q_vector->rx.total_packets += total_packets;
6137 	q_vector->rx.total_bytes += total_bytes;
6138 
6139 	if (cleaned_count)
6140 		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6141 
6142 	return !!budget;
6143 }
6144 
6145 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6146 				 struct igb_rx_buffer *bi)
6147 {
6148 	struct sk_buff *skb = bi->skb;
6149 	dma_addr_t dma = bi->dma;
6150 
6151 	if (dma)
6152 		return true;
6153 
6154 	if (likely(!skb)) {
6155 		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6156 						IGB_RX_HDR_LEN);
6157 		bi->skb = skb;
6158 		if (!skb) {
6159 			rx_ring->rx_stats.alloc_failed++;
6160 			return false;
6161 		}
6162 
6163 		/* initialize skb for ring */
6164 		skb_record_rx_queue(skb, rx_ring->queue_index);
6165 	}
6166 
6167 	dma = dma_map_single(rx_ring->dev, skb->data,
6168 			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6169 
6170 	if (dma_mapping_error(rx_ring->dev, dma)) {
6171 		rx_ring->rx_stats.alloc_failed++;
6172 		return false;
6173 	}
6174 
6175 	bi->dma = dma;
6176 	return true;
6177 }
6178 
6179 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6180 				  struct igb_rx_buffer *bi)
6181 {
6182 	struct page *page = bi->page;
6183 	dma_addr_t page_dma = bi->page_dma;
6184 	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6185 
6186 	if (page_dma)
6187 		return true;
6188 
6189 	if (!page) {
6190 		page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6191 		bi->page = page;
6192 		if (unlikely(!page)) {
6193 			rx_ring->rx_stats.alloc_failed++;
6194 			return false;
6195 		}
6196 	}
6197 
6198 	page_dma = dma_map_page(rx_ring->dev, page,
6199 				page_offset, PAGE_SIZE / 2,
6200 				DMA_FROM_DEVICE);
6201 
6202 	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6203 		rx_ring->rx_stats.alloc_failed++;
6204 		return false;
6205 	}
6206 
6207 	bi->page_dma = page_dma;
6208 	bi->page_offset = page_offset;
6209 	return true;
6210 }
6211 
6212 /**
6213  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6214  * @adapter: address of board private structure
6215  **/
6216 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6217 {
6218 	union e1000_adv_rx_desc *rx_desc;
6219 	struct igb_rx_buffer *bi;
6220 	u16 i = rx_ring->next_to_use;
6221 
6222 	rx_desc = IGB_RX_DESC(rx_ring, i);
6223 	bi = &rx_ring->rx_buffer_info[i];
6224 	i -= rx_ring->count;
6225 
6226 	while (cleaned_count--) {
6227 		if (!igb_alloc_mapped_skb(rx_ring, bi))
6228 			break;
6229 
6230 		/* Refresh the desc even if buffer_addrs didn't change
6231 		 * because each write-back erases this info. */
6232 		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6233 
6234 		if (!igb_alloc_mapped_page(rx_ring, bi))
6235 			break;
6236 
6237 		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6238 
6239 		rx_desc++;
6240 		bi++;
6241 		i++;
6242 		if (unlikely(!i)) {
6243 			rx_desc = IGB_RX_DESC(rx_ring, 0);
6244 			bi = rx_ring->rx_buffer_info;
6245 			i -= rx_ring->count;
6246 		}
6247 
6248 		/* clear the hdr_addr for the next_to_use descriptor */
6249 		rx_desc->read.hdr_addr = 0;
6250 	}
6251 
6252 	i += rx_ring->count;
6253 
6254 	if (rx_ring->next_to_use != i) {
6255 		rx_ring->next_to_use = i;
6256 
6257 		/* Force memory writes to complete before letting h/w
6258 		 * know there are new descriptors to fetch.  (Only
6259 		 * applicable for weak-ordered memory model archs,
6260 		 * such as IA-64). */
6261 		wmb();
6262 		writel(i, rx_ring->tail);
6263 	}
6264 }
6265 
6266 /**
6267  * igb_mii_ioctl -
6268  * @netdev:
6269  * @ifreq:
6270  * @cmd:
6271  **/
6272 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6273 {
6274 	struct igb_adapter *adapter = netdev_priv(netdev);
6275 	struct mii_ioctl_data *data = if_mii(ifr);
6276 
6277 	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6278 		return -EOPNOTSUPP;
6279 
6280 	switch (cmd) {
6281 	case SIOCGMIIPHY:
6282 		data->phy_id = adapter->hw.phy.addr;
6283 		break;
6284 	case SIOCGMIIREG:
6285 		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6286 		                     &data->val_out))
6287 			return -EIO;
6288 		break;
6289 	case SIOCSMIIREG:
6290 	default:
6291 		return -EOPNOTSUPP;
6292 	}
6293 	return 0;
6294 }
6295 
6296 /**
6297  * igb_hwtstamp_ioctl - control hardware time stamping
6298  * @netdev:
6299  * @ifreq:
6300  * @cmd:
6301  *
6302  * Outgoing time stamping can be enabled and disabled. Play nice and
6303  * disable it when requested, although it shouldn't case any overhead
6304  * when no packet needs it. At most one packet in the queue may be
6305  * marked for time stamping, otherwise it would be impossible to tell
6306  * for sure to which packet the hardware time stamp belongs.
6307  *
6308  * Incoming time stamping has to be configured via the hardware
6309  * filters. Not all combinations are supported, in particular event
6310  * type has to be specified. Matching the kind of event packet is
6311  * not supported, with the exception of "all V2 events regardless of
6312  * level 2 or 4".
6313  *
6314  **/
6315 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6316 			      struct ifreq *ifr, int cmd)
6317 {
6318 	struct igb_adapter *adapter = netdev_priv(netdev);
6319 	struct e1000_hw *hw = &adapter->hw;
6320 	struct hwtstamp_config config;
6321 	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6322 	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6323 	u32 tsync_rx_cfg = 0;
6324 	bool is_l4 = false;
6325 	bool is_l2 = false;
6326 	u32 regval;
6327 
6328 	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6329 		return -EFAULT;
6330 
6331 	/* reserved for future extensions */
6332 	if (config.flags)
6333 		return -EINVAL;
6334 
6335 	switch (config.tx_type) {
6336 	case HWTSTAMP_TX_OFF:
6337 		tsync_tx_ctl = 0;
6338 	case HWTSTAMP_TX_ON:
6339 		break;
6340 	default:
6341 		return -ERANGE;
6342 	}
6343 
6344 	switch (config.rx_filter) {
6345 	case HWTSTAMP_FILTER_NONE:
6346 		tsync_rx_ctl = 0;
6347 		break;
6348 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6349 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6350 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6351 	case HWTSTAMP_FILTER_ALL:
6352 		/*
6353 		 * register TSYNCRXCFG must be set, therefore it is not
6354 		 * possible to time stamp both Sync and Delay_Req messages
6355 		 * => fall back to time stamping all packets
6356 		 */
6357 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6358 		config.rx_filter = HWTSTAMP_FILTER_ALL;
6359 		break;
6360 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6361 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6362 		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6363 		is_l4 = true;
6364 		break;
6365 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6366 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6367 		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6368 		is_l4 = true;
6369 		break;
6370 	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6371 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6372 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6373 		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6374 		is_l2 = true;
6375 		is_l4 = true;
6376 		config.rx_filter = HWTSTAMP_FILTER_SOME;
6377 		break;
6378 	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6379 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6380 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6381 		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6382 		is_l2 = true;
6383 		is_l4 = true;
6384 		config.rx_filter = HWTSTAMP_FILTER_SOME;
6385 		break;
6386 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6387 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6388 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6389 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6390 		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6391 		is_l2 = true;
6392 		is_l4 = true;
6393 		break;
6394 	default:
6395 		return -ERANGE;
6396 	}
6397 
6398 	if (hw->mac.type == e1000_82575) {
6399 		if (tsync_rx_ctl | tsync_tx_ctl)
6400 			return -EINVAL;
6401 		return 0;
6402 	}
6403 
6404 	/*
6405 	 * Per-packet timestamping only works if all packets are
6406 	 * timestamped, so enable timestamping in all packets as
6407 	 * long as one rx filter was configured.
6408 	 */
6409 	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6410 		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6411 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6412 	}
6413 
6414 	/* enable/disable TX */
6415 	regval = rd32(E1000_TSYNCTXCTL);
6416 	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6417 	regval |= tsync_tx_ctl;
6418 	wr32(E1000_TSYNCTXCTL, regval);
6419 
6420 	/* enable/disable RX */
6421 	regval = rd32(E1000_TSYNCRXCTL);
6422 	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6423 	regval |= tsync_rx_ctl;
6424 	wr32(E1000_TSYNCRXCTL, regval);
6425 
6426 	/* define which PTP packets are time stamped */
6427 	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6428 
6429 	/* define ethertype filter for timestamped packets */
6430 	if (is_l2)
6431 		wr32(E1000_ETQF(3),
6432 		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6433 		                 E1000_ETQF_1588 | /* enable timestamping */
6434 		                 ETH_P_1588));     /* 1588 eth protocol type */
6435 	else
6436 		wr32(E1000_ETQF(3), 0);
6437 
6438 #define PTP_PORT 319
6439 	/* L4 Queue Filter[3]: filter by destination port and protocol */
6440 	if (is_l4) {
6441 		u32 ftqf = (IPPROTO_UDP /* UDP */
6442 			| E1000_FTQF_VF_BP /* VF not compared */
6443 			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6444 			| E1000_FTQF_MASK); /* mask all inputs */
6445 		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6446 
6447 		wr32(E1000_IMIR(3), htons(PTP_PORT));
6448 		wr32(E1000_IMIREXT(3),
6449 		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6450 		if (hw->mac.type == e1000_82576) {
6451 			/* enable source port check */
6452 			wr32(E1000_SPQF(3), htons(PTP_PORT));
6453 			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6454 		}
6455 		wr32(E1000_FTQF(3), ftqf);
6456 	} else {
6457 		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6458 	}
6459 	wrfl();
6460 
6461 	adapter->hwtstamp_config = config;
6462 
6463 	/* clear TX/RX time stamp registers, just to be sure */
6464 	regval = rd32(E1000_TXSTMPH);
6465 	regval = rd32(E1000_RXSTMPH);
6466 
6467 	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6468 		-EFAULT : 0;
6469 }
6470 
6471 /**
6472  * igb_ioctl -
6473  * @netdev:
6474  * @ifreq:
6475  * @cmd:
6476  **/
6477 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6478 {
6479 	switch (cmd) {
6480 	case SIOCGMIIPHY:
6481 	case SIOCGMIIREG:
6482 	case SIOCSMIIREG:
6483 		return igb_mii_ioctl(netdev, ifr, cmd);
6484 	case SIOCSHWTSTAMP:
6485 		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6486 	default:
6487 		return -EOPNOTSUPP;
6488 	}
6489 }
6490 
6491 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6492 {
6493 	struct igb_adapter *adapter = hw->back;
6494 	u16 cap_offset;
6495 
6496 	cap_offset = adapter->pdev->pcie_cap;
6497 	if (!cap_offset)
6498 		return -E1000_ERR_CONFIG;
6499 
6500 	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6501 
6502 	return 0;
6503 }
6504 
6505 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6506 {
6507 	struct igb_adapter *adapter = hw->back;
6508 	u16 cap_offset;
6509 
6510 	cap_offset = adapter->pdev->pcie_cap;
6511 	if (!cap_offset)
6512 		return -E1000_ERR_CONFIG;
6513 
6514 	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6515 
6516 	return 0;
6517 }
6518 
6519 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6520 {
6521 	struct igb_adapter *adapter = netdev_priv(netdev);
6522 	struct e1000_hw *hw = &adapter->hw;
6523 	u32 ctrl, rctl;
6524 	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6525 
6526 	if (enable) {
6527 		/* enable VLAN tag insert/strip */
6528 		ctrl = rd32(E1000_CTRL);
6529 		ctrl |= E1000_CTRL_VME;
6530 		wr32(E1000_CTRL, ctrl);
6531 
6532 		/* Disable CFI check */
6533 		rctl = rd32(E1000_RCTL);
6534 		rctl &= ~E1000_RCTL_CFIEN;
6535 		wr32(E1000_RCTL, rctl);
6536 	} else {
6537 		/* disable VLAN tag insert/strip */
6538 		ctrl = rd32(E1000_CTRL);
6539 		ctrl &= ~E1000_CTRL_VME;
6540 		wr32(E1000_CTRL, ctrl);
6541 	}
6542 
6543 	igb_rlpml_set(adapter);
6544 }
6545 
6546 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6547 {
6548 	struct igb_adapter *adapter = netdev_priv(netdev);
6549 	struct e1000_hw *hw = &adapter->hw;
6550 	int pf_id = adapter->vfs_allocated_count;
6551 
6552 	/* attempt to add filter to vlvf array */
6553 	igb_vlvf_set(adapter, vid, true, pf_id);
6554 
6555 	/* add the filter since PF can receive vlans w/o entry in vlvf */
6556 	igb_vfta_set(hw, vid, true);
6557 
6558 	set_bit(vid, adapter->active_vlans);
6559 
6560 	return 0;
6561 }
6562 
6563 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6564 {
6565 	struct igb_adapter *adapter = netdev_priv(netdev);
6566 	struct e1000_hw *hw = &adapter->hw;
6567 	int pf_id = adapter->vfs_allocated_count;
6568 	s32 err;
6569 
6570 	/* remove vlan from VLVF table array */
6571 	err = igb_vlvf_set(adapter, vid, false, pf_id);
6572 
6573 	/* if vid was not present in VLVF just remove it from table */
6574 	if (err)
6575 		igb_vfta_set(hw, vid, false);
6576 
6577 	clear_bit(vid, adapter->active_vlans);
6578 
6579 	return 0;
6580 }
6581 
6582 static void igb_restore_vlan(struct igb_adapter *adapter)
6583 {
6584 	u16 vid;
6585 
6586 	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6587 
6588 	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6589 		igb_vlan_rx_add_vid(adapter->netdev, vid);
6590 }
6591 
6592 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6593 {
6594 	struct pci_dev *pdev = adapter->pdev;
6595 	struct e1000_mac_info *mac = &adapter->hw.mac;
6596 
6597 	mac->autoneg = 0;
6598 
6599 	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6600 	 * for the switch() below to work */
6601 	if ((spd & 1) || (dplx & ~1))
6602 		goto err_inval;
6603 
6604 	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6605 	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6606 	    spd != SPEED_1000 &&
6607 	    dplx != DUPLEX_FULL)
6608 		goto err_inval;
6609 
6610 	switch (spd + dplx) {
6611 	case SPEED_10 + DUPLEX_HALF:
6612 		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6613 		break;
6614 	case SPEED_10 + DUPLEX_FULL:
6615 		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6616 		break;
6617 	case SPEED_100 + DUPLEX_HALF:
6618 		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6619 		break;
6620 	case SPEED_100 + DUPLEX_FULL:
6621 		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6622 		break;
6623 	case SPEED_1000 + DUPLEX_FULL:
6624 		mac->autoneg = 1;
6625 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6626 		break;
6627 	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6628 	default:
6629 		goto err_inval;
6630 	}
6631 	return 0;
6632 
6633 err_inval:
6634 	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6635 	return -EINVAL;
6636 }
6637 
6638 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6639 			  bool runtime)
6640 {
6641 	struct net_device *netdev = pci_get_drvdata(pdev);
6642 	struct igb_adapter *adapter = netdev_priv(netdev);
6643 	struct e1000_hw *hw = &adapter->hw;
6644 	u32 ctrl, rctl, status;
6645 	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6646 #ifdef CONFIG_PM
6647 	int retval = 0;
6648 #endif
6649 
6650 	netif_device_detach(netdev);
6651 
6652 	if (netif_running(netdev))
6653 		__igb_close(netdev, true);
6654 
6655 	igb_clear_interrupt_scheme(adapter);
6656 
6657 #ifdef CONFIG_PM
6658 	retval = pci_save_state(pdev);
6659 	if (retval)
6660 		return retval;
6661 #endif
6662 
6663 	status = rd32(E1000_STATUS);
6664 	if (status & E1000_STATUS_LU)
6665 		wufc &= ~E1000_WUFC_LNKC;
6666 
6667 	if (wufc) {
6668 		igb_setup_rctl(adapter);
6669 		igb_set_rx_mode(netdev);
6670 
6671 		/* turn on all-multi mode if wake on multicast is enabled */
6672 		if (wufc & E1000_WUFC_MC) {
6673 			rctl = rd32(E1000_RCTL);
6674 			rctl |= E1000_RCTL_MPE;
6675 			wr32(E1000_RCTL, rctl);
6676 		}
6677 
6678 		ctrl = rd32(E1000_CTRL);
6679 		/* advertise wake from D3Cold */
6680 		#define E1000_CTRL_ADVD3WUC 0x00100000
6681 		/* phy power management enable */
6682 		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6683 		ctrl |= E1000_CTRL_ADVD3WUC;
6684 		wr32(E1000_CTRL, ctrl);
6685 
6686 		/* Allow time for pending master requests to run */
6687 		igb_disable_pcie_master(hw);
6688 
6689 		wr32(E1000_WUC, E1000_WUC_PME_EN);
6690 		wr32(E1000_WUFC, wufc);
6691 	} else {
6692 		wr32(E1000_WUC, 0);
6693 		wr32(E1000_WUFC, 0);
6694 	}
6695 
6696 	*enable_wake = wufc || adapter->en_mng_pt;
6697 	if (!*enable_wake)
6698 		igb_power_down_link(adapter);
6699 	else
6700 		igb_power_up_link(adapter);
6701 
6702 	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6703 	 * would have already happened in close and is redundant. */
6704 	igb_release_hw_control(adapter);
6705 
6706 	pci_disable_device(pdev);
6707 
6708 	return 0;
6709 }
6710 
6711 #ifdef CONFIG_PM
6712 static int igb_suspend(struct device *dev)
6713 {
6714 	int retval;
6715 	bool wake;
6716 	struct pci_dev *pdev = to_pci_dev(dev);
6717 
6718 	retval = __igb_shutdown(pdev, &wake, 0);
6719 	if (retval)
6720 		return retval;
6721 
6722 	if (wake) {
6723 		pci_prepare_to_sleep(pdev);
6724 	} else {
6725 		pci_wake_from_d3(pdev, false);
6726 		pci_set_power_state(pdev, PCI_D3hot);
6727 	}
6728 
6729 	return 0;
6730 }
6731 
6732 static int igb_resume(struct device *dev)
6733 {
6734 	struct pci_dev *pdev = to_pci_dev(dev);
6735 	struct net_device *netdev = pci_get_drvdata(pdev);
6736 	struct igb_adapter *adapter = netdev_priv(netdev);
6737 	struct e1000_hw *hw = &adapter->hw;
6738 	u32 err;
6739 
6740 	pci_set_power_state(pdev, PCI_D0);
6741 	pci_restore_state(pdev);
6742 	pci_save_state(pdev);
6743 
6744 	err = pci_enable_device_mem(pdev);
6745 	if (err) {
6746 		dev_err(&pdev->dev,
6747 			"igb: Cannot enable PCI device from suspend\n");
6748 		return err;
6749 	}
6750 	pci_set_master(pdev);
6751 
6752 	pci_enable_wake(pdev, PCI_D3hot, 0);
6753 	pci_enable_wake(pdev, PCI_D3cold, 0);
6754 
6755 	if (!rtnl_is_locked()) {
6756 		/*
6757 		 * shut up ASSERT_RTNL() warning in
6758 		 * netif_set_real_num_tx/rx_queues.
6759 		 */
6760 		rtnl_lock();
6761 		err = igb_init_interrupt_scheme(adapter);
6762 		rtnl_unlock();
6763 	} else {
6764 		err = igb_init_interrupt_scheme(adapter);
6765 	}
6766 	if (err) {
6767 		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6768 		return -ENOMEM;
6769 	}
6770 
6771 	igb_reset(adapter);
6772 
6773 	/* let the f/w know that the h/w is now under the control of the
6774 	 * driver. */
6775 	igb_get_hw_control(adapter);
6776 
6777 	wr32(E1000_WUS, ~0);
6778 
6779 	if (netdev->flags & IFF_UP) {
6780 		err = __igb_open(netdev, true);
6781 		if (err)
6782 			return err;
6783 	}
6784 
6785 	netif_device_attach(netdev);
6786 	return 0;
6787 }
6788 
6789 #ifdef CONFIG_PM_RUNTIME
6790 static int igb_runtime_idle(struct device *dev)
6791 {
6792 	struct pci_dev *pdev = to_pci_dev(dev);
6793 	struct net_device *netdev = pci_get_drvdata(pdev);
6794 	struct igb_adapter *adapter = netdev_priv(netdev);
6795 
6796 	if (!igb_has_link(adapter))
6797 		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6798 
6799 	return -EBUSY;
6800 }
6801 
6802 static int igb_runtime_suspend(struct device *dev)
6803 {
6804 	struct pci_dev *pdev = to_pci_dev(dev);
6805 	int retval;
6806 	bool wake;
6807 
6808 	retval = __igb_shutdown(pdev, &wake, 1);
6809 	if (retval)
6810 		return retval;
6811 
6812 	if (wake) {
6813 		pci_prepare_to_sleep(pdev);
6814 	} else {
6815 		pci_wake_from_d3(pdev, false);
6816 		pci_set_power_state(pdev, PCI_D3hot);
6817 	}
6818 
6819 	return 0;
6820 }
6821 
6822 static int igb_runtime_resume(struct device *dev)
6823 {
6824 	return igb_resume(dev);
6825 }
6826 #endif /* CONFIG_PM_RUNTIME */
6827 #endif
6828 
6829 static void igb_shutdown(struct pci_dev *pdev)
6830 {
6831 	bool wake;
6832 
6833 	__igb_shutdown(pdev, &wake, 0);
6834 
6835 	if (system_state == SYSTEM_POWER_OFF) {
6836 		pci_wake_from_d3(pdev, wake);
6837 		pci_set_power_state(pdev, PCI_D3hot);
6838 	}
6839 }
6840 
6841 #ifdef CONFIG_NET_POLL_CONTROLLER
6842 /*
6843  * Polling 'interrupt' - used by things like netconsole to send skbs
6844  * without having to re-enable interrupts. It's not called while
6845  * the interrupt routine is executing.
6846  */
6847 static void igb_netpoll(struct net_device *netdev)
6848 {
6849 	struct igb_adapter *adapter = netdev_priv(netdev);
6850 	struct e1000_hw *hw = &adapter->hw;
6851 	struct igb_q_vector *q_vector;
6852 	int i;
6853 
6854 	for (i = 0; i < adapter->num_q_vectors; i++) {
6855 		q_vector = adapter->q_vector[i];
6856 		if (adapter->msix_entries)
6857 			wr32(E1000_EIMC, q_vector->eims_value);
6858 		else
6859 			igb_irq_disable(adapter);
6860 		napi_schedule(&q_vector->napi);
6861 	}
6862 }
6863 #endif /* CONFIG_NET_POLL_CONTROLLER */
6864 
6865 /**
6866  * igb_io_error_detected - called when PCI error is detected
6867  * @pdev: Pointer to PCI device
6868  * @state: The current pci connection state
6869  *
6870  * This function is called after a PCI bus error affecting
6871  * this device has been detected.
6872  */
6873 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6874 					      pci_channel_state_t state)
6875 {
6876 	struct net_device *netdev = pci_get_drvdata(pdev);
6877 	struct igb_adapter *adapter = netdev_priv(netdev);
6878 
6879 	netif_device_detach(netdev);
6880 
6881 	if (state == pci_channel_io_perm_failure)
6882 		return PCI_ERS_RESULT_DISCONNECT;
6883 
6884 	if (netif_running(netdev))
6885 		igb_down(adapter);
6886 	pci_disable_device(pdev);
6887 
6888 	/* Request a slot slot reset. */
6889 	return PCI_ERS_RESULT_NEED_RESET;
6890 }
6891 
6892 /**
6893  * igb_io_slot_reset - called after the pci bus has been reset.
6894  * @pdev: Pointer to PCI device
6895  *
6896  * Restart the card from scratch, as if from a cold-boot. Implementation
6897  * resembles the first-half of the igb_resume routine.
6898  */
6899 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6900 {
6901 	struct net_device *netdev = pci_get_drvdata(pdev);
6902 	struct igb_adapter *adapter = netdev_priv(netdev);
6903 	struct e1000_hw *hw = &adapter->hw;
6904 	pci_ers_result_t result;
6905 	int err;
6906 
6907 	if (pci_enable_device_mem(pdev)) {
6908 		dev_err(&pdev->dev,
6909 			"Cannot re-enable PCI device after reset.\n");
6910 		result = PCI_ERS_RESULT_DISCONNECT;
6911 	} else {
6912 		pci_set_master(pdev);
6913 		pci_restore_state(pdev);
6914 		pci_save_state(pdev);
6915 
6916 		pci_enable_wake(pdev, PCI_D3hot, 0);
6917 		pci_enable_wake(pdev, PCI_D3cold, 0);
6918 
6919 		igb_reset(adapter);
6920 		wr32(E1000_WUS, ~0);
6921 		result = PCI_ERS_RESULT_RECOVERED;
6922 	}
6923 
6924 	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6925 	if (err) {
6926 		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6927 		        "failed 0x%0x\n", err);
6928 		/* non-fatal, continue */
6929 	}
6930 
6931 	return result;
6932 }
6933 
6934 /**
6935  * igb_io_resume - called when traffic can start flowing again.
6936  * @pdev: Pointer to PCI device
6937  *
6938  * This callback is called when the error recovery driver tells us that
6939  * its OK to resume normal operation. Implementation resembles the
6940  * second-half of the igb_resume routine.
6941  */
6942 static void igb_io_resume(struct pci_dev *pdev)
6943 {
6944 	struct net_device *netdev = pci_get_drvdata(pdev);
6945 	struct igb_adapter *adapter = netdev_priv(netdev);
6946 
6947 	if (netif_running(netdev)) {
6948 		if (igb_up(adapter)) {
6949 			dev_err(&pdev->dev, "igb_up failed after reset\n");
6950 			return;
6951 		}
6952 	}
6953 
6954 	netif_device_attach(netdev);
6955 
6956 	/* let the f/w know that the h/w is now under the control of the
6957 	 * driver. */
6958 	igb_get_hw_control(adapter);
6959 }
6960 
6961 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6962                              u8 qsel)
6963 {
6964 	u32 rar_low, rar_high;
6965 	struct e1000_hw *hw = &adapter->hw;
6966 
6967 	/* HW expects these in little endian so we reverse the byte order
6968 	 * from network order (big endian) to little endian
6969 	 */
6970 	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6971 	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6972 	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6973 
6974 	/* Indicate to hardware the Address is Valid. */
6975 	rar_high |= E1000_RAH_AV;
6976 
6977 	if (hw->mac.type == e1000_82575)
6978 		rar_high |= E1000_RAH_POOL_1 * qsel;
6979 	else
6980 		rar_high |= E1000_RAH_POOL_1 << qsel;
6981 
6982 	wr32(E1000_RAL(index), rar_low);
6983 	wrfl();
6984 	wr32(E1000_RAH(index), rar_high);
6985 	wrfl();
6986 }
6987 
6988 static int igb_set_vf_mac(struct igb_adapter *adapter,
6989                           int vf, unsigned char *mac_addr)
6990 {
6991 	struct e1000_hw *hw = &adapter->hw;
6992 	/* VF MAC addresses start at end of receive addresses and moves
6993 	 * torwards the first, as a result a collision should not be possible */
6994 	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6995 
6996 	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6997 
6998 	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6999 
7000 	return 0;
7001 }
7002 
7003 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7004 {
7005 	struct igb_adapter *adapter = netdev_priv(netdev);
7006 	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7007 		return -EINVAL;
7008 	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7009 	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7010 	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7011 				      " change effective.");
7012 	if (test_bit(__IGB_DOWN, &adapter->state)) {
7013 		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7014 			 " but the PF device is not up.\n");
7015 		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7016 			 " attempting to use the VF device.\n");
7017 	}
7018 	return igb_set_vf_mac(adapter, vf, mac);
7019 }
7020 
7021 static int igb_link_mbps(int internal_link_speed)
7022 {
7023 	switch (internal_link_speed) {
7024 	case SPEED_100:
7025 		return 100;
7026 	case SPEED_1000:
7027 		return 1000;
7028 	default:
7029 		return 0;
7030 	}
7031 }
7032 
7033 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7034 				  int link_speed)
7035 {
7036 	int rf_dec, rf_int;
7037 	u32 bcnrc_val;
7038 
7039 	if (tx_rate != 0) {
7040 		/* Calculate the rate factor values to set */
7041 		rf_int = link_speed / tx_rate;
7042 		rf_dec = (link_speed - (rf_int * tx_rate));
7043 		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7044 
7045 		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7046 		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7047 		               E1000_RTTBCNRC_RF_INT_MASK);
7048 		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7049 	} else {
7050 		bcnrc_val = 0;
7051 	}
7052 
7053 	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7054 	wr32(E1000_RTTBCNRC, bcnrc_val);
7055 }
7056 
7057 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7058 {
7059 	int actual_link_speed, i;
7060 	bool reset_rate = false;
7061 
7062 	/* VF TX rate limit was not set or not supported */
7063 	if ((adapter->vf_rate_link_speed == 0) ||
7064 	    (adapter->hw.mac.type != e1000_82576))
7065 		return;
7066 
7067 	actual_link_speed = igb_link_mbps(adapter->link_speed);
7068 	if (actual_link_speed != adapter->vf_rate_link_speed) {
7069 		reset_rate = true;
7070 		adapter->vf_rate_link_speed = 0;
7071 		dev_info(&adapter->pdev->dev,
7072 		         "Link speed has been changed. VF Transmit "
7073 		         "rate is disabled\n");
7074 	}
7075 
7076 	for (i = 0; i < adapter->vfs_allocated_count; i++) {
7077 		if (reset_rate)
7078 			adapter->vf_data[i].tx_rate = 0;
7079 
7080 		igb_set_vf_rate_limit(&adapter->hw, i,
7081 		                      adapter->vf_data[i].tx_rate,
7082 		                      actual_link_speed);
7083 	}
7084 }
7085 
7086 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7087 {
7088 	struct igb_adapter *adapter = netdev_priv(netdev);
7089 	struct e1000_hw *hw = &adapter->hw;
7090 	int actual_link_speed;
7091 
7092 	if (hw->mac.type != e1000_82576)
7093 		return -EOPNOTSUPP;
7094 
7095 	actual_link_speed = igb_link_mbps(adapter->link_speed);
7096 	if ((vf >= adapter->vfs_allocated_count) ||
7097 	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7098 	    (tx_rate < 0) || (tx_rate > actual_link_speed))
7099 		return -EINVAL;
7100 
7101 	adapter->vf_rate_link_speed = actual_link_speed;
7102 	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7103 	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7104 
7105 	return 0;
7106 }
7107 
7108 static int igb_ndo_get_vf_config(struct net_device *netdev,
7109 				 int vf, struct ifla_vf_info *ivi)
7110 {
7111 	struct igb_adapter *adapter = netdev_priv(netdev);
7112 	if (vf >= adapter->vfs_allocated_count)
7113 		return -EINVAL;
7114 	ivi->vf = vf;
7115 	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7116 	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7117 	ivi->vlan = adapter->vf_data[vf].pf_vlan;
7118 	ivi->qos = adapter->vf_data[vf].pf_qos;
7119 	return 0;
7120 }
7121 
7122 static void igb_vmm_control(struct igb_adapter *adapter)
7123 {
7124 	struct e1000_hw *hw = &adapter->hw;
7125 	u32 reg;
7126 
7127 	switch (hw->mac.type) {
7128 	case e1000_82575:
7129 	default:
7130 		/* replication is not supported for 82575 */
7131 		return;
7132 	case e1000_82576:
7133 		/* notify HW that the MAC is adding vlan tags */
7134 		reg = rd32(E1000_DTXCTL);
7135 		reg |= E1000_DTXCTL_VLAN_ADDED;
7136 		wr32(E1000_DTXCTL, reg);
7137 	case e1000_82580:
7138 		/* enable replication vlan tag stripping */
7139 		reg = rd32(E1000_RPLOLR);
7140 		reg |= E1000_RPLOLR_STRVLAN;
7141 		wr32(E1000_RPLOLR, reg);
7142 	case e1000_i350:
7143 		/* none of the above registers are supported by i350 */
7144 		break;
7145 	}
7146 
7147 	if (adapter->vfs_allocated_count) {
7148 		igb_vmdq_set_loopback_pf(hw, true);
7149 		igb_vmdq_set_replication_pf(hw, true);
7150 		igb_vmdq_set_anti_spoofing_pf(hw, true,
7151 						adapter->vfs_allocated_count);
7152 	} else {
7153 		igb_vmdq_set_loopback_pf(hw, false);
7154 		igb_vmdq_set_replication_pf(hw, false);
7155 	}
7156 }
7157 
7158 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7159 {
7160 	struct e1000_hw *hw = &adapter->hw;
7161 	u32 dmac_thr;
7162 	u16 hwm;
7163 
7164 	if (hw->mac.type > e1000_82580) {
7165 		if (adapter->flags & IGB_FLAG_DMAC) {
7166 			u32 reg;
7167 
7168 			/* force threshold to 0. */
7169 			wr32(E1000_DMCTXTH, 0);
7170 
7171 			/*
7172 			 * DMA Coalescing high water mark needs to be greater
7173 			 * than the Rx threshold. Set hwm to PBA - max frame
7174 			 * size in 16B units, capping it at PBA - 6KB.
7175 			 */
7176 			hwm = 64 * pba - adapter->max_frame_size / 16;
7177 			if (hwm < 64 * (pba - 6))
7178 				hwm = 64 * (pba - 6);
7179 			reg = rd32(E1000_FCRTC);
7180 			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7181 			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7182 				& E1000_FCRTC_RTH_COAL_MASK);
7183 			wr32(E1000_FCRTC, reg);
7184 
7185 			/*
7186 			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7187 			 * frame size, capping it at PBA - 10KB.
7188 			 */
7189 			dmac_thr = pba - adapter->max_frame_size / 512;
7190 			if (dmac_thr < pba - 10)
7191 				dmac_thr = pba - 10;
7192 			reg = rd32(E1000_DMACR);
7193 			reg &= ~E1000_DMACR_DMACTHR_MASK;
7194 			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7195 				& E1000_DMACR_DMACTHR_MASK);
7196 
7197 			/* transition to L0x or L1 if available..*/
7198 			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7199 
7200 			/* watchdog timer= +-1000 usec in 32usec intervals */
7201 			reg |= (1000 >> 5);
7202 			wr32(E1000_DMACR, reg);
7203 
7204 			/*
7205 			 * no lower threshold to disable
7206 			 * coalescing(smart fifb)-UTRESH=0
7207 			 */
7208 			wr32(E1000_DMCRTRH, 0);
7209 
7210 			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7211 
7212 			wr32(E1000_DMCTLX, reg);
7213 
7214 			/*
7215 			 * free space in tx packet buffer to wake from
7216 			 * DMA coal
7217 			 */
7218 			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7219 			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7220 
7221 			/*
7222 			 * make low power state decision controlled
7223 			 * by DMA coal
7224 			 */
7225 			reg = rd32(E1000_PCIEMISC);
7226 			reg &= ~E1000_PCIEMISC_LX_DECISION;
7227 			wr32(E1000_PCIEMISC, reg);
7228 		} /* endif adapter->dmac is not disabled */
7229 	} else if (hw->mac.type == e1000_82580) {
7230 		u32 reg = rd32(E1000_PCIEMISC);
7231 		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7232 		wr32(E1000_DMACR, 0);
7233 	}
7234 }
7235 
7236 /* igb_main.c */
7237