xref: /linux/drivers/net/ethernet/intel/igb/igb_main.c (revision 2dbf708448c836754d25fe6108c5bfe1f5697c95)
1 /*******************************************************************************
2 
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2012 Intel Corporation.
5 
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9 
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14 
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21 
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25 
26 *******************************************************************************/
27 
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29 
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
61 
62 #define MAJ 3
63 #define MIN 2
64 #define BUILD 10
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70 				"Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72 
73 static const struct e1000_info *igb_info_tbl[] = {
74 	[board_82575] = &e1000_82575_info,
75 };
76 
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103 	/* required last entry */
104 	{0, }
105 };
106 
107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
108 
109 void igb_reset(struct igb_adapter *);
110 static int igb_setup_all_tx_resources(struct igb_adapter *);
111 static int igb_setup_all_rx_resources(struct igb_adapter *);
112 static void igb_free_all_tx_resources(struct igb_adapter *);
113 static void igb_free_all_rx_resources(struct igb_adapter *);
114 static void igb_setup_mrqc(struct igb_adapter *);
115 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116 static void __devexit igb_remove(struct pci_dev *pdev);
117 static void igb_init_hw_timer(struct igb_adapter *adapter);
118 static int igb_sw_init(struct igb_adapter *);
119 static int igb_open(struct net_device *);
120 static int igb_close(struct net_device *);
121 static void igb_configure_tx(struct igb_adapter *);
122 static void igb_configure_rx(struct igb_adapter *);
123 static void igb_clean_all_tx_rings(struct igb_adapter *);
124 static void igb_clean_all_rx_rings(struct igb_adapter *);
125 static void igb_clean_tx_ring(struct igb_ring *);
126 static void igb_clean_rx_ring(struct igb_ring *);
127 static void igb_set_rx_mode(struct net_device *);
128 static void igb_update_phy_info(unsigned long);
129 static void igb_watchdog(unsigned long);
130 static void igb_watchdog_task(struct work_struct *);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133 						 struct rtnl_link_stats64 *stats);
134 static int igb_change_mtu(struct net_device *, int);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef CONFIG_IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* CONFIG_IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152 static int igb_vlan_rx_add_vid(struct net_device *, u16);
153 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154 static void igb_restore_vlan(struct igb_adapter *);
155 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156 static void igb_ping_all_vfs(struct igb_adapter *);
157 static void igb_msg_task(struct igb_adapter *);
158 static void igb_vmm_control(struct igb_adapter *);
159 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163 			       int vf, u16 vlan, u8 qos);
164 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166 				 struct ifla_vf_info *ivi);
167 static void igb_check_vf_rate_limit(struct igb_adapter *);
168 
169 #ifdef CONFIG_PCI_IOV
170 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172 static int igb_check_vf_assignment(struct igb_adapter *adapter);
173 #endif
174 
175 #ifdef CONFIG_PM
176 #ifdef CONFIG_PM_SLEEP
177 static int igb_suspend(struct device *);
178 #endif
179 static int igb_resume(struct device *);
180 #ifdef CONFIG_PM_RUNTIME
181 static int igb_runtime_suspend(struct device *dev);
182 static int igb_runtime_resume(struct device *dev);
183 static int igb_runtime_idle(struct device *dev);
184 #endif
185 static const struct dev_pm_ops igb_pm_ops = {
186 	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
187 	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
188 			igb_runtime_idle)
189 };
190 #endif
191 static void igb_shutdown(struct pci_dev *);
192 #ifdef CONFIG_IGB_DCA
193 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
194 static struct notifier_block dca_notifier = {
195 	.notifier_call	= igb_notify_dca,
196 	.next		= NULL,
197 	.priority	= 0
198 };
199 #endif
200 #ifdef CONFIG_NET_POLL_CONTROLLER
201 /* for netdump / net console */
202 static void igb_netpoll(struct net_device *);
203 #endif
204 #ifdef CONFIG_PCI_IOV
205 static unsigned int max_vfs = 0;
206 module_param(max_vfs, uint, 0);
207 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
208                  "per physical function");
209 #endif /* CONFIG_PCI_IOV */
210 
211 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
212 		     pci_channel_state_t);
213 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
214 static void igb_io_resume(struct pci_dev *);
215 
216 static struct pci_error_handlers igb_err_handler = {
217 	.error_detected = igb_io_error_detected,
218 	.slot_reset = igb_io_slot_reset,
219 	.resume = igb_io_resume,
220 };
221 
222 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
223 
224 static struct pci_driver igb_driver = {
225 	.name     = igb_driver_name,
226 	.id_table = igb_pci_tbl,
227 	.probe    = igb_probe,
228 	.remove   = __devexit_p(igb_remove),
229 #ifdef CONFIG_PM
230 	.driver.pm = &igb_pm_ops,
231 #endif
232 	.shutdown = igb_shutdown,
233 	.err_handler = &igb_err_handler
234 };
235 
236 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
237 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
238 MODULE_LICENSE("GPL");
239 MODULE_VERSION(DRV_VERSION);
240 
241 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
242 static int debug = -1;
243 module_param(debug, int, 0);
244 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
245 
246 struct igb_reg_info {
247 	u32 ofs;
248 	char *name;
249 };
250 
251 static const struct igb_reg_info igb_reg_info_tbl[] = {
252 
253 	/* General Registers */
254 	{E1000_CTRL, "CTRL"},
255 	{E1000_STATUS, "STATUS"},
256 	{E1000_CTRL_EXT, "CTRL_EXT"},
257 
258 	/* Interrupt Registers */
259 	{E1000_ICR, "ICR"},
260 
261 	/* RX Registers */
262 	{E1000_RCTL, "RCTL"},
263 	{E1000_RDLEN(0), "RDLEN"},
264 	{E1000_RDH(0), "RDH"},
265 	{E1000_RDT(0), "RDT"},
266 	{E1000_RXDCTL(0), "RXDCTL"},
267 	{E1000_RDBAL(0), "RDBAL"},
268 	{E1000_RDBAH(0), "RDBAH"},
269 
270 	/* TX Registers */
271 	{E1000_TCTL, "TCTL"},
272 	{E1000_TDBAL(0), "TDBAL"},
273 	{E1000_TDBAH(0), "TDBAH"},
274 	{E1000_TDLEN(0), "TDLEN"},
275 	{E1000_TDH(0), "TDH"},
276 	{E1000_TDT(0), "TDT"},
277 	{E1000_TXDCTL(0), "TXDCTL"},
278 	{E1000_TDFH, "TDFH"},
279 	{E1000_TDFT, "TDFT"},
280 	{E1000_TDFHS, "TDFHS"},
281 	{E1000_TDFPC, "TDFPC"},
282 
283 	/* List Terminator */
284 	{}
285 };
286 
287 /*
288  * igb_regdump - register printout routine
289  */
290 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
291 {
292 	int n = 0;
293 	char rname[16];
294 	u32 regs[8];
295 
296 	switch (reginfo->ofs) {
297 	case E1000_RDLEN(0):
298 		for (n = 0; n < 4; n++)
299 			regs[n] = rd32(E1000_RDLEN(n));
300 		break;
301 	case E1000_RDH(0):
302 		for (n = 0; n < 4; n++)
303 			regs[n] = rd32(E1000_RDH(n));
304 		break;
305 	case E1000_RDT(0):
306 		for (n = 0; n < 4; n++)
307 			regs[n] = rd32(E1000_RDT(n));
308 		break;
309 	case E1000_RXDCTL(0):
310 		for (n = 0; n < 4; n++)
311 			regs[n] = rd32(E1000_RXDCTL(n));
312 		break;
313 	case E1000_RDBAL(0):
314 		for (n = 0; n < 4; n++)
315 			regs[n] = rd32(E1000_RDBAL(n));
316 		break;
317 	case E1000_RDBAH(0):
318 		for (n = 0; n < 4; n++)
319 			regs[n] = rd32(E1000_RDBAH(n));
320 		break;
321 	case E1000_TDBAL(0):
322 		for (n = 0; n < 4; n++)
323 			regs[n] = rd32(E1000_RDBAL(n));
324 		break;
325 	case E1000_TDBAH(0):
326 		for (n = 0; n < 4; n++)
327 			regs[n] = rd32(E1000_TDBAH(n));
328 		break;
329 	case E1000_TDLEN(0):
330 		for (n = 0; n < 4; n++)
331 			regs[n] = rd32(E1000_TDLEN(n));
332 		break;
333 	case E1000_TDH(0):
334 		for (n = 0; n < 4; n++)
335 			regs[n] = rd32(E1000_TDH(n));
336 		break;
337 	case E1000_TDT(0):
338 		for (n = 0; n < 4; n++)
339 			regs[n] = rd32(E1000_TDT(n));
340 		break;
341 	case E1000_TXDCTL(0):
342 		for (n = 0; n < 4; n++)
343 			regs[n] = rd32(E1000_TXDCTL(n));
344 		break;
345 	default:
346 		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
347 		return;
348 	}
349 
350 	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
351 	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
352 		regs[2], regs[3]);
353 }
354 
355 /*
356  * igb_dump - Print registers, tx-rings and rx-rings
357  */
358 static void igb_dump(struct igb_adapter *adapter)
359 {
360 	struct net_device *netdev = adapter->netdev;
361 	struct e1000_hw *hw = &adapter->hw;
362 	struct igb_reg_info *reginfo;
363 	struct igb_ring *tx_ring;
364 	union e1000_adv_tx_desc *tx_desc;
365 	struct my_u0 { u64 a; u64 b; } *u0;
366 	struct igb_ring *rx_ring;
367 	union e1000_adv_rx_desc *rx_desc;
368 	u32 staterr;
369 	u16 i, n;
370 
371 	if (!netif_msg_hw(adapter))
372 		return;
373 
374 	/* Print netdevice Info */
375 	if (netdev) {
376 		dev_info(&adapter->pdev->dev, "Net device Info\n");
377 		pr_info("Device Name     state            trans_start      "
378 			"last_rx\n");
379 		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
380 			netdev->state, netdev->trans_start, netdev->last_rx);
381 	}
382 
383 	/* Print Registers */
384 	dev_info(&adapter->pdev->dev, "Register Dump\n");
385 	pr_info(" Register Name   Value\n");
386 	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
387 	     reginfo->name; reginfo++) {
388 		igb_regdump(hw, reginfo);
389 	}
390 
391 	/* Print TX Ring Summary */
392 	if (!netdev || !netif_running(netdev))
393 		goto exit;
394 
395 	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
396 	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
397 	for (n = 0; n < adapter->num_tx_queues; n++) {
398 		struct igb_tx_buffer *buffer_info;
399 		tx_ring = adapter->tx_ring[n];
400 		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
401 		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
402 			n, tx_ring->next_to_use, tx_ring->next_to_clean,
403 			(u64)buffer_info->dma,
404 			buffer_info->length,
405 			buffer_info->next_to_watch,
406 			(u64)buffer_info->time_stamp);
407 	}
408 
409 	/* Print TX Rings */
410 	if (!netif_msg_tx_done(adapter))
411 		goto rx_ring_summary;
412 
413 	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
414 
415 	/* Transmit Descriptor Formats
416 	 *
417 	 * Advanced Transmit Descriptor
418 	 *   +--------------------------------------------------------------+
419 	 * 0 |         Buffer Address [63:0]                                |
420 	 *   +--------------------------------------------------------------+
421 	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
422 	 *   +--------------------------------------------------------------+
423 	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
424 	 */
425 
426 	for (n = 0; n < adapter->num_tx_queues; n++) {
427 		tx_ring = adapter->tx_ring[n];
428 		pr_info("------------------------------------\n");
429 		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
430 		pr_info("------------------------------------\n");
431 		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
432 			"[bi->dma       ] leng  ntw timestamp        "
433 			"bi->skb\n");
434 
435 		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
436 			const char *next_desc;
437 			struct igb_tx_buffer *buffer_info;
438 			tx_desc = IGB_TX_DESC(tx_ring, i);
439 			buffer_info = &tx_ring->tx_buffer_info[i];
440 			u0 = (struct my_u0 *)tx_desc;
441 			if (i == tx_ring->next_to_use &&
442 			    i == tx_ring->next_to_clean)
443 				next_desc = " NTC/U";
444 			else if (i == tx_ring->next_to_use)
445 				next_desc = " NTU";
446 			else if (i == tx_ring->next_to_clean)
447 				next_desc = " NTC";
448 			else
449 				next_desc = "";
450 
451 			pr_info("T [0x%03X]    %016llX %016llX %016llX"
452 				" %04X  %p %016llX %p%s\n", i,
453 				le64_to_cpu(u0->a),
454 				le64_to_cpu(u0->b),
455 				(u64)buffer_info->dma,
456 				buffer_info->length,
457 				buffer_info->next_to_watch,
458 				(u64)buffer_info->time_stamp,
459 				buffer_info->skb, next_desc);
460 
461 			if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
462 				print_hex_dump(KERN_INFO, "",
463 					DUMP_PREFIX_ADDRESS,
464 					16, 1, phys_to_virt(buffer_info->dma),
465 					buffer_info->length, true);
466 		}
467 	}
468 
469 	/* Print RX Rings Summary */
470 rx_ring_summary:
471 	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
472 	pr_info("Queue [NTU] [NTC]\n");
473 	for (n = 0; n < adapter->num_rx_queues; n++) {
474 		rx_ring = adapter->rx_ring[n];
475 		pr_info(" %5d %5X %5X\n",
476 			n, rx_ring->next_to_use, rx_ring->next_to_clean);
477 	}
478 
479 	/* Print RX Rings */
480 	if (!netif_msg_rx_status(adapter))
481 		goto exit;
482 
483 	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
484 
485 	/* Advanced Receive Descriptor (Read) Format
486 	 *    63                                           1        0
487 	 *    +-----------------------------------------------------+
488 	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
489 	 *    +----------------------------------------------+------+
490 	 *  8 |       Header Buffer Address [63:1]           |  DD  |
491 	 *    +-----------------------------------------------------+
492 	 *
493 	 *
494 	 * Advanced Receive Descriptor (Write-Back) Format
495 	 *
496 	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
497 	 *   +------------------------------------------------------+
498 	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
499 	 *   | Checksum   Ident  |   |           |    | Type | Type |
500 	 *   +------------------------------------------------------+
501 	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
502 	 *   +------------------------------------------------------+
503 	 *   63       48 47    32 31            20 19               0
504 	 */
505 
506 	for (n = 0; n < adapter->num_rx_queues; n++) {
507 		rx_ring = adapter->rx_ring[n];
508 		pr_info("------------------------------------\n");
509 		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
510 		pr_info("------------------------------------\n");
511 		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
512 			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
513 		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
514 			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
515 
516 		for (i = 0; i < rx_ring->count; i++) {
517 			const char *next_desc;
518 			struct igb_rx_buffer *buffer_info;
519 			buffer_info = &rx_ring->rx_buffer_info[i];
520 			rx_desc = IGB_RX_DESC(rx_ring, i);
521 			u0 = (struct my_u0 *)rx_desc;
522 			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
523 
524 			if (i == rx_ring->next_to_use)
525 				next_desc = " NTU";
526 			else if (i == rx_ring->next_to_clean)
527 				next_desc = " NTC";
528 			else
529 				next_desc = "";
530 
531 			if (staterr & E1000_RXD_STAT_DD) {
532 				/* Descriptor Done */
533 				pr_info("%s[0x%03X]     %016llX %016llX -------"
534 					"--------- %p%s\n", "RWB", i,
535 					le64_to_cpu(u0->a),
536 					le64_to_cpu(u0->b),
537 					buffer_info->skb, next_desc);
538 			} else {
539 				pr_info("%s[0x%03X]     %016llX %016llX %016llX"
540 					" %p%s\n", "R  ", i,
541 					le64_to_cpu(u0->a),
542 					le64_to_cpu(u0->b),
543 					(u64)buffer_info->dma,
544 					buffer_info->skb, next_desc);
545 
546 				if (netif_msg_pktdata(adapter)) {
547 					print_hex_dump(KERN_INFO, "",
548 						DUMP_PREFIX_ADDRESS,
549 						16, 1,
550 						phys_to_virt(buffer_info->dma),
551 						IGB_RX_HDR_LEN, true);
552 					print_hex_dump(KERN_INFO, "",
553 					  DUMP_PREFIX_ADDRESS,
554 					  16, 1,
555 					  phys_to_virt(
556 					    buffer_info->page_dma +
557 					    buffer_info->page_offset),
558 					  PAGE_SIZE/2, true);
559 				}
560 			}
561 		}
562 	}
563 
564 exit:
565 	return;
566 }
567 
568 
569 /**
570  * igb_read_clock - read raw cycle counter (to be used by time counter)
571  */
572 static cycle_t igb_read_clock(const struct cyclecounter *tc)
573 {
574 	struct igb_adapter *adapter =
575 		container_of(tc, struct igb_adapter, cycles);
576 	struct e1000_hw *hw = &adapter->hw;
577 	u64 stamp = 0;
578 	int shift = 0;
579 
580 	/*
581 	 * The timestamp latches on lowest register read. For the 82580
582 	 * the lowest register is SYSTIMR instead of SYSTIML.  However we never
583 	 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
584 	 */
585 	if (hw->mac.type >= e1000_82580) {
586 		stamp = rd32(E1000_SYSTIMR) >> 8;
587 		shift = IGB_82580_TSYNC_SHIFT;
588 	}
589 
590 	stamp |= (u64)rd32(E1000_SYSTIML) << shift;
591 	stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
592 	return stamp;
593 }
594 
595 /**
596  * igb_get_hw_dev - return device
597  * used by hardware layer to print debugging information
598  **/
599 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
600 {
601 	struct igb_adapter *adapter = hw->back;
602 	return adapter->netdev;
603 }
604 
605 /**
606  * igb_init_module - Driver Registration Routine
607  *
608  * igb_init_module is the first routine called when the driver is
609  * loaded. All it does is register with the PCI subsystem.
610  **/
611 static int __init igb_init_module(void)
612 {
613 	int ret;
614 	pr_info("%s - version %s\n",
615 	       igb_driver_string, igb_driver_version);
616 
617 	pr_info("%s\n", igb_copyright);
618 
619 #ifdef CONFIG_IGB_DCA
620 	dca_register_notify(&dca_notifier);
621 #endif
622 	ret = pci_register_driver(&igb_driver);
623 	return ret;
624 }
625 
626 module_init(igb_init_module);
627 
628 /**
629  * igb_exit_module - Driver Exit Cleanup Routine
630  *
631  * igb_exit_module is called just before the driver is removed
632  * from memory.
633  **/
634 static void __exit igb_exit_module(void)
635 {
636 #ifdef CONFIG_IGB_DCA
637 	dca_unregister_notify(&dca_notifier);
638 #endif
639 	pci_unregister_driver(&igb_driver);
640 }
641 
642 module_exit(igb_exit_module);
643 
644 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
645 /**
646  * igb_cache_ring_register - Descriptor ring to register mapping
647  * @adapter: board private structure to initialize
648  *
649  * Once we know the feature-set enabled for the device, we'll cache
650  * the register offset the descriptor ring is assigned to.
651  **/
652 static void igb_cache_ring_register(struct igb_adapter *adapter)
653 {
654 	int i = 0, j = 0;
655 	u32 rbase_offset = adapter->vfs_allocated_count;
656 
657 	switch (adapter->hw.mac.type) {
658 	case e1000_82576:
659 		/* The queues are allocated for virtualization such that VF 0
660 		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
661 		 * In order to avoid collision we start at the first free queue
662 		 * and continue consuming queues in the same sequence
663 		 */
664 		if (adapter->vfs_allocated_count) {
665 			for (; i < adapter->rss_queues; i++)
666 				adapter->rx_ring[i]->reg_idx = rbase_offset +
667 				                               Q_IDX_82576(i);
668 		}
669 	case e1000_82575:
670 	case e1000_82580:
671 	case e1000_i350:
672 	default:
673 		for (; i < adapter->num_rx_queues; i++)
674 			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
675 		for (; j < adapter->num_tx_queues; j++)
676 			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
677 		break;
678 	}
679 }
680 
681 static void igb_free_queues(struct igb_adapter *adapter)
682 {
683 	int i;
684 
685 	for (i = 0; i < adapter->num_tx_queues; i++) {
686 		kfree(adapter->tx_ring[i]);
687 		adapter->tx_ring[i] = NULL;
688 	}
689 	for (i = 0; i < adapter->num_rx_queues; i++) {
690 		kfree(adapter->rx_ring[i]);
691 		adapter->rx_ring[i] = NULL;
692 	}
693 	adapter->num_rx_queues = 0;
694 	adapter->num_tx_queues = 0;
695 }
696 
697 /**
698  * igb_alloc_queues - Allocate memory for all rings
699  * @adapter: board private structure to initialize
700  *
701  * We allocate one ring per queue at run-time since we don't know the
702  * number of queues at compile-time.
703  **/
704 static int igb_alloc_queues(struct igb_adapter *adapter)
705 {
706 	struct igb_ring *ring;
707 	int i;
708 	int orig_node = adapter->node;
709 
710 	for (i = 0; i < adapter->num_tx_queues; i++) {
711 		if (orig_node == -1) {
712 			int cur_node = next_online_node(adapter->node);
713 			if (cur_node == MAX_NUMNODES)
714 				cur_node = first_online_node;
715 			adapter->node = cur_node;
716 		}
717 		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
718 				    adapter->node);
719 		if (!ring)
720 			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
721 		if (!ring)
722 			goto err;
723 		ring->count = adapter->tx_ring_count;
724 		ring->queue_index = i;
725 		ring->dev = &adapter->pdev->dev;
726 		ring->netdev = adapter->netdev;
727 		ring->numa_node = adapter->node;
728 		/* For 82575, context index must be unique per ring. */
729 		if (adapter->hw.mac.type == e1000_82575)
730 			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
731 		adapter->tx_ring[i] = ring;
732 	}
733 	/* Restore the adapter's original node */
734 	adapter->node = orig_node;
735 
736 	for (i = 0; i < adapter->num_rx_queues; i++) {
737 		if (orig_node == -1) {
738 			int cur_node = next_online_node(adapter->node);
739 			if (cur_node == MAX_NUMNODES)
740 				cur_node = first_online_node;
741 			adapter->node = cur_node;
742 		}
743 		ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
744 				    adapter->node);
745 		if (!ring)
746 			ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
747 		if (!ring)
748 			goto err;
749 		ring->count = adapter->rx_ring_count;
750 		ring->queue_index = i;
751 		ring->dev = &adapter->pdev->dev;
752 		ring->netdev = adapter->netdev;
753 		ring->numa_node = adapter->node;
754 		/* set flag indicating ring supports SCTP checksum offload */
755 		if (adapter->hw.mac.type >= e1000_82576)
756 			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
757 
758 		/* On i350, loopback VLAN packets have the tag byte-swapped. */
759 		if (adapter->hw.mac.type == e1000_i350)
760 			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
761 
762 		adapter->rx_ring[i] = ring;
763 	}
764 	/* Restore the adapter's original node */
765 	adapter->node = orig_node;
766 
767 	igb_cache_ring_register(adapter);
768 
769 	return 0;
770 
771 err:
772 	/* Restore the adapter's original node */
773 	adapter->node = orig_node;
774 	igb_free_queues(adapter);
775 
776 	return -ENOMEM;
777 }
778 
779 /**
780  *  igb_write_ivar - configure ivar for given MSI-X vector
781  *  @hw: pointer to the HW structure
782  *  @msix_vector: vector number we are allocating to a given ring
783  *  @index: row index of IVAR register to write within IVAR table
784  *  @offset: column offset of in IVAR, should be multiple of 8
785  *
786  *  This function is intended to handle the writing of the IVAR register
787  *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
788  *  each containing an cause allocation for an Rx and Tx ring, and a
789  *  variable number of rows depending on the number of queues supported.
790  **/
791 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
792 			   int index, int offset)
793 {
794 	u32 ivar = array_rd32(E1000_IVAR0, index);
795 
796 	/* clear any bits that are currently set */
797 	ivar &= ~((u32)0xFF << offset);
798 
799 	/* write vector and valid bit */
800 	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
801 
802 	array_wr32(E1000_IVAR0, index, ivar);
803 }
804 
805 #define IGB_N0_QUEUE -1
806 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
807 {
808 	struct igb_adapter *adapter = q_vector->adapter;
809 	struct e1000_hw *hw = &adapter->hw;
810 	int rx_queue = IGB_N0_QUEUE;
811 	int tx_queue = IGB_N0_QUEUE;
812 	u32 msixbm = 0;
813 
814 	if (q_vector->rx.ring)
815 		rx_queue = q_vector->rx.ring->reg_idx;
816 	if (q_vector->tx.ring)
817 		tx_queue = q_vector->tx.ring->reg_idx;
818 
819 	switch (hw->mac.type) {
820 	case e1000_82575:
821 		/* The 82575 assigns vectors using a bitmask, which matches the
822 		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
823 		   or more queues to a vector, we write the appropriate bits
824 		   into the MSIXBM register for that vector. */
825 		if (rx_queue > IGB_N0_QUEUE)
826 			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
827 		if (tx_queue > IGB_N0_QUEUE)
828 			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
829 		if (!adapter->msix_entries && msix_vector == 0)
830 			msixbm |= E1000_EIMS_OTHER;
831 		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
832 		q_vector->eims_value = msixbm;
833 		break;
834 	case e1000_82576:
835 		/*
836 		 * 82576 uses a table that essentially consists of 2 columns
837 		 * with 8 rows.  The ordering is column-major so we use the
838 		 * lower 3 bits as the row index, and the 4th bit as the
839 		 * column offset.
840 		 */
841 		if (rx_queue > IGB_N0_QUEUE)
842 			igb_write_ivar(hw, msix_vector,
843 				       rx_queue & 0x7,
844 				       (rx_queue & 0x8) << 1);
845 		if (tx_queue > IGB_N0_QUEUE)
846 			igb_write_ivar(hw, msix_vector,
847 				       tx_queue & 0x7,
848 				       ((tx_queue & 0x8) << 1) + 8);
849 		q_vector->eims_value = 1 << msix_vector;
850 		break;
851 	case e1000_82580:
852 	case e1000_i350:
853 		/*
854 		 * On 82580 and newer adapters the scheme is similar to 82576
855 		 * however instead of ordering column-major we have things
856 		 * ordered row-major.  So we traverse the table by using
857 		 * bit 0 as the column offset, and the remaining bits as the
858 		 * row index.
859 		 */
860 		if (rx_queue > IGB_N0_QUEUE)
861 			igb_write_ivar(hw, msix_vector,
862 				       rx_queue >> 1,
863 				       (rx_queue & 0x1) << 4);
864 		if (tx_queue > IGB_N0_QUEUE)
865 			igb_write_ivar(hw, msix_vector,
866 				       tx_queue >> 1,
867 				       ((tx_queue & 0x1) << 4) + 8);
868 		q_vector->eims_value = 1 << msix_vector;
869 		break;
870 	default:
871 		BUG();
872 		break;
873 	}
874 
875 	/* add q_vector eims value to global eims_enable_mask */
876 	adapter->eims_enable_mask |= q_vector->eims_value;
877 
878 	/* configure q_vector to set itr on first interrupt */
879 	q_vector->set_itr = 1;
880 }
881 
882 /**
883  * igb_configure_msix - Configure MSI-X hardware
884  *
885  * igb_configure_msix sets up the hardware to properly
886  * generate MSI-X interrupts.
887  **/
888 static void igb_configure_msix(struct igb_adapter *adapter)
889 {
890 	u32 tmp;
891 	int i, vector = 0;
892 	struct e1000_hw *hw = &adapter->hw;
893 
894 	adapter->eims_enable_mask = 0;
895 
896 	/* set vector for other causes, i.e. link changes */
897 	switch (hw->mac.type) {
898 	case e1000_82575:
899 		tmp = rd32(E1000_CTRL_EXT);
900 		/* enable MSI-X PBA support*/
901 		tmp |= E1000_CTRL_EXT_PBA_CLR;
902 
903 		/* Auto-Mask interrupts upon ICR read. */
904 		tmp |= E1000_CTRL_EXT_EIAME;
905 		tmp |= E1000_CTRL_EXT_IRCA;
906 
907 		wr32(E1000_CTRL_EXT, tmp);
908 
909 		/* enable msix_other interrupt */
910 		array_wr32(E1000_MSIXBM(0), vector++,
911 		                      E1000_EIMS_OTHER);
912 		adapter->eims_other = E1000_EIMS_OTHER;
913 
914 		break;
915 
916 	case e1000_82576:
917 	case e1000_82580:
918 	case e1000_i350:
919 		/* Turn on MSI-X capability first, or our settings
920 		 * won't stick.  And it will take days to debug. */
921 		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
922 		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
923 		                E1000_GPIE_NSICR);
924 
925 		/* enable msix_other interrupt */
926 		adapter->eims_other = 1 << vector;
927 		tmp = (vector++ | E1000_IVAR_VALID) << 8;
928 
929 		wr32(E1000_IVAR_MISC, tmp);
930 		break;
931 	default:
932 		/* do nothing, since nothing else supports MSI-X */
933 		break;
934 	} /* switch (hw->mac.type) */
935 
936 	adapter->eims_enable_mask |= adapter->eims_other;
937 
938 	for (i = 0; i < adapter->num_q_vectors; i++)
939 		igb_assign_vector(adapter->q_vector[i], vector++);
940 
941 	wrfl();
942 }
943 
944 /**
945  * igb_request_msix - Initialize MSI-X interrupts
946  *
947  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
948  * kernel.
949  **/
950 static int igb_request_msix(struct igb_adapter *adapter)
951 {
952 	struct net_device *netdev = adapter->netdev;
953 	struct e1000_hw *hw = &adapter->hw;
954 	int i, err = 0, vector = 0;
955 
956 	err = request_irq(adapter->msix_entries[vector].vector,
957 	                  igb_msix_other, 0, netdev->name, adapter);
958 	if (err)
959 		goto out;
960 	vector++;
961 
962 	for (i = 0; i < adapter->num_q_vectors; i++) {
963 		struct igb_q_vector *q_vector = adapter->q_vector[i];
964 
965 		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
966 
967 		if (q_vector->rx.ring && q_vector->tx.ring)
968 			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
969 				q_vector->rx.ring->queue_index);
970 		else if (q_vector->tx.ring)
971 			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
972 				q_vector->tx.ring->queue_index);
973 		else if (q_vector->rx.ring)
974 			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
975 				q_vector->rx.ring->queue_index);
976 		else
977 			sprintf(q_vector->name, "%s-unused", netdev->name);
978 
979 		err = request_irq(adapter->msix_entries[vector].vector,
980 		                  igb_msix_ring, 0, q_vector->name,
981 		                  q_vector);
982 		if (err)
983 			goto out;
984 		vector++;
985 	}
986 
987 	igb_configure_msix(adapter);
988 	return 0;
989 out:
990 	return err;
991 }
992 
993 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
994 {
995 	if (adapter->msix_entries) {
996 		pci_disable_msix(adapter->pdev);
997 		kfree(adapter->msix_entries);
998 		adapter->msix_entries = NULL;
999 	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
1000 		pci_disable_msi(adapter->pdev);
1001 	}
1002 }
1003 
1004 /**
1005  * igb_free_q_vectors - Free memory allocated for interrupt vectors
1006  * @adapter: board private structure to initialize
1007  *
1008  * This function frees the memory allocated to the q_vectors.  In addition if
1009  * NAPI is enabled it will delete any references to the NAPI struct prior
1010  * to freeing the q_vector.
1011  **/
1012 static void igb_free_q_vectors(struct igb_adapter *adapter)
1013 {
1014 	int v_idx;
1015 
1016 	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1017 		struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1018 		adapter->q_vector[v_idx] = NULL;
1019 		if (!q_vector)
1020 			continue;
1021 		netif_napi_del(&q_vector->napi);
1022 		kfree(q_vector);
1023 	}
1024 	adapter->num_q_vectors = 0;
1025 }
1026 
1027 /**
1028  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1029  *
1030  * This function resets the device so that it has 0 rx queues, tx queues, and
1031  * MSI-X interrupts allocated.
1032  */
1033 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1034 {
1035 	igb_free_queues(adapter);
1036 	igb_free_q_vectors(adapter);
1037 	igb_reset_interrupt_capability(adapter);
1038 }
1039 
1040 /**
1041  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1042  *
1043  * Attempt to configure interrupts using the best available
1044  * capabilities of the hardware and kernel.
1045  **/
1046 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1047 {
1048 	int err;
1049 	int numvecs, i;
1050 
1051 	/* Number of supported queues. */
1052 	adapter->num_rx_queues = adapter->rss_queues;
1053 	if (adapter->vfs_allocated_count)
1054 		adapter->num_tx_queues = 1;
1055 	else
1056 		adapter->num_tx_queues = adapter->rss_queues;
1057 
1058 	/* start with one vector for every rx queue */
1059 	numvecs = adapter->num_rx_queues;
1060 
1061 	/* if tx handler is separate add 1 for every tx queue */
1062 	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1063 		numvecs += adapter->num_tx_queues;
1064 
1065 	/* store the number of vectors reserved for queues */
1066 	adapter->num_q_vectors = numvecs;
1067 
1068 	/* add 1 vector for link status interrupts */
1069 	numvecs++;
1070 	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1071 					GFP_KERNEL);
1072 	if (!adapter->msix_entries)
1073 		goto msi_only;
1074 
1075 	for (i = 0; i < numvecs; i++)
1076 		adapter->msix_entries[i].entry = i;
1077 
1078 	err = pci_enable_msix(adapter->pdev,
1079 			      adapter->msix_entries,
1080 			      numvecs);
1081 	if (err == 0)
1082 		goto out;
1083 
1084 	igb_reset_interrupt_capability(adapter);
1085 
1086 	/* If we can't do MSI-X, try MSI */
1087 msi_only:
1088 #ifdef CONFIG_PCI_IOV
1089 	/* disable SR-IOV for non MSI-X configurations */
1090 	if (adapter->vf_data) {
1091 		struct e1000_hw *hw = &adapter->hw;
1092 		/* disable iov and allow time for transactions to clear */
1093 		pci_disable_sriov(adapter->pdev);
1094 		msleep(500);
1095 
1096 		kfree(adapter->vf_data);
1097 		adapter->vf_data = NULL;
1098 		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1099 		wrfl();
1100 		msleep(100);
1101 		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1102 	}
1103 #endif
1104 	adapter->vfs_allocated_count = 0;
1105 	adapter->rss_queues = 1;
1106 	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1107 	adapter->num_rx_queues = 1;
1108 	adapter->num_tx_queues = 1;
1109 	adapter->num_q_vectors = 1;
1110 	if (!pci_enable_msi(adapter->pdev))
1111 		adapter->flags |= IGB_FLAG_HAS_MSI;
1112 out:
1113 	/* Notify the stack of the (possibly) reduced queue counts. */
1114 	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1115 	return netif_set_real_num_rx_queues(adapter->netdev,
1116 					    adapter->num_rx_queues);
1117 }
1118 
1119 /**
1120  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1121  * @adapter: board private structure to initialize
1122  *
1123  * We allocate one q_vector per queue interrupt.  If allocation fails we
1124  * return -ENOMEM.
1125  **/
1126 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1127 {
1128 	struct igb_q_vector *q_vector;
1129 	struct e1000_hw *hw = &adapter->hw;
1130 	int v_idx;
1131 	int orig_node = adapter->node;
1132 
1133 	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1134 		if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1135 						adapter->num_tx_queues)) &&
1136 		    (adapter->num_rx_queues == v_idx))
1137 			adapter->node = orig_node;
1138 		if (orig_node == -1) {
1139 			int cur_node = next_online_node(adapter->node);
1140 			if (cur_node == MAX_NUMNODES)
1141 				cur_node = first_online_node;
1142 			adapter->node = cur_node;
1143 		}
1144 		q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1145 					adapter->node);
1146 		if (!q_vector)
1147 			q_vector = kzalloc(sizeof(struct igb_q_vector),
1148 					   GFP_KERNEL);
1149 		if (!q_vector)
1150 			goto err_out;
1151 		q_vector->adapter = adapter;
1152 		q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1153 		q_vector->itr_val = IGB_START_ITR;
1154 		netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1155 		adapter->q_vector[v_idx] = q_vector;
1156 	}
1157 	/* Restore the adapter's original node */
1158 	adapter->node = orig_node;
1159 
1160 	return 0;
1161 
1162 err_out:
1163 	/* Restore the adapter's original node */
1164 	adapter->node = orig_node;
1165 	igb_free_q_vectors(adapter);
1166 	return -ENOMEM;
1167 }
1168 
1169 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1170                                       int ring_idx, int v_idx)
1171 {
1172 	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1173 
1174 	q_vector->rx.ring = adapter->rx_ring[ring_idx];
1175 	q_vector->rx.ring->q_vector = q_vector;
1176 	q_vector->rx.count++;
1177 	q_vector->itr_val = adapter->rx_itr_setting;
1178 	if (q_vector->itr_val && q_vector->itr_val <= 3)
1179 		q_vector->itr_val = IGB_START_ITR;
1180 }
1181 
1182 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1183                                       int ring_idx, int v_idx)
1184 {
1185 	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1186 
1187 	q_vector->tx.ring = adapter->tx_ring[ring_idx];
1188 	q_vector->tx.ring->q_vector = q_vector;
1189 	q_vector->tx.count++;
1190 	q_vector->itr_val = adapter->tx_itr_setting;
1191 	q_vector->tx.work_limit = adapter->tx_work_limit;
1192 	if (q_vector->itr_val && q_vector->itr_val <= 3)
1193 		q_vector->itr_val = IGB_START_ITR;
1194 }
1195 
1196 /**
1197  * igb_map_ring_to_vector - maps allocated queues to vectors
1198  *
1199  * This function maps the recently allocated queues to vectors.
1200  **/
1201 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1202 {
1203 	int i;
1204 	int v_idx = 0;
1205 
1206 	if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1207 	    (adapter->num_q_vectors < adapter->num_tx_queues))
1208 		return -ENOMEM;
1209 
1210 	if (adapter->num_q_vectors >=
1211 	    (adapter->num_rx_queues + adapter->num_tx_queues)) {
1212 		for (i = 0; i < adapter->num_rx_queues; i++)
1213 			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1214 		for (i = 0; i < adapter->num_tx_queues; i++)
1215 			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1216 	} else {
1217 		for (i = 0; i < adapter->num_rx_queues; i++) {
1218 			if (i < adapter->num_tx_queues)
1219 				igb_map_tx_ring_to_vector(adapter, i, v_idx);
1220 			igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1221 		}
1222 		for (; i < adapter->num_tx_queues; i++)
1223 			igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1224 	}
1225 	return 0;
1226 }
1227 
1228 /**
1229  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1230  *
1231  * This function initializes the interrupts and allocates all of the queues.
1232  **/
1233 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1234 {
1235 	struct pci_dev *pdev = adapter->pdev;
1236 	int err;
1237 
1238 	err = igb_set_interrupt_capability(adapter);
1239 	if (err)
1240 		return err;
1241 
1242 	err = igb_alloc_q_vectors(adapter);
1243 	if (err) {
1244 		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1245 		goto err_alloc_q_vectors;
1246 	}
1247 
1248 	err = igb_alloc_queues(adapter);
1249 	if (err) {
1250 		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1251 		goto err_alloc_queues;
1252 	}
1253 
1254 	err = igb_map_ring_to_vector(adapter);
1255 	if (err) {
1256 		dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1257 		goto err_map_queues;
1258 	}
1259 
1260 
1261 	return 0;
1262 err_map_queues:
1263 	igb_free_queues(adapter);
1264 err_alloc_queues:
1265 	igb_free_q_vectors(adapter);
1266 err_alloc_q_vectors:
1267 	igb_reset_interrupt_capability(adapter);
1268 	return err;
1269 }
1270 
1271 /**
1272  * igb_request_irq - initialize interrupts
1273  *
1274  * Attempts to configure interrupts using the best available
1275  * capabilities of the hardware and kernel.
1276  **/
1277 static int igb_request_irq(struct igb_adapter *adapter)
1278 {
1279 	struct net_device *netdev = adapter->netdev;
1280 	struct pci_dev *pdev = adapter->pdev;
1281 	int err = 0;
1282 
1283 	if (adapter->msix_entries) {
1284 		err = igb_request_msix(adapter);
1285 		if (!err)
1286 			goto request_done;
1287 		/* fall back to MSI */
1288 		igb_clear_interrupt_scheme(adapter);
1289 		if (!pci_enable_msi(pdev))
1290 			adapter->flags |= IGB_FLAG_HAS_MSI;
1291 		igb_free_all_tx_resources(adapter);
1292 		igb_free_all_rx_resources(adapter);
1293 		adapter->num_tx_queues = 1;
1294 		adapter->num_rx_queues = 1;
1295 		adapter->num_q_vectors = 1;
1296 		err = igb_alloc_q_vectors(adapter);
1297 		if (err) {
1298 			dev_err(&pdev->dev,
1299 			        "Unable to allocate memory for vectors\n");
1300 			goto request_done;
1301 		}
1302 		err = igb_alloc_queues(adapter);
1303 		if (err) {
1304 			dev_err(&pdev->dev,
1305 			        "Unable to allocate memory for queues\n");
1306 			igb_free_q_vectors(adapter);
1307 			goto request_done;
1308 		}
1309 		igb_setup_all_tx_resources(adapter);
1310 		igb_setup_all_rx_resources(adapter);
1311 	}
1312 
1313 	igb_assign_vector(adapter->q_vector[0], 0);
1314 
1315 	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1316 		err = request_irq(pdev->irq, igb_intr_msi, 0,
1317 				  netdev->name, adapter);
1318 		if (!err)
1319 			goto request_done;
1320 
1321 		/* fall back to legacy interrupts */
1322 		igb_reset_interrupt_capability(adapter);
1323 		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1324 	}
1325 
1326 	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1327 			  netdev->name, adapter);
1328 
1329 	if (err)
1330 		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1331 			err);
1332 
1333 request_done:
1334 	return err;
1335 }
1336 
1337 static void igb_free_irq(struct igb_adapter *adapter)
1338 {
1339 	if (adapter->msix_entries) {
1340 		int vector = 0, i;
1341 
1342 		free_irq(adapter->msix_entries[vector++].vector, adapter);
1343 
1344 		for (i = 0; i < adapter->num_q_vectors; i++)
1345 			free_irq(adapter->msix_entries[vector++].vector,
1346 				 adapter->q_vector[i]);
1347 	} else {
1348 		free_irq(adapter->pdev->irq, adapter);
1349 	}
1350 }
1351 
1352 /**
1353  * igb_irq_disable - Mask off interrupt generation on the NIC
1354  * @adapter: board private structure
1355  **/
1356 static void igb_irq_disable(struct igb_adapter *adapter)
1357 {
1358 	struct e1000_hw *hw = &adapter->hw;
1359 
1360 	/*
1361 	 * we need to be careful when disabling interrupts.  The VFs are also
1362 	 * mapped into these registers and so clearing the bits can cause
1363 	 * issues on the VF drivers so we only need to clear what we set
1364 	 */
1365 	if (adapter->msix_entries) {
1366 		u32 regval = rd32(E1000_EIAM);
1367 		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1368 		wr32(E1000_EIMC, adapter->eims_enable_mask);
1369 		regval = rd32(E1000_EIAC);
1370 		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1371 	}
1372 
1373 	wr32(E1000_IAM, 0);
1374 	wr32(E1000_IMC, ~0);
1375 	wrfl();
1376 	if (adapter->msix_entries) {
1377 		int i;
1378 		for (i = 0; i < adapter->num_q_vectors; i++)
1379 			synchronize_irq(adapter->msix_entries[i].vector);
1380 	} else {
1381 		synchronize_irq(adapter->pdev->irq);
1382 	}
1383 }
1384 
1385 /**
1386  * igb_irq_enable - Enable default interrupt generation settings
1387  * @adapter: board private structure
1388  **/
1389 static void igb_irq_enable(struct igb_adapter *adapter)
1390 {
1391 	struct e1000_hw *hw = &adapter->hw;
1392 
1393 	if (adapter->msix_entries) {
1394 		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1395 		u32 regval = rd32(E1000_EIAC);
1396 		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1397 		regval = rd32(E1000_EIAM);
1398 		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1399 		wr32(E1000_EIMS, adapter->eims_enable_mask);
1400 		if (adapter->vfs_allocated_count) {
1401 			wr32(E1000_MBVFIMR, 0xFF);
1402 			ims |= E1000_IMS_VMMB;
1403 		}
1404 		wr32(E1000_IMS, ims);
1405 	} else {
1406 		wr32(E1000_IMS, IMS_ENABLE_MASK |
1407 				E1000_IMS_DRSTA);
1408 		wr32(E1000_IAM, IMS_ENABLE_MASK |
1409 				E1000_IMS_DRSTA);
1410 	}
1411 }
1412 
1413 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1414 {
1415 	struct e1000_hw *hw = &adapter->hw;
1416 	u16 vid = adapter->hw.mng_cookie.vlan_id;
1417 	u16 old_vid = adapter->mng_vlan_id;
1418 
1419 	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1420 		/* add VID to filter table */
1421 		igb_vfta_set(hw, vid, true);
1422 		adapter->mng_vlan_id = vid;
1423 	} else {
1424 		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1425 	}
1426 
1427 	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1428 	    (vid != old_vid) &&
1429 	    !test_bit(old_vid, adapter->active_vlans)) {
1430 		/* remove VID from filter table */
1431 		igb_vfta_set(hw, old_vid, false);
1432 	}
1433 }
1434 
1435 /**
1436  * igb_release_hw_control - release control of the h/w to f/w
1437  * @adapter: address of board private structure
1438  *
1439  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1440  * For ASF and Pass Through versions of f/w this means that the
1441  * driver is no longer loaded.
1442  *
1443  **/
1444 static void igb_release_hw_control(struct igb_adapter *adapter)
1445 {
1446 	struct e1000_hw *hw = &adapter->hw;
1447 	u32 ctrl_ext;
1448 
1449 	/* Let firmware take over control of h/w */
1450 	ctrl_ext = rd32(E1000_CTRL_EXT);
1451 	wr32(E1000_CTRL_EXT,
1452 			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1453 }
1454 
1455 /**
1456  * igb_get_hw_control - get control of the h/w from f/w
1457  * @adapter: address of board private structure
1458  *
1459  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1460  * For ASF and Pass Through versions of f/w this means that
1461  * the driver is loaded.
1462  *
1463  **/
1464 static void igb_get_hw_control(struct igb_adapter *adapter)
1465 {
1466 	struct e1000_hw *hw = &adapter->hw;
1467 	u32 ctrl_ext;
1468 
1469 	/* Let firmware know the driver has taken over */
1470 	ctrl_ext = rd32(E1000_CTRL_EXT);
1471 	wr32(E1000_CTRL_EXT,
1472 			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1473 }
1474 
1475 /**
1476  * igb_configure - configure the hardware for RX and TX
1477  * @adapter: private board structure
1478  **/
1479 static void igb_configure(struct igb_adapter *adapter)
1480 {
1481 	struct net_device *netdev = adapter->netdev;
1482 	int i;
1483 
1484 	igb_get_hw_control(adapter);
1485 	igb_set_rx_mode(netdev);
1486 
1487 	igb_restore_vlan(adapter);
1488 
1489 	igb_setup_tctl(adapter);
1490 	igb_setup_mrqc(adapter);
1491 	igb_setup_rctl(adapter);
1492 
1493 	igb_configure_tx(adapter);
1494 	igb_configure_rx(adapter);
1495 
1496 	igb_rx_fifo_flush_82575(&adapter->hw);
1497 
1498 	/* call igb_desc_unused which always leaves
1499 	 * at least 1 descriptor unused to make sure
1500 	 * next_to_use != next_to_clean */
1501 	for (i = 0; i < adapter->num_rx_queues; i++) {
1502 		struct igb_ring *ring = adapter->rx_ring[i];
1503 		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1504 	}
1505 }
1506 
1507 /**
1508  * igb_power_up_link - Power up the phy/serdes link
1509  * @adapter: address of board private structure
1510  **/
1511 void igb_power_up_link(struct igb_adapter *adapter)
1512 {
1513 	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1514 		igb_power_up_phy_copper(&adapter->hw);
1515 	else
1516 		igb_power_up_serdes_link_82575(&adapter->hw);
1517 	igb_reset_phy(&adapter->hw);
1518 }
1519 
1520 /**
1521  * igb_power_down_link - Power down the phy/serdes link
1522  * @adapter: address of board private structure
1523  */
1524 static void igb_power_down_link(struct igb_adapter *adapter)
1525 {
1526 	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1527 		igb_power_down_phy_copper_82575(&adapter->hw);
1528 	else
1529 		igb_shutdown_serdes_link_82575(&adapter->hw);
1530 }
1531 
1532 /**
1533  * igb_up - Open the interface and prepare it to handle traffic
1534  * @adapter: board private structure
1535  **/
1536 int igb_up(struct igb_adapter *adapter)
1537 {
1538 	struct e1000_hw *hw = &adapter->hw;
1539 	int i;
1540 
1541 	/* hardware has been reset, we need to reload some things */
1542 	igb_configure(adapter);
1543 
1544 	clear_bit(__IGB_DOWN, &adapter->state);
1545 
1546 	for (i = 0; i < adapter->num_q_vectors; i++)
1547 		napi_enable(&(adapter->q_vector[i]->napi));
1548 
1549 	if (adapter->msix_entries)
1550 		igb_configure_msix(adapter);
1551 	else
1552 		igb_assign_vector(adapter->q_vector[0], 0);
1553 
1554 	/* Clear any pending interrupts. */
1555 	rd32(E1000_ICR);
1556 	igb_irq_enable(adapter);
1557 
1558 	/* notify VFs that reset has been completed */
1559 	if (adapter->vfs_allocated_count) {
1560 		u32 reg_data = rd32(E1000_CTRL_EXT);
1561 		reg_data |= E1000_CTRL_EXT_PFRSTD;
1562 		wr32(E1000_CTRL_EXT, reg_data);
1563 	}
1564 
1565 	netif_tx_start_all_queues(adapter->netdev);
1566 
1567 	/* start the watchdog. */
1568 	hw->mac.get_link_status = 1;
1569 	schedule_work(&adapter->watchdog_task);
1570 
1571 	return 0;
1572 }
1573 
1574 void igb_down(struct igb_adapter *adapter)
1575 {
1576 	struct net_device *netdev = adapter->netdev;
1577 	struct e1000_hw *hw = &adapter->hw;
1578 	u32 tctl, rctl;
1579 	int i;
1580 
1581 	/* signal that we're down so the interrupt handler does not
1582 	 * reschedule our watchdog timer */
1583 	set_bit(__IGB_DOWN, &adapter->state);
1584 
1585 	/* disable receives in the hardware */
1586 	rctl = rd32(E1000_RCTL);
1587 	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1588 	/* flush and sleep below */
1589 
1590 	netif_tx_stop_all_queues(netdev);
1591 
1592 	/* disable transmits in the hardware */
1593 	tctl = rd32(E1000_TCTL);
1594 	tctl &= ~E1000_TCTL_EN;
1595 	wr32(E1000_TCTL, tctl);
1596 	/* flush both disables and wait for them to finish */
1597 	wrfl();
1598 	msleep(10);
1599 
1600 	for (i = 0; i < adapter->num_q_vectors; i++)
1601 		napi_disable(&(adapter->q_vector[i]->napi));
1602 
1603 	igb_irq_disable(adapter);
1604 
1605 	del_timer_sync(&adapter->watchdog_timer);
1606 	del_timer_sync(&adapter->phy_info_timer);
1607 
1608 	netif_carrier_off(netdev);
1609 
1610 	/* record the stats before reset*/
1611 	spin_lock(&adapter->stats64_lock);
1612 	igb_update_stats(adapter, &adapter->stats64);
1613 	spin_unlock(&adapter->stats64_lock);
1614 
1615 	adapter->link_speed = 0;
1616 	adapter->link_duplex = 0;
1617 
1618 	if (!pci_channel_offline(adapter->pdev))
1619 		igb_reset(adapter);
1620 	igb_clean_all_tx_rings(adapter);
1621 	igb_clean_all_rx_rings(adapter);
1622 #ifdef CONFIG_IGB_DCA
1623 
1624 	/* since we reset the hardware DCA settings were cleared */
1625 	igb_setup_dca(adapter);
1626 #endif
1627 }
1628 
1629 void igb_reinit_locked(struct igb_adapter *adapter)
1630 {
1631 	WARN_ON(in_interrupt());
1632 	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1633 		msleep(1);
1634 	igb_down(adapter);
1635 	igb_up(adapter);
1636 	clear_bit(__IGB_RESETTING, &adapter->state);
1637 }
1638 
1639 void igb_reset(struct igb_adapter *adapter)
1640 {
1641 	struct pci_dev *pdev = adapter->pdev;
1642 	struct e1000_hw *hw = &adapter->hw;
1643 	struct e1000_mac_info *mac = &hw->mac;
1644 	struct e1000_fc_info *fc = &hw->fc;
1645 	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1646 	u16 hwm;
1647 
1648 	/* Repartition Pba for greater than 9k mtu
1649 	 * To take effect CTRL.RST is required.
1650 	 */
1651 	switch (mac->type) {
1652 	case e1000_i350:
1653 	case e1000_82580:
1654 		pba = rd32(E1000_RXPBS);
1655 		pba = igb_rxpbs_adjust_82580(pba);
1656 		break;
1657 	case e1000_82576:
1658 		pba = rd32(E1000_RXPBS);
1659 		pba &= E1000_RXPBS_SIZE_MASK_82576;
1660 		break;
1661 	case e1000_82575:
1662 	default:
1663 		pba = E1000_PBA_34K;
1664 		break;
1665 	}
1666 
1667 	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1668 	    (mac->type < e1000_82576)) {
1669 		/* adjust PBA for jumbo frames */
1670 		wr32(E1000_PBA, pba);
1671 
1672 		/* To maintain wire speed transmits, the Tx FIFO should be
1673 		 * large enough to accommodate two full transmit packets,
1674 		 * rounded up to the next 1KB and expressed in KB.  Likewise,
1675 		 * the Rx FIFO should be large enough to accommodate at least
1676 		 * one full receive packet and is similarly rounded up and
1677 		 * expressed in KB. */
1678 		pba = rd32(E1000_PBA);
1679 		/* upper 16 bits has Tx packet buffer allocation size in KB */
1680 		tx_space = pba >> 16;
1681 		/* lower 16 bits has Rx packet buffer allocation size in KB */
1682 		pba &= 0xffff;
1683 		/* the tx fifo also stores 16 bytes of information about the tx
1684 		 * but don't include ethernet FCS because hardware appends it */
1685 		min_tx_space = (adapter->max_frame_size +
1686 				sizeof(union e1000_adv_tx_desc) -
1687 				ETH_FCS_LEN) * 2;
1688 		min_tx_space = ALIGN(min_tx_space, 1024);
1689 		min_tx_space >>= 10;
1690 		/* software strips receive CRC, so leave room for it */
1691 		min_rx_space = adapter->max_frame_size;
1692 		min_rx_space = ALIGN(min_rx_space, 1024);
1693 		min_rx_space >>= 10;
1694 
1695 		/* If current Tx allocation is less than the min Tx FIFO size,
1696 		 * and the min Tx FIFO size is less than the current Rx FIFO
1697 		 * allocation, take space away from current Rx allocation */
1698 		if (tx_space < min_tx_space &&
1699 		    ((min_tx_space - tx_space) < pba)) {
1700 			pba = pba - (min_tx_space - tx_space);
1701 
1702 			/* if short on rx space, rx wins and must trump tx
1703 			 * adjustment */
1704 			if (pba < min_rx_space)
1705 				pba = min_rx_space;
1706 		}
1707 		wr32(E1000_PBA, pba);
1708 	}
1709 
1710 	/* flow control settings */
1711 	/* The high water mark must be low enough to fit one full frame
1712 	 * (or the size used for early receive) above it in the Rx FIFO.
1713 	 * Set it to the lower of:
1714 	 * - 90% of the Rx FIFO size, or
1715 	 * - the full Rx FIFO size minus one full frame */
1716 	hwm = min(((pba << 10) * 9 / 10),
1717 			((pba << 10) - 2 * adapter->max_frame_size));
1718 
1719 	fc->high_water = hwm & 0xFFF0;	/* 16-byte granularity */
1720 	fc->low_water = fc->high_water - 16;
1721 	fc->pause_time = 0xFFFF;
1722 	fc->send_xon = 1;
1723 	fc->current_mode = fc->requested_mode;
1724 
1725 	/* disable receive for all VFs and wait one second */
1726 	if (adapter->vfs_allocated_count) {
1727 		int i;
1728 		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1729 			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1730 
1731 		/* ping all the active vfs to let them know we are going down */
1732 		igb_ping_all_vfs(adapter);
1733 
1734 		/* disable transmits and receives */
1735 		wr32(E1000_VFRE, 0);
1736 		wr32(E1000_VFTE, 0);
1737 	}
1738 
1739 	/* Allow time for pending master requests to run */
1740 	hw->mac.ops.reset_hw(hw);
1741 	wr32(E1000_WUC, 0);
1742 
1743 	if (hw->mac.ops.init_hw(hw))
1744 		dev_err(&pdev->dev, "Hardware Error\n");
1745 
1746 	igb_init_dmac(adapter, pba);
1747 	if (!netif_running(adapter->netdev))
1748 		igb_power_down_link(adapter);
1749 
1750 	igb_update_mng_vlan(adapter);
1751 
1752 	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1753 	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1754 
1755 	igb_get_phy_info(hw);
1756 }
1757 
1758 static netdev_features_t igb_fix_features(struct net_device *netdev,
1759 	netdev_features_t features)
1760 {
1761 	/*
1762 	 * Since there is no support for separate rx/tx vlan accel
1763 	 * enable/disable make sure tx flag is always in same state as rx.
1764 	 */
1765 	if (features & NETIF_F_HW_VLAN_RX)
1766 		features |= NETIF_F_HW_VLAN_TX;
1767 	else
1768 		features &= ~NETIF_F_HW_VLAN_TX;
1769 
1770 	return features;
1771 }
1772 
1773 static int igb_set_features(struct net_device *netdev,
1774 	netdev_features_t features)
1775 {
1776 	netdev_features_t changed = netdev->features ^ features;
1777 	struct igb_adapter *adapter = netdev_priv(netdev);
1778 
1779 	if (changed & NETIF_F_HW_VLAN_RX)
1780 		igb_vlan_mode(netdev, features);
1781 
1782 	if (!(changed & NETIF_F_RXALL))
1783 		return 0;
1784 
1785 	netdev->features = features;
1786 
1787 	if (netif_running(netdev))
1788 		igb_reinit_locked(adapter);
1789 	else
1790 		igb_reset(adapter);
1791 
1792 	return 0;
1793 }
1794 
1795 static const struct net_device_ops igb_netdev_ops = {
1796 	.ndo_open		= igb_open,
1797 	.ndo_stop		= igb_close,
1798 	.ndo_start_xmit		= igb_xmit_frame,
1799 	.ndo_get_stats64	= igb_get_stats64,
1800 	.ndo_set_rx_mode	= igb_set_rx_mode,
1801 	.ndo_set_mac_address	= igb_set_mac,
1802 	.ndo_change_mtu		= igb_change_mtu,
1803 	.ndo_do_ioctl		= igb_ioctl,
1804 	.ndo_tx_timeout		= igb_tx_timeout,
1805 	.ndo_validate_addr	= eth_validate_addr,
1806 	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
1807 	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
1808 	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
1809 	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
1810 	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
1811 	.ndo_get_vf_config	= igb_ndo_get_vf_config,
1812 #ifdef CONFIG_NET_POLL_CONTROLLER
1813 	.ndo_poll_controller	= igb_netpoll,
1814 #endif
1815 	.ndo_fix_features	= igb_fix_features,
1816 	.ndo_set_features	= igb_set_features,
1817 };
1818 
1819 /**
1820  * igb_probe - Device Initialization Routine
1821  * @pdev: PCI device information struct
1822  * @ent: entry in igb_pci_tbl
1823  *
1824  * Returns 0 on success, negative on failure
1825  *
1826  * igb_probe initializes an adapter identified by a pci_dev structure.
1827  * The OS initialization, configuring of the adapter private structure,
1828  * and a hardware reset occur.
1829  **/
1830 static int __devinit igb_probe(struct pci_dev *pdev,
1831 			       const struct pci_device_id *ent)
1832 {
1833 	struct net_device *netdev;
1834 	struct igb_adapter *adapter;
1835 	struct e1000_hw *hw;
1836 	u16 eeprom_data = 0;
1837 	s32 ret_val;
1838 	static int global_quad_port_a; /* global quad port a indication */
1839 	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1840 	unsigned long mmio_start, mmio_len;
1841 	int err, pci_using_dac;
1842 	u16 eeprom_apme_mask = IGB_EEPROM_APME;
1843 	u8 part_str[E1000_PBANUM_LENGTH];
1844 
1845 	/* Catch broken hardware that put the wrong VF device ID in
1846 	 * the PCIe SR-IOV capability.
1847 	 */
1848 	if (pdev->is_virtfn) {
1849 		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1850 		     pci_name(pdev), pdev->vendor, pdev->device);
1851 		return -EINVAL;
1852 	}
1853 
1854 	err = pci_enable_device_mem(pdev);
1855 	if (err)
1856 		return err;
1857 
1858 	pci_using_dac = 0;
1859 	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1860 	if (!err) {
1861 		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1862 		if (!err)
1863 			pci_using_dac = 1;
1864 	} else {
1865 		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1866 		if (err) {
1867 			err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1868 			if (err) {
1869 				dev_err(&pdev->dev, "No usable DMA "
1870 					"configuration, aborting\n");
1871 				goto err_dma;
1872 			}
1873 		}
1874 	}
1875 
1876 	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1877 	                                   IORESOURCE_MEM),
1878 	                                   igb_driver_name);
1879 	if (err)
1880 		goto err_pci_reg;
1881 
1882 	pci_enable_pcie_error_reporting(pdev);
1883 
1884 	pci_set_master(pdev);
1885 	pci_save_state(pdev);
1886 
1887 	err = -ENOMEM;
1888 	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1889 				   IGB_MAX_TX_QUEUES);
1890 	if (!netdev)
1891 		goto err_alloc_etherdev;
1892 
1893 	SET_NETDEV_DEV(netdev, &pdev->dev);
1894 
1895 	pci_set_drvdata(pdev, netdev);
1896 	adapter = netdev_priv(netdev);
1897 	adapter->netdev = netdev;
1898 	adapter->pdev = pdev;
1899 	hw = &adapter->hw;
1900 	hw->back = adapter;
1901 	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1902 
1903 	mmio_start = pci_resource_start(pdev, 0);
1904 	mmio_len = pci_resource_len(pdev, 0);
1905 
1906 	err = -EIO;
1907 	hw->hw_addr = ioremap(mmio_start, mmio_len);
1908 	if (!hw->hw_addr)
1909 		goto err_ioremap;
1910 
1911 	netdev->netdev_ops = &igb_netdev_ops;
1912 	igb_set_ethtool_ops(netdev);
1913 	netdev->watchdog_timeo = 5 * HZ;
1914 
1915 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1916 
1917 	netdev->mem_start = mmio_start;
1918 	netdev->mem_end = mmio_start + mmio_len;
1919 
1920 	/* PCI config space info */
1921 	hw->vendor_id = pdev->vendor;
1922 	hw->device_id = pdev->device;
1923 	hw->revision_id = pdev->revision;
1924 	hw->subsystem_vendor_id = pdev->subsystem_vendor;
1925 	hw->subsystem_device_id = pdev->subsystem_device;
1926 
1927 	/* Copy the default MAC, PHY and NVM function pointers */
1928 	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1929 	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1930 	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1931 	/* Initialize skew-specific constants */
1932 	err = ei->get_invariants(hw);
1933 	if (err)
1934 		goto err_sw_init;
1935 
1936 	/* setup the private structure */
1937 	err = igb_sw_init(adapter);
1938 	if (err)
1939 		goto err_sw_init;
1940 
1941 	igb_get_bus_info_pcie(hw);
1942 
1943 	hw->phy.autoneg_wait_to_complete = false;
1944 
1945 	/* Copper options */
1946 	if (hw->phy.media_type == e1000_media_type_copper) {
1947 		hw->phy.mdix = AUTO_ALL_MODES;
1948 		hw->phy.disable_polarity_correction = false;
1949 		hw->phy.ms_type = e1000_ms_hw_default;
1950 	}
1951 
1952 	if (igb_check_reset_block(hw))
1953 		dev_info(&pdev->dev,
1954 			"PHY reset is blocked due to SOL/IDER session.\n");
1955 
1956 	/*
1957 	 * features is initialized to 0 in allocation, it might have bits
1958 	 * set by igb_sw_init so we should use an or instead of an
1959 	 * assignment.
1960 	 */
1961 	netdev->features |= NETIF_F_SG |
1962 			    NETIF_F_IP_CSUM |
1963 			    NETIF_F_IPV6_CSUM |
1964 			    NETIF_F_TSO |
1965 			    NETIF_F_TSO6 |
1966 			    NETIF_F_RXHASH |
1967 			    NETIF_F_RXCSUM |
1968 			    NETIF_F_HW_VLAN_RX |
1969 			    NETIF_F_HW_VLAN_TX;
1970 
1971 	/* copy netdev features into list of user selectable features */
1972 	netdev->hw_features |= netdev->features;
1973 	netdev->hw_features |= NETIF_F_RXALL;
1974 
1975 	/* set this bit last since it cannot be part of hw_features */
1976 	netdev->features |= NETIF_F_HW_VLAN_FILTER;
1977 
1978 	netdev->vlan_features |= NETIF_F_TSO |
1979 				 NETIF_F_TSO6 |
1980 				 NETIF_F_IP_CSUM |
1981 				 NETIF_F_IPV6_CSUM |
1982 				 NETIF_F_SG;
1983 
1984 	netdev->priv_flags |= IFF_SUPP_NOFCS;
1985 
1986 	if (pci_using_dac) {
1987 		netdev->features |= NETIF_F_HIGHDMA;
1988 		netdev->vlan_features |= NETIF_F_HIGHDMA;
1989 	}
1990 
1991 	if (hw->mac.type >= e1000_82576) {
1992 		netdev->hw_features |= NETIF_F_SCTP_CSUM;
1993 		netdev->features |= NETIF_F_SCTP_CSUM;
1994 	}
1995 
1996 	netdev->priv_flags |= IFF_UNICAST_FLT;
1997 
1998 	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1999 
2000 	/* before reading the NVM, reset the controller to put the device in a
2001 	 * known good starting state */
2002 	hw->mac.ops.reset_hw(hw);
2003 
2004 	/* make sure the NVM is good */
2005 	if (hw->nvm.ops.validate(hw) < 0) {
2006 		dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2007 		err = -EIO;
2008 		goto err_eeprom;
2009 	}
2010 
2011 	/* copy the MAC address out of the NVM */
2012 	if (hw->mac.ops.read_mac_addr(hw))
2013 		dev_err(&pdev->dev, "NVM Read Error\n");
2014 
2015 	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2016 	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2017 
2018 	if (!is_valid_ether_addr(netdev->perm_addr)) {
2019 		dev_err(&pdev->dev, "Invalid MAC Address\n");
2020 		err = -EIO;
2021 		goto err_eeprom;
2022 	}
2023 
2024 	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2025 	            (unsigned long) adapter);
2026 	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2027 	            (unsigned long) adapter);
2028 
2029 	INIT_WORK(&adapter->reset_task, igb_reset_task);
2030 	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2031 
2032 	/* Initialize link properties that are user-changeable */
2033 	adapter->fc_autoneg = true;
2034 	hw->mac.autoneg = true;
2035 	hw->phy.autoneg_advertised = 0x2f;
2036 
2037 	hw->fc.requested_mode = e1000_fc_default;
2038 	hw->fc.current_mode = e1000_fc_default;
2039 
2040 	igb_validate_mdi_setting(hw);
2041 
2042 	/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2043 	 * enable the ACPI Magic Packet filter
2044 	 */
2045 
2046 	if (hw->bus.func == 0)
2047 		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2048 	else if (hw->mac.type >= e1000_82580)
2049 		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2050 		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2051 		                 &eeprom_data);
2052 	else if (hw->bus.func == 1)
2053 		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2054 
2055 	if (eeprom_data & eeprom_apme_mask)
2056 		adapter->eeprom_wol |= E1000_WUFC_MAG;
2057 
2058 	/* now that we have the eeprom settings, apply the special cases where
2059 	 * the eeprom may be wrong or the board simply won't support wake on
2060 	 * lan on a particular port */
2061 	switch (pdev->device) {
2062 	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2063 		adapter->eeprom_wol = 0;
2064 		break;
2065 	case E1000_DEV_ID_82575EB_FIBER_SERDES:
2066 	case E1000_DEV_ID_82576_FIBER:
2067 	case E1000_DEV_ID_82576_SERDES:
2068 		/* Wake events only supported on port A for dual fiber
2069 		 * regardless of eeprom setting */
2070 		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2071 			adapter->eeprom_wol = 0;
2072 		break;
2073 	case E1000_DEV_ID_82576_QUAD_COPPER:
2074 	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2075 		/* if quad port adapter, disable WoL on all but port A */
2076 		if (global_quad_port_a != 0)
2077 			adapter->eeprom_wol = 0;
2078 		else
2079 			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2080 		/* Reset for multiple quad port adapters */
2081 		if (++global_quad_port_a == 4)
2082 			global_quad_port_a = 0;
2083 		break;
2084 	}
2085 
2086 	/* initialize the wol settings based on the eeprom settings */
2087 	adapter->wol = adapter->eeprom_wol;
2088 	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2089 
2090 	/* reset the hardware with the new settings */
2091 	igb_reset(adapter);
2092 
2093 	/* let the f/w know that the h/w is now under the control of the
2094 	 * driver. */
2095 	igb_get_hw_control(adapter);
2096 
2097 	strcpy(netdev->name, "eth%d");
2098 	err = register_netdev(netdev);
2099 	if (err)
2100 		goto err_register;
2101 
2102 	/* carrier off reporting is important to ethtool even BEFORE open */
2103 	netif_carrier_off(netdev);
2104 
2105 #ifdef CONFIG_IGB_DCA
2106 	if (dca_add_requester(&pdev->dev) == 0) {
2107 		adapter->flags |= IGB_FLAG_DCA_ENABLED;
2108 		dev_info(&pdev->dev, "DCA enabled\n");
2109 		igb_setup_dca(adapter);
2110 	}
2111 
2112 #endif
2113 	/* do hw tstamp init after resetting */
2114 	igb_init_hw_timer(adapter);
2115 
2116 	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2117 	/* print bus type/speed/width info */
2118 	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2119 		 netdev->name,
2120 		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2121 		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2122 		                                            "unknown"),
2123 		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2124 		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2125 		  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2126 		   "unknown"),
2127 		 netdev->dev_addr);
2128 
2129 	ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2130 	if (ret_val)
2131 		strcpy(part_str, "Unknown");
2132 	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2133 	dev_info(&pdev->dev,
2134 		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2135 		adapter->msix_entries ? "MSI-X" :
2136 		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2137 		adapter->num_rx_queues, adapter->num_tx_queues);
2138 	switch (hw->mac.type) {
2139 	case e1000_i350:
2140 		igb_set_eee_i350(hw);
2141 		break;
2142 	default:
2143 		break;
2144 	}
2145 
2146 	pm_runtime_put_noidle(&pdev->dev);
2147 	return 0;
2148 
2149 err_register:
2150 	igb_release_hw_control(adapter);
2151 err_eeprom:
2152 	if (!igb_check_reset_block(hw))
2153 		igb_reset_phy(hw);
2154 
2155 	if (hw->flash_address)
2156 		iounmap(hw->flash_address);
2157 err_sw_init:
2158 	igb_clear_interrupt_scheme(adapter);
2159 	iounmap(hw->hw_addr);
2160 err_ioremap:
2161 	free_netdev(netdev);
2162 err_alloc_etherdev:
2163 	pci_release_selected_regions(pdev,
2164 	                             pci_select_bars(pdev, IORESOURCE_MEM));
2165 err_pci_reg:
2166 err_dma:
2167 	pci_disable_device(pdev);
2168 	return err;
2169 }
2170 
2171 /**
2172  * igb_remove - Device Removal Routine
2173  * @pdev: PCI device information struct
2174  *
2175  * igb_remove is called by the PCI subsystem to alert the driver
2176  * that it should release a PCI device.  The could be caused by a
2177  * Hot-Plug event, or because the driver is going to be removed from
2178  * memory.
2179  **/
2180 static void __devexit igb_remove(struct pci_dev *pdev)
2181 {
2182 	struct net_device *netdev = pci_get_drvdata(pdev);
2183 	struct igb_adapter *adapter = netdev_priv(netdev);
2184 	struct e1000_hw *hw = &adapter->hw;
2185 
2186 	pm_runtime_get_noresume(&pdev->dev);
2187 
2188 	/*
2189 	 * The watchdog timer may be rescheduled, so explicitly
2190 	 * disable watchdog from being rescheduled.
2191 	 */
2192 	set_bit(__IGB_DOWN, &adapter->state);
2193 	del_timer_sync(&adapter->watchdog_timer);
2194 	del_timer_sync(&adapter->phy_info_timer);
2195 
2196 	cancel_work_sync(&adapter->reset_task);
2197 	cancel_work_sync(&adapter->watchdog_task);
2198 
2199 #ifdef CONFIG_IGB_DCA
2200 	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2201 		dev_info(&pdev->dev, "DCA disabled\n");
2202 		dca_remove_requester(&pdev->dev);
2203 		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2204 		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2205 	}
2206 #endif
2207 
2208 	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2209 	 * would have already happened in close and is redundant. */
2210 	igb_release_hw_control(adapter);
2211 
2212 	unregister_netdev(netdev);
2213 
2214 	igb_clear_interrupt_scheme(adapter);
2215 
2216 #ifdef CONFIG_PCI_IOV
2217 	/* reclaim resources allocated to VFs */
2218 	if (adapter->vf_data) {
2219 		/* disable iov and allow time for transactions to clear */
2220 		if (!igb_check_vf_assignment(adapter)) {
2221 			pci_disable_sriov(pdev);
2222 			msleep(500);
2223 		} else {
2224 			dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2225 		}
2226 
2227 		kfree(adapter->vf_data);
2228 		adapter->vf_data = NULL;
2229 		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2230 		wrfl();
2231 		msleep(100);
2232 		dev_info(&pdev->dev, "IOV Disabled\n");
2233 	}
2234 #endif
2235 
2236 	iounmap(hw->hw_addr);
2237 	if (hw->flash_address)
2238 		iounmap(hw->flash_address);
2239 	pci_release_selected_regions(pdev,
2240 	                             pci_select_bars(pdev, IORESOURCE_MEM));
2241 
2242 	kfree(adapter->shadow_vfta);
2243 	free_netdev(netdev);
2244 
2245 	pci_disable_pcie_error_reporting(pdev);
2246 
2247 	pci_disable_device(pdev);
2248 }
2249 
2250 /**
2251  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2252  * @adapter: board private structure to initialize
2253  *
2254  * This function initializes the vf specific data storage and then attempts to
2255  * allocate the VFs.  The reason for ordering it this way is because it is much
2256  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2257  * the memory for the VFs.
2258  **/
2259 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2260 {
2261 #ifdef CONFIG_PCI_IOV
2262 	struct pci_dev *pdev = adapter->pdev;
2263 	int old_vfs = igb_find_enabled_vfs(adapter);
2264 	int i;
2265 
2266 	if (old_vfs) {
2267 		dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2268 			 "max_vfs setting of %d\n", old_vfs, max_vfs);
2269 		adapter->vfs_allocated_count = old_vfs;
2270 	}
2271 
2272 	if (!adapter->vfs_allocated_count)
2273 		return;
2274 
2275 	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2276 				sizeof(struct vf_data_storage), GFP_KERNEL);
2277 	/* if allocation failed then we do not support SR-IOV */
2278 	if (!adapter->vf_data) {
2279 		adapter->vfs_allocated_count = 0;
2280 		dev_err(&pdev->dev, "Unable to allocate memory for VF "
2281 			"Data Storage\n");
2282 		goto out;
2283 	}
2284 
2285 	if (!old_vfs) {
2286 		if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2287 			goto err_out;
2288 	}
2289 	dev_info(&pdev->dev, "%d VFs allocated\n",
2290 		 adapter->vfs_allocated_count);
2291 	for (i = 0; i < adapter->vfs_allocated_count; i++)
2292 		igb_vf_configure(adapter, i);
2293 
2294 	/* DMA Coalescing is not supported in IOV mode. */
2295 	adapter->flags &= ~IGB_FLAG_DMAC;
2296 	goto out;
2297 err_out:
2298 	kfree(adapter->vf_data);
2299 	adapter->vf_data = NULL;
2300 	adapter->vfs_allocated_count = 0;
2301 out:
2302 	return;
2303 #endif /* CONFIG_PCI_IOV */
2304 }
2305 
2306 /**
2307  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2308  * @adapter: board private structure to initialize
2309  *
2310  * igb_init_hw_timer initializes the function pointer and values for the hw
2311  * timer found in hardware.
2312  **/
2313 static void igb_init_hw_timer(struct igb_adapter *adapter)
2314 {
2315 	struct e1000_hw *hw = &adapter->hw;
2316 
2317 	switch (hw->mac.type) {
2318 	case e1000_i350:
2319 	case e1000_82580:
2320 		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2321 		adapter->cycles.read = igb_read_clock;
2322 		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2323 		adapter->cycles.mult = 1;
2324 		/*
2325 		 * The 82580 timesync updates the system timer every 8ns by 8ns
2326 		 * and the value cannot be shifted.  Instead we need to shift
2327 		 * the registers to generate a 64bit timer value.  As a result
2328 		 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2329 		 * 24 in order to generate a larger value for synchronization.
2330 		 */
2331 		adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2332 		/* disable system timer temporarily by setting bit 31 */
2333 		wr32(E1000_TSAUXC, 0x80000000);
2334 		wrfl();
2335 
2336 		/* Set registers so that rollover occurs soon to test this. */
2337 		wr32(E1000_SYSTIMR, 0x00000000);
2338 		wr32(E1000_SYSTIML, 0x80000000);
2339 		wr32(E1000_SYSTIMH, 0x000000FF);
2340 		wrfl();
2341 
2342 		/* enable system timer by clearing bit 31 */
2343 		wr32(E1000_TSAUXC, 0x0);
2344 		wrfl();
2345 
2346 		timecounter_init(&adapter->clock,
2347 				 &adapter->cycles,
2348 				 ktime_to_ns(ktime_get_real()));
2349 		/*
2350 		 * Synchronize our NIC clock against system wall clock. NIC
2351 		 * time stamp reading requires ~3us per sample, each sample
2352 		 * was pretty stable even under load => only require 10
2353 		 * samples for each offset comparison.
2354 		 */
2355 		memset(&adapter->compare, 0, sizeof(adapter->compare));
2356 		adapter->compare.source = &adapter->clock;
2357 		adapter->compare.target = ktime_get_real;
2358 		adapter->compare.num_samples = 10;
2359 		timecompare_update(&adapter->compare, 0);
2360 		break;
2361 	case e1000_82576:
2362 		/*
2363 		 * Initialize hardware timer: we keep it running just in case
2364 		 * that some program needs it later on.
2365 		 */
2366 		memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2367 		adapter->cycles.read = igb_read_clock;
2368 		adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2369 		adapter->cycles.mult = 1;
2370 		/**
2371 		 * Scale the NIC clock cycle by a large factor so that
2372 		 * relatively small clock corrections can be added or
2373 		 * subtracted at each clock tick. The drawbacks of a large
2374 		 * factor are a) that the clock register overflows more quickly
2375 		 * (not such a big deal) and b) that the increment per tick has
2376 		 * to fit into 24 bits.  As a result we need to use a shift of
2377 		 * 19 so we can fit a value of 16 into the TIMINCA register.
2378 		 */
2379 		adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2380 		wr32(E1000_TIMINCA,
2381 		                (1 << E1000_TIMINCA_16NS_SHIFT) |
2382 		                (16 << IGB_82576_TSYNC_SHIFT));
2383 
2384 		/* Set registers so that rollover occurs soon to test this. */
2385 		wr32(E1000_SYSTIML, 0x00000000);
2386 		wr32(E1000_SYSTIMH, 0xFF800000);
2387 		wrfl();
2388 
2389 		timecounter_init(&adapter->clock,
2390 				 &adapter->cycles,
2391 				 ktime_to_ns(ktime_get_real()));
2392 		/*
2393 		 * Synchronize our NIC clock against system wall clock. NIC
2394 		 * time stamp reading requires ~3us per sample, each sample
2395 		 * was pretty stable even under load => only require 10
2396 		 * samples for each offset comparison.
2397 		 */
2398 		memset(&adapter->compare, 0, sizeof(adapter->compare));
2399 		adapter->compare.source = &adapter->clock;
2400 		adapter->compare.target = ktime_get_real;
2401 		adapter->compare.num_samples = 10;
2402 		timecompare_update(&adapter->compare, 0);
2403 		break;
2404 	case e1000_82575:
2405 		/* 82575 does not support timesync */
2406 	default:
2407 		break;
2408 	}
2409 
2410 }
2411 
2412 /**
2413  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2414  * @adapter: board private structure to initialize
2415  *
2416  * igb_sw_init initializes the Adapter private data structure.
2417  * Fields are initialized based on PCI device information and
2418  * OS network device settings (MTU size).
2419  **/
2420 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2421 {
2422 	struct e1000_hw *hw = &adapter->hw;
2423 	struct net_device *netdev = adapter->netdev;
2424 	struct pci_dev *pdev = adapter->pdev;
2425 
2426 	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2427 
2428 	/* set default ring sizes */
2429 	adapter->tx_ring_count = IGB_DEFAULT_TXD;
2430 	adapter->rx_ring_count = IGB_DEFAULT_RXD;
2431 
2432 	/* set default ITR values */
2433 	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2434 	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2435 
2436 	/* set default work limits */
2437 	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2438 
2439 	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2440 				  VLAN_HLEN;
2441 	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2442 
2443 	adapter->node = -1;
2444 
2445 	spin_lock_init(&adapter->stats64_lock);
2446 #ifdef CONFIG_PCI_IOV
2447 	switch (hw->mac.type) {
2448 	case e1000_82576:
2449 	case e1000_i350:
2450 		if (max_vfs > 7) {
2451 			dev_warn(&pdev->dev,
2452 				 "Maximum of 7 VFs per PF, using max\n");
2453 			adapter->vfs_allocated_count = 7;
2454 		} else
2455 			adapter->vfs_allocated_count = max_vfs;
2456 		break;
2457 	default:
2458 		break;
2459 	}
2460 #endif /* CONFIG_PCI_IOV */
2461 	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2462 	/* i350 cannot do RSS and SR-IOV at the same time */
2463 	if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2464 		adapter->rss_queues = 1;
2465 
2466 	/*
2467 	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2468 	 * then we should combine the queues into a queue pair in order to
2469 	 * conserve interrupts due to limited supply
2470 	 */
2471 	if ((adapter->rss_queues > 4) ||
2472 	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2473 		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2474 
2475 	/* Setup and initialize a copy of the hw vlan table array */
2476 	adapter->shadow_vfta = kzalloc(sizeof(u32) *
2477 				E1000_VLAN_FILTER_TBL_SIZE,
2478 				GFP_ATOMIC);
2479 
2480 	/* This call may decrease the number of queues */
2481 	if (igb_init_interrupt_scheme(adapter)) {
2482 		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2483 		return -ENOMEM;
2484 	}
2485 
2486 	igb_probe_vfs(adapter);
2487 
2488 	/* Explicitly disable IRQ since the NIC can be in any state. */
2489 	igb_irq_disable(adapter);
2490 
2491 	if (hw->mac.type == e1000_i350)
2492 		adapter->flags &= ~IGB_FLAG_DMAC;
2493 
2494 	set_bit(__IGB_DOWN, &adapter->state);
2495 	return 0;
2496 }
2497 
2498 /**
2499  * igb_open - Called when a network interface is made active
2500  * @netdev: network interface device structure
2501  *
2502  * Returns 0 on success, negative value on failure
2503  *
2504  * The open entry point is called when a network interface is made
2505  * active by the system (IFF_UP).  At this point all resources needed
2506  * for transmit and receive operations are allocated, the interrupt
2507  * handler is registered with the OS, the watchdog timer is started,
2508  * and the stack is notified that the interface is ready.
2509  **/
2510 static int __igb_open(struct net_device *netdev, bool resuming)
2511 {
2512 	struct igb_adapter *adapter = netdev_priv(netdev);
2513 	struct e1000_hw *hw = &adapter->hw;
2514 	struct pci_dev *pdev = adapter->pdev;
2515 	int err;
2516 	int i;
2517 
2518 	/* disallow open during test */
2519 	if (test_bit(__IGB_TESTING, &adapter->state)) {
2520 		WARN_ON(resuming);
2521 		return -EBUSY;
2522 	}
2523 
2524 	if (!resuming)
2525 		pm_runtime_get_sync(&pdev->dev);
2526 
2527 	netif_carrier_off(netdev);
2528 
2529 	/* allocate transmit descriptors */
2530 	err = igb_setup_all_tx_resources(adapter);
2531 	if (err)
2532 		goto err_setup_tx;
2533 
2534 	/* allocate receive descriptors */
2535 	err = igb_setup_all_rx_resources(adapter);
2536 	if (err)
2537 		goto err_setup_rx;
2538 
2539 	igb_power_up_link(adapter);
2540 
2541 	/* before we allocate an interrupt, we must be ready to handle it.
2542 	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2543 	 * as soon as we call pci_request_irq, so we have to setup our
2544 	 * clean_rx handler before we do so.  */
2545 	igb_configure(adapter);
2546 
2547 	err = igb_request_irq(adapter);
2548 	if (err)
2549 		goto err_req_irq;
2550 
2551 	/* From here on the code is the same as igb_up() */
2552 	clear_bit(__IGB_DOWN, &adapter->state);
2553 
2554 	for (i = 0; i < adapter->num_q_vectors; i++)
2555 		napi_enable(&(adapter->q_vector[i]->napi));
2556 
2557 	/* Clear any pending interrupts. */
2558 	rd32(E1000_ICR);
2559 
2560 	igb_irq_enable(adapter);
2561 
2562 	/* notify VFs that reset has been completed */
2563 	if (adapter->vfs_allocated_count) {
2564 		u32 reg_data = rd32(E1000_CTRL_EXT);
2565 		reg_data |= E1000_CTRL_EXT_PFRSTD;
2566 		wr32(E1000_CTRL_EXT, reg_data);
2567 	}
2568 
2569 	netif_tx_start_all_queues(netdev);
2570 
2571 	if (!resuming)
2572 		pm_runtime_put(&pdev->dev);
2573 
2574 	/* start the watchdog. */
2575 	hw->mac.get_link_status = 1;
2576 	schedule_work(&adapter->watchdog_task);
2577 
2578 	return 0;
2579 
2580 err_req_irq:
2581 	igb_release_hw_control(adapter);
2582 	igb_power_down_link(adapter);
2583 	igb_free_all_rx_resources(adapter);
2584 err_setup_rx:
2585 	igb_free_all_tx_resources(adapter);
2586 err_setup_tx:
2587 	igb_reset(adapter);
2588 	if (!resuming)
2589 		pm_runtime_put(&pdev->dev);
2590 
2591 	return err;
2592 }
2593 
2594 static int igb_open(struct net_device *netdev)
2595 {
2596 	return __igb_open(netdev, false);
2597 }
2598 
2599 /**
2600  * igb_close - Disables a network interface
2601  * @netdev: network interface device structure
2602  *
2603  * Returns 0, this is not allowed to fail
2604  *
2605  * The close entry point is called when an interface is de-activated
2606  * by the OS.  The hardware is still under the driver's control, but
2607  * needs to be disabled.  A global MAC reset is issued to stop the
2608  * hardware, and all transmit and receive resources are freed.
2609  **/
2610 static int __igb_close(struct net_device *netdev, bool suspending)
2611 {
2612 	struct igb_adapter *adapter = netdev_priv(netdev);
2613 	struct pci_dev *pdev = adapter->pdev;
2614 
2615 	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2616 
2617 	if (!suspending)
2618 		pm_runtime_get_sync(&pdev->dev);
2619 
2620 	igb_down(adapter);
2621 	igb_free_irq(adapter);
2622 
2623 	igb_free_all_tx_resources(adapter);
2624 	igb_free_all_rx_resources(adapter);
2625 
2626 	if (!suspending)
2627 		pm_runtime_put_sync(&pdev->dev);
2628 	return 0;
2629 }
2630 
2631 static int igb_close(struct net_device *netdev)
2632 {
2633 	return __igb_close(netdev, false);
2634 }
2635 
2636 /**
2637  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2638  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2639  *
2640  * Return 0 on success, negative on failure
2641  **/
2642 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2643 {
2644 	struct device *dev = tx_ring->dev;
2645 	int orig_node = dev_to_node(dev);
2646 	int size;
2647 
2648 	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2649 	tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2650 	if (!tx_ring->tx_buffer_info)
2651 		tx_ring->tx_buffer_info = vzalloc(size);
2652 	if (!tx_ring->tx_buffer_info)
2653 		goto err;
2654 
2655 	/* round up to nearest 4K */
2656 	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2657 	tx_ring->size = ALIGN(tx_ring->size, 4096);
2658 
2659 	set_dev_node(dev, tx_ring->numa_node);
2660 	tx_ring->desc = dma_alloc_coherent(dev,
2661 					   tx_ring->size,
2662 					   &tx_ring->dma,
2663 					   GFP_KERNEL);
2664 	set_dev_node(dev, orig_node);
2665 	if (!tx_ring->desc)
2666 		tx_ring->desc = dma_alloc_coherent(dev,
2667 						   tx_ring->size,
2668 						   &tx_ring->dma,
2669 						   GFP_KERNEL);
2670 
2671 	if (!tx_ring->desc)
2672 		goto err;
2673 
2674 	tx_ring->next_to_use = 0;
2675 	tx_ring->next_to_clean = 0;
2676 
2677 	return 0;
2678 
2679 err:
2680 	vfree(tx_ring->tx_buffer_info);
2681 	dev_err(dev,
2682 		"Unable to allocate memory for the transmit descriptor ring\n");
2683 	return -ENOMEM;
2684 }
2685 
2686 /**
2687  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2688  *				  (Descriptors) for all queues
2689  * @adapter: board private structure
2690  *
2691  * Return 0 on success, negative on failure
2692  **/
2693 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2694 {
2695 	struct pci_dev *pdev = adapter->pdev;
2696 	int i, err = 0;
2697 
2698 	for (i = 0; i < adapter->num_tx_queues; i++) {
2699 		err = igb_setup_tx_resources(adapter->tx_ring[i]);
2700 		if (err) {
2701 			dev_err(&pdev->dev,
2702 				"Allocation for Tx Queue %u failed\n", i);
2703 			for (i--; i >= 0; i--)
2704 				igb_free_tx_resources(adapter->tx_ring[i]);
2705 			break;
2706 		}
2707 	}
2708 
2709 	return err;
2710 }
2711 
2712 /**
2713  * igb_setup_tctl - configure the transmit control registers
2714  * @adapter: Board private structure
2715  **/
2716 void igb_setup_tctl(struct igb_adapter *adapter)
2717 {
2718 	struct e1000_hw *hw = &adapter->hw;
2719 	u32 tctl;
2720 
2721 	/* disable queue 0 which is enabled by default on 82575 and 82576 */
2722 	wr32(E1000_TXDCTL(0), 0);
2723 
2724 	/* Program the Transmit Control Register */
2725 	tctl = rd32(E1000_TCTL);
2726 	tctl &= ~E1000_TCTL_CT;
2727 	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2728 		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2729 
2730 	igb_config_collision_dist(hw);
2731 
2732 	/* Enable transmits */
2733 	tctl |= E1000_TCTL_EN;
2734 
2735 	wr32(E1000_TCTL, tctl);
2736 }
2737 
2738 /**
2739  * igb_configure_tx_ring - Configure transmit ring after Reset
2740  * @adapter: board private structure
2741  * @ring: tx ring to configure
2742  *
2743  * Configure a transmit ring after a reset.
2744  **/
2745 void igb_configure_tx_ring(struct igb_adapter *adapter,
2746                            struct igb_ring *ring)
2747 {
2748 	struct e1000_hw *hw = &adapter->hw;
2749 	u32 txdctl = 0;
2750 	u64 tdba = ring->dma;
2751 	int reg_idx = ring->reg_idx;
2752 
2753 	/* disable the queue */
2754 	wr32(E1000_TXDCTL(reg_idx), 0);
2755 	wrfl();
2756 	mdelay(10);
2757 
2758 	wr32(E1000_TDLEN(reg_idx),
2759 	                ring->count * sizeof(union e1000_adv_tx_desc));
2760 	wr32(E1000_TDBAL(reg_idx),
2761 	                tdba & 0x00000000ffffffffULL);
2762 	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2763 
2764 	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2765 	wr32(E1000_TDH(reg_idx), 0);
2766 	writel(0, ring->tail);
2767 
2768 	txdctl |= IGB_TX_PTHRESH;
2769 	txdctl |= IGB_TX_HTHRESH << 8;
2770 	txdctl |= IGB_TX_WTHRESH << 16;
2771 
2772 	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2773 	wr32(E1000_TXDCTL(reg_idx), txdctl);
2774 
2775 	netdev_tx_reset_queue(txring_txq(ring));
2776 }
2777 
2778 /**
2779  * igb_configure_tx - Configure transmit Unit after Reset
2780  * @adapter: board private structure
2781  *
2782  * Configure the Tx unit of the MAC after a reset.
2783  **/
2784 static void igb_configure_tx(struct igb_adapter *adapter)
2785 {
2786 	int i;
2787 
2788 	for (i = 0; i < adapter->num_tx_queues; i++)
2789 		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2790 }
2791 
2792 /**
2793  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2794  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2795  *
2796  * Returns 0 on success, negative on failure
2797  **/
2798 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2799 {
2800 	struct device *dev = rx_ring->dev;
2801 	int orig_node = dev_to_node(dev);
2802 	int size, desc_len;
2803 
2804 	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2805 	rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2806 	if (!rx_ring->rx_buffer_info)
2807 		rx_ring->rx_buffer_info = vzalloc(size);
2808 	if (!rx_ring->rx_buffer_info)
2809 		goto err;
2810 
2811 	desc_len = sizeof(union e1000_adv_rx_desc);
2812 
2813 	/* Round up to nearest 4K */
2814 	rx_ring->size = rx_ring->count * desc_len;
2815 	rx_ring->size = ALIGN(rx_ring->size, 4096);
2816 
2817 	set_dev_node(dev, rx_ring->numa_node);
2818 	rx_ring->desc = dma_alloc_coherent(dev,
2819 					   rx_ring->size,
2820 					   &rx_ring->dma,
2821 					   GFP_KERNEL);
2822 	set_dev_node(dev, orig_node);
2823 	if (!rx_ring->desc)
2824 		rx_ring->desc = dma_alloc_coherent(dev,
2825 						   rx_ring->size,
2826 						   &rx_ring->dma,
2827 						   GFP_KERNEL);
2828 
2829 	if (!rx_ring->desc)
2830 		goto err;
2831 
2832 	rx_ring->next_to_clean = 0;
2833 	rx_ring->next_to_use = 0;
2834 
2835 	return 0;
2836 
2837 err:
2838 	vfree(rx_ring->rx_buffer_info);
2839 	rx_ring->rx_buffer_info = NULL;
2840 	dev_err(dev, "Unable to allocate memory for the receive descriptor"
2841 		" ring\n");
2842 	return -ENOMEM;
2843 }
2844 
2845 /**
2846  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2847  *				  (Descriptors) for all queues
2848  * @adapter: board private structure
2849  *
2850  * Return 0 on success, negative on failure
2851  **/
2852 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2853 {
2854 	struct pci_dev *pdev = adapter->pdev;
2855 	int i, err = 0;
2856 
2857 	for (i = 0; i < adapter->num_rx_queues; i++) {
2858 		err = igb_setup_rx_resources(adapter->rx_ring[i]);
2859 		if (err) {
2860 			dev_err(&pdev->dev,
2861 				"Allocation for Rx Queue %u failed\n", i);
2862 			for (i--; i >= 0; i--)
2863 				igb_free_rx_resources(adapter->rx_ring[i]);
2864 			break;
2865 		}
2866 	}
2867 
2868 	return err;
2869 }
2870 
2871 /**
2872  * igb_setup_mrqc - configure the multiple receive queue control registers
2873  * @adapter: Board private structure
2874  **/
2875 static void igb_setup_mrqc(struct igb_adapter *adapter)
2876 {
2877 	struct e1000_hw *hw = &adapter->hw;
2878 	u32 mrqc, rxcsum;
2879 	u32 j, num_rx_queues, shift = 0, shift2 = 0;
2880 	union e1000_reta {
2881 		u32 dword;
2882 		u8  bytes[4];
2883 	} reta;
2884 	static const u8 rsshash[40] = {
2885 		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2886 		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2887 		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2888 		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2889 
2890 	/* Fill out hash function seeds */
2891 	for (j = 0; j < 10; j++) {
2892 		u32 rsskey = rsshash[(j * 4)];
2893 		rsskey |= rsshash[(j * 4) + 1] << 8;
2894 		rsskey |= rsshash[(j * 4) + 2] << 16;
2895 		rsskey |= rsshash[(j * 4) + 3] << 24;
2896 		array_wr32(E1000_RSSRK(0), j, rsskey);
2897 	}
2898 
2899 	num_rx_queues = adapter->rss_queues;
2900 
2901 	if (adapter->vfs_allocated_count) {
2902 		/* 82575 and 82576 supports 2 RSS queues for VMDq */
2903 		switch (hw->mac.type) {
2904 		case e1000_i350:
2905 		case e1000_82580:
2906 			num_rx_queues = 1;
2907 			shift = 0;
2908 			break;
2909 		case e1000_82576:
2910 			shift = 3;
2911 			num_rx_queues = 2;
2912 			break;
2913 		case e1000_82575:
2914 			shift = 2;
2915 			shift2 = 6;
2916 		default:
2917 			break;
2918 		}
2919 	} else {
2920 		if (hw->mac.type == e1000_82575)
2921 			shift = 6;
2922 	}
2923 
2924 	for (j = 0; j < (32 * 4); j++) {
2925 		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2926 		if (shift2)
2927 			reta.bytes[j & 3] |= num_rx_queues << shift2;
2928 		if ((j & 3) == 3)
2929 			wr32(E1000_RETA(j >> 2), reta.dword);
2930 	}
2931 
2932 	/*
2933 	 * Disable raw packet checksumming so that RSS hash is placed in
2934 	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2935 	 * offloads as they are enabled by default
2936 	 */
2937 	rxcsum = rd32(E1000_RXCSUM);
2938 	rxcsum |= E1000_RXCSUM_PCSD;
2939 
2940 	if (adapter->hw.mac.type >= e1000_82576)
2941 		/* Enable Receive Checksum Offload for SCTP */
2942 		rxcsum |= E1000_RXCSUM_CRCOFL;
2943 
2944 	/* Don't need to set TUOFL or IPOFL, they default to 1 */
2945 	wr32(E1000_RXCSUM, rxcsum);
2946 
2947 	/* If VMDq is enabled then we set the appropriate mode for that, else
2948 	 * we default to RSS so that an RSS hash is calculated per packet even
2949 	 * if we are only using one queue */
2950 	if (adapter->vfs_allocated_count) {
2951 		if (hw->mac.type > e1000_82575) {
2952 			/* Set the default pool for the PF's first queue */
2953 			u32 vtctl = rd32(E1000_VT_CTL);
2954 			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2955 				   E1000_VT_CTL_DISABLE_DEF_POOL);
2956 			vtctl |= adapter->vfs_allocated_count <<
2957 				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2958 			wr32(E1000_VT_CTL, vtctl);
2959 		}
2960 		if (adapter->rss_queues > 1)
2961 			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2962 		else
2963 			mrqc = E1000_MRQC_ENABLE_VMDQ;
2964 	} else {
2965 		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2966 	}
2967 	igb_vmm_control(adapter);
2968 
2969 	/*
2970 	 * Generate RSS hash based on TCP port numbers and/or
2971 	 * IPv4/v6 src and dst addresses since UDP cannot be
2972 	 * hashed reliably due to IP fragmentation
2973 	 */
2974 	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2975 		E1000_MRQC_RSS_FIELD_IPV4_TCP |
2976 		E1000_MRQC_RSS_FIELD_IPV6 |
2977 		E1000_MRQC_RSS_FIELD_IPV6_TCP |
2978 		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2979 
2980 	wr32(E1000_MRQC, mrqc);
2981 }
2982 
2983 /**
2984  * igb_setup_rctl - configure the receive control registers
2985  * @adapter: Board private structure
2986  **/
2987 void igb_setup_rctl(struct igb_adapter *adapter)
2988 {
2989 	struct e1000_hw *hw = &adapter->hw;
2990 	u32 rctl;
2991 
2992 	rctl = rd32(E1000_RCTL);
2993 
2994 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2995 	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2996 
2997 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2998 		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2999 
3000 	/*
3001 	 * enable stripping of CRC. It's unlikely this will break BMC
3002 	 * redirection as it did with e1000. Newer features require
3003 	 * that the HW strips the CRC.
3004 	 */
3005 	rctl |= E1000_RCTL_SECRC;
3006 
3007 	/* disable store bad packets and clear size bits. */
3008 	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3009 
3010 	/* enable LPE to prevent packets larger than max_frame_size */
3011 	rctl |= E1000_RCTL_LPE;
3012 
3013 	/* disable queue 0 to prevent tail write w/o re-config */
3014 	wr32(E1000_RXDCTL(0), 0);
3015 
3016 	/* Attention!!!  For SR-IOV PF driver operations you must enable
3017 	 * queue drop for all VF and PF queues to prevent head of line blocking
3018 	 * if an un-trusted VF does not provide descriptors to hardware.
3019 	 */
3020 	if (adapter->vfs_allocated_count) {
3021 		/* set all queue drop enable bits */
3022 		wr32(E1000_QDE, ALL_QUEUES);
3023 	}
3024 
3025 	/* This is useful for sniffing bad packets. */
3026 	if (adapter->netdev->features & NETIF_F_RXALL) {
3027 		/* UPE and MPE will be handled by normal PROMISC logic
3028 		 * in e1000e_set_rx_mode */
3029 		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3030 			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
3031 			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3032 
3033 		rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3034 			  E1000_RCTL_DPF | /* Allow filtered pause */
3035 			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3036 		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3037 		 * and that breaks VLANs.
3038 		 */
3039 	}
3040 
3041 	wr32(E1000_RCTL, rctl);
3042 }
3043 
3044 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3045                                    int vfn)
3046 {
3047 	struct e1000_hw *hw = &adapter->hw;
3048 	u32 vmolr;
3049 
3050 	/* if it isn't the PF check to see if VFs are enabled and
3051 	 * increase the size to support vlan tags */
3052 	if (vfn < adapter->vfs_allocated_count &&
3053 	    adapter->vf_data[vfn].vlans_enabled)
3054 		size += VLAN_TAG_SIZE;
3055 
3056 	vmolr = rd32(E1000_VMOLR(vfn));
3057 	vmolr &= ~E1000_VMOLR_RLPML_MASK;
3058 	vmolr |= size | E1000_VMOLR_LPE;
3059 	wr32(E1000_VMOLR(vfn), vmolr);
3060 
3061 	return 0;
3062 }
3063 
3064 /**
3065  * igb_rlpml_set - set maximum receive packet size
3066  * @adapter: board private structure
3067  *
3068  * Configure maximum receivable packet size.
3069  **/
3070 static void igb_rlpml_set(struct igb_adapter *adapter)
3071 {
3072 	u32 max_frame_size = adapter->max_frame_size;
3073 	struct e1000_hw *hw = &adapter->hw;
3074 	u16 pf_id = adapter->vfs_allocated_count;
3075 
3076 	if (pf_id) {
3077 		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3078 		/*
3079 		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3080 		 * to our max jumbo frame size, in case we need to enable
3081 		 * jumbo frames on one of the rings later.
3082 		 * This will not pass over-length frames into the default
3083 		 * queue because it's gated by the VMOLR.RLPML.
3084 		 */
3085 		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3086 	}
3087 
3088 	wr32(E1000_RLPML, max_frame_size);
3089 }
3090 
3091 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3092 				 int vfn, bool aupe)
3093 {
3094 	struct e1000_hw *hw = &adapter->hw;
3095 	u32 vmolr;
3096 
3097 	/*
3098 	 * This register exists only on 82576 and newer so if we are older then
3099 	 * we should exit and do nothing
3100 	 */
3101 	if (hw->mac.type < e1000_82576)
3102 		return;
3103 
3104 	vmolr = rd32(E1000_VMOLR(vfn));
3105 	vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3106 	if (aupe)
3107 		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3108 	else
3109 		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3110 
3111 	/* clear all bits that might not be set */
3112 	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3113 
3114 	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3115 		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3116 	/*
3117 	 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3118 	 * multicast packets
3119 	 */
3120 	if (vfn <= adapter->vfs_allocated_count)
3121 		vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
3122 
3123 	wr32(E1000_VMOLR(vfn), vmolr);
3124 }
3125 
3126 /**
3127  * igb_configure_rx_ring - Configure a receive ring after Reset
3128  * @adapter: board private structure
3129  * @ring: receive ring to be configured
3130  *
3131  * Configure the Rx unit of the MAC after a reset.
3132  **/
3133 void igb_configure_rx_ring(struct igb_adapter *adapter,
3134                            struct igb_ring *ring)
3135 {
3136 	struct e1000_hw *hw = &adapter->hw;
3137 	u64 rdba = ring->dma;
3138 	int reg_idx = ring->reg_idx;
3139 	u32 srrctl = 0, rxdctl = 0;
3140 
3141 	/* disable the queue */
3142 	wr32(E1000_RXDCTL(reg_idx), 0);
3143 
3144 	/* Set DMA base address registers */
3145 	wr32(E1000_RDBAL(reg_idx),
3146 	     rdba & 0x00000000ffffffffULL);
3147 	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3148 	wr32(E1000_RDLEN(reg_idx),
3149 	               ring->count * sizeof(union e1000_adv_rx_desc));
3150 
3151 	/* initialize head and tail */
3152 	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3153 	wr32(E1000_RDH(reg_idx), 0);
3154 	writel(0, ring->tail);
3155 
3156 	/* set descriptor configuration */
3157 	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3158 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3159 	srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3160 #else
3161 	srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3162 #endif
3163 	srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3164 	if (hw->mac.type >= e1000_82580)
3165 		srrctl |= E1000_SRRCTL_TIMESTAMP;
3166 	/* Only set Drop Enable if we are supporting multiple queues */
3167 	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3168 		srrctl |= E1000_SRRCTL_DROP_EN;
3169 
3170 	wr32(E1000_SRRCTL(reg_idx), srrctl);
3171 
3172 	/* set filtering for VMDQ pools */
3173 	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3174 
3175 	rxdctl |= IGB_RX_PTHRESH;
3176 	rxdctl |= IGB_RX_HTHRESH << 8;
3177 	rxdctl |= IGB_RX_WTHRESH << 16;
3178 
3179 	/* enable receive descriptor fetching */
3180 	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3181 	wr32(E1000_RXDCTL(reg_idx), rxdctl);
3182 }
3183 
3184 /**
3185  * igb_configure_rx - Configure receive Unit after Reset
3186  * @adapter: board private structure
3187  *
3188  * Configure the Rx unit of the MAC after a reset.
3189  **/
3190 static void igb_configure_rx(struct igb_adapter *adapter)
3191 {
3192 	int i;
3193 
3194 	/* set UTA to appropriate mode */
3195 	igb_set_uta(adapter);
3196 
3197 	/* set the correct pool for the PF default MAC address in entry 0 */
3198 	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3199 	                 adapter->vfs_allocated_count);
3200 
3201 	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3202 	 * the Base and Length of the Rx Descriptor Ring */
3203 	for (i = 0; i < adapter->num_rx_queues; i++)
3204 		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3205 }
3206 
3207 /**
3208  * igb_free_tx_resources - Free Tx Resources per Queue
3209  * @tx_ring: Tx descriptor ring for a specific queue
3210  *
3211  * Free all transmit software resources
3212  **/
3213 void igb_free_tx_resources(struct igb_ring *tx_ring)
3214 {
3215 	igb_clean_tx_ring(tx_ring);
3216 
3217 	vfree(tx_ring->tx_buffer_info);
3218 	tx_ring->tx_buffer_info = NULL;
3219 
3220 	/* if not set, then don't free */
3221 	if (!tx_ring->desc)
3222 		return;
3223 
3224 	dma_free_coherent(tx_ring->dev, tx_ring->size,
3225 			  tx_ring->desc, tx_ring->dma);
3226 
3227 	tx_ring->desc = NULL;
3228 }
3229 
3230 /**
3231  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3232  * @adapter: board private structure
3233  *
3234  * Free all transmit software resources
3235  **/
3236 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3237 {
3238 	int i;
3239 
3240 	for (i = 0; i < adapter->num_tx_queues; i++)
3241 		igb_free_tx_resources(adapter->tx_ring[i]);
3242 }
3243 
3244 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3245 				    struct igb_tx_buffer *tx_buffer)
3246 {
3247 	if (tx_buffer->skb) {
3248 		dev_kfree_skb_any(tx_buffer->skb);
3249 		if (tx_buffer->dma)
3250 			dma_unmap_single(ring->dev,
3251 					 tx_buffer->dma,
3252 					 tx_buffer->length,
3253 					 DMA_TO_DEVICE);
3254 	} else if (tx_buffer->dma) {
3255 		dma_unmap_page(ring->dev,
3256 			       tx_buffer->dma,
3257 			       tx_buffer->length,
3258 			       DMA_TO_DEVICE);
3259 	}
3260 	tx_buffer->next_to_watch = NULL;
3261 	tx_buffer->skb = NULL;
3262 	tx_buffer->dma = 0;
3263 	/* buffer_info must be completely set up in the transmit path */
3264 }
3265 
3266 /**
3267  * igb_clean_tx_ring - Free Tx Buffers
3268  * @tx_ring: ring to be cleaned
3269  **/
3270 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3271 {
3272 	struct igb_tx_buffer *buffer_info;
3273 	unsigned long size;
3274 	u16 i;
3275 
3276 	if (!tx_ring->tx_buffer_info)
3277 		return;
3278 	/* Free all the Tx ring sk_buffs */
3279 
3280 	for (i = 0; i < tx_ring->count; i++) {
3281 		buffer_info = &tx_ring->tx_buffer_info[i];
3282 		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3283 	}
3284 
3285 	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3286 	memset(tx_ring->tx_buffer_info, 0, size);
3287 
3288 	/* Zero out the descriptor ring */
3289 	memset(tx_ring->desc, 0, tx_ring->size);
3290 
3291 	tx_ring->next_to_use = 0;
3292 	tx_ring->next_to_clean = 0;
3293 }
3294 
3295 /**
3296  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3297  * @adapter: board private structure
3298  **/
3299 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3300 {
3301 	int i;
3302 
3303 	for (i = 0; i < adapter->num_tx_queues; i++)
3304 		igb_clean_tx_ring(adapter->tx_ring[i]);
3305 }
3306 
3307 /**
3308  * igb_free_rx_resources - Free Rx Resources
3309  * @rx_ring: ring to clean the resources from
3310  *
3311  * Free all receive software resources
3312  **/
3313 void igb_free_rx_resources(struct igb_ring *rx_ring)
3314 {
3315 	igb_clean_rx_ring(rx_ring);
3316 
3317 	vfree(rx_ring->rx_buffer_info);
3318 	rx_ring->rx_buffer_info = NULL;
3319 
3320 	/* if not set, then don't free */
3321 	if (!rx_ring->desc)
3322 		return;
3323 
3324 	dma_free_coherent(rx_ring->dev, rx_ring->size,
3325 			  rx_ring->desc, rx_ring->dma);
3326 
3327 	rx_ring->desc = NULL;
3328 }
3329 
3330 /**
3331  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3332  * @adapter: board private structure
3333  *
3334  * Free all receive software resources
3335  **/
3336 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3337 {
3338 	int i;
3339 
3340 	for (i = 0; i < adapter->num_rx_queues; i++)
3341 		igb_free_rx_resources(adapter->rx_ring[i]);
3342 }
3343 
3344 /**
3345  * igb_clean_rx_ring - Free Rx Buffers per Queue
3346  * @rx_ring: ring to free buffers from
3347  **/
3348 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3349 {
3350 	unsigned long size;
3351 	u16 i;
3352 
3353 	if (!rx_ring->rx_buffer_info)
3354 		return;
3355 
3356 	/* Free all the Rx ring sk_buffs */
3357 	for (i = 0; i < rx_ring->count; i++) {
3358 		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3359 		if (buffer_info->dma) {
3360 			dma_unmap_single(rx_ring->dev,
3361 			                 buffer_info->dma,
3362 					 IGB_RX_HDR_LEN,
3363 					 DMA_FROM_DEVICE);
3364 			buffer_info->dma = 0;
3365 		}
3366 
3367 		if (buffer_info->skb) {
3368 			dev_kfree_skb(buffer_info->skb);
3369 			buffer_info->skb = NULL;
3370 		}
3371 		if (buffer_info->page_dma) {
3372 			dma_unmap_page(rx_ring->dev,
3373 			               buffer_info->page_dma,
3374 				       PAGE_SIZE / 2,
3375 				       DMA_FROM_DEVICE);
3376 			buffer_info->page_dma = 0;
3377 		}
3378 		if (buffer_info->page) {
3379 			put_page(buffer_info->page);
3380 			buffer_info->page = NULL;
3381 			buffer_info->page_offset = 0;
3382 		}
3383 	}
3384 
3385 	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3386 	memset(rx_ring->rx_buffer_info, 0, size);
3387 
3388 	/* Zero out the descriptor ring */
3389 	memset(rx_ring->desc, 0, rx_ring->size);
3390 
3391 	rx_ring->next_to_clean = 0;
3392 	rx_ring->next_to_use = 0;
3393 }
3394 
3395 /**
3396  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3397  * @adapter: board private structure
3398  **/
3399 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3400 {
3401 	int i;
3402 
3403 	for (i = 0; i < adapter->num_rx_queues; i++)
3404 		igb_clean_rx_ring(adapter->rx_ring[i]);
3405 }
3406 
3407 /**
3408  * igb_set_mac - Change the Ethernet Address of the NIC
3409  * @netdev: network interface device structure
3410  * @p: pointer to an address structure
3411  *
3412  * Returns 0 on success, negative on failure
3413  **/
3414 static int igb_set_mac(struct net_device *netdev, void *p)
3415 {
3416 	struct igb_adapter *adapter = netdev_priv(netdev);
3417 	struct e1000_hw *hw = &adapter->hw;
3418 	struct sockaddr *addr = p;
3419 
3420 	if (!is_valid_ether_addr(addr->sa_data))
3421 		return -EADDRNOTAVAIL;
3422 
3423 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3424 	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3425 
3426 	/* set the correct pool for the new PF MAC address in entry 0 */
3427 	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3428 	                 adapter->vfs_allocated_count);
3429 
3430 	return 0;
3431 }
3432 
3433 /**
3434  * igb_write_mc_addr_list - write multicast addresses to MTA
3435  * @netdev: network interface device structure
3436  *
3437  * Writes multicast address list to the MTA hash table.
3438  * Returns: -ENOMEM on failure
3439  *                0 on no addresses written
3440  *                X on writing X addresses to MTA
3441  **/
3442 static int igb_write_mc_addr_list(struct net_device *netdev)
3443 {
3444 	struct igb_adapter *adapter = netdev_priv(netdev);
3445 	struct e1000_hw *hw = &adapter->hw;
3446 	struct netdev_hw_addr *ha;
3447 	u8  *mta_list;
3448 	int i;
3449 
3450 	if (netdev_mc_empty(netdev)) {
3451 		/* nothing to program, so clear mc list */
3452 		igb_update_mc_addr_list(hw, NULL, 0);
3453 		igb_restore_vf_multicasts(adapter);
3454 		return 0;
3455 	}
3456 
3457 	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3458 	if (!mta_list)
3459 		return -ENOMEM;
3460 
3461 	/* The shared function expects a packed array of only addresses. */
3462 	i = 0;
3463 	netdev_for_each_mc_addr(ha, netdev)
3464 		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3465 
3466 	igb_update_mc_addr_list(hw, mta_list, i);
3467 	kfree(mta_list);
3468 
3469 	return netdev_mc_count(netdev);
3470 }
3471 
3472 /**
3473  * igb_write_uc_addr_list - write unicast addresses to RAR table
3474  * @netdev: network interface device structure
3475  *
3476  * Writes unicast address list to the RAR table.
3477  * Returns: -ENOMEM on failure/insufficient address space
3478  *                0 on no addresses written
3479  *                X on writing X addresses to the RAR table
3480  **/
3481 static int igb_write_uc_addr_list(struct net_device *netdev)
3482 {
3483 	struct igb_adapter *adapter = netdev_priv(netdev);
3484 	struct e1000_hw *hw = &adapter->hw;
3485 	unsigned int vfn = adapter->vfs_allocated_count;
3486 	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3487 	int count = 0;
3488 
3489 	/* return ENOMEM indicating insufficient memory for addresses */
3490 	if (netdev_uc_count(netdev) > rar_entries)
3491 		return -ENOMEM;
3492 
3493 	if (!netdev_uc_empty(netdev) && rar_entries) {
3494 		struct netdev_hw_addr *ha;
3495 
3496 		netdev_for_each_uc_addr(ha, netdev) {
3497 			if (!rar_entries)
3498 				break;
3499 			igb_rar_set_qsel(adapter, ha->addr,
3500 			                 rar_entries--,
3501 			                 vfn);
3502 			count++;
3503 		}
3504 	}
3505 	/* write the addresses in reverse order to avoid write combining */
3506 	for (; rar_entries > 0 ; rar_entries--) {
3507 		wr32(E1000_RAH(rar_entries), 0);
3508 		wr32(E1000_RAL(rar_entries), 0);
3509 	}
3510 	wrfl();
3511 
3512 	return count;
3513 }
3514 
3515 /**
3516  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3517  * @netdev: network interface device structure
3518  *
3519  * The set_rx_mode entry point is called whenever the unicast or multicast
3520  * address lists or the network interface flags are updated.  This routine is
3521  * responsible for configuring the hardware for proper unicast, multicast,
3522  * promiscuous mode, and all-multi behavior.
3523  **/
3524 static void igb_set_rx_mode(struct net_device *netdev)
3525 {
3526 	struct igb_adapter *adapter = netdev_priv(netdev);
3527 	struct e1000_hw *hw = &adapter->hw;
3528 	unsigned int vfn = adapter->vfs_allocated_count;
3529 	u32 rctl, vmolr = 0;
3530 	int count;
3531 
3532 	/* Check for Promiscuous and All Multicast modes */
3533 	rctl = rd32(E1000_RCTL);
3534 
3535 	/* clear the effected bits */
3536 	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3537 
3538 	if (netdev->flags & IFF_PROMISC) {
3539 		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3540 		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3541 	} else {
3542 		if (netdev->flags & IFF_ALLMULTI) {
3543 			rctl |= E1000_RCTL_MPE;
3544 			vmolr |= E1000_VMOLR_MPME;
3545 		} else {
3546 			/*
3547 			 * Write addresses to the MTA, if the attempt fails
3548 			 * then we should just turn on promiscuous mode so
3549 			 * that we can at least receive multicast traffic
3550 			 */
3551 			count = igb_write_mc_addr_list(netdev);
3552 			if (count < 0) {
3553 				rctl |= E1000_RCTL_MPE;
3554 				vmolr |= E1000_VMOLR_MPME;
3555 			} else if (count) {
3556 				vmolr |= E1000_VMOLR_ROMPE;
3557 			}
3558 		}
3559 		/*
3560 		 * Write addresses to available RAR registers, if there is not
3561 		 * sufficient space to store all the addresses then enable
3562 		 * unicast promiscuous mode
3563 		 */
3564 		count = igb_write_uc_addr_list(netdev);
3565 		if (count < 0) {
3566 			rctl |= E1000_RCTL_UPE;
3567 			vmolr |= E1000_VMOLR_ROPE;
3568 		}
3569 		rctl |= E1000_RCTL_VFE;
3570 	}
3571 	wr32(E1000_RCTL, rctl);
3572 
3573 	/*
3574 	 * In order to support SR-IOV and eventually VMDq it is necessary to set
3575 	 * the VMOLR to enable the appropriate modes.  Without this workaround
3576 	 * we will have issues with VLAN tag stripping not being done for frames
3577 	 * that are only arriving because we are the default pool
3578 	 */
3579 	if (hw->mac.type < e1000_82576)
3580 		return;
3581 
3582 	vmolr |= rd32(E1000_VMOLR(vfn)) &
3583 	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3584 	wr32(E1000_VMOLR(vfn), vmolr);
3585 	igb_restore_vf_multicasts(adapter);
3586 }
3587 
3588 static void igb_check_wvbr(struct igb_adapter *adapter)
3589 {
3590 	struct e1000_hw *hw = &adapter->hw;
3591 	u32 wvbr = 0;
3592 
3593 	switch (hw->mac.type) {
3594 	case e1000_82576:
3595 	case e1000_i350:
3596 		if (!(wvbr = rd32(E1000_WVBR)))
3597 			return;
3598 		break;
3599 	default:
3600 		break;
3601 	}
3602 
3603 	adapter->wvbr |= wvbr;
3604 }
3605 
3606 #define IGB_STAGGERED_QUEUE_OFFSET 8
3607 
3608 static void igb_spoof_check(struct igb_adapter *adapter)
3609 {
3610 	int j;
3611 
3612 	if (!adapter->wvbr)
3613 		return;
3614 
3615 	for(j = 0; j < adapter->vfs_allocated_count; j++) {
3616 		if (adapter->wvbr & (1 << j) ||
3617 		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3618 			dev_warn(&adapter->pdev->dev,
3619 				"Spoof event(s) detected on VF %d\n", j);
3620 			adapter->wvbr &=
3621 				~((1 << j) |
3622 				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3623 		}
3624 	}
3625 }
3626 
3627 /* Need to wait a few seconds after link up to get diagnostic information from
3628  * the phy */
3629 static void igb_update_phy_info(unsigned long data)
3630 {
3631 	struct igb_adapter *adapter = (struct igb_adapter *) data;
3632 	igb_get_phy_info(&adapter->hw);
3633 }
3634 
3635 /**
3636  * igb_has_link - check shared code for link and determine up/down
3637  * @adapter: pointer to driver private info
3638  **/
3639 bool igb_has_link(struct igb_adapter *adapter)
3640 {
3641 	struct e1000_hw *hw = &adapter->hw;
3642 	bool link_active = false;
3643 	s32 ret_val = 0;
3644 
3645 	/* get_link_status is set on LSC (link status) interrupt or
3646 	 * rx sequence error interrupt.  get_link_status will stay
3647 	 * false until the e1000_check_for_link establishes link
3648 	 * for copper adapters ONLY
3649 	 */
3650 	switch (hw->phy.media_type) {
3651 	case e1000_media_type_copper:
3652 		if (hw->mac.get_link_status) {
3653 			ret_val = hw->mac.ops.check_for_link(hw);
3654 			link_active = !hw->mac.get_link_status;
3655 		} else {
3656 			link_active = true;
3657 		}
3658 		break;
3659 	case e1000_media_type_internal_serdes:
3660 		ret_val = hw->mac.ops.check_for_link(hw);
3661 		link_active = hw->mac.serdes_has_link;
3662 		break;
3663 	default:
3664 	case e1000_media_type_unknown:
3665 		break;
3666 	}
3667 
3668 	return link_active;
3669 }
3670 
3671 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3672 {
3673 	bool ret = false;
3674 	u32 ctrl_ext, thstat;
3675 
3676 	/* check for thermal sensor event on i350, copper only */
3677 	if (hw->mac.type == e1000_i350) {
3678 		thstat = rd32(E1000_THSTAT);
3679 		ctrl_ext = rd32(E1000_CTRL_EXT);
3680 
3681 		if ((hw->phy.media_type == e1000_media_type_copper) &&
3682 		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3683 			ret = !!(thstat & event);
3684 		}
3685 	}
3686 
3687 	return ret;
3688 }
3689 
3690 /**
3691  * igb_watchdog - Timer Call-back
3692  * @data: pointer to adapter cast into an unsigned long
3693  **/
3694 static void igb_watchdog(unsigned long data)
3695 {
3696 	struct igb_adapter *adapter = (struct igb_adapter *)data;
3697 	/* Do the rest outside of interrupt context */
3698 	schedule_work(&adapter->watchdog_task);
3699 }
3700 
3701 static void igb_watchdog_task(struct work_struct *work)
3702 {
3703 	struct igb_adapter *adapter = container_of(work,
3704 	                                           struct igb_adapter,
3705                                                    watchdog_task);
3706 	struct e1000_hw *hw = &adapter->hw;
3707 	struct net_device *netdev = adapter->netdev;
3708 	u32 link;
3709 	int i;
3710 
3711 	link = igb_has_link(adapter);
3712 	if (link) {
3713 		/* Cancel scheduled suspend requests. */
3714 		pm_runtime_resume(netdev->dev.parent);
3715 
3716 		if (!netif_carrier_ok(netdev)) {
3717 			u32 ctrl;
3718 			hw->mac.ops.get_speed_and_duplex(hw,
3719 			                                 &adapter->link_speed,
3720 			                                 &adapter->link_duplex);
3721 
3722 			ctrl = rd32(E1000_CTRL);
3723 			/* Links status message must follow this format */
3724 			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3725 			       "Duplex, Flow Control: %s\n",
3726 			       netdev->name,
3727 			       adapter->link_speed,
3728 			       adapter->link_duplex == FULL_DUPLEX ?
3729 			       "Full" : "Half",
3730 			       (ctrl & E1000_CTRL_TFCE) &&
3731 			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3732 			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3733 			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3734 
3735 			/* check for thermal sensor event */
3736 			if (igb_thermal_sensor_event(hw,
3737 			    E1000_THSTAT_LINK_THROTTLE)) {
3738 				netdev_info(netdev, "The network adapter link "
3739 					    "speed was downshifted because it "
3740 					    "overheated\n");
3741 			}
3742 
3743 			/* adjust timeout factor according to speed/duplex */
3744 			adapter->tx_timeout_factor = 1;
3745 			switch (adapter->link_speed) {
3746 			case SPEED_10:
3747 				adapter->tx_timeout_factor = 14;
3748 				break;
3749 			case SPEED_100:
3750 				/* maybe add some timeout factor ? */
3751 				break;
3752 			}
3753 
3754 			netif_carrier_on(netdev);
3755 
3756 			igb_ping_all_vfs(adapter);
3757 			igb_check_vf_rate_limit(adapter);
3758 
3759 			/* link state has changed, schedule phy info update */
3760 			if (!test_bit(__IGB_DOWN, &adapter->state))
3761 				mod_timer(&adapter->phy_info_timer,
3762 					  round_jiffies(jiffies + 2 * HZ));
3763 		}
3764 	} else {
3765 		if (netif_carrier_ok(netdev)) {
3766 			adapter->link_speed = 0;
3767 			adapter->link_duplex = 0;
3768 
3769 			/* check for thermal sensor event */
3770 			if (igb_thermal_sensor_event(hw,
3771 			    E1000_THSTAT_PWR_DOWN)) {
3772 				netdev_err(netdev, "The network adapter was "
3773 					   "stopped because it overheated\n");
3774 			}
3775 
3776 			/* Links status message must follow this format */
3777 			printk(KERN_INFO "igb: %s NIC Link is Down\n",
3778 			       netdev->name);
3779 			netif_carrier_off(netdev);
3780 
3781 			igb_ping_all_vfs(adapter);
3782 
3783 			/* link state has changed, schedule phy info update */
3784 			if (!test_bit(__IGB_DOWN, &adapter->state))
3785 				mod_timer(&adapter->phy_info_timer,
3786 					  round_jiffies(jiffies + 2 * HZ));
3787 
3788 			pm_schedule_suspend(netdev->dev.parent,
3789 					    MSEC_PER_SEC * 5);
3790 		}
3791 	}
3792 
3793 	spin_lock(&adapter->stats64_lock);
3794 	igb_update_stats(adapter, &adapter->stats64);
3795 	spin_unlock(&adapter->stats64_lock);
3796 
3797 	for (i = 0; i < adapter->num_tx_queues; i++) {
3798 		struct igb_ring *tx_ring = adapter->tx_ring[i];
3799 		if (!netif_carrier_ok(netdev)) {
3800 			/* We've lost link, so the controller stops DMA,
3801 			 * but we've got queued Tx work that's never going
3802 			 * to get done, so reset controller to flush Tx.
3803 			 * (Do the reset outside of interrupt context). */
3804 			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3805 				adapter->tx_timeout_count++;
3806 				schedule_work(&adapter->reset_task);
3807 				/* return immediately since reset is imminent */
3808 				return;
3809 			}
3810 		}
3811 
3812 		/* Force detection of hung controller every watchdog period */
3813 		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3814 	}
3815 
3816 	/* Cause software interrupt to ensure rx ring is cleaned */
3817 	if (adapter->msix_entries) {
3818 		u32 eics = 0;
3819 		for (i = 0; i < adapter->num_q_vectors; i++)
3820 			eics |= adapter->q_vector[i]->eims_value;
3821 		wr32(E1000_EICS, eics);
3822 	} else {
3823 		wr32(E1000_ICS, E1000_ICS_RXDMT0);
3824 	}
3825 
3826 	igb_spoof_check(adapter);
3827 
3828 	/* Reset the timer */
3829 	if (!test_bit(__IGB_DOWN, &adapter->state))
3830 		mod_timer(&adapter->watchdog_timer,
3831 			  round_jiffies(jiffies + 2 * HZ));
3832 }
3833 
3834 enum latency_range {
3835 	lowest_latency = 0,
3836 	low_latency = 1,
3837 	bulk_latency = 2,
3838 	latency_invalid = 255
3839 };
3840 
3841 /**
3842  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3843  *
3844  *      Stores a new ITR value based on strictly on packet size.  This
3845  *      algorithm is less sophisticated than that used in igb_update_itr,
3846  *      due to the difficulty of synchronizing statistics across multiple
3847  *      receive rings.  The divisors and thresholds used by this function
3848  *      were determined based on theoretical maximum wire speed and testing
3849  *      data, in order to minimize response time while increasing bulk
3850  *      throughput.
3851  *      This functionality is controlled by the InterruptThrottleRate module
3852  *      parameter (see igb_param.c)
3853  *      NOTE:  This function is called only when operating in a multiqueue
3854  *             receive environment.
3855  * @q_vector: pointer to q_vector
3856  **/
3857 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3858 {
3859 	int new_val = q_vector->itr_val;
3860 	int avg_wire_size = 0;
3861 	struct igb_adapter *adapter = q_vector->adapter;
3862 	unsigned int packets;
3863 
3864 	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3865 	 * ints/sec - ITR timer value of 120 ticks.
3866 	 */
3867 	if (adapter->link_speed != SPEED_1000) {
3868 		new_val = IGB_4K_ITR;
3869 		goto set_itr_val;
3870 	}
3871 
3872 	packets = q_vector->rx.total_packets;
3873 	if (packets)
3874 		avg_wire_size = q_vector->rx.total_bytes / packets;
3875 
3876 	packets = q_vector->tx.total_packets;
3877 	if (packets)
3878 		avg_wire_size = max_t(u32, avg_wire_size,
3879 				      q_vector->tx.total_bytes / packets);
3880 
3881 	/* if avg_wire_size isn't set no work was done */
3882 	if (!avg_wire_size)
3883 		goto clear_counts;
3884 
3885 	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3886 	avg_wire_size += 24;
3887 
3888 	/* Don't starve jumbo frames */
3889 	avg_wire_size = min(avg_wire_size, 3000);
3890 
3891 	/* Give a little boost to mid-size frames */
3892 	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3893 		new_val = avg_wire_size / 3;
3894 	else
3895 		new_val = avg_wire_size / 2;
3896 
3897 	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3898 	if (new_val < IGB_20K_ITR &&
3899 	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3900 	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3901 		new_val = IGB_20K_ITR;
3902 
3903 set_itr_val:
3904 	if (new_val != q_vector->itr_val) {
3905 		q_vector->itr_val = new_val;
3906 		q_vector->set_itr = 1;
3907 	}
3908 clear_counts:
3909 	q_vector->rx.total_bytes = 0;
3910 	q_vector->rx.total_packets = 0;
3911 	q_vector->tx.total_bytes = 0;
3912 	q_vector->tx.total_packets = 0;
3913 }
3914 
3915 /**
3916  * igb_update_itr - update the dynamic ITR value based on statistics
3917  *      Stores a new ITR value based on packets and byte
3918  *      counts during the last interrupt.  The advantage of per interrupt
3919  *      computation is faster updates and more accurate ITR for the current
3920  *      traffic pattern.  Constants in this function were computed
3921  *      based on theoretical maximum wire speed and thresholds were set based
3922  *      on testing data as well as attempting to minimize response time
3923  *      while increasing bulk throughput.
3924  *      this functionality is controlled by the InterruptThrottleRate module
3925  *      parameter (see igb_param.c)
3926  *      NOTE:  These calculations are only valid when operating in a single-
3927  *             queue environment.
3928  * @q_vector: pointer to q_vector
3929  * @ring_container: ring info to update the itr for
3930  **/
3931 static void igb_update_itr(struct igb_q_vector *q_vector,
3932 			   struct igb_ring_container *ring_container)
3933 {
3934 	unsigned int packets = ring_container->total_packets;
3935 	unsigned int bytes = ring_container->total_bytes;
3936 	u8 itrval = ring_container->itr;
3937 
3938 	/* no packets, exit with status unchanged */
3939 	if (packets == 0)
3940 		return;
3941 
3942 	switch (itrval) {
3943 	case lowest_latency:
3944 		/* handle TSO and jumbo frames */
3945 		if (bytes/packets > 8000)
3946 			itrval = bulk_latency;
3947 		else if ((packets < 5) && (bytes > 512))
3948 			itrval = low_latency;
3949 		break;
3950 	case low_latency:  /* 50 usec aka 20000 ints/s */
3951 		if (bytes > 10000) {
3952 			/* this if handles the TSO accounting */
3953 			if (bytes/packets > 8000) {
3954 				itrval = bulk_latency;
3955 			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3956 				itrval = bulk_latency;
3957 			} else if ((packets > 35)) {
3958 				itrval = lowest_latency;
3959 			}
3960 		} else if (bytes/packets > 2000) {
3961 			itrval = bulk_latency;
3962 		} else if (packets <= 2 && bytes < 512) {
3963 			itrval = lowest_latency;
3964 		}
3965 		break;
3966 	case bulk_latency: /* 250 usec aka 4000 ints/s */
3967 		if (bytes > 25000) {
3968 			if (packets > 35)
3969 				itrval = low_latency;
3970 		} else if (bytes < 1500) {
3971 			itrval = low_latency;
3972 		}
3973 		break;
3974 	}
3975 
3976 	/* clear work counters since we have the values we need */
3977 	ring_container->total_bytes = 0;
3978 	ring_container->total_packets = 0;
3979 
3980 	/* write updated itr to ring container */
3981 	ring_container->itr = itrval;
3982 }
3983 
3984 static void igb_set_itr(struct igb_q_vector *q_vector)
3985 {
3986 	struct igb_adapter *adapter = q_vector->adapter;
3987 	u32 new_itr = q_vector->itr_val;
3988 	u8 current_itr = 0;
3989 
3990 	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3991 	if (adapter->link_speed != SPEED_1000) {
3992 		current_itr = 0;
3993 		new_itr = IGB_4K_ITR;
3994 		goto set_itr_now;
3995 	}
3996 
3997 	igb_update_itr(q_vector, &q_vector->tx);
3998 	igb_update_itr(q_vector, &q_vector->rx);
3999 
4000 	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4001 
4002 	/* conservative mode (itr 3) eliminates the lowest_latency setting */
4003 	if (current_itr == lowest_latency &&
4004 	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4005 	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4006 		current_itr = low_latency;
4007 
4008 	switch (current_itr) {
4009 	/* counts and packets in update_itr are dependent on these numbers */
4010 	case lowest_latency:
4011 		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4012 		break;
4013 	case low_latency:
4014 		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4015 		break;
4016 	case bulk_latency:
4017 		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4018 		break;
4019 	default:
4020 		break;
4021 	}
4022 
4023 set_itr_now:
4024 	if (new_itr != q_vector->itr_val) {
4025 		/* this attempts to bias the interrupt rate towards Bulk
4026 		 * by adding intermediate steps when interrupt rate is
4027 		 * increasing */
4028 		new_itr = new_itr > q_vector->itr_val ?
4029 		             max((new_itr * q_vector->itr_val) /
4030 		                 (new_itr + (q_vector->itr_val >> 2)),
4031 				 new_itr) :
4032 			     new_itr;
4033 		/* Don't write the value here; it resets the adapter's
4034 		 * internal timer, and causes us to delay far longer than
4035 		 * we should between interrupts.  Instead, we write the ITR
4036 		 * value at the beginning of the next interrupt so the timing
4037 		 * ends up being correct.
4038 		 */
4039 		q_vector->itr_val = new_itr;
4040 		q_vector->set_itr = 1;
4041 	}
4042 }
4043 
4044 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4045 			    u32 type_tucmd, u32 mss_l4len_idx)
4046 {
4047 	struct e1000_adv_tx_context_desc *context_desc;
4048 	u16 i = tx_ring->next_to_use;
4049 
4050 	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4051 
4052 	i++;
4053 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4054 
4055 	/* set bits to identify this as an advanced context descriptor */
4056 	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4057 
4058 	/* For 82575, context index must be unique per ring. */
4059 	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4060 		mss_l4len_idx |= tx_ring->reg_idx << 4;
4061 
4062 	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
4063 	context_desc->seqnum_seed	= 0;
4064 	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
4065 	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
4066 }
4067 
4068 static int igb_tso(struct igb_ring *tx_ring,
4069 		   struct igb_tx_buffer *first,
4070 		   u8 *hdr_len)
4071 {
4072 	struct sk_buff *skb = first->skb;
4073 	u32 vlan_macip_lens, type_tucmd;
4074 	u32 mss_l4len_idx, l4len;
4075 
4076 	if (!skb_is_gso(skb))
4077 		return 0;
4078 
4079 	if (skb_header_cloned(skb)) {
4080 		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4081 		if (err)
4082 			return err;
4083 	}
4084 
4085 	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4086 	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4087 
4088 	if (first->protocol == __constant_htons(ETH_P_IP)) {
4089 		struct iphdr *iph = ip_hdr(skb);
4090 		iph->tot_len = 0;
4091 		iph->check = 0;
4092 		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4093 							 iph->daddr, 0,
4094 							 IPPROTO_TCP,
4095 							 0);
4096 		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4097 		first->tx_flags |= IGB_TX_FLAGS_TSO |
4098 				   IGB_TX_FLAGS_CSUM |
4099 				   IGB_TX_FLAGS_IPV4;
4100 	} else if (skb_is_gso_v6(skb)) {
4101 		ipv6_hdr(skb)->payload_len = 0;
4102 		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4103 						       &ipv6_hdr(skb)->daddr,
4104 						       0, IPPROTO_TCP, 0);
4105 		first->tx_flags |= IGB_TX_FLAGS_TSO |
4106 				   IGB_TX_FLAGS_CSUM;
4107 	}
4108 
4109 	/* compute header lengths */
4110 	l4len = tcp_hdrlen(skb);
4111 	*hdr_len = skb_transport_offset(skb) + l4len;
4112 
4113 	/* update gso size and bytecount with header size */
4114 	first->gso_segs = skb_shinfo(skb)->gso_segs;
4115 	first->bytecount += (first->gso_segs - 1) * *hdr_len;
4116 
4117 	/* MSS L4LEN IDX */
4118 	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4119 	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4120 
4121 	/* VLAN MACLEN IPLEN */
4122 	vlan_macip_lens = skb_network_header_len(skb);
4123 	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4124 	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4125 
4126 	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4127 
4128 	return 1;
4129 }
4130 
4131 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4132 {
4133 	struct sk_buff *skb = first->skb;
4134 	u32 vlan_macip_lens = 0;
4135 	u32 mss_l4len_idx = 0;
4136 	u32 type_tucmd = 0;
4137 
4138 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4139 		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4140 			return;
4141 	} else {
4142 		u8 l4_hdr = 0;
4143 		switch (first->protocol) {
4144 		case __constant_htons(ETH_P_IP):
4145 			vlan_macip_lens |= skb_network_header_len(skb);
4146 			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4147 			l4_hdr = ip_hdr(skb)->protocol;
4148 			break;
4149 		case __constant_htons(ETH_P_IPV6):
4150 			vlan_macip_lens |= skb_network_header_len(skb);
4151 			l4_hdr = ipv6_hdr(skb)->nexthdr;
4152 			break;
4153 		default:
4154 			if (unlikely(net_ratelimit())) {
4155 				dev_warn(tx_ring->dev,
4156 				 "partial checksum but proto=%x!\n",
4157 				 first->protocol);
4158 			}
4159 			break;
4160 		}
4161 
4162 		switch (l4_hdr) {
4163 		case IPPROTO_TCP:
4164 			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4165 			mss_l4len_idx = tcp_hdrlen(skb) <<
4166 					E1000_ADVTXD_L4LEN_SHIFT;
4167 			break;
4168 		case IPPROTO_SCTP:
4169 			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4170 			mss_l4len_idx = sizeof(struct sctphdr) <<
4171 					E1000_ADVTXD_L4LEN_SHIFT;
4172 			break;
4173 		case IPPROTO_UDP:
4174 			mss_l4len_idx = sizeof(struct udphdr) <<
4175 					E1000_ADVTXD_L4LEN_SHIFT;
4176 			break;
4177 		default:
4178 			if (unlikely(net_ratelimit())) {
4179 				dev_warn(tx_ring->dev,
4180 				 "partial checksum but l4 proto=%x!\n",
4181 				 l4_hdr);
4182 			}
4183 			break;
4184 		}
4185 
4186 		/* update TX checksum flag */
4187 		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4188 	}
4189 
4190 	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4191 	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4192 
4193 	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4194 }
4195 
4196 static __le32 igb_tx_cmd_type(u32 tx_flags)
4197 {
4198 	/* set type for advanced descriptor with frame checksum insertion */
4199 	__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4200 				      E1000_ADVTXD_DCMD_IFCS |
4201 				      E1000_ADVTXD_DCMD_DEXT);
4202 
4203 	/* set HW vlan bit if vlan is present */
4204 	if (tx_flags & IGB_TX_FLAGS_VLAN)
4205 		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4206 
4207 	/* set timestamp bit if present */
4208 	if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4209 		cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4210 
4211 	/* set segmentation bits for TSO */
4212 	if (tx_flags & IGB_TX_FLAGS_TSO)
4213 		cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4214 
4215 	return cmd_type;
4216 }
4217 
4218 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4219 				 union e1000_adv_tx_desc *tx_desc,
4220 				 u32 tx_flags, unsigned int paylen)
4221 {
4222 	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4223 
4224 	/* 82575 requires a unique index per ring if any offload is enabled */
4225 	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4226 	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4227 		olinfo_status |= tx_ring->reg_idx << 4;
4228 
4229 	/* insert L4 checksum */
4230 	if (tx_flags & IGB_TX_FLAGS_CSUM) {
4231 		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4232 
4233 		/* insert IPv4 checksum */
4234 		if (tx_flags & IGB_TX_FLAGS_IPV4)
4235 			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4236 	}
4237 
4238 	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4239 }
4240 
4241 /*
4242  * The largest size we can write to the descriptor is 65535.  In order to
4243  * maintain a power of two alignment we have to limit ourselves to 32K.
4244  */
4245 #define IGB_MAX_TXD_PWR	15
4246 #define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
4247 
4248 static void igb_tx_map(struct igb_ring *tx_ring,
4249 		       struct igb_tx_buffer *first,
4250 		       const u8 hdr_len)
4251 {
4252 	struct sk_buff *skb = first->skb;
4253 	struct igb_tx_buffer *tx_buffer_info;
4254 	union e1000_adv_tx_desc *tx_desc;
4255 	dma_addr_t dma;
4256 	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4257 	unsigned int data_len = skb->data_len;
4258 	unsigned int size = skb_headlen(skb);
4259 	unsigned int paylen = skb->len - hdr_len;
4260 	__le32 cmd_type;
4261 	u32 tx_flags = first->tx_flags;
4262 	u16 i = tx_ring->next_to_use;
4263 
4264 	tx_desc = IGB_TX_DESC(tx_ring, i);
4265 
4266 	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4267 	cmd_type = igb_tx_cmd_type(tx_flags);
4268 
4269 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4270 	if (dma_mapping_error(tx_ring->dev, dma))
4271 		goto dma_error;
4272 
4273 	/* record length, and DMA address */
4274 	first->length = size;
4275 	first->dma = dma;
4276 	tx_desc->read.buffer_addr = cpu_to_le64(dma);
4277 
4278 	for (;;) {
4279 		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4280 			tx_desc->read.cmd_type_len =
4281 				cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4282 
4283 			i++;
4284 			tx_desc++;
4285 			if (i == tx_ring->count) {
4286 				tx_desc = IGB_TX_DESC(tx_ring, 0);
4287 				i = 0;
4288 			}
4289 
4290 			dma += IGB_MAX_DATA_PER_TXD;
4291 			size -= IGB_MAX_DATA_PER_TXD;
4292 
4293 			tx_desc->read.olinfo_status = 0;
4294 			tx_desc->read.buffer_addr = cpu_to_le64(dma);
4295 		}
4296 
4297 		if (likely(!data_len))
4298 			break;
4299 
4300 		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4301 
4302 		i++;
4303 		tx_desc++;
4304 		if (i == tx_ring->count) {
4305 			tx_desc = IGB_TX_DESC(tx_ring, 0);
4306 			i = 0;
4307 		}
4308 
4309 		size = skb_frag_size(frag);
4310 		data_len -= size;
4311 
4312 		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4313 				   size, DMA_TO_DEVICE);
4314 		if (dma_mapping_error(tx_ring->dev, dma))
4315 			goto dma_error;
4316 
4317 		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4318 		tx_buffer_info->length = size;
4319 		tx_buffer_info->dma = dma;
4320 
4321 		tx_desc->read.olinfo_status = 0;
4322 		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4323 
4324 		frag++;
4325 	}
4326 
4327 	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4328 
4329 	/* write last descriptor with RS and EOP bits */
4330 	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4331 	if (unlikely(skb->no_fcs))
4332 		cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4333 	tx_desc->read.cmd_type_len = cmd_type;
4334 
4335 	/* set the timestamp */
4336 	first->time_stamp = jiffies;
4337 
4338 	/*
4339 	 * Force memory writes to complete before letting h/w know there
4340 	 * are new descriptors to fetch.  (Only applicable for weak-ordered
4341 	 * memory model archs, such as IA-64).
4342 	 *
4343 	 * We also need this memory barrier to make certain all of the
4344 	 * status bits have been updated before next_to_watch is written.
4345 	 */
4346 	wmb();
4347 
4348 	/* set next_to_watch value indicating a packet is present */
4349 	first->next_to_watch = tx_desc;
4350 
4351 	i++;
4352 	if (i == tx_ring->count)
4353 		i = 0;
4354 
4355 	tx_ring->next_to_use = i;
4356 
4357 	writel(i, tx_ring->tail);
4358 
4359 	/* we need this if more than one processor can write to our tail
4360 	 * at a time, it syncronizes IO on IA64/Altix systems */
4361 	mmiowb();
4362 
4363 	return;
4364 
4365 dma_error:
4366 	dev_err(tx_ring->dev, "TX DMA map failed\n");
4367 
4368 	/* clear dma mappings for failed tx_buffer_info map */
4369 	for (;;) {
4370 		tx_buffer_info = &tx_ring->tx_buffer_info[i];
4371 		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4372 		if (tx_buffer_info == first)
4373 			break;
4374 		if (i == 0)
4375 			i = tx_ring->count;
4376 		i--;
4377 	}
4378 
4379 	tx_ring->next_to_use = i;
4380 }
4381 
4382 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4383 {
4384 	struct net_device *netdev = tx_ring->netdev;
4385 
4386 	netif_stop_subqueue(netdev, tx_ring->queue_index);
4387 
4388 	/* Herbert's original patch had:
4389 	 *  smp_mb__after_netif_stop_queue();
4390 	 * but since that doesn't exist yet, just open code it. */
4391 	smp_mb();
4392 
4393 	/* We need to check again in a case another CPU has just
4394 	 * made room available. */
4395 	if (igb_desc_unused(tx_ring) < size)
4396 		return -EBUSY;
4397 
4398 	/* A reprieve! */
4399 	netif_wake_subqueue(netdev, tx_ring->queue_index);
4400 
4401 	u64_stats_update_begin(&tx_ring->tx_syncp2);
4402 	tx_ring->tx_stats.restart_queue2++;
4403 	u64_stats_update_end(&tx_ring->tx_syncp2);
4404 
4405 	return 0;
4406 }
4407 
4408 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4409 {
4410 	if (igb_desc_unused(tx_ring) >= size)
4411 		return 0;
4412 	return __igb_maybe_stop_tx(tx_ring, size);
4413 }
4414 
4415 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4416 				struct igb_ring *tx_ring)
4417 {
4418 	struct igb_tx_buffer *first;
4419 	int tso;
4420 	u32 tx_flags = 0;
4421 	__be16 protocol = vlan_get_protocol(skb);
4422 	u8 hdr_len = 0;
4423 
4424 	/* need: 1 descriptor per page,
4425 	 *       + 2 desc gap to keep tail from touching head,
4426 	 *       + 1 desc for skb->data,
4427 	 *       + 1 desc for context descriptor,
4428 	 * otherwise try next time */
4429 	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4430 		/* this is a hard error */
4431 		return NETDEV_TX_BUSY;
4432 	}
4433 
4434 	/* record the location of the first descriptor for this packet */
4435 	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4436 	first->skb = skb;
4437 	first->bytecount = skb->len;
4438 	first->gso_segs = 1;
4439 
4440 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4441 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4442 		tx_flags |= IGB_TX_FLAGS_TSTAMP;
4443 	}
4444 
4445 	if (vlan_tx_tag_present(skb)) {
4446 		tx_flags |= IGB_TX_FLAGS_VLAN;
4447 		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4448 	}
4449 
4450 	/* record initial flags and protocol */
4451 	first->tx_flags = tx_flags;
4452 	first->protocol = protocol;
4453 
4454 	tso = igb_tso(tx_ring, first, &hdr_len);
4455 	if (tso < 0)
4456 		goto out_drop;
4457 	else if (!tso)
4458 		igb_tx_csum(tx_ring, first);
4459 
4460 	igb_tx_map(tx_ring, first, hdr_len);
4461 
4462 	/* Make sure there is space in the ring for the next send. */
4463 	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4464 
4465 	return NETDEV_TX_OK;
4466 
4467 out_drop:
4468 	igb_unmap_and_free_tx_resource(tx_ring, first);
4469 
4470 	return NETDEV_TX_OK;
4471 }
4472 
4473 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4474 						    struct sk_buff *skb)
4475 {
4476 	unsigned int r_idx = skb->queue_mapping;
4477 
4478 	if (r_idx >= adapter->num_tx_queues)
4479 		r_idx = r_idx % adapter->num_tx_queues;
4480 
4481 	return adapter->tx_ring[r_idx];
4482 }
4483 
4484 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4485 				  struct net_device *netdev)
4486 {
4487 	struct igb_adapter *adapter = netdev_priv(netdev);
4488 
4489 	if (test_bit(__IGB_DOWN, &adapter->state)) {
4490 		dev_kfree_skb_any(skb);
4491 		return NETDEV_TX_OK;
4492 	}
4493 
4494 	if (skb->len <= 0) {
4495 		dev_kfree_skb_any(skb);
4496 		return NETDEV_TX_OK;
4497 	}
4498 
4499 	/*
4500 	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4501 	 * in order to meet this minimum size requirement.
4502 	 */
4503 	if (skb->len < 17) {
4504 		if (skb_padto(skb, 17))
4505 			return NETDEV_TX_OK;
4506 		skb->len = 17;
4507 	}
4508 
4509 	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4510 }
4511 
4512 /**
4513  * igb_tx_timeout - Respond to a Tx Hang
4514  * @netdev: network interface device structure
4515  **/
4516 static void igb_tx_timeout(struct net_device *netdev)
4517 {
4518 	struct igb_adapter *adapter = netdev_priv(netdev);
4519 	struct e1000_hw *hw = &adapter->hw;
4520 
4521 	/* Do the reset outside of interrupt context */
4522 	adapter->tx_timeout_count++;
4523 
4524 	if (hw->mac.type >= e1000_82580)
4525 		hw->dev_spec._82575.global_device_reset = true;
4526 
4527 	schedule_work(&adapter->reset_task);
4528 	wr32(E1000_EICS,
4529 	     (adapter->eims_enable_mask & ~adapter->eims_other));
4530 }
4531 
4532 static void igb_reset_task(struct work_struct *work)
4533 {
4534 	struct igb_adapter *adapter;
4535 	adapter = container_of(work, struct igb_adapter, reset_task);
4536 
4537 	igb_dump(adapter);
4538 	netdev_err(adapter->netdev, "Reset adapter\n");
4539 	igb_reinit_locked(adapter);
4540 }
4541 
4542 /**
4543  * igb_get_stats64 - Get System Network Statistics
4544  * @netdev: network interface device structure
4545  * @stats: rtnl_link_stats64 pointer
4546  *
4547  **/
4548 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4549 						 struct rtnl_link_stats64 *stats)
4550 {
4551 	struct igb_adapter *adapter = netdev_priv(netdev);
4552 
4553 	spin_lock(&adapter->stats64_lock);
4554 	igb_update_stats(adapter, &adapter->stats64);
4555 	memcpy(stats, &adapter->stats64, sizeof(*stats));
4556 	spin_unlock(&adapter->stats64_lock);
4557 
4558 	return stats;
4559 }
4560 
4561 /**
4562  * igb_change_mtu - Change the Maximum Transfer Unit
4563  * @netdev: network interface device structure
4564  * @new_mtu: new value for maximum frame size
4565  *
4566  * Returns 0 on success, negative on failure
4567  **/
4568 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4569 {
4570 	struct igb_adapter *adapter = netdev_priv(netdev);
4571 	struct pci_dev *pdev = adapter->pdev;
4572 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4573 
4574 	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4575 		dev_err(&pdev->dev, "Invalid MTU setting\n");
4576 		return -EINVAL;
4577 	}
4578 
4579 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4580 	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4581 		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4582 		return -EINVAL;
4583 	}
4584 
4585 	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4586 		msleep(1);
4587 
4588 	/* igb_down has a dependency on max_frame_size */
4589 	adapter->max_frame_size = max_frame;
4590 
4591 	if (netif_running(netdev))
4592 		igb_down(adapter);
4593 
4594 	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4595 		 netdev->mtu, new_mtu);
4596 	netdev->mtu = new_mtu;
4597 
4598 	if (netif_running(netdev))
4599 		igb_up(adapter);
4600 	else
4601 		igb_reset(adapter);
4602 
4603 	clear_bit(__IGB_RESETTING, &adapter->state);
4604 
4605 	return 0;
4606 }
4607 
4608 /**
4609  * igb_update_stats - Update the board statistics counters
4610  * @adapter: board private structure
4611  **/
4612 
4613 void igb_update_stats(struct igb_adapter *adapter,
4614 		      struct rtnl_link_stats64 *net_stats)
4615 {
4616 	struct e1000_hw *hw = &adapter->hw;
4617 	struct pci_dev *pdev = adapter->pdev;
4618 	u32 reg, mpc;
4619 	u16 phy_tmp;
4620 	int i;
4621 	u64 bytes, packets;
4622 	unsigned int start;
4623 	u64 _bytes, _packets;
4624 
4625 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4626 
4627 	/*
4628 	 * Prevent stats update while adapter is being reset, or if the pci
4629 	 * connection is down.
4630 	 */
4631 	if (adapter->link_speed == 0)
4632 		return;
4633 	if (pci_channel_offline(pdev))
4634 		return;
4635 
4636 	bytes = 0;
4637 	packets = 0;
4638 	for (i = 0; i < adapter->num_rx_queues; i++) {
4639 		u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4640 		struct igb_ring *ring = adapter->rx_ring[i];
4641 
4642 		ring->rx_stats.drops += rqdpc_tmp;
4643 		net_stats->rx_fifo_errors += rqdpc_tmp;
4644 
4645 		do {
4646 			start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4647 			_bytes = ring->rx_stats.bytes;
4648 			_packets = ring->rx_stats.packets;
4649 		} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4650 		bytes += _bytes;
4651 		packets += _packets;
4652 	}
4653 
4654 	net_stats->rx_bytes = bytes;
4655 	net_stats->rx_packets = packets;
4656 
4657 	bytes = 0;
4658 	packets = 0;
4659 	for (i = 0; i < adapter->num_tx_queues; i++) {
4660 		struct igb_ring *ring = adapter->tx_ring[i];
4661 		do {
4662 			start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4663 			_bytes = ring->tx_stats.bytes;
4664 			_packets = ring->tx_stats.packets;
4665 		} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4666 		bytes += _bytes;
4667 		packets += _packets;
4668 	}
4669 	net_stats->tx_bytes = bytes;
4670 	net_stats->tx_packets = packets;
4671 
4672 	/* read stats registers */
4673 	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4674 	adapter->stats.gprc += rd32(E1000_GPRC);
4675 	adapter->stats.gorc += rd32(E1000_GORCL);
4676 	rd32(E1000_GORCH); /* clear GORCL */
4677 	adapter->stats.bprc += rd32(E1000_BPRC);
4678 	adapter->stats.mprc += rd32(E1000_MPRC);
4679 	adapter->stats.roc += rd32(E1000_ROC);
4680 
4681 	adapter->stats.prc64 += rd32(E1000_PRC64);
4682 	adapter->stats.prc127 += rd32(E1000_PRC127);
4683 	adapter->stats.prc255 += rd32(E1000_PRC255);
4684 	adapter->stats.prc511 += rd32(E1000_PRC511);
4685 	adapter->stats.prc1023 += rd32(E1000_PRC1023);
4686 	adapter->stats.prc1522 += rd32(E1000_PRC1522);
4687 	adapter->stats.symerrs += rd32(E1000_SYMERRS);
4688 	adapter->stats.sec += rd32(E1000_SEC);
4689 
4690 	mpc = rd32(E1000_MPC);
4691 	adapter->stats.mpc += mpc;
4692 	net_stats->rx_fifo_errors += mpc;
4693 	adapter->stats.scc += rd32(E1000_SCC);
4694 	adapter->stats.ecol += rd32(E1000_ECOL);
4695 	adapter->stats.mcc += rd32(E1000_MCC);
4696 	adapter->stats.latecol += rd32(E1000_LATECOL);
4697 	adapter->stats.dc += rd32(E1000_DC);
4698 	adapter->stats.rlec += rd32(E1000_RLEC);
4699 	adapter->stats.xonrxc += rd32(E1000_XONRXC);
4700 	adapter->stats.xontxc += rd32(E1000_XONTXC);
4701 	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4702 	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4703 	adapter->stats.fcruc += rd32(E1000_FCRUC);
4704 	adapter->stats.gptc += rd32(E1000_GPTC);
4705 	adapter->stats.gotc += rd32(E1000_GOTCL);
4706 	rd32(E1000_GOTCH); /* clear GOTCL */
4707 	adapter->stats.rnbc += rd32(E1000_RNBC);
4708 	adapter->stats.ruc += rd32(E1000_RUC);
4709 	adapter->stats.rfc += rd32(E1000_RFC);
4710 	adapter->stats.rjc += rd32(E1000_RJC);
4711 	adapter->stats.tor += rd32(E1000_TORH);
4712 	adapter->stats.tot += rd32(E1000_TOTH);
4713 	adapter->stats.tpr += rd32(E1000_TPR);
4714 
4715 	adapter->stats.ptc64 += rd32(E1000_PTC64);
4716 	adapter->stats.ptc127 += rd32(E1000_PTC127);
4717 	adapter->stats.ptc255 += rd32(E1000_PTC255);
4718 	adapter->stats.ptc511 += rd32(E1000_PTC511);
4719 	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4720 	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4721 
4722 	adapter->stats.mptc += rd32(E1000_MPTC);
4723 	adapter->stats.bptc += rd32(E1000_BPTC);
4724 
4725 	adapter->stats.tpt += rd32(E1000_TPT);
4726 	adapter->stats.colc += rd32(E1000_COLC);
4727 
4728 	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4729 	/* read internal phy specific stats */
4730 	reg = rd32(E1000_CTRL_EXT);
4731 	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4732 		adapter->stats.rxerrc += rd32(E1000_RXERRC);
4733 		adapter->stats.tncrs += rd32(E1000_TNCRS);
4734 	}
4735 
4736 	adapter->stats.tsctc += rd32(E1000_TSCTC);
4737 	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4738 
4739 	adapter->stats.iac += rd32(E1000_IAC);
4740 	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4741 	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4742 	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4743 	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4744 	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4745 	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4746 	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4747 	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4748 
4749 	/* Fill out the OS statistics structure */
4750 	net_stats->multicast = adapter->stats.mprc;
4751 	net_stats->collisions = adapter->stats.colc;
4752 
4753 	/* Rx Errors */
4754 
4755 	/* RLEC on some newer hardware can be incorrect so build
4756 	 * our own version based on RUC and ROC */
4757 	net_stats->rx_errors = adapter->stats.rxerrc +
4758 		adapter->stats.crcerrs + adapter->stats.algnerrc +
4759 		adapter->stats.ruc + adapter->stats.roc +
4760 		adapter->stats.cexterr;
4761 	net_stats->rx_length_errors = adapter->stats.ruc +
4762 				      adapter->stats.roc;
4763 	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4764 	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4765 	net_stats->rx_missed_errors = adapter->stats.mpc;
4766 
4767 	/* Tx Errors */
4768 	net_stats->tx_errors = adapter->stats.ecol +
4769 			       adapter->stats.latecol;
4770 	net_stats->tx_aborted_errors = adapter->stats.ecol;
4771 	net_stats->tx_window_errors = adapter->stats.latecol;
4772 	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4773 
4774 	/* Tx Dropped needs to be maintained elsewhere */
4775 
4776 	/* Phy Stats */
4777 	if (hw->phy.media_type == e1000_media_type_copper) {
4778 		if ((adapter->link_speed == SPEED_1000) &&
4779 		   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4780 			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4781 			adapter->phy_stats.idle_errors += phy_tmp;
4782 		}
4783 	}
4784 
4785 	/* Management Stats */
4786 	adapter->stats.mgptc += rd32(E1000_MGTPTC);
4787 	adapter->stats.mgprc += rd32(E1000_MGTPRC);
4788 	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4789 
4790 	/* OS2BMC Stats */
4791 	reg = rd32(E1000_MANC);
4792 	if (reg & E1000_MANC_EN_BMC2OS) {
4793 		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4794 		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4795 		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4796 		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4797 	}
4798 }
4799 
4800 static irqreturn_t igb_msix_other(int irq, void *data)
4801 {
4802 	struct igb_adapter *adapter = data;
4803 	struct e1000_hw *hw = &adapter->hw;
4804 	u32 icr = rd32(E1000_ICR);
4805 	/* reading ICR causes bit 31 of EICR to be cleared */
4806 
4807 	if (icr & E1000_ICR_DRSTA)
4808 		schedule_work(&adapter->reset_task);
4809 
4810 	if (icr & E1000_ICR_DOUTSYNC) {
4811 		/* HW is reporting DMA is out of sync */
4812 		adapter->stats.doosync++;
4813 		/* The DMA Out of Sync is also indication of a spoof event
4814 		 * in IOV mode. Check the Wrong VM Behavior register to
4815 		 * see if it is really a spoof event. */
4816 		igb_check_wvbr(adapter);
4817 	}
4818 
4819 	/* Check for a mailbox event */
4820 	if (icr & E1000_ICR_VMMB)
4821 		igb_msg_task(adapter);
4822 
4823 	if (icr & E1000_ICR_LSC) {
4824 		hw->mac.get_link_status = 1;
4825 		/* guard against interrupt when we're going down */
4826 		if (!test_bit(__IGB_DOWN, &adapter->state))
4827 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4828 	}
4829 
4830 	wr32(E1000_EIMS, adapter->eims_other);
4831 
4832 	return IRQ_HANDLED;
4833 }
4834 
4835 static void igb_write_itr(struct igb_q_vector *q_vector)
4836 {
4837 	struct igb_adapter *adapter = q_vector->adapter;
4838 	u32 itr_val = q_vector->itr_val & 0x7FFC;
4839 
4840 	if (!q_vector->set_itr)
4841 		return;
4842 
4843 	if (!itr_val)
4844 		itr_val = 0x4;
4845 
4846 	if (adapter->hw.mac.type == e1000_82575)
4847 		itr_val |= itr_val << 16;
4848 	else
4849 		itr_val |= E1000_EITR_CNT_IGNR;
4850 
4851 	writel(itr_val, q_vector->itr_register);
4852 	q_vector->set_itr = 0;
4853 }
4854 
4855 static irqreturn_t igb_msix_ring(int irq, void *data)
4856 {
4857 	struct igb_q_vector *q_vector = data;
4858 
4859 	/* Write the ITR value calculated from the previous interrupt. */
4860 	igb_write_itr(q_vector);
4861 
4862 	napi_schedule(&q_vector->napi);
4863 
4864 	return IRQ_HANDLED;
4865 }
4866 
4867 #ifdef CONFIG_IGB_DCA
4868 static void igb_update_dca(struct igb_q_vector *q_vector)
4869 {
4870 	struct igb_adapter *adapter = q_vector->adapter;
4871 	struct e1000_hw *hw = &adapter->hw;
4872 	int cpu = get_cpu();
4873 
4874 	if (q_vector->cpu == cpu)
4875 		goto out_no_update;
4876 
4877 	if (q_vector->tx.ring) {
4878 		int q = q_vector->tx.ring->reg_idx;
4879 		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4880 		if (hw->mac.type == e1000_82575) {
4881 			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4882 			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4883 		} else {
4884 			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4885 			dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4886 			              E1000_DCA_TXCTRL_CPUID_SHIFT;
4887 		}
4888 		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4889 		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4890 	}
4891 	if (q_vector->rx.ring) {
4892 		int q = q_vector->rx.ring->reg_idx;
4893 		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4894 		if (hw->mac.type == e1000_82575) {
4895 			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4896 			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4897 		} else {
4898 			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4899 			dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4900 			              E1000_DCA_RXCTRL_CPUID_SHIFT;
4901 		}
4902 		dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4903 		dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4904 		dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4905 		wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4906 	}
4907 	q_vector->cpu = cpu;
4908 out_no_update:
4909 	put_cpu();
4910 }
4911 
4912 static void igb_setup_dca(struct igb_adapter *adapter)
4913 {
4914 	struct e1000_hw *hw = &adapter->hw;
4915 	int i;
4916 
4917 	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4918 		return;
4919 
4920 	/* Always use CB2 mode, difference is masked in the CB driver. */
4921 	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4922 
4923 	for (i = 0; i < adapter->num_q_vectors; i++) {
4924 		adapter->q_vector[i]->cpu = -1;
4925 		igb_update_dca(adapter->q_vector[i]);
4926 	}
4927 }
4928 
4929 static int __igb_notify_dca(struct device *dev, void *data)
4930 {
4931 	struct net_device *netdev = dev_get_drvdata(dev);
4932 	struct igb_adapter *adapter = netdev_priv(netdev);
4933 	struct pci_dev *pdev = adapter->pdev;
4934 	struct e1000_hw *hw = &adapter->hw;
4935 	unsigned long event = *(unsigned long *)data;
4936 
4937 	switch (event) {
4938 	case DCA_PROVIDER_ADD:
4939 		/* if already enabled, don't do it again */
4940 		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4941 			break;
4942 		if (dca_add_requester(dev) == 0) {
4943 			adapter->flags |= IGB_FLAG_DCA_ENABLED;
4944 			dev_info(&pdev->dev, "DCA enabled\n");
4945 			igb_setup_dca(adapter);
4946 			break;
4947 		}
4948 		/* Fall Through since DCA is disabled. */
4949 	case DCA_PROVIDER_REMOVE:
4950 		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4951 			/* without this a class_device is left
4952 			 * hanging around in the sysfs model */
4953 			dca_remove_requester(dev);
4954 			dev_info(&pdev->dev, "DCA disabled\n");
4955 			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4956 			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4957 		}
4958 		break;
4959 	}
4960 
4961 	return 0;
4962 }
4963 
4964 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4965                           void *p)
4966 {
4967 	int ret_val;
4968 
4969 	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4970 	                                 __igb_notify_dca);
4971 
4972 	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4973 }
4974 #endif /* CONFIG_IGB_DCA */
4975 
4976 #ifdef CONFIG_PCI_IOV
4977 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4978 {
4979 	unsigned char mac_addr[ETH_ALEN];
4980 	struct pci_dev *pdev = adapter->pdev;
4981 	struct e1000_hw *hw = &adapter->hw;
4982 	struct pci_dev *pvfdev;
4983 	unsigned int device_id;
4984 	u16 thisvf_devfn;
4985 
4986 	random_ether_addr(mac_addr);
4987 	igb_set_vf_mac(adapter, vf, mac_addr);
4988 
4989 	switch (adapter->hw.mac.type) {
4990 	case e1000_82576:
4991 		device_id = IGB_82576_VF_DEV_ID;
4992 		/* VF Stride for 82576 is 2 */
4993 		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4994 			(pdev->devfn & 1);
4995 		break;
4996 	case e1000_i350:
4997 		device_id = IGB_I350_VF_DEV_ID;
4998 		/* VF Stride for I350 is 4 */
4999 		thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5000 				(pdev->devfn & 3);
5001 		break;
5002 	default:
5003 		device_id = 0;
5004 		thisvf_devfn = 0;
5005 		break;
5006 	}
5007 
5008 	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5009 	while (pvfdev) {
5010 		if (pvfdev->devfn == thisvf_devfn)
5011 			break;
5012 		pvfdev = pci_get_device(hw->vendor_id,
5013 					device_id, pvfdev);
5014 	}
5015 
5016 	if (pvfdev)
5017 		adapter->vf_data[vf].vfdev = pvfdev;
5018 	else
5019 		dev_err(&pdev->dev,
5020 			"Couldn't find pci dev ptr for VF %4.4x\n",
5021 			thisvf_devfn);
5022 	return pvfdev != NULL;
5023 }
5024 
5025 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5026 {
5027 	struct e1000_hw *hw = &adapter->hw;
5028 	struct pci_dev *pdev = adapter->pdev;
5029 	struct pci_dev *pvfdev;
5030 	u16 vf_devfn = 0;
5031 	u16 vf_stride;
5032 	unsigned int device_id;
5033 	int vfs_found = 0;
5034 
5035 	switch (adapter->hw.mac.type) {
5036 	case e1000_82576:
5037 		device_id = IGB_82576_VF_DEV_ID;
5038 		/* VF Stride for 82576 is 2 */
5039 		vf_stride = 2;
5040 		break;
5041 	case e1000_i350:
5042 		device_id = IGB_I350_VF_DEV_ID;
5043 		/* VF Stride for I350 is 4 */
5044 		vf_stride = 4;
5045 		break;
5046 	default:
5047 		device_id = 0;
5048 		vf_stride = 0;
5049 		break;
5050 	}
5051 
5052 	vf_devfn = pdev->devfn + 0x80;
5053 	pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5054 	while (pvfdev) {
5055 		if (pvfdev->devfn == vf_devfn &&
5056 		    (pvfdev->bus->number >= pdev->bus->number))
5057 			vfs_found++;
5058 		vf_devfn += vf_stride;
5059 		pvfdev = pci_get_device(hw->vendor_id,
5060 					device_id, pvfdev);
5061 	}
5062 
5063 	return vfs_found;
5064 }
5065 
5066 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5067 {
5068 	int i;
5069 	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5070 		if (adapter->vf_data[i].vfdev) {
5071 			if (adapter->vf_data[i].vfdev->dev_flags &
5072 			    PCI_DEV_FLAGS_ASSIGNED)
5073 				return true;
5074 		}
5075 	}
5076 	return false;
5077 }
5078 
5079 #endif
5080 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5081 {
5082 	struct e1000_hw *hw = &adapter->hw;
5083 	u32 ping;
5084 	int i;
5085 
5086 	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5087 		ping = E1000_PF_CONTROL_MSG;
5088 		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5089 			ping |= E1000_VT_MSGTYPE_CTS;
5090 		igb_write_mbx(hw, &ping, 1, i);
5091 	}
5092 }
5093 
5094 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5095 {
5096 	struct e1000_hw *hw = &adapter->hw;
5097 	u32 vmolr = rd32(E1000_VMOLR(vf));
5098 	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5099 
5100 	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5101 	                    IGB_VF_FLAG_MULTI_PROMISC);
5102 	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5103 
5104 	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5105 		vmolr |= E1000_VMOLR_MPME;
5106 		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5107 		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5108 	} else {
5109 		/*
5110 		 * if we have hashes and we are clearing a multicast promisc
5111 		 * flag we need to write the hashes to the MTA as this step
5112 		 * was previously skipped
5113 		 */
5114 		if (vf_data->num_vf_mc_hashes > 30) {
5115 			vmolr |= E1000_VMOLR_MPME;
5116 		} else if (vf_data->num_vf_mc_hashes) {
5117 			int j;
5118 			vmolr |= E1000_VMOLR_ROMPE;
5119 			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5120 				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5121 		}
5122 	}
5123 
5124 	wr32(E1000_VMOLR(vf), vmolr);
5125 
5126 	/* there are flags left unprocessed, likely not supported */
5127 	if (*msgbuf & E1000_VT_MSGINFO_MASK)
5128 		return -EINVAL;
5129 
5130 	return 0;
5131 
5132 }
5133 
5134 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5135 				  u32 *msgbuf, u32 vf)
5136 {
5137 	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5138 	u16 *hash_list = (u16 *)&msgbuf[1];
5139 	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5140 	int i;
5141 
5142 	/* salt away the number of multicast addresses assigned
5143 	 * to this VF for later use to restore when the PF multi cast
5144 	 * list changes
5145 	 */
5146 	vf_data->num_vf_mc_hashes = n;
5147 
5148 	/* only up to 30 hash values supported */
5149 	if (n > 30)
5150 		n = 30;
5151 
5152 	/* store the hashes for later use */
5153 	for (i = 0; i < n; i++)
5154 		vf_data->vf_mc_hashes[i] = hash_list[i];
5155 
5156 	/* Flush and reset the mta with the new values */
5157 	igb_set_rx_mode(adapter->netdev);
5158 
5159 	return 0;
5160 }
5161 
5162 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5163 {
5164 	struct e1000_hw *hw = &adapter->hw;
5165 	struct vf_data_storage *vf_data;
5166 	int i, j;
5167 
5168 	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5169 		u32 vmolr = rd32(E1000_VMOLR(i));
5170 		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5171 
5172 		vf_data = &adapter->vf_data[i];
5173 
5174 		if ((vf_data->num_vf_mc_hashes > 30) ||
5175 		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5176 			vmolr |= E1000_VMOLR_MPME;
5177 		} else if (vf_data->num_vf_mc_hashes) {
5178 			vmolr |= E1000_VMOLR_ROMPE;
5179 			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5180 				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5181 		}
5182 		wr32(E1000_VMOLR(i), vmolr);
5183 	}
5184 }
5185 
5186 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5187 {
5188 	struct e1000_hw *hw = &adapter->hw;
5189 	u32 pool_mask, reg, vid;
5190 	int i;
5191 
5192 	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5193 
5194 	/* Find the vlan filter for this id */
5195 	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5196 		reg = rd32(E1000_VLVF(i));
5197 
5198 		/* remove the vf from the pool */
5199 		reg &= ~pool_mask;
5200 
5201 		/* if pool is empty then remove entry from vfta */
5202 		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5203 		    (reg & E1000_VLVF_VLANID_ENABLE)) {
5204 			reg = 0;
5205 			vid = reg & E1000_VLVF_VLANID_MASK;
5206 			igb_vfta_set(hw, vid, false);
5207 		}
5208 
5209 		wr32(E1000_VLVF(i), reg);
5210 	}
5211 
5212 	adapter->vf_data[vf].vlans_enabled = 0;
5213 }
5214 
5215 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5216 {
5217 	struct e1000_hw *hw = &adapter->hw;
5218 	u32 reg, i;
5219 
5220 	/* The vlvf table only exists on 82576 hardware and newer */
5221 	if (hw->mac.type < e1000_82576)
5222 		return -1;
5223 
5224 	/* we only need to do this if VMDq is enabled */
5225 	if (!adapter->vfs_allocated_count)
5226 		return -1;
5227 
5228 	/* Find the vlan filter for this id */
5229 	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5230 		reg = rd32(E1000_VLVF(i));
5231 		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5232 		    vid == (reg & E1000_VLVF_VLANID_MASK))
5233 			break;
5234 	}
5235 
5236 	if (add) {
5237 		if (i == E1000_VLVF_ARRAY_SIZE) {
5238 			/* Did not find a matching VLAN ID entry that was
5239 			 * enabled.  Search for a free filter entry, i.e.
5240 			 * one without the enable bit set
5241 			 */
5242 			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5243 				reg = rd32(E1000_VLVF(i));
5244 				if (!(reg & E1000_VLVF_VLANID_ENABLE))
5245 					break;
5246 			}
5247 		}
5248 		if (i < E1000_VLVF_ARRAY_SIZE) {
5249 			/* Found an enabled/available entry */
5250 			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5251 
5252 			/* if !enabled we need to set this up in vfta */
5253 			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5254 				/* add VID to filter table */
5255 				igb_vfta_set(hw, vid, true);
5256 				reg |= E1000_VLVF_VLANID_ENABLE;
5257 			}
5258 			reg &= ~E1000_VLVF_VLANID_MASK;
5259 			reg |= vid;
5260 			wr32(E1000_VLVF(i), reg);
5261 
5262 			/* do not modify RLPML for PF devices */
5263 			if (vf >= adapter->vfs_allocated_count)
5264 				return 0;
5265 
5266 			if (!adapter->vf_data[vf].vlans_enabled) {
5267 				u32 size;
5268 				reg = rd32(E1000_VMOLR(vf));
5269 				size = reg & E1000_VMOLR_RLPML_MASK;
5270 				size += 4;
5271 				reg &= ~E1000_VMOLR_RLPML_MASK;
5272 				reg |= size;
5273 				wr32(E1000_VMOLR(vf), reg);
5274 			}
5275 
5276 			adapter->vf_data[vf].vlans_enabled++;
5277 		}
5278 	} else {
5279 		if (i < E1000_VLVF_ARRAY_SIZE) {
5280 			/* remove vf from the pool */
5281 			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5282 			/* if pool is empty then remove entry from vfta */
5283 			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5284 				reg = 0;
5285 				igb_vfta_set(hw, vid, false);
5286 			}
5287 			wr32(E1000_VLVF(i), reg);
5288 
5289 			/* do not modify RLPML for PF devices */
5290 			if (vf >= adapter->vfs_allocated_count)
5291 				return 0;
5292 
5293 			adapter->vf_data[vf].vlans_enabled--;
5294 			if (!adapter->vf_data[vf].vlans_enabled) {
5295 				u32 size;
5296 				reg = rd32(E1000_VMOLR(vf));
5297 				size = reg & E1000_VMOLR_RLPML_MASK;
5298 				size -= 4;
5299 				reg &= ~E1000_VMOLR_RLPML_MASK;
5300 				reg |= size;
5301 				wr32(E1000_VMOLR(vf), reg);
5302 			}
5303 		}
5304 	}
5305 	return 0;
5306 }
5307 
5308 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5309 {
5310 	struct e1000_hw *hw = &adapter->hw;
5311 
5312 	if (vid)
5313 		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5314 	else
5315 		wr32(E1000_VMVIR(vf), 0);
5316 }
5317 
5318 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5319 			       int vf, u16 vlan, u8 qos)
5320 {
5321 	int err = 0;
5322 	struct igb_adapter *adapter = netdev_priv(netdev);
5323 
5324 	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5325 		return -EINVAL;
5326 	if (vlan || qos) {
5327 		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5328 		if (err)
5329 			goto out;
5330 		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5331 		igb_set_vmolr(adapter, vf, !vlan);
5332 		adapter->vf_data[vf].pf_vlan = vlan;
5333 		adapter->vf_data[vf].pf_qos = qos;
5334 		dev_info(&adapter->pdev->dev,
5335 			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5336 		if (test_bit(__IGB_DOWN, &adapter->state)) {
5337 			dev_warn(&adapter->pdev->dev,
5338 				 "The VF VLAN has been set,"
5339 				 " but the PF device is not up.\n");
5340 			dev_warn(&adapter->pdev->dev,
5341 				 "Bring the PF device up before"
5342 				 " attempting to use the VF device.\n");
5343 		}
5344 	} else {
5345 		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5346 				   false, vf);
5347 		igb_set_vmvir(adapter, vlan, vf);
5348 		igb_set_vmolr(adapter, vf, true);
5349 		adapter->vf_data[vf].pf_vlan = 0;
5350 		adapter->vf_data[vf].pf_qos = 0;
5351        }
5352 out:
5353        return err;
5354 }
5355 
5356 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5357 {
5358 	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5359 	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5360 
5361 	return igb_vlvf_set(adapter, vid, add, vf);
5362 }
5363 
5364 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5365 {
5366 	/* clear flags - except flag that indicates PF has set the MAC */
5367 	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5368 	adapter->vf_data[vf].last_nack = jiffies;
5369 
5370 	/* reset offloads to defaults */
5371 	igb_set_vmolr(adapter, vf, true);
5372 
5373 	/* reset vlans for device */
5374 	igb_clear_vf_vfta(adapter, vf);
5375 	if (adapter->vf_data[vf].pf_vlan)
5376 		igb_ndo_set_vf_vlan(adapter->netdev, vf,
5377 				    adapter->vf_data[vf].pf_vlan,
5378 				    adapter->vf_data[vf].pf_qos);
5379 	else
5380 		igb_clear_vf_vfta(adapter, vf);
5381 
5382 	/* reset multicast table array for vf */
5383 	adapter->vf_data[vf].num_vf_mc_hashes = 0;
5384 
5385 	/* Flush and reset the mta with the new values */
5386 	igb_set_rx_mode(adapter->netdev);
5387 }
5388 
5389 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5390 {
5391 	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5392 
5393 	/* generate a new mac address as we were hotplug removed/added */
5394 	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5395 		random_ether_addr(vf_mac);
5396 
5397 	/* process remaining reset events */
5398 	igb_vf_reset(adapter, vf);
5399 }
5400 
5401 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5402 {
5403 	struct e1000_hw *hw = &adapter->hw;
5404 	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5405 	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5406 	u32 reg, msgbuf[3];
5407 	u8 *addr = (u8 *)(&msgbuf[1]);
5408 
5409 	/* process all the same items cleared in a function level reset */
5410 	igb_vf_reset(adapter, vf);
5411 
5412 	/* set vf mac address */
5413 	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5414 
5415 	/* enable transmit and receive for vf */
5416 	reg = rd32(E1000_VFTE);
5417 	wr32(E1000_VFTE, reg | (1 << vf));
5418 	reg = rd32(E1000_VFRE);
5419 	wr32(E1000_VFRE, reg | (1 << vf));
5420 
5421 	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5422 
5423 	/* reply to reset with ack and vf mac address */
5424 	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5425 	memcpy(addr, vf_mac, 6);
5426 	igb_write_mbx(hw, msgbuf, 3, vf);
5427 }
5428 
5429 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5430 {
5431 	/*
5432 	 * The VF MAC Address is stored in a packed array of bytes
5433 	 * starting at the second 32 bit word of the msg array
5434 	 */
5435 	unsigned char *addr = (char *)&msg[1];
5436 	int err = -1;
5437 
5438 	if (is_valid_ether_addr(addr))
5439 		err = igb_set_vf_mac(adapter, vf, addr);
5440 
5441 	return err;
5442 }
5443 
5444 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5445 {
5446 	struct e1000_hw *hw = &adapter->hw;
5447 	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5448 	u32 msg = E1000_VT_MSGTYPE_NACK;
5449 
5450 	/* if device isn't clear to send it shouldn't be reading either */
5451 	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5452 	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5453 		igb_write_mbx(hw, &msg, 1, vf);
5454 		vf_data->last_nack = jiffies;
5455 	}
5456 }
5457 
5458 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5459 {
5460 	struct pci_dev *pdev = adapter->pdev;
5461 	u32 msgbuf[E1000_VFMAILBOX_SIZE];
5462 	struct e1000_hw *hw = &adapter->hw;
5463 	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5464 	s32 retval;
5465 
5466 	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5467 
5468 	if (retval) {
5469 		/* if receive failed revoke VF CTS stats and restart init */
5470 		dev_err(&pdev->dev, "Error receiving message from VF\n");
5471 		vf_data->flags &= ~IGB_VF_FLAG_CTS;
5472 		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5473 			return;
5474 		goto out;
5475 	}
5476 
5477 	/* this is a message we already processed, do nothing */
5478 	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5479 		return;
5480 
5481 	/*
5482 	 * until the vf completes a reset it should not be
5483 	 * allowed to start any configuration.
5484 	 */
5485 
5486 	if (msgbuf[0] == E1000_VF_RESET) {
5487 		igb_vf_reset_msg(adapter, vf);
5488 		return;
5489 	}
5490 
5491 	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5492 		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5493 			return;
5494 		retval = -1;
5495 		goto out;
5496 	}
5497 
5498 	switch ((msgbuf[0] & 0xFFFF)) {
5499 	case E1000_VF_SET_MAC_ADDR:
5500 		retval = -EINVAL;
5501 		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5502 			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5503 		else
5504 			dev_warn(&pdev->dev,
5505 				 "VF %d attempted to override administratively "
5506 				 "set MAC address\nReload the VF driver to "
5507 				 "resume operations\n", vf);
5508 		break;
5509 	case E1000_VF_SET_PROMISC:
5510 		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5511 		break;
5512 	case E1000_VF_SET_MULTICAST:
5513 		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5514 		break;
5515 	case E1000_VF_SET_LPE:
5516 		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5517 		break;
5518 	case E1000_VF_SET_VLAN:
5519 		retval = -1;
5520 		if (vf_data->pf_vlan)
5521 			dev_warn(&pdev->dev,
5522 				 "VF %d attempted to override administratively "
5523 				 "set VLAN tag\nReload the VF driver to "
5524 				 "resume operations\n", vf);
5525 		else
5526 			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5527 		break;
5528 	default:
5529 		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5530 		retval = -1;
5531 		break;
5532 	}
5533 
5534 	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5535 out:
5536 	/* notify the VF of the results of what it sent us */
5537 	if (retval)
5538 		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5539 	else
5540 		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5541 
5542 	igb_write_mbx(hw, msgbuf, 1, vf);
5543 }
5544 
5545 static void igb_msg_task(struct igb_adapter *adapter)
5546 {
5547 	struct e1000_hw *hw = &adapter->hw;
5548 	u32 vf;
5549 
5550 	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5551 		/* process any reset requests */
5552 		if (!igb_check_for_rst(hw, vf))
5553 			igb_vf_reset_event(adapter, vf);
5554 
5555 		/* process any messages pending */
5556 		if (!igb_check_for_msg(hw, vf))
5557 			igb_rcv_msg_from_vf(adapter, vf);
5558 
5559 		/* process any acks */
5560 		if (!igb_check_for_ack(hw, vf))
5561 			igb_rcv_ack_from_vf(adapter, vf);
5562 	}
5563 }
5564 
5565 /**
5566  *  igb_set_uta - Set unicast filter table address
5567  *  @adapter: board private structure
5568  *
5569  *  The unicast table address is a register array of 32-bit registers.
5570  *  The table is meant to be used in a way similar to how the MTA is used
5571  *  however due to certain limitations in the hardware it is necessary to
5572  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5573  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5574  **/
5575 static void igb_set_uta(struct igb_adapter *adapter)
5576 {
5577 	struct e1000_hw *hw = &adapter->hw;
5578 	int i;
5579 
5580 	/* The UTA table only exists on 82576 hardware and newer */
5581 	if (hw->mac.type < e1000_82576)
5582 		return;
5583 
5584 	/* we only need to do this if VMDq is enabled */
5585 	if (!adapter->vfs_allocated_count)
5586 		return;
5587 
5588 	for (i = 0; i < hw->mac.uta_reg_count; i++)
5589 		array_wr32(E1000_UTA, i, ~0);
5590 }
5591 
5592 /**
5593  * igb_intr_msi - Interrupt Handler
5594  * @irq: interrupt number
5595  * @data: pointer to a network interface device structure
5596  **/
5597 static irqreturn_t igb_intr_msi(int irq, void *data)
5598 {
5599 	struct igb_adapter *adapter = data;
5600 	struct igb_q_vector *q_vector = adapter->q_vector[0];
5601 	struct e1000_hw *hw = &adapter->hw;
5602 	/* read ICR disables interrupts using IAM */
5603 	u32 icr = rd32(E1000_ICR);
5604 
5605 	igb_write_itr(q_vector);
5606 
5607 	if (icr & E1000_ICR_DRSTA)
5608 		schedule_work(&adapter->reset_task);
5609 
5610 	if (icr & E1000_ICR_DOUTSYNC) {
5611 		/* HW is reporting DMA is out of sync */
5612 		adapter->stats.doosync++;
5613 	}
5614 
5615 	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5616 		hw->mac.get_link_status = 1;
5617 		if (!test_bit(__IGB_DOWN, &adapter->state))
5618 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5619 	}
5620 
5621 	napi_schedule(&q_vector->napi);
5622 
5623 	return IRQ_HANDLED;
5624 }
5625 
5626 /**
5627  * igb_intr - Legacy Interrupt Handler
5628  * @irq: interrupt number
5629  * @data: pointer to a network interface device structure
5630  **/
5631 static irqreturn_t igb_intr(int irq, void *data)
5632 {
5633 	struct igb_adapter *adapter = data;
5634 	struct igb_q_vector *q_vector = adapter->q_vector[0];
5635 	struct e1000_hw *hw = &adapter->hw;
5636 	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5637 	 * need for the IMC write */
5638 	u32 icr = rd32(E1000_ICR);
5639 
5640 	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5641 	 * not set, then the adapter didn't send an interrupt */
5642 	if (!(icr & E1000_ICR_INT_ASSERTED))
5643 		return IRQ_NONE;
5644 
5645 	igb_write_itr(q_vector);
5646 
5647 	if (icr & E1000_ICR_DRSTA)
5648 		schedule_work(&adapter->reset_task);
5649 
5650 	if (icr & E1000_ICR_DOUTSYNC) {
5651 		/* HW is reporting DMA is out of sync */
5652 		adapter->stats.doosync++;
5653 	}
5654 
5655 	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5656 		hw->mac.get_link_status = 1;
5657 		/* guard against interrupt when we're going down */
5658 		if (!test_bit(__IGB_DOWN, &adapter->state))
5659 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5660 	}
5661 
5662 	napi_schedule(&q_vector->napi);
5663 
5664 	return IRQ_HANDLED;
5665 }
5666 
5667 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5668 {
5669 	struct igb_adapter *adapter = q_vector->adapter;
5670 	struct e1000_hw *hw = &adapter->hw;
5671 
5672 	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5673 	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5674 		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5675 			igb_set_itr(q_vector);
5676 		else
5677 			igb_update_ring_itr(q_vector);
5678 	}
5679 
5680 	if (!test_bit(__IGB_DOWN, &adapter->state)) {
5681 		if (adapter->msix_entries)
5682 			wr32(E1000_EIMS, q_vector->eims_value);
5683 		else
5684 			igb_irq_enable(adapter);
5685 	}
5686 }
5687 
5688 /**
5689  * igb_poll - NAPI Rx polling callback
5690  * @napi: napi polling structure
5691  * @budget: count of how many packets we should handle
5692  **/
5693 static int igb_poll(struct napi_struct *napi, int budget)
5694 {
5695 	struct igb_q_vector *q_vector = container_of(napi,
5696 	                                             struct igb_q_vector,
5697 	                                             napi);
5698 	bool clean_complete = true;
5699 
5700 #ifdef CONFIG_IGB_DCA
5701 	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5702 		igb_update_dca(q_vector);
5703 #endif
5704 	if (q_vector->tx.ring)
5705 		clean_complete = igb_clean_tx_irq(q_vector);
5706 
5707 	if (q_vector->rx.ring)
5708 		clean_complete &= igb_clean_rx_irq(q_vector, budget);
5709 
5710 	/* If all work not completed, return budget and keep polling */
5711 	if (!clean_complete)
5712 		return budget;
5713 
5714 	/* If not enough Rx work done, exit the polling mode */
5715 	napi_complete(napi);
5716 	igb_ring_irq_enable(q_vector);
5717 
5718 	return 0;
5719 }
5720 
5721 /**
5722  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5723  * @adapter: board private structure
5724  * @shhwtstamps: timestamp structure to update
5725  * @regval: unsigned 64bit system time value.
5726  *
5727  * We need to convert the system time value stored in the RX/TXSTMP registers
5728  * into a hwtstamp which can be used by the upper level timestamping functions
5729  */
5730 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5731                                    struct skb_shared_hwtstamps *shhwtstamps,
5732                                    u64 regval)
5733 {
5734 	u64 ns;
5735 
5736 	/*
5737 	 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5738 	 * 24 to match clock shift we setup earlier.
5739 	 */
5740 	if (adapter->hw.mac.type >= e1000_82580)
5741 		regval <<= IGB_82580_TSYNC_SHIFT;
5742 
5743 	ns = timecounter_cyc2time(&adapter->clock, regval);
5744 	timecompare_update(&adapter->compare, ns);
5745 	memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5746 	shhwtstamps->hwtstamp = ns_to_ktime(ns);
5747 	shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5748 }
5749 
5750 /**
5751  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5752  * @q_vector: pointer to q_vector containing needed info
5753  * @buffer: pointer to igb_tx_buffer structure
5754  *
5755  * If we were asked to do hardware stamping and such a time stamp is
5756  * available, then it must have been for this skb here because we only
5757  * allow only one such packet into the queue.
5758  */
5759 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5760 			    struct igb_tx_buffer *buffer_info)
5761 {
5762 	struct igb_adapter *adapter = q_vector->adapter;
5763 	struct e1000_hw *hw = &adapter->hw;
5764 	struct skb_shared_hwtstamps shhwtstamps;
5765 	u64 regval;
5766 
5767 	/* if skb does not support hw timestamp or TX stamp not valid exit */
5768 	if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5769 	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5770 		return;
5771 
5772 	regval = rd32(E1000_TXSTMPL);
5773 	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5774 
5775 	igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5776 	skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5777 }
5778 
5779 /**
5780  * igb_clean_tx_irq - Reclaim resources after transmit completes
5781  * @q_vector: pointer to q_vector containing needed info
5782  * returns true if ring is completely cleaned
5783  **/
5784 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5785 {
5786 	struct igb_adapter *adapter = q_vector->adapter;
5787 	struct igb_ring *tx_ring = q_vector->tx.ring;
5788 	struct igb_tx_buffer *tx_buffer;
5789 	union e1000_adv_tx_desc *tx_desc, *eop_desc;
5790 	unsigned int total_bytes = 0, total_packets = 0;
5791 	unsigned int budget = q_vector->tx.work_limit;
5792 	unsigned int i = tx_ring->next_to_clean;
5793 
5794 	if (test_bit(__IGB_DOWN, &adapter->state))
5795 		return true;
5796 
5797 	tx_buffer = &tx_ring->tx_buffer_info[i];
5798 	tx_desc = IGB_TX_DESC(tx_ring, i);
5799 	i -= tx_ring->count;
5800 
5801 	for (; budget; budget--) {
5802 		eop_desc = tx_buffer->next_to_watch;
5803 
5804 		/* prevent any other reads prior to eop_desc */
5805 		rmb();
5806 
5807 		/* if next_to_watch is not set then there is no work pending */
5808 		if (!eop_desc)
5809 			break;
5810 
5811 		/* if DD is not set pending work has not been completed */
5812 		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5813 			break;
5814 
5815 		/* clear next_to_watch to prevent false hangs */
5816 		tx_buffer->next_to_watch = NULL;
5817 
5818 		/* update the statistics for this packet */
5819 		total_bytes += tx_buffer->bytecount;
5820 		total_packets += tx_buffer->gso_segs;
5821 
5822 		/* retrieve hardware timestamp */
5823 		igb_tx_hwtstamp(q_vector, tx_buffer);
5824 
5825 		/* free the skb */
5826 		dev_kfree_skb_any(tx_buffer->skb);
5827 		tx_buffer->skb = NULL;
5828 
5829 		/* unmap skb header data */
5830 		dma_unmap_single(tx_ring->dev,
5831 				 tx_buffer->dma,
5832 				 tx_buffer->length,
5833 				 DMA_TO_DEVICE);
5834 
5835 		/* clear last DMA location and unmap remaining buffers */
5836 		while (tx_desc != eop_desc) {
5837 			tx_buffer->dma = 0;
5838 
5839 			tx_buffer++;
5840 			tx_desc++;
5841 			i++;
5842 			if (unlikely(!i)) {
5843 				i -= tx_ring->count;
5844 				tx_buffer = tx_ring->tx_buffer_info;
5845 				tx_desc = IGB_TX_DESC(tx_ring, 0);
5846 			}
5847 
5848 			/* unmap any remaining paged data */
5849 			if (tx_buffer->dma) {
5850 				dma_unmap_page(tx_ring->dev,
5851 					       tx_buffer->dma,
5852 					       tx_buffer->length,
5853 					       DMA_TO_DEVICE);
5854 			}
5855 		}
5856 
5857 		/* clear last DMA location */
5858 		tx_buffer->dma = 0;
5859 
5860 		/* move us one more past the eop_desc for start of next pkt */
5861 		tx_buffer++;
5862 		tx_desc++;
5863 		i++;
5864 		if (unlikely(!i)) {
5865 			i -= tx_ring->count;
5866 			tx_buffer = tx_ring->tx_buffer_info;
5867 			tx_desc = IGB_TX_DESC(tx_ring, 0);
5868 		}
5869 	}
5870 
5871 	netdev_tx_completed_queue(txring_txq(tx_ring),
5872 				  total_packets, total_bytes);
5873 	i += tx_ring->count;
5874 	tx_ring->next_to_clean = i;
5875 	u64_stats_update_begin(&tx_ring->tx_syncp);
5876 	tx_ring->tx_stats.bytes += total_bytes;
5877 	tx_ring->tx_stats.packets += total_packets;
5878 	u64_stats_update_end(&tx_ring->tx_syncp);
5879 	q_vector->tx.total_bytes += total_bytes;
5880 	q_vector->tx.total_packets += total_packets;
5881 
5882 	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5883 		struct e1000_hw *hw = &adapter->hw;
5884 
5885 		eop_desc = tx_buffer->next_to_watch;
5886 
5887 		/* Detect a transmit hang in hardware, this serializes the
5888 		 * check with the clearing of time_stamp and movement of i */
5889 		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5890 		if (eop_desc &&
5891 		    time_after(jiffies, tx_buffer->time_stamp +
5892 			       (adapter->tx_timeout_factor * HZ)) &&
5893 		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5894 
5895 			/* detected Tx unit hang */
5896 			dev_err(tx_ring->dev,
5897 				"Detected Tx Unit Hang\n"
5898 				"  Tx Queue             <%d>\n"
5899 				"  TDH                  <%x>\n"
5900 				"  TDT                  <%x>\n"
5901 				"  next_to_use          <%x>\n"
5902 				"  next_to_clean        <%x>\n"
5903 				"buffer_info[next_to_clean]\n"
5904 				"  time_stamp           <%lx>\n"
5905 				"  next_to_watch        <%p>\n"
5906 				"  jiffies              <%lx>\n"
5907 				"  desc.status          <%x>\n",
5908 				tx_ring->queue_index,
5909 				rd32(E1000_TDH(tx_ring->reg_idx)),
5910 				readl(tx_ring->tail),
5911 				tx_ring->next_to_use,
5912 				tx_ring->next_to_clean,
5913 				tx_buffer->time_stamp,
5914 				eop_desc,
5915 				jiffies,
5916 				eop_desc->wb.status);
5917 			netif_stop_subqueue(tx_ring->netdev,
5918 					    tx_ring->queue_index);
5919 
5920 			/* we are about to reset, no point in enabling stuff */
5921 			return true;
5922 		}
5923 	}
5924 
5925 	if (unlikely(total_packets &&
5926 		     netif_carrier_ok(tx_ring->netdev) &&
5927 		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5928 		/* Make sure that anybody stopping the queue after this
5929 		 * sees the new next_to_clean.
5930 		 */
5931 		smp_mb();
5932 		if (__netif_subqueue_stopped(tx_ring->netdev,
5933 					     tx_ring->queue_index) &&
5934 		    !(test_bit(__IGB_DOWN, &adapter->state))) {
5935 			netif_wake_subqueue(tx_ring->netdev,
5936 					    tx_ring->queue_index);
5937 
5938 			u64_stats_update_begin(&tx_ring->tx_syncp);
5939 			tx_ring->tx_stats.restart_queue++;
5940 			u64_stats_update_end(&tx_ring->tx_syncp);
5941 		}
5942 	}
5943 
5944 	return !!budget;
5945 }
5946 
5947 static inline void igb_rx_checksum(struct igb_ring *ring,
5948 				   union e1000_adv_rx_desc *rx_desc,
5949 				   struct sk_buff *skb)
5950 {
5951 	skb_checksum_none_assert(skb);
5952 
5953 	/* Ignore Checksum bit is set */
5954 	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5955 		return;
5956 
5957 	/* Rx checksum disabled via ethtool */
5958 	if (!(ring->netdev->features & NETIF_F_RXCSUM))
5959 		return;
5960 
5961 	/* TCP/UDP checksum error bit is set */
5962 	if (igb_test_staterr(rx_desc,
5963 			     E1000_RXDEXT_STATERR_TCPE |
5964 			     E1000_RXDEXT_STATERR_IPE)) {
5965 		/*
5966 		 * work around errata with sctp packets where the TCPE aka
5967 		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5968 		 * packets, (aka let the stack check the crc32c)
5969 		 */
5970 		if (!((skb->len == 60) &&
5971 		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5972 			u64_stats_update_begin(&ring->rx_syncp);
5973 			ring->rx_stats.csum_err++;
5974 			u64_stats_update_end(&ring->rx_syncp);
5975 		}
5976 		/* let the stack verify checksum errors */
5977 		return;
5978 	}
5979 	/* It must be a TCP or UDP packet with a valid checksum */
5980 	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5981 				      E1000_RXD_STAT_UDPCS))
5982 		skb->ip_summed = CHECKSUM_UNNECESSARY;
5983 
5984 	dev_dbg(ring->dev, "cksum success: bits %08X\n",
5985 		le32_to_cpu(rx_desc->wb.upper.status_error));
5986 }
5987 
5988 static inline void igb_rx_hash(struct igb_ring *ring,
5989 			       union e1000_adv_rx_desc *rx_desc,
5990 			       struct sk_buff *skb)
5991 {
5992 	if (ring->netdev->features & NETIF_F_RXHASH)
5993 		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5994 }
5995 
5996 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5997 			    union e1000_adv_rx_desc *rx_desc,
5998 			    struct sk_buff *skb)
5999 {
6000 	struct igb_adapter *adapter = q_vector->adapter;
6001 	struct e1000_hw *hw = &adapter->hw;
6002 	u64 regval;
6003 
6004 	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6005 				       E1000_RXDADV_STAT_TS))
6006 		return;
6007 
6008 	/*
6009 	 * If this bit is set, then the RX registers contain the time stamp. No
6010 	 * other packet will be time stamped until we read these registers, so
6011 	 * read the registers to make them available again. Because only one
6012 	 * packet can be time stamped at a time, we know that the register
6013 	 * values must belong to this one here and therefore we don't need to
6014 	 * compare any of the additional attributes stored for it.
6015 	 *
6016 	 * If nothing went wrong, then it should have a shared tx_flags that we
6017 	 * can turn into a skb_shared_hwtstamps.
6018 	 */
6019 	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6020 		u32 *stamp = (u32 *)skb->data;
6021 		regval = le32_to_cpu(*(stamp + 2));
6022 		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
6023 		skb_pull(skb, IGB_TS_HDR_LEN);
6024 	} else {
6025 		if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
6026 			return;
6027 
6028 		regval = rd32(E1000_RXSTMPL);
6029 		regval |= (u64)rd32(E1000_RXSTMPH) << 32;
6030 	}
6031 
6032 	igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
6033 }
6034 
6035 static void igb_rx_vlan(struct igb_ring *ring,
6036 			union e1000_adv_rx_desc *rx_desc,
6037 			struct sk_buff *skb)
6038 {
6039 	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6040 		u16 vid;
6041 		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6042 		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6043 			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6044 		else
6045 			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6046 
6047 		__vlan_hwaccel_put_tag(skb, vid);
6048 	}
6049 }
6050 
6051 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6052 {
6053 	/* HW will not DMA in data larger than the given buffer, even if it
6054 	 * parses the (NFS, of course) header to be larger.  In that case, it
6055 	 * fills the header buffer and spills the rest into the page.
6056 	 */
6057 	u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6058 	           E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6059 	if (hlen > IGB_RX_HDR_LEN)
6060 		hlen = IGB_RX_HDR_LEN;
6061 	return hlen;
6062 }
6063 
6064 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6065 {
6066 	struct igb_ring *rx_ring = q_vector->rx.ring;
6067 	union e1000_adv_rx_desc *rx_desc;
6068 	const int current_node = numa_node_id();
6069 	unsigned int total_bytes = 0, total_packets = 0;
6070 	u16 cleaned_count = igb_desc_unused(rx_ring);
6071 	u16 i = rx_ring->next_to_clean;
6072 
6073 	rx_desc = IGB_RX_DESC(rx_ring, i);
6074 
6075 	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6076 		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6077 		struct sk_buff *skb = buffer_info->skb;
6078 		union e1000_adv_rx_desc *next_rxd;
6079 
6080 		buffer_info->skb = NULL;
6081 		prefetch(skb->data);
6082 
6083 		i++;
6084 		if (i == rx_ring->count)
6085 			i = 0;
6086 
6087 		next_rxd = IGB_RX_DESC(rx_ring, i);
6088 		prefetch(next_rxd);
6089 
6090 		/*
6091 		 * This memory barrier is needed to keep us from reading
6092 		 * any other fields out of the rx_desc until we know the
6093 		 * RXD_STAT_DD bit is set
6094 		 */
6095 		rmb();
6096 
6097 		if (!skb_is_nonlinear(skb)) {
6098 			__skb_put(skb, igb_get_hlen(rx_desc));
6099 			dma_unmap_single(rx_ring->dev, buffer_info->dma,
6100 					 IGB_RX_HDR_LEN,
6101 					 DMA_FROM_DEVICE);
6102 			buffer_info->dma = 0;
6103 		}
6104 
6105 		if (rx_desc->wb.upper.length) {
6106 			u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6107 
6108 			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6109 						buffer_info->page,
6110 						buffer_info->page_offset,
6111 						length);
6112 
6113 			skb->len += length;
6114 			skb->data_len += length;
6115 			skb->truesize += PAGE_SIZE / 2;
6116 
6117 			if ((page_count(buffer_info->page) != 1) ||
6118 			    (page_to_nid(buffer_info->page) != current_node))
6119 				buffer_info->page = NULL;
6120 			else
6121 				get_page(buffer_info->page);
6122 
6123 			dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6124 				       PAGE_SIZE / 2, DMA_FROM_DEVICE);
6125 			buffer_info->page_dma = 0;
6126 		}
6127 
6128 		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6129 			struct igb_rx_buffer *next_buffer;
6130 			next_buffer = &rx_ring->rx_buffer_info[i];
6131 			buffer_info->skb = next_buffer->skb;
6132 			buffer_info->dma = next_buffer->dma;
6133 			next_buffer->skb = skb;
6134 			next_buffer->dma = 0;
6135 			goto next_desc;
6136 		}
6137 
6138 		if (unlikely((igb_test_staterr(rx_desc,
6139 					       E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6140 			     && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6141 			dev_kfree_skb_any(skb);
6142 			goto next_desc;
6143 		}
6144 
6145 		igb_rx_hwtstamp(q_vector, rx_desc, skb);
6146 		igb_rx_hash(rx_ring, rx_desc, skb);
6147 		igb_rx_checksum(rx_ring, rx_desc, skb);
6148 		igb_rx_vlan(rx_ring, rx_desc, skb);
6149 
6150 		total_bytes += skb->len;
6151 		total_packets++;
6152 
6153 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6154 
6155 		napi_gro_receive(&q_vector->napi, skb);
6156 
6157 		budget--;
6158 next_desc:
6159 		if (!budget)
6160 			break;
6161 
6162 		cleaned_count++;
6163 		/* return some buffers to hardware, one at a time is too slow */
6164 		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6165 			igb_alloc_rx_buffers(rx_ring, cleaned_count);
6166 			cleaned_count = 0;
6167 		}
6168 
6169 		/* use prefetched values */
6170 		rx_desc = next_rxd;
6171 	}
6172 
6173 	rx_ring->next_to_clean = i;
6174 	u64_stats_update_begin(&rx_ring->rx_syncp);
6175 	rx_ring->rx_stats.packets += total_packets;
6176 	rx_ring->rx_stats.bytes += total_bytes;
6177 	u64_stats_update_end(&rx_ring->rx_syncp);
6178 	q_vector->rx.total_packets += total_packets;
6179 	q_vector->rx.total_bytes += total_bytes;
6180 
6181 	if (cleaned_count)
6182 		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6183 
6184 	return !!budget;
6185 }
6186 
6187 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6188 				 struct igb_rx_buffer *bi)
6189 {
6190 	struct sk_buff *skb = bi->skb;
6191 	dma_addr_t dma = bi->dma;
6192 
6193 	if (dma)
6194 		return true;
6195 
6196 	if (likely(!skb)) {
6197 		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6198 						IGB_RX_HDR_LEN);
6199 		bi->skb = skb;
6200 		if (!skb) {
6201 			rx_ring->rx_stats.alloc_failed++;
6202 			return false;
6203 		}
6204 
6205 		/* initialize skb for ring */
6206 		skb_record_rx_queue(skb, rx_ring->queue_index);
6207 	}
6208 
6209 	dma = dma_map_single(rx_ring->dev, skb->data,
6210 			     IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6211 
6212 	if (dma_mapping_error(rx_ring->dev, dma)) {
6213 		rx_ring->rx_stats.alloc_failed++;
6214 		return false;
6215 	}
6216 
6217 	bi->dma = dma;
6218 	return true;
6219 }
6220 
6221 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6222 				  struct igb_rx_buffer *bi)
6223 {
6224 	struct page *page = bi->page;
6225 	dma_addr_t page_dma = bi->page_dma;
6226 	unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6227 
6228 	if (page_dma)
6229 		return true;
6230 
6231 	if (!page) {
6232 		page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6233 		bi->page = page;
6234 		if (unlikely(!page)) {
6235 			rx_ring->rx_stats.alloc_failed++;
6236 			return false;
6237 		}
6238 	}
6239 
6240 	page_dma = dma_map_page(rx_ring->dev, page,
6241 				page_offset, PAGE_SIZE / 2,
6242 				DMA_FROM_DEVICE);
6243 
6244 	if (dma_mapping_error(rx_ring->dev, page_dma)) {
6245 		rx_ring->rx_stats.alloc_failed++;
6246 		return false;
6247 	}
6248 
6249 	bi->page_dma = page_dma;
6250 	bi->page_offset = page_offset;
6251 	return true;
6252 }
6253 
6254 /**
6255  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6256  * @adapter: address of board private structure
6257  **/
6258 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6259 {
6260 	union e1000_adv_rx_desc *rx_desc;
6261 	struct igb_rx_buffer *bi;
6262 	u16 i = rx_ring->next_to_use;
6263 
6264 	rx_desc = IGB_RX_DESC(rx_ring, i);
6265 	bi = &rx_ring->rx_buffer_info[i];
6266 	i -= rx_ring->count;
6267 
6268 	while (cleaned_count--) {
6269 		if (!igb_alloc_mapped_skb(rx_ring, bi))
6270 			break;
6271 
6272 		/* Refresh the desc even if buffer_addrs didn't change
6273 		 * because each write-back erases this info. */
6274 		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6275 
6276 		if (!igb_alloc_mapped_page(rx_ring, bi))
6277 			break;
6278 
6279 		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6280 
6281 		rx_desc++;
6282 		bi++;
6283 		i++;
6284 		if (unlikely(!i)) {
6285 			rx_desc = IGB_RX_DESC(rx_ring, 0);
6286 			bi = rx_ring->rx_buffer_info;
6287 			i -= rx_ring->count;
6288 		}
6289 
6290 		/* clear the hdr_addr for the next_to_use descriptor */
6291 		rx_desc->read.hdr_addr = 0;
6292 	}
6293 
6294 	i += rx_ring->count;
6295 
6296 	if (rx_ring->next_to_use != i) {
6297 		rx_ring->next_to_use = i;
6298 
6299 		/* Force memory writes to complete before letting h/w
6300 		 * know there are new descriptors to fetch.  (Only
6301 		 * applicable for weak-ordered memory model archs,
6302 		 * such as IA-64). */
6303 		wmb();
6304 		writel(i, rx_ring->tail);
6305 	}
6306 }
6307 
6308 /**
6309  * igb_mii_ioctl -
6310  * @netdev:
6311  * @ifreq:
6312  * @cmd:
6313  **/
6314 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6315 {
6316 	struct igb_adapter *adapter = netdev_priv(netdev);
6317 	struct mii_ioctl_data *data = if_mii(ifr);
6318 
6319 	if (adapter->hw.phy.media_type != e1000_media_type_copper)
6320 		return -EOPNOTSUPP;
6321 
6322 	switch (cmd) {
6323 	case SIOCGMIIPHY:
6324 		data->phy_id = adapter->hw.phy.addr;
6325 		break;
6326 	case SIOCGMIIREG:
6327 		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6328 		                     &data->val_out))
6329 			return -EIO;
6330 		break;
6331 	case SIOCSMIIREG:
6332 	default:
6333 		return -EOPNOTSUPP;
6334 	}
6335 	return 0;
6336 }
6337 
6338 /**
6339  * igb_hwtstamp_ioctl - control hardware time stamping
6340  * @netdev:
6341  * @ifreq:
6342  * @cmd:
6343  *
6344  * Outgoing time stamping can be enabled and disabled. Play nice and
6345  * disable it when requested, although it shouldn't case any overhead
6346  * when no packet needs it. At most one packet in the queue may be
6347  * marked for time stamping, otherwise it would be impossible to tell
6348  * for sure to which packet the hardware time stamp belongs.
6349  *
6350  * Incoming time stamping has to be configured via the hardware
6351  * filters. Not all combinations are supported, in particular event
6352  * type has to be specified. Matching the kind of event packet is
6353  * not supported, with the exception of "all V2 events regardless of
6354  * level 2 or 4".
6355  *
6356  **/
6357 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6358 			      struct ifreq *ifr, int cmd)
6359 {
6360 	struct igb_adapter *adapter = netdev_priv(netdev);
6361 	struct e1000_hw *hw = &adapter->hw;
6362 	struct hwtstamp_config config;
6363 	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6364 	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6365 	u32 tsync_rx_cfg = 0;
6366 	bool is_l4 = false;
6367 	bool is_l2 = false;
6368 	u32 regval;
6369 
6370 	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6371 		return -EFAULT;
6372 
6373 	/* reserved for future extensions */
6374 	if (config.flags)
6375 		return -EINVAL;
6376 
6377 	switch (config.tx_type) {
6378 	case HWTSTAMP_TX_OFF:
6379 		tsync_tx_ctl = 0;
6380 	case HWTSTAMP_TX_ON:
6381 		break;
6382 	default:
6383 		return -ERANGE;
6384 	}
6385 
6386 	switch (config.rx_filter) {
6387 	case HWTSTAMP_FILTER_NONE:
6388 		tsync_rx_ctl = 0;
6389 		break;
6390 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6391 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6392 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6393 	case HWTSTAMP_FILTER_ALL:
6394 		/*
6395 		 * register TSYNCRXCFG must be set, therefore it is not
6396 		 * possible to time stamp both Sync and Delay_Req messages
6397 		 * => fall back to time stamping all packets
6398 		 */
6399 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6400 		config.rx_filter = HWTSTAMP_FILTER_ALL;
6401 		break;
6402 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6403 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6404 		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6405 		is_l4 = true;
6406 		break;
6407 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6408 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6409 		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6410 		is_l4 = true;
6411 		break;
6412 	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6413 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6414 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6415 		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6416 		is_l2 = true;
6417 		is_l4 = true;
6418 		config.rx_filter = HWTSTAMP_FILTER_SOME;
6419 		break;
6420 	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6421 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6422 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6423 		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6424 		is_l2 = true;
6425 		is_l4 = true;
6426 		config.rx_filter = HWTSTAMP_FILTER_SOME;
6427 		break;
6428 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
6429 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
6430 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6431 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6432 		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6433 		is_l2 = true;
6434 		is_l4 = true;
6435 		break;
6436 	default:
6437 		return -ERANGE;
6438 	}
6439 
6440 	if (hw->mac.type == e1000_82575) {
6441 		if (tsync_rx_ctl | tsync_tx_ctl)
6442 			return -EINVAL;
6443 		return 0;
6444 	}
6445 
6446 	/*
6447 	 * Per-packet timestamping only works if all packets are
6448 	 * timestamped, so enable timestamping in all packets as
6449 	 * long as one rx filter was configured.
6450 	 */
6451 	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6452 		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6453 		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6454 	}
6455 
6456 	/* enable/disable TX */
6457 	regval = rd32(E1000_TSYNCTXCTL);
6458 	regval &= ~E1000_TSYNCTXCTL_ENABLED;
6459 	regval |= tsync_tx_ctl;
6460 	wr32(E1000_TSYNCTXCTL, regval);
6461 
6462 	/* enable/disable RX */
6463 	regval = rd32(E1000_TSYNCRXCTL);
6464 	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6465 	regval |= tsync_rx_ctl;
6466 	wr32(E1000_TSYNCRXCTL, regval);
6467 
6468 	/* define which PTP packets are time stamped */
6469 	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6470 
6471 	/* define ethertype filter for timestamped packets */
6472 	if (is_l2)
6473 		wr32(E1000_ETQF(3),
6474 		                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6475 		                 E1000_ETQF_1588 | /* enable timestamping */
6476 		                 ETH_P_1588));     /* 1588 eth protocol type */
6477 	else
6478 		wr32(E1000_ETQF(3), 0);
6479 
6480 #define PTP_PORT 319
6481 	/* L4 Queue Filter[3]: filter by destination port and protocol */
6482 	if (is_l4) {
6483 		u32 ftqf = (IPPROTO_UDP /* UDP */
6484 			| E1000_FTQF_VF_BP /* VF not compared */
6485 			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6486 			| E1000_FTQF_MASK); /* mask all inputs */
6487 		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6488 
6489 		wr32(E1000_IMIR(3), htons(PTP_PORT));
6490 		wr32(E1000_IMIREXT(3),
6491 		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6492 		if (hw->mac.type == e1000_82576) {
6493 			/* enable source port check */
6494 			wr32(E1000_SPQF(3), htons(PTP_PORT));
6495 			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6496 		}
6497 		wr32(E1000_FTQF(3), ftqf);
6498 	} else {
6499 		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6500 	}
6501 	wrfl();
6502 
6503 	adapter->hwtstamp_config = config;
6504 
6505 	/* clear TX/RX time stamp registers, just to be sure */
6506 	regval = rd32(E1000_TXSTMPH);
6507 	regval = rd32(E1000_RXSTMPH);
6508 
6509 	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6510 		-EFAULT : 0;
6511 }
6512 
6513 /**
6514  * igb_ioctl -
6515  * @netdev:
6516  * @ifreq:
6517  * @cmd:
6518  **/
6519 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6520 {
6521 	switch (cmd) {
6522 	case SIOCGMIIPHY:
6523 	case SIOCGMIIREG:
6524 	case SIOCSMIIREG:
6525 		return igb_mii_ioctl(netdev, ifr, cmd);
6526 	case SIOCSHWTSTAMP:
6527 		return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6528 	default:
6529 		return -EOPNOTSUPP;
6530 	}
6531 }
6532 
6533 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6534 {
6535 	struct igb_adapter *adapter = hw->back;
6536 	u16 cap_offset;
6537 
6538 	cap_offset = adapter->pdev->pcie_cap;
6539 	if (!cap_offset)
6540 		return -E1000_ERR_CONFIG;
6541 
6542 	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6543 
6544 	return 0;
6545 }
6546 
6547 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6548 {
6549 	struct igb_adapter *adapter = hw->back;
6550 	u16 cap_offset;
6551 
6552 	cap_offset = adapter->pdev->pcie_cap;
6553 	if (!cap_offset)
6554 		return -E1000_ERR_CONFIG;
6555 
6556 	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6557 
6558 	return 0;
6559 }
6560 
6561 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6562 {
6563 	struct igb_adapter *adapter = netdev_priv(netdev);
6564 	struct e1000_hw *hw = &adapter->hw;
6565 	u32 ctrl, rctl;
6566 	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6567 
6568 	if (enable) {
6569 		/* enable VLAN tag insert/strip */
6570 		ctrl = rd32(E1000_CTRL);
6571 		ctrl |= E1000_CTRL_VME;
6572 		wr32(E1000_CTRL, ctrl);
6573 
6574 		/* Disable CFI check */
6575 		rctl = rd32(E1000_RCTL);
6576 		rctl &= ~E1000_RCTL_CFIEN;
6577 		wr32(E1000_RCTL, rctl);
6578 	} else {
6579 		/* disable VLAN tag insert/strip */
6580 		ctrl = rd32(E1000_CTRL);
6581 		ctrl &= ~E1000_CTRL_VME;
6582 		wr32(E1000_CTRL, ctrl);
6583 	}
6584 
6585 	igb_rlpml_set(adapter);
6586 }
6587 
6588 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6589 {
6590 	struct igb_adapter *adapter = netdev_priv(netdev);
6591 	struct e1000_hw *hw = &adapter->hw;
6592 	int pf_id = adapter->vfs_allocated_count;
6593 
6594 	/* attempt to add filter to vlvf array */
6595 	igb_vlvf_set(adapter, vid, true, pf_id);
6596 
6597 	/* add the filter since PF can receive vlans w/o entry in vlvf */
6598 	igb_vfta_set(hw, vid, true);
6599 
6600 	set_bit(vid, adapter->active_vlans);
6601 
6602 	return 0;
6603 }
6604 
6605 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6606 {
6607 	struct igb_adapter *adapter = netdev_priv(netdev);
6608 	struct e1000_hw *hw = &adapter->hw;
6609 	int pf_id = adapter->vfs_allocated_count;
6610 	s32 err;
6611 
6612 	/* remove vlan from VLVF table array */
6613 	err = igb_vlvf_set(adapter, vid, false, pf_id);
6614 
6615 	/* if vid was not present in VLVF just remove it from table */
6616 	if (err)
6617 		igb_vfta_set(hw, vid, false);
6618 
6619 	clear_bit(vid, adapter->active_vlans);
6620 
6621 	return 0;
6622 }
6623 
6624 static void igb_restore_vlan(struct igb_adapter *adapter)
6625 {
6626 	u16 vid;
6627 
6628 	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6629 
6630 	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6631 		igb_vlan_rx_add_vid(adapter->netdev, vid);
6632 }
6633 
6634 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6635 {
6636 	struct pci_dev *pdev = adapter->pdev;
6637 	struct e1000_mac_info *mac = &adapter->hw.mac;
6638 
6639 	mac->autoneg = 0;
6640 
6641 	/* Make sure dplx is at most 1 bit and lsb of speed is not set
6642 	 * for the switch() below to work */
6643 	if ((spd & 1) || (dplx & ~1))
6644 		goto err_inval;
6645 
6646 	/* Fiber NIC's only allow 1000 Gbps Full duplex */
6647 	if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6648 	    spd != SPEED_1000 &&
6649 	    dplx != DUPLEX_FULL)
6650 		goto err_inval;
6651 
6652 	switch (spd + dplx) {
6653 	case SPEED_10 + DUPLEX_HALF:
6654 		mac->forced_speed_duplex = ADVERTISE_10_HALF;
6655 		break;
6656 	case SPEED_10 + DUPLEX_FULL:
6657 		mac->forced_speed_duplex = ADVERTISE_10_FULL;
6658 		break;
6659 	case SPEED_100 + DUPLEX_HALF:
6660 		mac->forced_speed_duplex = ADVERTISE_100_HALF;
6661 		break;
6662 	case SPEED_100 + DUPLEX_FULL:
6663 		mac->forced_speed_duplex = ADVERTISE_100_FULL;
6664 		break;
6665 	case SPEED_1000 + DUPLEX_FULL:
6666 		mac->autoneg = 1;
6667 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6668 		break;
6669 	case SPEED_1000 + DUPLEX_HALF: /* not supported */
6670 	default:
6671 		goto err_inval;
6672 	}
6673 	return 0;
6674 
6675 err_inval:
6676 	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6677 	return -EINVAL;
6678 }
6679 
6680 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6681 			  bool runtime)
6682 {
6683 	struct net_device *netdev = pci_get_drvdata(pdev);
6684 	struct igb_adapter *adapter = netdev_priv(netdev);
6685 	struct e1000_hw *hw = &adapter->hw;
6686 	u32 ctrl, rctl, status;
6687 	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6688 #ifdef CONFIG_PM
6689 	int retval = 0;
6690 #endif
6691 
6692 	netif_device_detach(netdev);
6693 
6694 	if (netif_running(netdev))
6695 		__igb_close(netdev, true);
6696 
6697 	igb_clear_interrupt_scheme(adapter);
6698 
6699 #ifdef CONFIG_PM
6700 	retval = pci_save_state(pdev);
6701 	if (retval)
6702 		return retval;
6703 #endif
6704 
6705 	status = rd32(E1000_STATUS);
6706 	if (status & E1000_STATUS_LU)
6707 		wufc &= ~E1000_WUFC_LNKC;
6708 
6709 	if (wufc) {
6710 		igb_setup_rctl(adapter);
6711 		igb_set_rx_mode(netdev);
6712 
6713 		/* turn on all-multi mode if wake on multicast is enabled */
6714 		if (wufc & E1000_WUFC_MC) {
6715 			rctl = rd32(E1000_RCTL);
6716 			rctl |= E1000_RCTL_MPE;
6717 			wr32(E1000_RCTL, rctl);
6718 		}
6719 
6720 		ctrl = rd32(E1000_CTRL);
6721 		/* advertise wake from D3Cold */
6722 		#define E1000_CTRL_ADVD3WUC 0x00100000
6723 		/* phy power management enable */
6724 		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6725 		ctrl |= E1000_CTRL_ADVD3WUC;
6726 		wr32(E1000_CTRL, ctrl);
6727 
6728 		/* Allow time for pending master requests to run */
6729 		igb_disable_pcie_master(hw);
6730 
6731 		wr32(E1000_WUC, E1000_WUC_PME_EN);
6732 		wr32(E1000_WUFC, wufc);
6733 	} else {
6734 		wr32(E1000_WUC, 0);
6735 		wr32(E1000_WUFC, 0);
6736 	}
6737 
6738 	*enable_wake = wufc || adapter->en_mng_pt;
6739 	if (!*enable_wake)
6740 		igb_power_down_link(adapter);
6741 	else
6742 		igb_power_up_link(adapter);
6743 
6744 	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6745 	 * would have already happened in close and is redundant. */
6746 	igb_release_hw_control(adapter);
6747 
6748 	pci_disable_device(pdev);
6749 
6750 	return 0;
6751 }
6752 
6753 #ifdef CONFIG_PM
6754 #ifdef CONFIG_PM_SLEEP
6755 static int igb_suspend(struct device *dev)
6756 {
6757 	int retval;
6758 	bool wake;
6759 	struct pci_dev *pdev = to_pci_dev(dev);
6760 
6761 	retval = __igb_shutdown(pdev, &wake, 0);
6762 	if (retval)
6763 		return retval;
6764 
6765 	if (wake) {
6766 		pci_prepare_to_sleep(pdev);
6767 	} else {
6768 		pci_wake_from_d3(pdev, false);
6769 		pci_set_power_state(pdev, PCI_D3hot);
6770 	}
6771 
6772 	return 0;
6773 }
6774 #endif /* CONFIG_PM_SLEEP */
6775 
6776 static int igb_resume(struct device *dev)
6777 {
6778 	struct pci_dev *pdev = to_pci_dev(dev);
6779 	struct net_device *netdev = pci_get_drvdata(pdev);
6780 	struct igb_adapter *adapter = netdev_priv(netdev);
6781 	struct e1000_hw *hw = &adapter->hw;
6782 	u32 err;
6783 
6784 	pci_set_power_state(pdev, PCI_D0);
6785 	pci_restore_state(pdev);
6786 	pci_save_state(pdev);
6787 
6788 	err = pci_enable_device_mem(pdev);
6789 	if (err) {
6790 		dev_err(&pdev->dev,
6791 			"igb: Cannot enable PCI device from suspend\n");
6792 		return err;
6793 	}
6794 	pci_set_master(pdev);
6795 
6796 	pci_enable_wake(pdev, PCI_D3hot, 0);
6797 	pci_enable_wake(pdev, PCI_D3cold, 0);
6798 
6799 	if (!rtnl_is_locked()) {
6800 		/*
6801 		 * shut up ASSERT_RTNL() warning in
6802 		 * netif_set_real_num_tx/rx_queues.
6803 		 */
6804 		rtnl_lock();
6805 		err = igb_init_interrupt_scheme(adapter);
6806 		rtnl_unlock();
6807 	} else {
6808 		err = igb_init_interrupt_scheme(adapter);
6809 	}
6810 	if (err) {
6811 		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6812 		return -ENOMEM;
6813 	}
6814 
6815 	igb_reset(adapter);
6816 
6817 	/* let the f/w know that the h/w is now under the control of the
6818 	 * driver. */
6819 	igb_get_hw_control(adapter);
6820 
6821 	wr32(E1000_WUS, ~0);
6822 
6823 	if (netdev->flags & IFF_UP) {
6824 		err = __igb_open(netdev, true);
6825 		if (err)
6826 			return err;
6827 	}
6828 
6829 	netif_device_attach(netdev);
6830 	return 0;
6831 }
6832 
6833 #ifdef CONFIG_PM_RUNTIME
6834 static int igb_runtime_idle(struct device *dev)
6835 {
6836 	struct pci_dev *pdev = to_pci_dev(dev);
6837 	struct net_device *netdev = pci_get_drvdata(pdev);
6838 	struct igb_adapter *adapter = netdev_priv(netdev);
6839 
6840 	if (!igb_has_link(adapter))
6841 		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6842 
6843 	return -EBUSY;
6844 }
6845 
6846 static int igb_runtime_suspend(struct device *dev)
6847 {
6848 	struct pci_dev *pdev = to_pci_dev(dev);
6849 	int retval;
6850 	bool wake;
6851 
6852 	retval = __igb_shutdown(pdev, &wake, 1);
6853 	if (retval)
6854 		return retval;
6855 
6856 	if (wake) {
6857 		pci_prepare_to_sleep(pdev);
6858 	} else {
6859 		pci_wake_from_d3(pdev, false);
6860 		pci_set_power_state(pdev, PCI_D3hot);
6861 	}
6862 
6863 	return 0;
6864 }
6865 
6866 static int igb_runtime_resume(struct device *dev)
6867 {
6868 	return igb_resume(dev);
6869 }
6870 #endif /* CONFIG_PM_RUNTIME */
6871 #endif
6872 
6873 static void igb_shutdown(struct pci_dev *pdev)
6874 {
6875 	bool wake;
6876 
6877 	__igb_shutdown(pdev, &wake, 0);
6878 
6879 	if (system_state == SYSTEM_POWER_OFF) {
6880 		pci_wake_from_d3(pdev, wake);
6881 		pci_set_power_state(pdev, PCI_D3hot);
6882 	}
6883 }
6884 
6885 #ifdef CONFIG_NET_POLL_CONTROLLER
6886 /*
6887  * Polling 'interrupt' - used by things like netconsole to send skbs
6888  * without having to re-enable interrupts. It's not called while
6889  * the interrupt routine is executing.
6890  */
6891 static void igb_netpoll(struct net_device *netdev)
6892 {
6893 	struct igb_adapter *adapter = netdev_priv(netdev);
6894 	struct e1000_hw *hw = &adapter->hw;
6895 	struct igb_q_vector *q_vector;
6896 	int i;
6897 
6898 	for (i = 0; i < adapter->num_q_vectors; i++) {
6899 		q_vector = adapter->q_vector[i];
6900 		if (adapter->msix_entries)
6901 			wr32(E1000_EIMC, q_vector->eims_value);
6902 		else
6903 			igb_irq_disable(adapter);
6904 		napi_schedule(&q_vector->napi);
6905 	}
6906 }
6907 #endif /* CONFIG_NET_POLL_CONTROLLER */
6908 
6909 /**
6910  * igb_io_error_detected - called when PCI error is detected
6911  * @pdev: Pointer to PCI device
6912  * @state: The current pci connection state
6913  *
6914  * This function is called after a PCI bus error affecting
6915  * this device has been detected.
6916  */
6917 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6918 					      pci_channel_state_t state)
6919 {
6920 	struct net_device *netdev = pci_get_drvdata(pdev);
6921 	struct igb_adapter *adapter = netdev_priv(netdev);
6922 
6923 	netif_device_detach(netdev);
6924 
6925 	if (state == pci_channel_io_perm_failure)
6926 		return PCI_ERS_RESULT_DISCONNECT;
6927 
6928 	if (netif_running(netdev))
6929 		igb_down(adapter);
6930 	pci_disable_device(pdev);
6931 
6932 	/* Request a slot slot reset. */
6933 	return PCI_ERS_RESULT_NEED_RESET;
6934 }
6935 
6936 /**
6937  * igb_io_slot_reset - called after the pci bus has been reset.
6938  * @pdev: Pointer to PCI device
6939  *
6940  * Restart the card from scratch, as if from a cold-boot. Implementation
6941  * resembles the first-half of the igb_resume routine.
6942  */
6943 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6944 {
6945 	struct net_device *netdev = pci_get_drvdata(pdev);
6946 	struct igb_adapter *adapter = netdev_priv(netdev);
6947 	struct e1000_hw *hw = &adapter->hw;
6948 	pci_ers_result_t result;
6949 	int err;
6950 
6951 	if (pci_enable_device_mem(pdev)) {
6952 		dev_err(&pdev->dev,
6953 			"Cannot re-enable PCI device after reset.\n");
6954 		result = PCI_ERS_RESULT_DISCONNECT;
6955 	} else {
6956 		pci_set_master(pdev);
6957 		pci_restore_state(pdev);
6958 		pci_save_state(pdev);
6959 
6960 		pci_enable_wake(pdev, PCI_D3hot, 0);
6961 		pci_enable_wake(pdev, PCI_D3cold, 0);
6962 
6963 		igb_reset(adapter);
6964 		wr32(E1000_WUS, ~0);
6965 		result = PCI_ERS_RESULT_RECOVERED;
6966 	}
6967 
6968 	err = pci_cleanup_aer_uncorrect_error_status(pdev);
6969 	if (err) {
6970 		dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6971 		        "failed 0x%0x\n", err);
6972 		/* non-fatal, continue */
6973 	}
6974 
6975 	return result;
6976 }
6977 
6978 /**
6979  * igb_io_resume - called when traffic can start flowing again.
6980  * @pdev: Pointer to PCI device
6981  *
6982  * This callback is called when the error recovery driver tells us that
6983  * its OK to resume normal operation. Implementation resembles the
6984  * second-half of the igb_resume routine.
6985  */
6986 static void igb_io_resume(struct pci_dev *pdev)
6987 {
6988 	struct net_device *netdev = pci_get_drvdata(pdev);
6989 	struct igb_adapter *adapter = netdev_priv(netdev);
6990 
6991 	if (netif_running(netdev)) {
6992 		if (igb_up(adapter)) {
6993 			dev_err(&pdev->dev, "igb_up failed after reset\n");
6994 			return;
6995 		}
6996 	}
6997 
6998 	netif_device_attach(netdev);
6999 
7000 	/* let the f/w know that the h/w is now under the control of the
7001 	 * driver. */
7002 	igb_get_hw_control(adapter);
7003 }
7004 
7005 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
7006                              u8 qsel)
7007 {
7008 	u32 rar_low, rar_high;
7009 	struct e1000_hw *hw = &adapter->hw;
7010 
7011 	/* HW expects these in little endian so we reverse the byte order
7012 	 * from network order (big endian) to little endian
7013 	 */
7014 	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
7015 	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
7016 	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
7017 
7018 	/* Indicate to hardware the Address is Valid. */
7019 	rar_high |= E1000_RAH_AV;
7020 
7021 	if (hw->mac.type == e1000_82575)
7022 		rar_high |= E1000_RAH_POOL_1 * qsel;
7023 	else
7024 		rar_high |= E1000_RAH_POOL_1 << qsel;
7025 
7026 	wr32(E1000_RAL(index), rar_low);
7027 	wrfl();
7028 	wr32(E1000_RAH(index), rar_high);
7029 	wrfl();
7030 }
7031 
7032 static int igb_set_vf_mac(struct igb_adapter *adapter,
7033                           int vf, unsigned char *mac_addr)
7034 {
7035 	struct e1000_hw *hw = &adapter->hw;
7036 	/* VF MAC addresses start at end of receive addresses and moves
7037 	 * torwards the first, as a result a collision should not be possible */
7038 	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7039 
7040 	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7041 
7042 	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7043 
7044 	return 0;
7045 }
7046 
7047 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7048 {
7049 	struct igb_adapter *adapter = netdev_priv(netdev);
7050 	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7051 		return -EINVAL;
7052 	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7053 	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7054 	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7055 				      " change effective.");
7056 	if (test_bit(__IGB_DOWN, &adapter->state)) {
7057 		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7058 			 " but the PF device is not up.\n");
7059 		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7060 			 " attempting to use the VF device.\n");
7061 	}
7062 	return igb_set_vf_mac(adapter, vf, mac);
7063 }
7064 
7065 static int igb_link_mbps(int internal_link_speed)
7066 {
7067 	switch (internal_link_speed) {
7068 	case SPEED_100:
7069 		return 100;
7070 	case SPEED_1000:
7071 		return 1000;
7072 	default:
7073 		return 0;
7074 	}
7075 }
7076 
7077 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7078 				  int link_speed)
7079 {
7080 	int rf_dec, rf_int;
7081 	u32 bcnrc_val;
7082 
7083 	if (tx_rate != 0) {
7084 		/* Calculate the rate factor values to set */
7085 		rf_int = link_speed / tx_rate;
7086 		rf_dec = (link_speed - (rf_int * tx_rate));
7087 		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7088 
7089 		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7090 		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7091 		               E1000_RTTBCNRC_RF_INT_MASK);
7092 		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7093 	} else {
7094 		bcnrc_val = 0;
7095 	}
7096 
7097 	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7098 	wr32(E1000_RTTBCNRC, bcnrc_val);
7099 }
7100 
7101 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7102 {
7103 	int actual_link_speed, i;
7104 	bool reset_rate = false;
7105 
7106 	/* VF TX rate limit was not set or not supported */
7107 	if ((adapter->vf_rate_link_speed == 0) ||
7108 	    (adapter->hw.mac.type != e1000_82576))
7109 		return;
7110 
7111 	actual_link_speed = igb_link_mbps(adapter->link_speed);
7112 	if (actual_link_speed != adapter->vf_rate_link_speed) {
7113 		reset_rate = true;
7114 		adapter->vf_rate_link_speed = 0;
7115 		dev_info(&adapter->pdev->dev,
7116 		         "Link speed has been changed. VF Transmit "
7117 		         "rate is disabled\n");
7118 	}
7119 
7120 	for (i = 0; i < adapter->vfs_allocated_count; i++) {
7121 		if (reset_rate)
7122 			adapter->vf_data[i].tx_rate = 0;
7123 
7124 		igb_set_vf_rate_limit(&adapter->hw, i,
7125 		                      adapter->vf_data[i].tx_rate,
7126 		                      actual_link_speed);
7127 	}
7128 }
7129 
7130 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7131 {
7132 	struct igb_adapter *adapter = netdev_priv(netdev);
7133 	struct e1000_hw *hw = &adapter->hw;
7134 	int actual_link_speed;
7135 
7136 	if (hw->mac.type != e1000_82576)
7137 		return -EOPNOTSUPP;
7138 
7139 	actual_link_speed = igb_link_mbps(adapter->link_speed);
7140 	if ((vf >= adapter->vfs_allocated_count) ||
7141 	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7142 	    (tx_rate < 0) || (tx_rate > actual_link_speed))
7143 		return -EINVAL;
7144 
7145 	adapter->vf_rate_link_speed = actual_link_speed;
7146 	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7147 	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7148 
7149 	return 0;
7150 }
7151 
7152 static int igb_ndo_get_vf_config(struct net_device *netdev,
7153 				 int vf, struct ifla_vf_info *ivi)
7154 {
7155 	struct igb_adapter *adapter = netdev_priv(netdev);
7156 	if (vf >= adapter->vfs_allocated_count)
7157 		return -EINVAL;
7158 	ivi->vf = vf;
7159 	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7160 	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7161 	ivi->vlan = adapter->vf_data[vf].pf_vlan;
7162 	ivi->qos = adapter->vf_data[vf].pf_qos;
7163 	return 0;
7164 }
7165 
7166 static void igb_vmm_control(struct igb_adapter *adapter)
7167 {
7168 	struct e1000_hw *hw = &adapter->hw;
7169 	u32 reg;
7170 
7171 	switch (hw->mac.type) {
7172 	case e1000_82575:
7173 	default:
7174 		/* replication is not supported for 82575 */
7175 		return;
7176 	case e1000_82576:
7177 		/* notify HW that the MAC is adding vlan tags */
7178 		reg = rd32(E1000_DTXCTL);
7179 		reg |= E1000_DTXCTL_VLAN_ADDED;
7180 		wr32(E1000_DTXCTL, reg);
7181 	case e1000_82580:
7182 		/* enable replication vlan tag stripping */
7183 		reg = rd32(E1000_RPLOLR);
7184 		reg |= E1000_RPLOLR_STRVLAN;
7185 		wr32(E1000_RPLOLR, reg);
7186 	case e1000_i350:
7187 		/* none of the above registers are supported by i350 */
7188 		break;
7189 	}
7190 
7191 	if (adapter->vfs_allocated_count) {
7192 		igb_vmdq_set_loopback_pf(hw, true);
7193 		igb_vmdq_set_replication_pf(hw, true);
7194 		igb_vmdq_set_anti_spoofing_pf(hw, true,
7195 						adapter->vfs_allocated_count);
7196 	} else {
7197 		igb_vmdq_set_loopback_pf(hw, false);
7198 		igb_vmdq_set_replication_pf(hw, false);
7199 	}
7200 }
7201 
7202 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7203 {
7204 	struct e1000_hw *hw = &adapter->hw;
7205 	u32 dmac_thr;
7206 	u16 hwm;
7207 
7208 	if (hw->mac.type > e1000_82580) {
7209 		if (adapter->flags & IGB_FLAG_DMAC) {
7210 			u32 reg;
7211 
7212 			/* force threshold to 0. */
7213 			wr32(E1000_DMCTXTH, 0);
7214 
7215 			/*
7216 			 * DMA Coalescing high water mark needs to be greater
7217 			 * than the Rx threshold. Set hwm to PBA - max frame
7218 			 * size in 16B units, capping it at PBA - 6KB.
7219 			 */
7220 			hwm = 64 * pba - adapter->max_frame_size / 16;
7221 			if (hwm < 64 * (pba - 6))
7222 				hwm = 64 * (pba - 6);
7223 			reg = rd32(E1000_FCRTC);
7224 			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7225 			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7226 				& E1000_FCRTC_RTH_COAL_MASK);
7227 			wr32(E1000_FCRTC, reg);
7228 
7229 			/*
7230 			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7231 			 * frame size, capping it at PBA - 10KB.
7232 			 */
7233 			dmac_thr = pba - adapter->max_frame_size / 512;
7234 			if (dmac_thr < pba - 10)
7235 				dmac_thr = pba - 10;
7236 			reg = rd32(E1000_DMACR);
7237 			reg &= ~E1000_DMACR_DMACTHR_MASK;
7238 			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7239 				& E1000_DMACR_DMACTHR_MASK);
7240 
7241 			/* transition to L0x or L1 if available..*/
7242 			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7243 
7244 			/* watchdog timer= +-1000 usec in 32usec intervals */
7245 			reg |= (1000 >> 5);
7246 			wr32(E1000_DMACR, reg);
7247 
7248 			/*
7249 			 * no lower threshold to disable
7250 			 * coalescing(smart fifb)-UTRESH=0
7251 			 */
7252 			wr32(E1000_DMCRTRH, 0);
7253 
7254 			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7255 
7256 			wr32(E1000_DMCTLX, reg);
7257 
7258 			/*
7259 			 * free space in tx packet buffer to wake from
7260 			 * DMA coal
7261 			 */
7262 			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7263 			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7264 
7265 			/*
7266 			 * make low power state decision controlled
7267 			 * by DMA coal
7268 			 */
7269 			reg = rd32(E1000_PCIEMISC);
7270 			reg &= ~E1000_PCIEMISC_LX_DECISION;
7271 			wr32(E1000_PCIEMISC, reg);
7272 		} /* endif adapter->dmac is not disabled */
7273 	} else if (hw->mac.type == e1000_82580) {
7274 		u32 reg = rd32(E1000_PCIEMISC);
7275 		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7276 		wr32(E1000_DMACR, 0);
7277 	}
7278 }
7279 
7280 /* igb_main.c */
7281