xref: /freebsd/sys/dev/e1000/if_em.c (revision 52baf267be42c3e14a9d843c24c953efae7195bd)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2011, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71 
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79 
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84 
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88 
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int	em_display_debug_stats = 0;
93 
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.2";
98 
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108 
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111 	/* Intel(R) PRO/1000 Network Connection */
112 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116 						PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131 
132 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137 						PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	/* required last entry */
176 	{ 0, 0, 0, 0, 0}
177 };
178 
179 /*********************************************************************
180  *  Table of branding strings for all supported NICs.
181  *********************************************************************/
182 
183 static char *em_strings[] = {
184 	"Intel(R) PRO/1000 Network Connection"
185 };
186 
187 /*********************************************************************
188  *  Function prototypes
189  *********************************************************************/
190 static int	em_probe(device_t);
191 static int	em_attach(device_t);
192 static int	em_detach(device_t);
193 static int	em_shutdown(device_t);
194 static int	em_suspend(device_t);
195 static int	em_resume(device_t);
196 #ifdef EM_MULTIQUEUE
197 static int	em_mq_start(struct ifnet *, struct mbuf *);
198 static int	em_mq_start_locked(struct ifnet *,
199 		    struct tx_ring *, struct mbuf *);
200 static void	em_qflush(struct ifnet *);
201 #else
202 static void	em_start(struct ifnet *);
203 static void	em_start_locked(struct ifnet *, struct tx_ring *);
204 #endif
205 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
206 static void	em_init(void *);
207 static void	em_init_locked(struct adapter *);
208 static void	em_stop(void *);
209 static void	em_media_status(struct ifnet *, struct ifmediareq *);
210 static int	em_media_change(struct ifnet *);
211 static void	em_identify_hardware(struct adapter *);
212 static int	em_allocate_pci_resources(struct adapter *);
213 static int	em_allocate_legacy(struct adapter *);
214 static int	em_allocate_msix(struct adapter *);
215 static int	em_allocate_queues(struct adapter *);
216 static int	em_setup_msix(struct adapter *);
217 static void	em_free_pci_resources(struct adapter *);
218 static void	em_local_timer(void *);
219 static void	em_reset(struct adapter *);
220 static int	em_setup_interface(device_t, struct adapter *);
221 
222 static void	em_setup_transmit_structures(struct adapter *);
223 static void	em_initialize_transmit_unit(struct adapter *);
224 static int	em_allocate_transmit_buffers(struct tx_ring *);
225 static void	em_free_transmit_structures(struct adapter *);
226 static void	em_free_transmit_buffers(struct tx_ring *);
227 
228 static int	em_setup_receive_structures(struct adapter *);
229 static int	em_allocate_receive_buffers(struct rx_ring *);
230 static void	em_initialize_receive_unit(struct adapter *);
231 static void	em_free_receive_structures(struct adapter *);
232 static void	em_free_receive_buffers(struct rx_ring *);
233 
234 static void	em_enable_intr(struct adapter *);
235 static void	em_disable_intr(struct adapter *);
236 static void	em_update_stats_counters(struct adapter *);
237 static void	em_add_hw_stats(struct adapter *adapter);
238 static void	em_txeof(struct tx_ring *);
239 static bool	em_rxeof(struct rx_ring *, int, int *);
240 #ifndef __NO_STRICT_ALIGNMENT
241 static int	em_fixup_rx(struct rx_ring *);
242 #endif
243 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245 		    struct ip *, u32 *, u32 *);
246 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247 		    struct tcphdr *, u32 *, u32 *);
248 static void	em_set_promisc(struct adapter *);
249 static void	em_disable_promisc(struct adapter *);
250 static void	em_set_multi(struct adapter *);
251 static void	em_update_link_status(struct adapter *);
252 static void	em_refresh_mbufs(struct rx_ring *, int);
253 static void	em_register_vlan(void *, struct ifnet *, u16);
254 static void	em_unregister_vlan(void *, struct ifnet *, u16);
255 static void	em_setup_vlan_hw_support(struct adapter *);
256 static int	em_xmit(struct tx_ring *, struct mbuf **);
257 static int	em_dma_malloc(struct adapter *, bus_size_t,
258 		    struct em_dma_alloc *, int);
259 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
260 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261 static void	em_print_nvm_info(struct adapter *);
262 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263 static void	em_print_debug_info(struct adapter *);
264 static int 	em_is_valid_ether_addr(u8 *);
265 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
267 		    const char *, struct em_int_delay_info *, int, int);
268 /* Management and WOL Support */
269 static void	em_init_manageability(struct adapter *);
270 static void	em_release_manageability(struct adapter *);
271 static void     em_get_hw_control(struct adapter *);
272 static void     em_release_hw_control(struct adapter *);
273 static void	em_get_wakeup(device_t);
274 static void     em_enable_wakeup(device_t);
275 static int	em_enable_phy_wakeup(struct adapter *);
276 static void	em_led_func(void *, int);
277 static void	em_disable_aspm(struct adapter *);
278 
279 static int	em_irq_fast(void *);
280 
281 /* MSIX handlers */
282 static void	em_msix_tx(void *);
283 static void	em_msix_rx(void *);
284 static void	em_msix_link(void *);
285 static void	em_handle_tx(void *context, int pending);
286 static void	em_handle_rx(void *context, int pending);
287 static void	em_handle_link(void *context, int pending);
288 
289 static void	em_set_sysctl_value(struct adapter *, const char *,
290 		    const char *, int *, int);
291 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
293 
294 static __inline void em_rx_discard(struct rx_ring *, int);
295 
296 #ifdef DEVICE_POLLING
297 static poll_handler_t em_poll;
298 #endif /* POLLING */
299 
300 /*********************************************************************
301  *  FreeBSD Device Interface Entry Points
302  *********************************************************************/
303 
304 static device_method_t em_methods[] = {
305 	/* Device interface */
306 	DEVMETHOD(device_probe, em_probe),
307 	DEVMETHOD(device_attach, em_attach),
308 	DEVMETHOD(device_detach, em_detach),
309 	DEVMETHOD(device_shutdown, em_shutdown),
310 	DEVMETHOD(device_suspend, em_suspend),
311 	DEVMETHOD(device_resume, em_resume),
312 	{0, 0}
313 };
314 
315 static driver_t em_driver = {
316 	"em", em_methods, sizeof(struct adapter),
317 };
318 
319 devclass_t em_devclass;
320 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
321 MODULE_DEPEND(em, pci, 1, 1, 1);
322 MODULE_DEPEND(em, ether, 1, 1, 1);
323 
324 /*********************************************************************
325  *  Tunable default values.
326  *********************************************************************/
327 
328 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
329 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
330 #define M_TSO_LEN			66
331 
332 /* Allow common code without TSO */
333 #ifndef CSUM_TSO
334 #define CSUM_TSO	0
335 #endif
336 
337 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
338 
339 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
340 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
341 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
342 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
343 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
344     0, "Default transmit interrupt delay in usecs");
345 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
346     0, "Default receive interrupt delay in usecs");
347 
348 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
349 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
350 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
351 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
352 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
353     &em_tx_abs_int_delay_dflt, 0,
354     "Default transmit interrupt delay limit in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
356     &em_rx_abs_int_delay_dflt, 0,
357     "Default receive interrupt delay limit in usecs");
358 
359 static int em_rxd = EM_DEFAULT_RXD;
360 static int em_txd = EM_DEFAULT_TXD;
361 TUNABLE_INT("hw.em.rxd", &em_rxd);
362 TUNABLE_INT("hw.em.txd", &em_txd);
363 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
364     "Number of receive descriptors per queue");
365 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
366     "Number of transmit descriptors per queue");
367 
368 static int em_smart_pwr_down = FALSE;
369 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
370 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
371     0, "Set to true to leave smart power down enabled on newer adapters");
372 
373 /* Controls whether promiscuous also shows bad packets */
374 static int em_debug_sbp = FALSE;
375 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
376 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
377     "Show bad packets in promiscuous mode");
378 
379 static int em_enable_msix = TRUE;
380 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
381 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
382     "Enable MSI-X interrupts");
383 
384 /* How many packets rxeof tries to clean at a time */
385 static int em_rx_process_limit = 100;
386 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
387 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
388     &em_rx_process_limit, 0,
389     "Maximum number of received packets to process "
390     "at a time, -1 means unlimited");
391 
392 /* Energy efficient ethernet - default to OFF */
393 static int eee_setting = 1;
394 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
395 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
396     "Enable Energy Efficient Ethernet");
397 
398 /* Global used in WOL setup with multiport cards */
399 static int global_quad_port_a = 0;
400 
401 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
402 #include <dev/netmap/if_em_netmap.h>
403 #endif /* DEV_NETMAP */
404 
405 /*********************************************************************
406  *  Device identification routine
407  *
408  *  em_probe determines if the driver should be loaded on
409  *  adapter based on PCI vendor/device id of the adapter.
410  *
411  *  return BUS_PROBE_DEFAULT on success, positive on failure
412  *********************************************************************/
413 
414 static int
415 em_probe(device_t dev)
416 {
417 	char		adapter_name[60];
418 	u16		pci_vendor_id = 0;
419 	u16		pci_device_id = 0;
420 	u16		pci_subvendor_id = 0;
421 	u16		pci_subdevice_id = 0;
422 	em_vendor_info_t *ent;
423 
424 	INIT_DEBUGOUT("em_probe: begin");
425 
426 	pci_vendor_id = pci_get_vendor(dev);
427 	if (pci_vendor_id != EM_VENDOR_ID)
428 		return (ENXIO);
429 
430 	pci_device_id = pci_get_device(dev);
431 	pci_subvendor_id = pci_get_subvendor(dev);
432 	pci_subdevice_id = pci_get_subdevice(dev);
433 
434 	ent = em_vendor_info_array;
435 	while (ent->vendor_id != 0) {
436 		if ((pci_vendor_id == ent->vendor_id) &&
437 		    (pci_device_id == ent->device_id) &&
438 
439 		    ((pci_subvendor_id == ent->subvendor_id) ||
440 		    (ent->subvendor_id == PCI_ANY_ID)) &&
441 
442 		    ((pci_subdevice_id == ent->subdevice_id) ||
443 		    (ent->subdevice_id == PCI_ANY_ID))) {
444 			sprintf(adapter_name, "%s %s",
445 				em_strings[ent->index],
446 				em_driver_version);
447 			device_set_desc_copy(dev, adapter_name);
448 			return (BUS_PROBE_DEFAULT);
449 		}
450 		ent++;
451 	}
452 
453 	return (ENXIO);
454 }
455 
456 /*********************************************************************
457  *  Device initialization routine
458  *
459  *  The attach entry point is called when the driver is being loaded.
460  *  This routine identifies the type of hardware, allocates all resources
461  *  and initializes the hardware.
462  *
463  *  return 0 on success, positive on failure
464  *********************************************************************/
465 
466 static int
467 em_attach(device_t dev)
468 {
469 	struct adapter	*adapter;
470 	struct e1000_hw	*hw;
471 	int		error = 0;
472 
473 	INIT_DEBUGOUT("em_attach: begin");
474 
475 	if (resource_disabled("em", device_get_unit(dev))) {
476 		device_printf(dev, "Disabled by device hint\n");
477 		return (ENXIO);
478 	}
479 
480 	adapter = device_get_softc(dev);
481 	adapter->dev = adapter->osdep.dev = dev;
482 	hw = &adapter->hw;
483 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
484 
485 	/* SYSCTL stuff */
486 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
487 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
488 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
489 	    em_sysctl_nvm_info, "I", "NVM Information");
490 
491 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494 	    em_sysctl_debug_info, "I", "Debug Information");
495 
496 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499 	    em_set_flowcntl, "I", "Flow Control");
500 
501 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
502 
503 	/* Determine hardware and mac info */
504 	em_identify_hardware(adapter);
505 
506 	/* Setup PCI resources */
507 	if (em_allocate_pci_resources(adapter)) {
508 		device_printf(dev, "Allocation of PCI resources failed\n");
509 		error = ENXIO;
510 		goto err_pci;
511 	}
512 
513 	/*
514 	** For ICH8 and family we need to
515 	** map the flash memory, and this
516 	** must happen after the MAC is
517 	** identified
518 	*/
519 	if ((hw->mac.type == e1000_ich8lan) ||
520 	    (hw->mac.type == e1000_ich9lan) ||
521 	    (hw->mac.type == e1000_ich10lan) ||
522 	    (hw->mac.type == e1000_pchlan) ||
523 	    (hw->mac.type == e1000_pch2lan)) {
524 		int rid = EM_BAR_TYPE_FLASH;
525 		adapter->flash = bus_alloc_resource_any(dev,
526 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
527 		if (adapter->flash == NULL) {
528 			device_printf(dev, "Mapping of Flash failed\n");
529 			error = ENXIO;
530 			goto err_pci;
531 		}
532 		/* This is used in the shared code */
533 		hw->flash_address = (u8 *)adapter->flash;
534 		adapter->osdep.flash_bus_space_tag =
535 		    rman_get_bustag(adapter->flash);
536 		adapter->osdep.flash_bus_space_handle =
537 		    rman_get_bushandle(adapter->flash);
538 	}
539 
540 	/* Do Shared Code initialization */
541 	if (e1000_setup_init_funcs(hw, TRUE)) {
542 		device_printf(dev, "Setup of Shared code failed\n");
543 		error = ENXIO;
544 		goto err_pci;
545 	}
546 
547 	e1000_get_bus_info(hw);
548 
549 	/* Set up some sysctls for the tunable interrupt delays */
550 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
551 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
552 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
553 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
554 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
555 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
556 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
557 	    "receive interrupt delay limit in usecs",
558 	    &adapter->rx_abs_int_delay,
559 	    E1000_REGISTER(hw, E1000_RADV),
560 	    em_rx_abs_int_delay_dflt);
561 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
562 	    "transmit interrupt delay limit in usecs",
563 	    &adapter->tx_abs_int_delay,
564 	    E1000_REGISTER(hw, E1000_TADV),
565 	    em_tx_abs_int_delay_dflt);
566 
567 	/* Sysctl for limiting the amount of work done in the taskqueue */
568 	em_set_sysctl_value(adapter, "rx_processing_limit",
569 	    "max number of rx packets to process", &adapter->rx_process_limit,
570 	    em_rx_process_limit);
571 
572 	/*
573 	 * Validate number of transmit and receive descriptors. It
574 	 * must not exceed hardware maximum, and must be multiple
575 	 * of E1000_DBA_ALIGN.
576 	 */
577 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
578 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
579 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
580 		    EM_DEFAULT_TXD, em_txd);
581 		adapter->num_tx_desc = EM_DEFAULT_TXD;
582 	} else
583 		adapter->num_tx_desc = em_txd;
584 
585 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
586 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
587 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
588 		    EM_DEFAULT_RXD, em_rxd);
589 		adapter->num_rx_desc = EM_DEFAULT_RXD;
590 	} else
591 		adapter->num_rx_desc = em_rxd;
592 
593 	hw->mac.autoneg = DO_AUTO_NEG;
594 	hw->phy.autoneg_wait_to_complete = FALSE;
595 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
596 
597 	/* Copper options */
598 	if (hw->phy.media_type == e1000_media_type_copper) {
599 		hw->phy.mdix = AUTO_ALL_MODES;
600 		hw->phy.disable_polarity_correction = FALSE;
601 		hw->phy.ms_type = EM_MASTER_SLAVE;
602 	}
603 
604 	/*
605 	 * Set the frame limits assuming
606 	 * standard ethernet sized frames.
607 	 */
608 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
609 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
610 
611 	/*
612 	 * This controls when hardware reports transmit completion
613 	 * status.
614 	 */
615 	hw->mac.report_tx_early = 1;
616 
617 	/*
618 	** Get queue/ring memory
619 	*/
620 	if (em_allocate_queues(adapter)) {
621 		error = ENOMEM;
622 		goto err_pci;
623 	}
624 
625 	/* Allocate multicast array memory. */
626 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
627 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
628 	if (adapter->mta == NULL) {
629 		device_printf(dev, "Can not allocate multicast setup array\n");
630 		error = ENOMEM;
631 		goto err_late;
632 	}
633 
634 	/* Check SOL/IDER usage */
635 	if (e1000_check_reset_block(hw))
636 		device_printf(dev, "PHY reset is blocked"
637 		    " due to SOL/IDER session.\n");
638 
639 	/* Sysctl for setting Energy Efficient Ethernet */
640 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
641 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
642 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
643 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
644 	    adapter, 0, em_sysctl_eee, "I",
645 	    "Disable Energy Efficient Ethernet");
646 
647 	/*
648 	** Start from a known state, this is
649 	** important in reading the nvm and
650 	** mac from that.
651 	*/
652 	e1000_reset_hw(hw);
653 
654 
655 	/* Make sure we have a good EEPROM before we read from it */
656 	if (e1000_validate_nvm_checksum(hw) < 0) {
657 		/*
658 		** Some PCI-E parts fail the first check due to
659 		** the link being in sleep state, call it again,
660 		** if it fails a second time its a real issue.
661 		*/
662 		if (e1000_validate_nvm_checksum(hw) < 0) {
663 			device_printf(dev,
664 			    "The EEPROM Checksum Is Not Valid\n");
665 			error = EIO;
666 			goto err_late;
667 		}
668 	}
669 
670 	/* Copy the permanent MAC address out of the EEPROM */
671 	if (e1000_read_mac_addr(hw) < 0) {
672 		device_printf(dev, "EEPROM read error while reading MAC"
673 		    " address\n");
674 		error = EIO;
675 		goto err_late;
676 	}
677 
678 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
679 		device_printf(dev, "Invalid MAC address\n");
680 		error = EIO;
681 		goto err_late;
682 	}
683 
684 	/*
685 	**  Do interrupt configuration
686 	*/
687 	if (adapter->msix > 1) /* Do MSIX */
688 		error = em_allocate_msix(adapter);
689 	else  /* MSI or Legacy */
690 		error = em_allocate_legacy(adapter);
691 	if (error)
692 		goto err_late;
693 
694 	/*
695 	 * Get Wake-on-Lan and Management info for later use
696 	 */
697 	em_get_wakeup(dev);
698 
699 	/* Setup OS specific network interface */
700 	if (em_setup_interface(dev, adapter) != 0)
701 		goto err_late;
702 
703 	em_reset(adapter);
704 
705 	/* Initialize statistics */
706 	em_update_stats_counters(adapter);
707 
708 	hw->mac.get_link_status = 1;
709 	em_update_link_status(adapter);
710 
711 	/* Register for VLAN events */
712 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
713 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
714 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
715 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
716 
717 	em_add_hw_stats(adapter);
718 
719 	/* Non-AMT based hardware can now take control from firmware */
720 	if (adapter->has_manage && !adapter->has_amt)
721 		em_get_hw_control(adapter);
722 
723 	/* Tell the stack that the interface is not active */
724 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
725 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
726 
727 	adapter->led_dev = led_create(em_led_func, adapter,
728 	    device_get_nameunit(dev));
729 #ifdef DEV_NETMAP
730 	em_netmap_attach(adapter);
731 #endif /* DEV_NETMAP */
732 
733 	INIT_DEBUGOUT("em_attach: end");
734 
735 	return (0);
736 
737 err_late:
738 	em_free_transmit_structures(adapter);
739 	em_free_receive_structures(adapter);
740 	em_release_hw_control(adapter);
741 	if (adapter->ifp != NULL)
742 		if_free(adapter->ifp);
743 err_pci:
744 	em_free_pci_resources(adapter);
745 	free(adapter->mta, M_DEVBUF);
746 	EM_CORE_LOCK_DESTROY(adapter);
747 
748 	return (error);
749 }
750 
751 /*********************************************************************
752  *  Device removal routine
753  *
754  *  The detach entry point is called when the driver is being removed.
755  *  This routine stops the adapter and deallocates all the resources
756  *  that were allocated for driver operation.
757  *
758  *  return 0 on success, positive on failure
759  *********************************************************************/
760 
761 static int
762 em_detach(device_t dev)
763 {
764 	struct adapter	*adapter = device_get_softc(dev);
765 	struct ifnet	*ifp = adapter->ifp;
766 
767 	INIT_DEBUGOUT("em_detach: begin");
768 
769 	/* Make sure VLANS are not using driver */
770 	if (adapter->ifp->if_vlantrunk != NULL) {
771 		device_printf(dev,"Vlan in use, detach first\n");
772 		return (EBUSY);
773 	}
774 
775 #ifdef DEVICE_POLLING
776 	if (ifp->if_capenable & IFCAP_POLLING)
777 		ether_poll_deregister(ifp);
778 #endif
779 
780 	if (adapter->led_dev != NULL)
781 		led_destroy(adapter->led_dev);
782 
783 	EM_CORE_LOCK(adapter);
784 	adapter->in_detach = 1;
785 	em_stop(adapter);
786 	EM_CORE_UNLOCK(adapter);
787 	EM_CORE_LOCK_DESTROY(adapter);
788 
789 	e1000_phy_hw_reset(&adapter->hw);
790 
791 	em_release_manageability(adapter);
792 	em_release_hw_control(adapter);
793 
794 	/* Unregister VLAN events */
795 	if (adapter->vlan_attach != NULL)
796 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
797 	if (adapter->vlan_detach != NULL)
798 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
799 
800 	ether_ifdetach(adapter->ifp);
801 	callout_drain(&adapter->timer);
802 
803 #ifdef DEV_NETMAP
804 	netmap_detach(ifp);
805 #endif /* DEV_NETMAP */
806 
807 	em_free_pci_resources(adapter);
808 	bus_generic_detach(dev);
809 	if_free(ifp);
810 
811 	em_free_transmit_structures(adapter);
812 	em_free_receive_structures(adapter);
813 
814 	em_release_hw_control(adapter);
815 	free(adapter->mta, M_DEVBUF);
816 
817 	return (0);
818 }
819 
820 /*********************************************************************
821  *
822  *  Shutdown entry point
823  *
824  **********************************************************************/
825 
826 static int
827 em_shutdown(device_t dev)
828 {
829 	return em_suspend(dev);
830 }
831 
832 /*
833  * Suspend/resume device methods.
834  */
835 static int
836 em_suspend(device_t dev)
837 {
838 	struct adapter *adapter = device_get_softc(dev);
839 
840 	EM_CORE_LOCK(adapter);
841 
842         em_release_manageability(adapter);
843 	em_release_hw_control(adapter);
844 	em_enable_wakeup(dev);
845 
846 	EM_CORE_UNLOCK(adapter);
847 
848 	return bus_generic_suspend(dev);
849 }
850 
851 static int
852 em_resume(device_t dev)
853 {
854 	struct adapter *adapter = device_get_softc(dev);
855 	struct tx_ring	*txr = adapter->tx_rings;
856 	struct ifnet *ifp = adapter->ifp;
857 
858 	EM_CORE_LOCK(adapter);
859 	if (adapter->hw.mac.type == e1000_pch2lan)
860 		e1000_resume_workarounds_pchlan(&adapter->hw);
861 	em_init_locked(adapter);
862 	em_init_manageability(adapter);
863 
864 	if ((ifp->if_flags & IFF_UP) &&
865 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
867 			EM_TX_LOCK(txr);
868 #ifdef EM_MULTIQUEUE
869 			if (!drbr_empty(ifp, txr->br))
870 				em_mq_start_locked(ifp, txr, NULL);
871 #else
872 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
873 				em_start_locked(ifp, txr);
874 #endif
875 			EM_TX_UNLOCK(txr);
876 		}
877 	}
878 	EM_CORE_UNLOCK(adapter);
879 
880 	return bus_generic_resume(dev);
881 }
882 
883 
884 #ifdef EM_MULTIQUEUE
885 /*********************************************************************
886  *  Multiqueue Transmit routines
887  *
888  *  em_mq_start is called by the stack to initiate a transmit.
889  *  however, if busy the driver can queue the request rather
890  *  than do an immediate send. It is this that is an advantage
891  *  in this driver, rather than also having multiple tx queues.
892  **********************************************************************/
893 static int
894 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
895 {
896 	struct adapter  *adapter = txr->adapter;
897         struct mbuf     *next;
898         int             err = 0, enq = 0;
899 
900 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
901 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
902 		if (m != NULL)
903 			err = drbr_enqueue(ifp, txr->br, m);
904 		return (err);
905 	}
906 
907 	enq = 0;
908 	if (m == NULL) {
909 		next = drbr_dequeue(ifp, txr->br);
910 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
911 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
912 			return (err);
913 		next = drbr_dequeue(ifp, txr->br);
914 	} else
915 		next = m;
916 
917 	/* Process the queue */
918 	while (next != NULL) {
919 		if ((err = em_xmit(txr, &next)) != 0) {
920                         if (next != NULL)
921                                 err = drbr_enqueue(ifp, txr->br, next);
922                         break;
923 		}
924 		enq++;
925 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
926 		ETHER_BPF_MTAP(ifp, next);
927 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
928                         break;
929 		next = drbr_dequeue(ifp, txr->br);
930 	}
931 
932 	if (enq > 0) {
933                 /* Set the watchdog */
934                 txr->queue_status = EM_QUEUE_WORKING;
935 		txr->watchdog_time = ticks;
936 	}
937 
938 	if (txr->tx_avail < EM_MAX_SCATTER)
939 		em_txeof(txr);
940 	if (txr->tx_avail < EM_MAX_SCATTER)
941 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
942 	return (err);
943 }
944 
945 /*
946 ** Multiqueue capable stack interface
947 */
948 static int
949 em_mq_start(struct ifnet *ifp, struct mbuf *m)
950 {
951 	struct adapter	*adapter = ifp->if_softc;
952 	struct tx_ring	*txr = adapter->tx_rings;
953 	int 		error;
954 
955 	if (EM_TX_TRYLOCK(txr)) {
956 		error = em_mq_start_locked(ifp, txr, m);
957 		EM_TX_UNLOCK(txr);
958 	} else
959 		error = drbr_enqueue(ifp, txr->br, m);
960 
961 	return (error);
962 }
963 
964 /*
965 ** Flush all ring buffers
966 */
967 static void
968 em_qflush(struct ifnet *ifp)
969 {
970 	struct adapter  *adapter = ifp->if_softc;
971 	struct tx_ring  *txr = adapter->tx_rings;
972 	struct mbuf     *m;
973 
974 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
975 		EM_TX_LOCK(txr);
976 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
977 			m_freem(m);
978 		EM_TX_UNLOCK(txr);
979 	}
980 	if_qflush(ifp);
981 }
982 #else  /* !EM_MULTIQUEUE */
983 
984 static void
985 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
986 {
987 	struct adapter	*adapter = ifp->if_softc;
988 	struct mbuf	*m_head;
989 
990 	EM_TX_LOCK_ASSERT(txr);
991 
992 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
993 	    IFF_DRV_RUNNING)
994 		return;
995 
996 	if (!adapter->link_active)
997 		return;
998 
999 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1000         	/* Call cleanup if number of TX descriptors low */
1001 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1002 			em_txeof(txr);
1003 		if (txr->tx_avail < EM_MAX_SCATTER) {
1004 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1005 			break;
1006 		}
1007                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1008 		if (m_head == NULL)
1009 			break;
1010 		/*
1011 		 *  Encapsulation can modify our pointer, and or make it
1012 		 *  NULL on failure.  In that event, we can't requeue.
1013 		 */
1014 		if (em_xmit(txr, &m_head)) {
1015 			if (m_head == NULL)
1016 				break;
1017 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1018 			break;
1019 		}
1020 
1021 		/* Send a copy of the frame to the BPF listener */
1022 		ETHER_BPF_MTAP(ifp, m_head);
1023 
1024 		/* Set timeout in case hardware has problems transmitting. */
1025 		txr->watchdog_time = ticks;
1026                 txr->queue_status = EM_QUEUE_WORKING;
1027 	}
1028 
1029 	return;
1030 }
1031 
1032 static void
1033 em_start(struct ifnet *ifp)
1034 {
1035 	struct adapter	*adapter = ifp->if_softc;
1036 	struct tx_ring	*txr = adapter->tx_rings;
1037 
1038 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1039 		EM_TX_LOCK(txr);
1040 		em_start_locked(ifp, txr);
1041 		EM_TX_UNLOCK(txr);
1042 	}
1043 	return;
1044 }
1045 #endif /* EM_MULTIQUEUE */
1046 
1047 /*********************************************************************
1048  *  Ioctl entry point
1049  *
1050  *  em_ioctl is called when the user wants to configure the
1051  *  interface.
1052  *
1053  *  return 0 on success, positive on failure
1054  **********************************************************************/
1055 
1056 static int
1057 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1058 {
1059 	struct adapter	*adapter = ifp->if_softc;
1060 	struct ifreq	*ifr = (struct ifreq *)data;
1061 #if defined(INET) || defined(INET6)
1062 	struct ifaddr	*ifa = (struct ifaddr *)data;
1063 #endif
1064 	bool		avoid_reset = FALSE;
1065 	int		error = 0;
1066 
1067 	if (adapter->in_detach)
1068 		return (error);
1069 
1070 	switch (command) {
1071 	case SIOCSIFADDR:
1072 #ifdef INET
1073 		if (ifa->ifa_addr->sa_family == AF_INET)
1074 			avoid_reset = TRUE;
1075 #endif
1076 #ifdef INET6
1077 		if (ifa->ifa_addr->sa_family == AF_INET6)
1078 			avoid_reset = TRUE;
1079 #endif
1080 		/*
1081 		** Calling init results in link renegotiation,
1082 		** so we avoid doing it when possible.
1083 		*/
1084 		if (avoid_reset) {
1085 			ifp->if_flags |= IFF_UP;
1086 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1087 				em_init(adapter);
1088 #ifdef INET
1089 			if (!(ifp->if_flags & IFF_NOARP))
1090 				arp_ifinit(ifp, ifa);
1091 #endif
1092 		} else
1093 			error = ether_ioctl(ifp, command, data);
1094 		break;
1095 	case SIOCSIFMTU:
1096 	    {
1097 		int max_frame_size;
1098 
1099 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1100 
1101 		EM_CORE_LOCK(adapter);
1102 		switch (adapter->hw.mac.type) {
1103 		case e1000_82571:
1104 		case e1000_82572:
1105 		case e1000_ich9lan:
1106 		case e1000_ich10lan:
1107 		case e1000_pch2lan:
1108 		case e1000_82574:
1109 		case e1000_82583:
1110 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1111 			max_frame_size = 9234;
1112 			break;
1113 		case e1000_pchlan:
1114 			max_frame_size = 4096;
1115 			break;
1116 			/* Adapters that do not support jumbo frames */
1117 		case e1000_ich8lan:
1118 			max_frame_size = ETHER_MAX_LEN;
1119 			break;
1120 		default:
1121 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1122 		}
1123 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1124 		    ETHER_CRC_LEN) {
1125 			EM_CORE_UNLOCK(adapter);
1126 			error = EINVAL;
1127 			break;
1128 		}
1129 
1130 		ifp->if_mtu = ifr->ifr_mtu;
1131 		adapter->max_frame_size =
1132 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1133 		em_init_locked(adapter);
1134 		EM_CORE_UNLOCK(adapter);
1135 		break;
1136 	    }
1137 	case SIOCSIFFLAGS:
1138 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1139 		    SIOCSIFFLAGS (Set Interface Flags)");
1140 		EM_CORE_LOCK(adapter);
1141 		if (ifp->if_flags & IFF_UP) {
1142 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1143 				if ((ifp->if_flags ^ adapter->if_flags) &
1144 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1145 					em_disable_promisc(adapter);
1146 					em_set_promisc(adapter);
1147 				}
1148 			} else
1149 				em_init_locked(adapter);
1150 		} else
1151 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1152 				em_stop(adapter);
1153 		adapter->if_flags = ifp->if_flags;
1154 		EM_CORE_UNLOCK(adapter);
1155 		break;
1156 	case SIOCADDMULTI:
1157 	case SIOCDELMULTI:
1158 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1159 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1160 			EM_CORE_LOCK(adapter);
1161 			em_disable_intr(adapter);
1162 			em_set_multi(adapter);
1163 #ifdef DEVICE_POLLING
1164 			if (!(ifp->if_capenable & IFCAP_POLLING))
1165 #endif
1166 				em_enable_intr(adapter);
1167 			EM_CORE_UNLOCK(adapter);
1168 		}
1169 		break;
1170 	case SIOCSIFMEDIA:
1171 		/* Check SOL/IDER usage */
1172 		EM_CORE_LOCK(adapter);
1173 		if (e1000_check_reset_block(&adapter->hw)) {
1174 			EM_CORE_UNLOCK(adapter);
1175 			device_printf(adapter->dev, "Media change is"
1176 			    " blocked due to SOL/IDER session.\n");
1177 			break;
1178 		}
1179 		EM_CORE_UNLOCK(adapter);
1180 		/* falls thru */
1181 	case SIOCGIFMEDIA:
1182 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1183 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1184 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1185 		break;
1186 	case SIOCSIFCAP:
1187 	    {
1188 		int mask, reinit;
1189 
1190 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1191 		reinit = 0;
1192 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1193 #ifdef DEVICE_POLLING
1194 		if (mask & IFCAP_POLLING) {
1195 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1196 				error = ether_poll_register(em_poll, ifp);
1197 				if (error)
1198 					return (error);
1199 				EM_CORE_LOCK(adapter);
1200 				em_disable_intr(adapter);
1201 				ifp->if_capenable |= IFCAP_POLLING;
1202 				EM_CORE_UNLOCK(adapter);
1203 			} else {
1204 				error = ether_poll_deregister(ifp);
1205 				/* Enable interrupt even in error case */
1206 				EM_CORE_LOCK(adapter);
1207 				em_enable_intr(adapter);
1208 				ifp->if_capenable &= ~IFCAP_POLLING;
1209 				EM_CORE_UNLOCK(adapter);
1210 			}
1211 		}
1212 #endif
1213 		if (mask & IFCAP_HWCSUM) {
1214 			ifp->if_capenable ^= IFCAP_HWCSUM;
1215 			reinit = 1;
1216 		}
1217 		if (mask & IFCAP_TSO4) {
1218 			ifp->if_capenable ^= IFCAP_TSO4;
1219 			reinit = 1;
1220 		}
1221 		if (mask & IFCAP_VLAN_HWTAGGING) {
1222 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1223 			reinit = 1;
1224 		}
1225 		if (mask & IFCAP_VLAN_HWFILTER) {
1226 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1227 			reinit = 1;
1228 		}
1229 		if (mask & IFCAP_VLAN_HWTSO) {
1230 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1231 			reinit = 1;
1232 		}
1233 		if ((mask & IFCAP_WOL) &&
1234 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1235 			if (mask & IFCAP_WOL_MCAST)
1236 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1237 			if (mask & IFCAP_WOL_MAGIC)
1238 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1239 		}
1240 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1241 			em_init(adapter);
1242 		VLAN_CAPABILITIES(ifp);
1243 		break;
1244 	    }
1245 
1246 	default:
1247 		error = ether_ioctl(ifp, command, data);
1248 		break;
1249 	}
1250 
1251 	return (error);
1252 }
1253 
1254 
1255 /*********************************************************************
1256  *  Init entry point
1257  *
1258  *  This routine is used in two ways. It is used by the stack as
1259  *  init entry point in network interface structure. It is also used
1260  *  by the driver as a hw/sw initialization routine to get to a
1261  *  consistent state.
1262  *
1263  *  return 0 on success, positive on failure
1264  **********************************************************************/
1265 
1266 static void
1267 em_init_locked(struct adapter *adapter)
1268 {
1269 	struct ifnet	*ifp = adapter->ifp;
1270 	device_t	dev = adapter->dev;
1271 
1272 	INIT_DEBUGOUT("em_init: begin");
1273 
1274 	EM_CORE_LOCK_ASSERT(adapter);
1275 
1276 	em_disable_intr(adapter);
1277 	callout_stop(&adapter->timer);
1278 
1279 	/* Get the latest mac address, User can use a LAA */
1280         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1281               ETHER_ADDR_LEN);
1282 
1283 	/* Put the address into the Receive Address Array */
1284 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1285 
1286 	/*
1287 	 * With the 82571 adapter, RAR[0] may be overwritten
1288 	 * when the other port is reset, we make a duplicate
1289 	 * in RAR[14] for that eventuality, this assures
1290 	 * the interface continues to function.
1291 	 */
1292 	if (adapter->hw.mac.type == e1000_82571) {
1293 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1294 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1295 		    E1000_RAR_ENTRIES - 1);
1296 	}
1297 
1298 	/* Initialize the hardware */
1299 	em_reset(adapter);
1300 	em_update_link_status(adapter);
1301 
1302 	/* Setup VLAN support, basic and offload if available */
1303 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1304 
1305 	/* Set hardware offload abilities */
1306 	ifp->if_hwassist = 0;
1307 	if (ifp->if_capenable & IFCAP_TXCSUM)
1308 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1309 	if (ifp->if_capenable & IFCAP_TSO4)
1310 		ifp->if_hwassist |= CSUM_TSO;
1311 
1312 	/* Configure for OS presence */
1313 	em_init_manageability(adapter);
1314 
1315 	/* Prepare transmit descriptors and buffers */
1316 	em_setup_transmit_structures(adapter);
1317 	em_initialize_transmit_unit(adapter);
1318 
1319 	/* Setup Multicast table */
1320 	em_set_multi(adapter);
1321 
1322 	/*
1323 	** Figure out the desired mbuf
1324 	** pool for doing jumbos
1325 	*/
1326 	if (adapter->max_frame_size <= 2048)
1327 		adapter->rx_mbuf_sz = MCLBYTES;
1328 	else if (adapter->max_frame_size <= 4096)
1329 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1330 	else
1331 		adapter->rx_mbuf_sz = MJUM9BYTES;
1332 
1333 	/* Prepare receive descriptors and buffers */
1334 	if (em_setup_receive_structures(adapter)) {
1335 		device_printf(dev, "Could not setup receive structures\n");
1336 		em_stop(adapter);
1337 		return;
1338 	}
1339 	em_initialize_receive_unit(adapter);
1340 
1341 	/* Use real VLAN Filter support? */
1342 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1343 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1344 			/* Use real VLAN Filter support */
1345 			em_setup_vlan_hw_support(adapter);
1346 		else {
1347 			u32 ctrl;
1348 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1349 			ctrl |= E1000_CTRL_VME;
1350 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1351 		}
1352 	}
1353 
1354 	/* Don't lose promiscuous settings */
1355 	em_set_promisc(adapter);
1356 
1357 	/* Set the interface as ACTIVE */
1358 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1359 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1360 
1361 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1362 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1363 
1364 	/* MSI/X configuration for 82574 */
1365 	if (adapter->hw.mac.type == e1000_82574) {
1366 		int tmp;
1367 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1368 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1369 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1370 		/* Set the IVAR - interrupt vector routing. */
1371 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1372 	}
1373 
1374 #ifdef DEVICE_POLLING
1375 	/*
1376 	 * Only enable interrupts if we are not polling, make sure
1377 	 * they are off otherwise.
1378 	 */
1379 	if (ifp->if_capenable & IFCAP_POLLING)
1380 		em_disable_intr(adapter);
1381 	else
1382 #endif /* DEVICE_POLLING */
1383 		em_enable_intr(adapter);
1384 
1385 	/* AMT based hardware can now take control from firmware */
1386 	if (adapter->has_manage && adapter->has_amt)
1387 		em_get_hw_control(adapter);
1388 }
1389 
1390 static void
1391 em_init(void *arg)
1392 {
1393 	struct adapter *adapter = arg;
1394 
1395 	EM_CORE_LOCK(adapter);
1396 	em_init_locked(adapter);
1397 	EM_CORE_UNLOCK(adapter);
1398 }
1399 
1400 
1401 #ifdef DEVICE_POLLING
1402 /*********************************************************************
1403  *
1404  *  Legacy polling routine: note this only works with single queue
1405  *
1406  *********************************************************************/
1407 static int
1408 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1409 {
1410 	struct adapter *adapter = ifp->if_softc;
1411 	struct tx_ring	*txr = adapter->tx_rings;
1412 	struct rx_ring	*rxr = adapter->rx_rings;
1413 	u32		reg_icr;
1414 	int		rx_done;
1415 
1416 	EM_CORE_LOCK(adapter);
1417 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1418 		EM_CORE_UNLOCK(adapter);
1419 		return (0);
1420 	}
1421 
1422 	if (cmd == POLL_AND_CHECK_STATUS) {
1423 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1424 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1425 			callout_stop(&adapter->timer);
1426 			adapter->hw.mac.get_link_status = 1;
1427 			em_update_link_status(adapter);
1428 			callout_reset(&adapter->timer, hz,
1429 			    em_local_timer, adapter);
1430 		}
1431 	}
1432 	EM_CORE_UNLOCK(adapter);
1433 
1434 	em_rxeof(rxr, count, &rx_done);
1435 
1436 	EM_TX_LOCK(txr);
1437 	em_txeof(txr);
1438 #ifdef EM_MULTIQUEUE
1439 	if (!drbr_empty(ifp, txr->br))
1440 		em_mq_start_locked(ifp, txr, NULL);
1441 #else
1442 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1443 		em_start_locked(ifp, txr);
1444 #endif
1445 	EM_TX_UNLOCK(txr);
1446 
1447 	return (rx_done);
1448 }
1449 #endif /* DEVICE_POLLING */
1450 
1451 
1452 /*********************************************************************
1453  *
1454  *  Fast Legacy/MSI Combined Interrupt Service routine
1455  *
1456  *********************************************************************/
1457 static int
1458 em_irq_fast(void *arg)
1459 {
1460 	struct adapter	*adapter = arg;
1461 	struct ifnet	*ifp;
1462 	u32		reg_icr;
1463 
1464 	ifp = adapter->ifp;
1465 
1466 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1467 
1468 	/* Hot eject?  */
1469 	if (reg_icr == 0xffffffff)
1470 		return FILTER_STRAY;
1471 
1472 	/* Definitely not our interrupt.  */
1473 	if (reg_icr == 0x0)
1474 		return FILTER_STRAY;
1475 
1476 	/*
1477 	 * Starting with the 82571 chip, bit 31 should be used to
1478 	 * determine whether the interrupt belongs to us.
1479 	 */
1480 	if (adapter->hw.mac.type >= e1000_82571 &&
1481 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1482 		return FILTER_STRAY;
1483 
1484 	em_disable_intr(adapter);
1485 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1486 
1487 	/* Link status change */
1488 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1489 		adapter->hw.mac.get_link_status = 1;
1490 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1491 	}
1492 
1493 	if (reg_icr & E1000_ICR_RXO)
1494 		adapter->rx_overruns++;
1495 	return FILTER_HANDLED;
1496 }
1497 
1498 /* Combined RX/TX handler, used by Legacy and MSI */
1499 static void
1500 em_handle_que(void *context, int pending)
1501 {
1502 	struct adapter	*adapter = context;
1503 	struct ifnet	*ifp = adapter->ifp;
1504 	struct tx_ring	*txr = adapter->tx_rings;
1505 	struct rx_ring	*rxr = adapter->rx_rings;
1506 
1507 
1508 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1509 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1510 		EM_TX_LOCK(txr);
1511 		em_txeof(txr);
1512 #ifdef EM_MULTIQUEUE
1513 		if (!drbr_empty(ifp, txr->br))
1514 			em_mq_start_locked(ifp, txr, NULL);
1515 #else
1516 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1517 			em_start_locked(ifp, txr);
1518 #endif
1519 		EM_TX_UNLOCK(txr);
1520 		if (more) {
1521 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1522 			return;
1523 		}
1524 	}
1525 
1526 	em_enable_intr(adapter);
1527 	return;
1528 }
1529 
1530 
1531 /*********************************************************************
1532  *
1533  *  MSIX Interrupt Service Routines
1534  *
1535  **********************************************************************/
1536 static void
1537 em_msix_tx(void *arg)
1538 {
1539 	struct tx_ring *txr = arg;
1540 	struct adapter *adapter = txr->adapter;
1541 	struct ifnet	*ifp = adapter->ifp;
1542 
1543 	++txr->tx_irq;
1544 	EM_TX_LOCK(txr);
1545 	em_txeof(txr);
1546 #ifdef EM_MULTIQUEUE
1547 	if (!drbr_empty(ifp, txr->br))
1548 		em_mq_start_locked(ifp, txr, NULL);
1549 #else
1550 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1551 		em_start_locked(ifp, txr);
1552 #endif
1553 	/* Reenable this interrupt */
1554 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1555 	EM_TX_UNLOCK(txr);
1556 	return;
1557 }
1558 
1559 /*********************************************************************
1560  *
1561  *  MSIX RX Interrupt Service routine
1562  *
1563  **********************************************************************/
1564 
1565 static void
1566 em_msix_rx(void *arg)
1567 {
1568 	struct rx_ring	*rxr = arg;
1569 	struct adapter	*adapter = rxr->adapter;
1570 	bool		more;
1571 
1572 	++rxr->rx_irq;
1573 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1574 	if (more)
1575 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1576 	else
1577 		/* Reenable this interrupt */
1578 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1579 	return;
1580 }
1581 
1582 /*********************************************************************
1583  *
1584  *  MSIX Link Fast Interrupt Service routine
1585  *
1586  **********************************************************************/
1587 static void
1588 em_msix_link(void *arg)
1589 {
1590 	struct adapter	*adapter = arg;
1591 	u32		reg_icr;
1592 
1593 	++adapter->link_irq;
1594 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1595 
1596 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1597 		adapter->hw.mac.get_link_status = 1;
1598 		em_handle_link(adapter, 0);
1599 	} else
1600 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1601 		    EM_MSIX_LINK | E1000_IMS_LSC);
1602 	return;
1603 }
1604 
1605 static void
1606 em_handle_rx(void *context, int pending)
1607 {
1608 	struct rx_ring	*rxr = context;
1609 	struct adapter	*adapter = rxr->adapter;
1610         bool            more;
1611 
1612 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1613 	if (more)
1614 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1615 	else
1616 		/* Reenable this interrupt */
1617 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1618 }
1619 
1620 static void
1621 em_handle_tx(void *context, int pending)
1622 {
1623 	struct tx_ring	*txr = context;
1624 	struct adapter	*adapter = txr->adapter;
1625 	struct ifnet	*ifp = adapter->ifp;
1626 
1627 	EM_TX_LOCK(txr);
1628 	em_txeof(txr);
1629 #ifdef EM_MULTIQUEUE
1630 	if (!drbr_empty(ifp, txr->br))
1631 		em_mq_start_locked(ifp, txr, NULL);
1632 #else
1633 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1634 		em_start_locked(ifp, txr);
1635 #endif
1636 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1637 	EM_TX_UNLOCK(txr);
1638 }
1639 
1640 static void
1641 em_handle_link(void *context, int pending)
1642 {
1643 	struct adapter	*adapter = context;
1644 	struct tx_ring	*txr = adapter->tx_rings;
1645 	struct ifnet *ifp = adapter->ifp;
1646 
1647 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1648 		return;
1649 
1650 	EM_CORE_LOCK(adapter);
1651 	callout_stop(&adapter->timer);
1652 	em_update_link_status(adapter);
1653 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1654 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1655 	    EM_MSIX_LINK | E1000_IMS_LSC);
1656 	if (adapter->link_active) {
1657 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1658 			EM_TX_LOCK(txr);
1659 #ifdef EM_MULTIQUEUE
1660 			if (!drbr_empty(ifp, txr->br))
1661 				em_mq_start_locked(ifp, txr, NULL);
1662 #else
1663 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1664 				em_start_locked(ifp, txr);
1665 #endif
1666 			EM_TX_UNLOCK(txr);
1667 		}
1668 	}
1669 	EM_CORE_UNLOCK(adapter);
1670 }
1671 
1672 
1673 /*********************************************************************
1674  *
1675  *  Media Ioctl callback
1676  *
1677  *  This routine is called whenever the user queries the status of
1678  *  the interface using ifconfig.
1679  *
1680  **********************************************************************/
1681 static void
1682 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1683 {
1684 	struct adapter *adapter = ifp->if_softc;
1685 	u_char fiber_type = IFM_1000_SX;
1686 
1687 	INIT_DEBUGOUT("em_media_status: begin");
1688 
1689 	EM_CORE_LOCK(adapter);
1690 	em_update_link_status(adapter);
1691 
1692 	ifmr->ifm_status = IFM_AVALID;
1693 	ifmr->ifm_active = IFM_ETHER;
1694 
1695 	if (!adapter->link_active) {
1696 		EM_CORE_UNLOCK(adapter);
1697 		return;
1698 	}
1699 
1700 	ifmr->ifm_status |= IFM_ACTIVE;
1701 
1702 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1703 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1704 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1705 	} else {
1706 		switch (adapter->link_speed) {
1707 		case 10:
1708 			ifmr->ifm_active |= IFM_10_T;
1709 			break;
1710 		case 100:
1711 			ifmr->ifm_active |= IFM_100_TX;
1712 			break;
1713 		case 1000:
1714 			ifmr->ifm_active |= IFM_1000_T;
1715 			break;
1716 		}
1717 		if (adapter->link_duplex == FULL_DUPLEX)
1718 			ifmr->ifm_active |= IFM_FDX;
1719 		else
1720 			ifmr->ifm_active |= IFM_HDX;
1721 	}
1722 	EM_CORE_UNLOCK(adapter);
1723 }
1724 
1725 /*********************************************************************
1726  *
1727  *  Media Ioctl callback
1728  *
1729  *  This routine is called when the user changes speed/duplex using
1730  *  media/mediopt option with ifconfig.
1731  *
1732  **********************************************************************/
1733 static int
1734 em_media_change(struct ifnet *ifp)
1735 {
1736 	struct adapter *adapter = ifp->if_softc;
1737 	struct ifmedia  *ifm = &adapter->media;
1738 
1739 	INIT_DEBUGOUT("em_media_change: begin");
1740 
1741 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1742 		return (EINVAL);
1743 
1744 	EM_CORE_LOCK(adapter);
1745 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1746 	case IFM_AUTO:
1747 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1748 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1749 		break;
1750 	case IFM_1000_LX:
1751 	case IFM_1000_SX:
1752 	case IFM_1000_T:
1753 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1754 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1755 		break;
1756 	case IFM_100_TX:
1757 		adapter->hw.mac.autoneg = FALSE;
1758 		adapter->hw.phy.autoneg_advertised = 0;
1759 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1760 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1761 		else
1762 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1763 		break;
1764 	case IFM_10_T:
1765 		adapter->hw.mac.autoneg = FALSE;
1766 		adapter->hw.phy.autoneg_advertised = 0;
1767 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1768 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1769 		else
1770 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1771 		break;
1772 	default:
1773 		device_printf(adapter->dev, "Unsupported media type\n");
1774 	}
1775 
1776 	em_init_locked(adapter);
1777 	EM_CORE_UNLOCK(adapter);
1778 
1779 	return (0);
1780 }
1781 
1782 /*********************************************************************
1783  *
1784  *  This routine maps the mbufs to tx descriptors.
1785  *
1786  *  return 0 on success, positive on failure
1787  **********************************************************************/
1788 
1789 static int
1790 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1791 {
1792 	struct adapter		*adapter = txr->adapter;
1793 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1794 	bus_dmamap_t		map;
1795 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1796 	struct e1000_tx_desc	*ctxd = NULL;
1797 	struct mbuf		*m_head;
1798 	struct ether_header	*eh;
1799 	struct ip		*ip = NULL;
1800 	struct tcphdr		*tp = NULL;
1801 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1802 	int			ip_off, poff;
1803 	int			nsegs, i, j, first, last = 0;
1804 	int			error, do_tso, tso_desc = 0, remap = 1;
1805 
1806 retry:
1807 	m_head = *m_headp;
1808 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1809 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1810 	ip_off = poff = 0;
1811 
1812 	/*
1813 	 * Intel recommends entire IP/TCP header length reside in a single
1814 	 * buffer. If multiple descriptors are used to describe the IP and
1815 	 * TCP header, each descriptor should describe one or more
1816 	 * complete headers; descriptors referencing only parts of headers
1817 	 * are not supported. If all layer headers are not coalesced into
1818 	 * a single buffer, each buffer should not cross a 4KB boundary,
1819 	 * or be larger than the maximum read request size.
1820 	 * Controller also requires modifing IP/TCP header to make TSO work
1821 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1822 	 * IP/TCP header into a single buffer to meet the requirement of
1823 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1824 	 * which also has similiar restrictions.
1825 	 */
1826 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1827 		if (do_tso || (m_head->m_next != NULL &&
1828 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1829 			if (M_WRITABLE(*m_headp) == 0) {
1830 				m_head = m_dup(*m_headp, M_DONTWAIT);
1831 				m_freem(*m_headp);
1832 				if (m_head == NULL) {
1833 					*m_headp = NULL;
1834 					return (ENOBUFS);
1835 				}
1836 				*m_headp = m_head;
1837 			}
1838 		}
1839 		/*
1840 		 * XXX
1841 		 * Assume IPv4, we don't have TSO/checksum offload support
1842 		 * for IPv6 yet.
1843 		 */
1844 		ip_off = sizeof(struct ether_header);
1845 		m_head = m_pullup(m_head, ip_off);
1846 		if (m_head == NULL) {
1847 			*m_headp = NULL;
1848 			return (ENOBUFS);
1849 		}
1850 		eh = mtod(m_head, struct ether_header *);
1851 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1852 			ip_off = sizeof(struct ether_vlan_header);
1853 			m_head = m_pullup(m_head, ip_off);
1854 			if (m_head == NULL) {
1855 				*m_headp = NULL;
1856 				return (ENOBUFS);
1857 			}
1858 		}
1859 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1860 		if (m_head == NULL) {
1861 			*m_headp = NULL;
1862 			return (ENOBUFS);
1863 		}
1864 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1865 		poff = ip_off + (ip->ip_hl << 2);
1866 		if (do_tso) {
1867 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1868 			if (m_head == NULL) {
1869 				*m_headp = NULL;
1870 				return (ENOBUFS);
1871 			}
1872 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1873 			/*
1874 			 * TSO workaround:
1875 			 *   pull 4 more bytes of data into it.
1876 			 */
1877 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1878 			if (m_head == NULL) {
1879 				*m_headp = NULL;
1880 				return (ENOBUFS);
1881 			}
1882 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1883 			ip->ip_len = 0;
1884 			ip->ip_sum = 0;
1885 			/*
1886 			 * The pseudo TCP checksum does not include TCP payload
1887 			 * length so driver should recompute the checksum here
1888 			 * what hardware expect to see. This is adherence of
1889 			 * Microsoft's Large Send specification.
1890 			 */
1891 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1892 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1893 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1894 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1895 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1896 			if (m_head == NULL) {
1897 				*m_headp = NULL;
1898 				return (ENOBUFS);
1899 			}
1900 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1901 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1902 			if (m_head == NULL) {
1903 				*m_headp = NULL;
1904 				return (ENOBUFS);
1905 			}
1906 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1907 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1908 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1909 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1910 			if (m_head == NULL) {
1911 				*m_headp = NULL;
1912 				return (ENOBUFS);
1913 			}
1914 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1915 		}
1916 		*m_headp = m_head;
1917 	}
1918 
1919 	/*
1920 	 * Map the packet for DMA
1921 	 *
1922 	 * Capture the first descriptor index,
1923 	 * this descriptor will have the index
1924 	 * of the EOP which is the only one that
1925 	 * now gets a DONE bit writeback.
1926 	 */
1927 	first = txr->next_avail_desc;
1928 	tx_buffer = &txr->tx_buffers[first];
1929 	tx_buffer_mapped = tx_buffer;
1930 	map = tx_buffer->map;
1931 
1932 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1933 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1934 
1935 	/*
1936 	 * There are two types of errors we can (try) to handle:
1937 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1938 	 *   out of segments.  Defragment the mbuf chain and try again.
1939 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1940 	 *   at this point in time.  Defer sending and try again later.
1941 	 * All other errors, in particular EINVAL, are fatal and prevent the
1942 	 * mbuf chain from ever going through.  Drop it and report error.
1943 	 */
1944 	if (error == EFBIG && remap) {
1945 		struct mbuf *m;
1946 
1947 		m = m_defrag(*m_headp, M_DONTWAIT);
1948 		if (m == NULL) {
1949 			adapter->mbuf_alloc_failed++;
1950 			m_freem(*m_headp);
1951 			*m_headp = NULL;
1952 			return (ENOBUFS);
1953 		}
1954 		*m_headp = m;
1955 
1956 		/* Try it again, but only once */
1957 		remap = 0;
1958 		goto retry;
1959 	} else if (error == ENOMEM) {
1960 		adapter->no_tx_dma_setup++;
1961 		return (error);
1962 	} else if (error != 0) {
1963 		adapter->no_tx_dma_setup++;
1964 		m_freem(*m_headp);
1965 		*m_headp = NULL;
1966 		return (error);
1967 	}
1968 
1969 	/*
1970 	 * TSO Hardware workaround, if this packet is not
1971 	 * TSO, and is only a single descriptor long, and
1972 	 * it follows a TSO burst, then we need to add a
1973 	 * sentinel descriptor to prevent premature writeback.
1974 	 */
1975 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1976 		if (nsegs == 1)
1977 			tso_desc = TRUE;
1978 		txr->tx_tso = FALSE;
1979 	}
1980 
1981         if (nsegs > (txr->tx_avail - 2)) {
1982                 txr->no_desc_avail++;
1983 		bus_dmamap_unload(txr->txtag, map);
1984 		return (ENOBUFS);
1985         }
1986 	m_head = *m_headp;
1987 
1988 	/* Do hardware assists */
1989 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1990 		em_tso_setup(txr, m_head, ip_off, ip, tp,
1991 		    &txd_upper, &txd_lower);
1992 		/* we need to make a final sentinel transmit desc */
1993 		tso_desc = TRUE;
1994 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1995 		em_transmit_checksum_setup(txr, m_head,
1996 		    ip_off, ip, &txd_upper, &txd_lower);
1997 
1998 	if (m_head->m_flags & M_VLANTAG) {
1999 		/* Set the vlan id. */
2000 		txd_upper |=
2001 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2002                 /* Tell hardware to add tag */
2003                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2004         }
2005 
2006 	i = txr->next_avail_desc;
2007 
2008 	/* Set up our transmit descriptors */
2009 	for (j = 0; j < nsegs; j++) {
2010 		bus_size_t seg_len;
2011 		bus_addr_t seg_addr;
2012 
2013 		tx_buffer = &txr->tx_buffers[i];
2014 		ctxd = &txr->tx_base[i];
2015 		seg_addr = segs[j].ds_addr;
2016 		seg_len  = segs[j].ds_len;
2017 		/*
2018 		** TSO Workaround:
2019 		** If this is the last descriptor, we want to
2020 		** split it so we have a small final sentinel
2021 		*/
2022 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2023 			seg_len -= 4;
2024 			ctxd->buffer_addr = htole64(seg_addr);
2025 			ctxd->lower.data = htole32(
2026 			adapter->txd_cmd | txd_lower | seg_len);
2027 			ctxd->upper.data =
2028 			    htole32(txd_upper);
2029 			if (++i == adapter->num_tx_desc)
2030 				i = 0;
2031 			/* Now make the sentinel */
2032 			++txd_used; /* using an extra txd */
2033 			ctxd = &txr->tx_base[i];
2034 			tx_buffer = &txr->tx_buffers[i];
2035 			ctxd->buffer_addr =
2036 			    htole64(seg_addr + seg_len);
2037 			ctxd->lower.data = htole32(
2038 			adapter->txd_cmd | txd_lower | 4);
2039 			ctxd->upper.data =
2040 			    htole32(txd_upper);
2041 			last = i;
2042 			if (++i == adapter->num_tx_desc)
2043 				i = 0;
2044 		} else {
2045 			ctxd->buffer_addr = htole64(seg_addr);
2046 			ctxd->lower.data = htole32(
2047 			adapter->txd_cmd | txd_lower | seg_len);
2048 			ctxd->upper.data =
2049 			    htole32(txd_upper);
2050 			last = i;
2051 			if (++i == adapter->num_tx_desc)
2052 				i = 0;
2053 		}
2054 		tx_buffer->m_head = NULL;
2055 		tx_buffer->next_eop = -1;
2056 	}
2057 
2058 	txr->next_avail_desc = i;
2059 	txr->tx_avail -= nsegs;
2060 	if (tso_desc) /* TSO used an extra for sentinel */
2061 		txr->tx_avail -= txd_used;
2062 
2063         tx_buffer->m_head = m_head;
2064 	/*
2065 	** Here we swap the map so the last descriptor,
2066 	** which gets the completion interrupt has the
2067 	** real map, and the first descriptor gets the
2068 	** unused map from this descriptor.
2069 	*/
2070 	tx_buffer_mapped->map = tx_buffer->map;
2071 	tx_buffer->map = map;
2072         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2073 
2074         /*
2075          * Last Descriptor of Packet
2076 	 * needs End Of Packet (EOP)
2077 	 * and Report Status (RS)
2078          */
2079         ctxd->lower.data |=
2080 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2081 	/*
2082 	 * Keep track in the first buffer which
2083 	 * descriptor will be written back
2084 	 */
2085 	tx_buffer = &txr->tx_buffers[first];
2086 	tx_buffer->next_eop = last;
2087 	/* Update the watchdog time early and often */
2088 	txr->watchdog_time = ticks;
2089 
2090 	/*
2091 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2092 	 * that this frame is available to transmit.
2093 	 */
2094 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2095 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2096 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2097 
2098 	return (0);
2099 }
2100 
2101 static void
2102 em_set_promisc(struct adapter *adapter)
2103 {
2104 	struct ifnet	*ifp = adapter->ifp;
2105 	u32		reg_rctl;
2106 
2107 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2108 
2109 	if (ifp->if_flags & IFF_PROMISC) {
2110 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2111 		/* Turn this on if you want to see bad packets */
2112 		if (em_debug_sbp)
2113 			reg_rctl |= E1000_RCTL_SBP;
2114 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2115 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2116 		reg_rctl |= E1000_RCTL_MPE;
2117 		reg_rctl &= ~E1000_RCTL_UPE;
2118 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2119 	}
2120 }
2121 
2122 static void
2123 em_disable_promisc(struct adapter *adapter)
2124 {
2125 	u32	reg_rctl;
2126 
2127 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128 
2129 	reg_rctl &=  (~E1000_RCTL_UPE);
2130 	reg_rctl &=  (~E1000_RCTL_MPE);
2131 	reg_rctl &=  (~E1000_RCTL_SBP);
2132 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2133 }
2134 
2135 
2136 /*********************************************************************
2137  *  Multicast Update
2138  *
2139  *  This routine is called whenever multicast address list is updated.
2140  *
2141  **********************************************************************/
2142 
2143 static void
2144 em_set_multi(struct adapter *adapter)
2145 {
2146 	struct ifnet	*ifp = adapter->ifp;
2147 	struct ifmultiaddr *ifma;
2148 	u32 reg_rctl = 0;
2149 	u8  *mta; /* Multicast array memory */
2150 	int mcnt = 0;
2151 
2152 	IOCTL_DEBUGOUT("em_set_multi: begin");
2153 
2154 	mta = adapter->mta;
2155 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2156 
2157 	if (adapter->hw.mac.type == e1000_82542 &&
2158 	    adapter->hw.revision_id == E1000_REVISION_2) {
2159 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2160 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2161 			e1000_pci_clear_mwi(&adapter->hw);
2162 		reg_rctl |= E1000_RCTL_RST;
2163 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2164 		msec_delay(5);
2165 	}
2166 
2167 #if __FreeBSD_version < 800000
2168 	IF_ADDR_LOCK(ifp);
2169 #else
2170 	if_maddr_rlock(ifp);
2171 #endif
2172 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2173 		if (ifma->ifma_addr->sa_family != AF_LINK)
2174 			continue;
2175 
2176 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2177 			break;
2178 
2179 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2180 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2181 		mcnt++;
2182 	}
2183 #if __FreeBSD_version < 800000
2184 	IF_ADDR_UNLOCK(ifp);
2185 #else
2186 	if_maddr_runlock(ifp);
2187 #endif
2188 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2189 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2190 		reg_rctl |= E1000_RCTL_MPE;
2191 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2192 	} else
2193 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2194 
2195 	if (adapter->hw.mac.type == e1000_82542 &&
2196 	    adapter->hw.revision_id == E1000_REVISION_2) {
2197 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2198 		reg_rctl &= ~E1000_RCTL_RST;
2199 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2200 		msec_delay(5);
2201 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2202 			e1000_pci_set_mwi(&adapter->hw);
2203 	}
2204 }
2205 
2206 
2207 /*********************************************************************
2208  *  Timer routine
2209  *
2210  *  This routine checks for link status and updates statistics.
2211  *
2212  **********************************************************************/
2213 
2214 static void
2215 em_local_timer(void *arg)
2216 {
2217 	struct adapter	*adapter = arg;
2218 	struct ifnet	*ifp = adapter->ifp;
2219 	struct tx_ring	*txr = adapter->tx_rings;
2220 	struct rx_ring	*rxr = adapter->rx_rings;
2221 	u32		trigger;
2222 
2223 	EM_CORE_LOCK_ASSERT(adapter);
2224 
2225 	em_update_link_status(adapter);
2226 	em_update_stats_counters(adapter);
2227 
2228 	/* Reset LAA into RAR[0] on 82571 */
2229 	if ((adapter->hw.mac.type == e1000_82571) &&
2230 	    e1000_get_laa_state_82571(&adapter->hw))
2231 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2232 
2233 	/* Mask to use in the irq trigger */
2234 	if (adapter->msix_mem)
2235 		trigger = rxr->ims; /* RX for 82574 */
2236 	else
2237 		trigger = E1000_ICS_RXDMT0;
2238 
2239 	/*
2240 	** Check on the state of the TX queue(s), this
2241 	** can be done without the lock because its RO
2242 	** and the HUNG state will be static if set.
2243 	*/
2244 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2245 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2246 		    (adapter->pause_frames == 0))
2247 			goto hung;
2248 		/* Schedule a TX tasklet if needed */
2249 		if (txr->tx_avail <= EM_MAX_SCATTER)
2250 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2251 	}
2252 
2253 	adapter->pause_frames = 0;
2254 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2255 #ifndef DEVICE_POLLING
2256 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2257 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2258 #endif
2259 	return;
2260 hung:
2261 	/* Looks like we're hung */
2262 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2263 	device_printf(adapter->dev,
2264 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2265 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2266 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2267 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2268 	    "Next TX to Clean = %d\n",
2269 	    txr->me, txr->tx_avail, txr->next_to_clean);
2270 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2271 	adapter->watchdog_events++;
2272 	adapter->pause_frames = 0;
2273 	em_init_locked(adapter);
2274 }
2275 
2276 
2277 static void
2278 em_update_link_status(struct adapter *adapter)
2279 {
2280 	struct e1000_hw *hw = &adapter->hw;
2281 	struct ifnet *ifp = adapter->ifp;
2282 	device_t dev = adapter->dev;
2283 	struct tx_ring *txr = adapter->tx_rings;
2284 	u32 link_check = 0;
2285 
2286 	/* Get the cached link value or read phy for real */
2287 	switch (hw->phy.media_type) {
2288 	case e1000_media_type_copper:
2289 		if (hw->mac.get_link_status) {
2290 			/* Do the work to read phy */
2291 			e1000_check_for_link(hw);
2292 			link_check = !hw->mac.get_link_status;
2293 			if (link_check) /* ESB2 fix */
2294 				e1000_cfg_on_link_up(hw);
2295 		} else
2296 			link_check = TRUE;
2297 		break;
2298 	case e1000_media_type_fiber:
2299 		e1000_check_for_link(hw);
2300 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2301                                  E1000_STATUS_LU);
2302 		break;
2303 	case e1000_media_type_internal_serdes:
2304 		e1000_check_for_link(hw);
2305 		link_check = adapter->hw.mac.serdes_has_link;
2306 		break;
2307 	default:
2308 	case e1000_media_type_unknown:
2309 		break;
2310 	}
2311 
2312 	/* Now check for a transition */
2313 	if (link_check && (adapter->link_active == 0)) {
2314 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2315 		    &adapter->link_duplex);
2316 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2317 		if ((adapter->link_speed != SPEED_1000) &&
2318 		    ((hw->mac.type == e1000_82571) ||
2319 		    (hw->mac.type == e1000_82572))) {
2320 			int tarc0;
2321 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2322 			tarc0 &= ~SPEED_MODE_BIT;
2323 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2324 		}
2325 		if (bootverbose)
2326 			device_printf(dev, "Link is up %d Mbps %s\n",
2327 			    adapter->link_speed,
2328 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2329 			    "Full Duplex" : "Half Duplex"));
2330 		adapter->link_active = 1;
2331 		adapter->smartspeed = 0;
2332 		ifp->if_baudrate = adapter->link_speed * 1000000;
2333 		if_link_state_change(ifp, LINK_STATE_UP);
2334 	} else if (!link_check && (adapter->link_active == 1)) {
2335 		ifp->if_baudrate = adapter->link_speed = 0;
2336 		adapter->link_duplex = 0;
2337 		if (bootverbose)
2338 			device_printf(dev, "Link is Down\n");
2339 		adapter->link_active = 0;
2340 		/* Link down, disable watchdog */
2341 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2342 			txr->queue_status = EM_QUEUE_IDLE;
2343 		if_link_state_change(ifp, LINK_STATE_DOWN);
2344 	}
2345 }
2346 
2347 /*********************************************************************
2348  *
2349  *  This routine disables all traffic on the adapter by issuing a
2350  *  global reset on the MAC and deallocates TX/RX buffers.
2351  *
2352  *  This routine should always be called with BOTH the CORE
2353  *  and TX locks.
2354  **********************************************************************/
2355 
2356 static void
2357 em_stop(void *arg)
2358 {
2359 	struct adapter	*adapter = arg;
2360 	struct ifnet	*ifp = adapter->ifp;
2361 	struct tx_ring	*txr = adapter->tx_rings;
2362 
2363 	EM_CORE_LOCK_ASSERT(adapter);
2364 
2365 	INIT_DEBUGOUT("em_stop: begin");
2366 
2367 	em_disable_intr(adapter);
2368 	callout_stop(&adapter->timer);
2369 
2370 	/* Tell the stack that the interface is no longer active */
2371 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2372 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2373 
2374         /* Unarm watchdog timer. */
2375 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2376 		EM_TX_LOCK(txr);
2377 		txr->queue_status = EM_QUEUE_IDLE;
2378 		EM_TX_UNLOCK(txr);
2379 	}
2380 
2381 	e1000_reset_hw(&adapter->hw);
2382 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2383 
2384 	e1000_led_off(&adapter->hw);
2385 	e1000_cleanup_led(&adapter->hw);
2386 }
2387 
2388 
2389 /*********************************************************************
2390  *
2391  *  Determine hardware revision.
2392  *
2393  **********************************************************************/
2394 static void
2395 em_identify_hardware(struct adapter *adapter)
2396 {
2397 	device_t dev = adapter->dev;
2398 
2399 	/* Make sure our PCI config space has the necessary stuff set */
2400 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2401 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2402 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2403 		device_printf(dev, "Memory Access and/or Bus Master bits "
2404 		    "were not set!\n");
2405 		adapter->hw.bus.pci_cmd_word |=
2406 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2407 		pci_write_config(dev, PCIR_COMMAND,
2408 		    adapter->hw.bus.pci_cmd_word, 2);
2409 	}
2410 
2411 	/* Save off the information about this board */
2412 	adapter->hw.vendor_id = pci_get_vendor(dev);
2413 	adapter->hw.device_id = pci_get_device(dev);
2414 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2415 	adapter->hw.subsystem_vendor_id =
2416 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2417 	adapter->hw.subsystem_device_id =
2418 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2419 
2420 	/* Do Shared Code Init and Setup */
2421 	if (e1000_set_mac_type(&adapter->hw)) {
2422 		device_printf(dev, "Setup init failure\n");
2423 		return;
2424 	}
2425 }
2426 
2427 static int
2428 em_allocate_pci_resources(struct adapter *adapter)
2429 {
2430 	device_t	dev = adapter->dev;
2431 	int		rid;
2432 
2433 	rid = PCIR_BAR(0);
2434 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2435 	    &rid, RF_ACTIVE);
2436 	if (adapter->memory == NULL) {
2437 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2438 		return (ENXIO);
2439 	}
2440 	adapter->osdep.mem_bus_space_tag =
2441 	    rman_get_bustag(adapter->memory);
2442 	adapter->osdep.mem_bus_space_handle =
2443 	    rman_get_bushandle(adapter->memory);
2444 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2445 
2446 	/* Default to a single queue */
2447 	adapter->num_queues = 1;
2448 
2449 	/*
2450 	 * Setup MSI/X or MSI if PCI Express
2451 	 */
2452 	adapter->msix = em_setup_msix(adapter);
2453 
2454 	adapter->hw.back = &adapter->osdep;
2455 
2456 	return (0);
2457 }
2458 
2459 /*********************************************************************
2460  *
2461  *  Setup the Legacy or MSI Interrupt handler
2462  *
2463  **********************************************************************/
2464 int
2465 em_allocate_legacy(struct adapter *adapter)
2466 {
2467 	device_t dev = adapter->dev;
2468 	struct tx_ring	*txr = adapter->tx_rings;
2469 	int error, rid = 0;
2470 
2471 	/* Manually turn off all interrupts */
2472 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2473 
2474 	if (adapter->msix == 1) /* using MSI */
2475 		rid = 1;
2476 	/* We allocate a single interrupt resource */
2477 	adapter->res = bus_alloc_resource_any(dev,
2478 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2479 	if (adapter->res == NULL) {
2480 		device_printf(dev, "Unable to allocate bus resource: "
2481 		    "interrupt\n");
2482 		return (ENXIO);
2483 	}
2484 
2485 	/*
2486 	 * Allocate a fast interrupt and the associated
2487 	 * deferred processing contexts.
2488 	 */
2489 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2490 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2491 	    taskqueue_thread_enqueue, &adapter->tq);
2492 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2493 	    device_get_nameunit(adapter->dev));
2494 	/* Use a TX only tasklet for local timer */
2495 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2496 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2497 	    taskqueue_thread_enqueue, &txr->tq);
2498 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2499 	    device_get_nameunit(adapter->dev));
2500 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2501 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2502 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2503 		device_printf(dev, "Failed to register fast interrupt "
2504 			    "handler: %d\n", error);
2505 		taskqueue_free(adapter->tq);
2506 		adapter->tq = NULL;
2507 		return (error);
2508 	}
2509 
2510 	return (0);
2511 }
2512 
2513 /*********************************************************************
2514  *
2515  *  Setup the MSIX Interrupt handlers
2516  *   This is not really Multiqueue, rather
2517  *   its just seperate interrupt vectors
2518  *   for TX, RX, and Link.
2519  *
2520  **********************************************************************/
2521 int
2522 em_allocate_msix(struct adapter *adapter)
2523 {
2524 	device_t	dev = adapter->dev;
2525 	struct		tx_ring *txr = adapter->tx_rings;
2526 	struct		rx_ring *rxr = adapter->rx_rings;
2527 	int		error, rid, vector = 0;
2528 
2529 
2530 	/* Make sure all interrupts are disabled */
2531 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2532 
2533 	/* First set up ring resources */
2534 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2535 
2536 		/* RX ring */
2537 		rid = vector + 1;
2538 
2539 		rxr->res = bus_alloc_resource_any(dev,
2540 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2541 		if (rxr->res == NULL) {
2542 			device_printf(dev,
2543 			    "Unable to allocate bus resource: "
2544 			    "RX MSIX Interrupt %d\n", i);
2545 			return (ENXIO);
2546 		}
2547 		if ((error = bus_setup_intr(dev, rxr->res,
2548 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2549 		    rxr, &rxr->tag)) != 0) {
2550 			device_printf(dev, "Failed to register RX handler");
2551 			return (error);
2552 		}
2553 #if __FreeBSD_version >= 800504
2554 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2555 #endif
2556 		rxr->msix = vector++; /* NOTE increment vector for TX */
2557 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2558 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2559 		    taskqueue_thread_enqueue, &rxr->tq);
2560 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2561 		    device_get_nameunit(adapter->dev));
2562 		/*
2563 		** Set the bit to enable interrupt
2564 		** in E1000_IMS -- bits 20 and 21
2565 		** are for RX0 and RX1, note this has
2566 		** NOTHING to do with the MSIX vector
2567 		*/
2568 		rxr->ims = 1 << (20 + i);
2569 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2570 
2571 		/* TX ring */
2572 		rid = vector + 1;
2573 		txr->res = bus_alloc_resource_any(dev,
2574 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2575 		if (txr->res == NULL) {
2576 			device_printf(dev,
2577 			    "Unable to allocate bus resource: "
2578 			    "TX MSIX Interrupt %d\n", i);
2579 			return (ENXIO);
2580 		}
2581 		if ((error = bus_setup_intr(dev, txr->res,
2582 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2583 		    txr, &txr->tag)) != 0) {
2584 			device_printf(dev, "Failed to register TX handler");
2585 			return (error);
2586 		}
2587 #if __FreeBSD_version >= 800504
2588 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2589 #endif
2590 		txr->msix = vector++; /* Increment vector for next pass */
2591 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2592 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2593 		    taskqueue_thread_enqueue, &txr->tq);
2594 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2595 		    device_get_nameunit(adapter->dev));
2596 		/*
2597 		** Set the bit to enable interrupt
2598 		** in E1000_IMS -- bits 22 and 23
2599 		** are for TX0 and TX1, note this has
2600 		** NOTHING to do with the MSIX vector
2601 		*/
2602 		txr->ims = 1 << (22 + i);
2603 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2604 	}
2605 
2606 	/* Link interrupt */
2607 	++rid;
2608 	adapter->res = bus_alloc_resource_any(dev,
2609 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2610 	if (!adapter->res) {
2611 		device_printf(dev,"Unable to allocate "
2612 		    "bus resource: Link interrupt [%d]\n", rid);
2613 		return (ENXIO);
2614         }
2615 	/* Set the link handler function */
2616 	error = bus_setup_intr(dev, adapter->res,
2617 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2618 	    em_msix_link, adapter, &adapter->tag);
2619 	if (error) {
2620 		adapter->res = NULL;
2621 		device_printf(dev, "Failed to register LINK handler");
2622 		return (error);
2623 	}
2624 #if __FreeBSD_version >= 800504
2625 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2626 #endif
2627 	adapter->linkvec = vector;
2628 	adapter->ivars |=  (8 | vector) << 16;
2629 	adapter->ivars |= 0x80000000;
2630 
2631 	return (0);
2632 }
2633 
2634 
2635 static void
2636 em_free_pci_resources(struct adapter *adapter)
2637 {
2638 	device_t	dev = adapter->dev;
2639 	struct tx_ring	*txr;
2640 	struct rx_ring	*rxr;
2641 	int		rid;
2642 
2643 
2644 	/*
2645 	** Release all the queue interrupt resources:
2646 	*/
2647 	for (int i = 0; i < adapter->num_queues; i++) {
2648 		txr = &adapter->tx_rings[i];
2649 		rxr = &adapter->rx_rings[i];
2650 		/* an early abort? */
2651 		if ((txr == NULL) || (rxr == NULL))
2652 			break;
2653 		rid = txr->msix +1;
2654 		if (txr->tag != NULL) {
2655 			bus_teardown_intr(dev, txr->res, txr->tag);
2656 			txr->tag = NULL;
2657 		}
2658 		if (txr->res != NULL)
2659 			bus_release_resource(dev, SYS_RES_IRQ,
2660 			    rid, txr->res);
2661 		rid = rxr->msix +1;
2662 		if (rxr->tag != NULL) {
2663 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2664 			rxr->tag = NULL;
2665 		}
2666 		if (rxr->res != NULL)
2667 			bus_release_resource(dev, SYS_RES_IRQ,
2668 			    rid, rxr->res);
2669 	}
2670 
2671         if (adapter->linkvec) /* we are doing MSIX */
2672                 rid = adapter->linkvec + 1;
2673         else
2674                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2675 
2676 	if (adapter->tag != NULL) {
2677 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2678 		adapter->tag = NULL;
2679 	}
2680 
2681 	if (adapter->res != NULL)
2682 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2683 
2684 
2685 	if (adapter->msix)
2686 		pci_release_msi(dev);
2687 
2688 	if (adapter->msix_mem != NULL)
2689 		bus_release_resource(dev, SYS_RES_MEMORY,
2690 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2691 
2692 	if (adapter->memory != NULL)
2693 		bus_release_resource(dev, SYS_RES_MEMORY,
2694 		    PCIR_BAR(0), adapter->memory);
2695 
2696 	if (adapter->flash != NULL)
2697 		bus_release_resource(dev, SYS_RES_MEMORY,
2698 		    EM_FLASH, adapter->flash);
2699 }
2700 
2701 /*
2702  * Setup MSI or MSI/X
2703  */
2704 static int
2705 em_setup_msix(struct adapter *adapter)
2706 {
2707 	device_t dev = adapter->dev;
2708 	int val = 0;
2709 
2710 	/*
2711 	** Setup MSI/X for Hartwell: tests have shown
2712 	** use of two queues to be unstable, and to
2713 	** provide no great gain anyway, so we simply
2714 	** seperate the interrupts and use a single queue.
2715 	*/
2716 	if ((adapter->hw.mac.type == e1000_82574) &&
2717 	    (em_enable_msix == TRUE)) {
2718 		/* Map the MSIX BAR */
2719 		int rid = PCIR_BAR(EM_MSIX_BAR);
2720 		adapter->msix_mem = bus_alloc_resource_any(dev,
2721 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2722        		if (!adapter->msix_mem) {
2723 			/* May not be enabled */
2724                		device_printf(adapter->dev,
2725 			    "Unable to map MSIX table \n");
2726 			goto msi;
2727        		}
2728 		val = pci_msix_count(dev);
2729 		/* We only need 3 vectors */
2730 		if (val > 3)
2731 			val = 3;
2732 		if ((val != 3) && (val != 5)) {
2733 			bus_release_resource(dev, SYS_RES_MEMORY,
2734 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2735 			adapter->msix_mem = NULL;
2736                		device_printf(adapter->dev,
2737 			    "MSIX: incorrect vectors, using MSI\n");
2738 			goto msi;
2739 		}
2740 
2741 		if (pci_alloc_msix(dev, &val) == 0) {
2742 			device_printf(adapter->dev,
2743 			    "Using MSIX interrupts "
2744 			    "with %d vectors\n", val);
2745 		}
2746 
2747 		return (val);
2748 	}
2749 msi:
2750        	val = pci_msi_count(dev);
2751        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2752                	adapter->msix = 1;
2753                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2754 		return (val);
2755 	}
2756 	/* Should only happen due to manual configuration */
2757 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2758 	return (0);
2759 }
2760 
2761 
2762 /*********************************************************************
2763  *
2764  *  Initialize the hardware to a configuration
2765  *  as specified by the adapter structure.
2766  *
2767  **********************************************************************/
2768 static void
2769 em_reset(struct adapter *adapter)
2770 {
2771 	device_t	dev = adapter->dev;
2772 	struct ifnet	*ifp = adapter->ifp;
2773 	struct e1000_hw	*hw = &adapter->hw;
2774 	u16		rx_buffer_size;
2775 	u32		pba;
2776 
2777 	INIT_DEBUGOUT("em_reset: begin");
2778 
2779 	/* Set up smart power down as default off on newer adapters. */
2780 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2781 	    hw->mac.type == e1000_82572)) {
2782 		u16 phy_tmp = 0;
2783 
2784 		/* Speed up time to link by disabling smart power down. */
2785 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2786 		phy_tmp &= ~IGP02E1000_PM_SPD;
2787 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2788 	}
2789 
2790 	/*
2791 	 * Packet Buffer Allocation (PBA)
2792 	 * Writing PBA sets the receive portion of the buffer
2793 	 * the remainder is used for the transmit buffer.
2794 	 */
2795 	switch (hw->mac.type) {
2796 	/* Total Packet Buffer on these is 48K */
2797 	case e1000_82571:
2798 	case e1000_82572:
2799 	case e1000_80003es2lan:
2800 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2801 		break;
2802 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2803 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2804 		break;
2805 	case e1000_82574:
2806 	case e1000_82583:
2807 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2808 		break;
2809 	case e1000_ich8lan:
2810 		pba = E1000_PBA_8K;
2811 		break;
2812 	case e1000_ich9lan:
2813 	case e1000_ich10lan:
2814 		/* Boost Receive side for jumbo frames */
2815 		if (adapter->max_frame_size > 4096)
2816 			pba = E1000_PBA_14K;
2817 		else
2818 			pba = E1000_PBA_10K;
2819 		break;
2820 	case e1000_pchlan:
2821 	case e1000_pch2lan:
2822 		pba = E1000_PBA_26K;
2823 		break;
2824 	default:
2825 		if (adapter->max_frame_size > 8192)
2826 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2827 		else
2828 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2829 	}
2830 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2831 
2832 	/*
2833 	 * These parameters control the automatic generation (Tx) and
2834 	 * response (Rx) to Ethernet PAUSE frames.
2835 	 * - High water mark should allow for at least two frames to be
2836 	 *   received after sending an XOFF.
2837 	 * - Low water mark works best when it is very near the high water mark.
2838 	 *   This allows the receiver to restart by sending XON when it has
2839 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2840 	 *   restart after one full frame is pulled from the buffer. There
2841 	 *   could be several smaller frames in the buffer and if so they will
2842 	 *   not trigger the XON until their total number reduces the buffer
2843 	 *   by 1500.
2844 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2845 	 */
2846 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2847 	hw->fc.high_water = rx_buffer_size -
2848 	    roundup2(adapter->max_frame_size, 1024);
2849 	hw->fc.low_water = hw->fc.high_water - 1500;
2850 
2851 	if (adapter->fc) /* locally set flow control value? */
2852 		hw->fc.requested_mode = adapter->fc;
2853 	else
2854 		hw->fc.requested_mode = e1000_fc_full;
2855 
2856 	if (hw->mac.type == e1000_80003es2lan)
2857 		hw->fc.pause_time = 0xFFFF;
2858 	else
2859 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2860 
2861 	hw->fc.send_xon = TRUE;
2862 
2863 	/* Device specific overrides/settings */
2864 	switch (hw->mac.type) {
2865 	case e1000_pchlan:
2866 		/* Workaround: no TX flow ctrl for PCH */
2867                 hw->fc.requested_mode = e1000_fc_rx_pause;
2868 		hw->fc.pause_time = 0xFFFF; /* override */
2869 		if (ifp->if_mtu > ETHERMTU) {
2870 			hw->fc.high_water = 0x3500;
2871 			hw->fc.low_water = 0x1500;
2872 		} else {
2873 			hw->fc.high_water = 0x5000;
2874 			hw->fc.low_water = 0x3000;
2875 		}
2876 		hw->fc.refresh_time = 0x1000;
2877 		break;
2878 	case e1000_pch2lan:
2879 		hw->fc.high_water = 0x5C20;
2880 		hw->fc.low_water = 0x5048;
2881 		hw->fc.pause_time = 0x0650;
2882 		hw->fc.refresh_time = 0x0400;
2883 		/* Jumbos need adjusted PBA */
2884 		if (ifp->if_mtu > ETHERMTU)
2885 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2886 		else
2887 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2888 		break;
2889         case e1000_ich9lan:
2890         case e1000_ich10lan:
2891 		if (ifp->if_mtu > ETHERMTU) {
2892 			hw->fc.high_water = 0x2800;
2893 			hw->fc.low_water = hw->fc.high_water - 8;
2894 			break;
2895 		}
2896 		/* else fall thru */
2897 	default:
2898 		if (hw->mac.type == e1000_80003es2lan)
2899 			hw->fc.pause_time = 0xFFFF;
2900 		break;
2901 	}
2902 
2903 	/* Issue a global reset */
2904 	e1000_reset_hw(hw);
2905 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2906 	em_disable_aspm(adapter);
2907 	/* and a re-init */
2908 	if (e1000_init_hw(hw) < 0) {
2909 		device_printf(dev, "Hardware Initialization Failed\n");
2910 		return;
2911 	}
2912 
2913 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2914 	e1000_get_phy_info(hw);
2915 	e1000_check_for_link(hw);
2916 	return;
2917 }
2918 
2919 /*********************************************************************
2920  *
2921  *  Setup networking device structure and register an interface.
2922  *
2923  **********************************************************************/
2924 static int
2925 em_setup_interface(device_t dev, struct adapter *adapter)
2926 {
2927 	struct ifnet   *ifp;
2928 
2929 	INIT_DEBUGOUT("em_setup_interface: begin");
2930 
2931 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2932 	if (ifp == NULL) {
2933 		device_printf(dev, "can not allocate ifnet structure\n");
2934 		return (-1);
2935 	}
2936 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2937 	ifp->if_init =  em_init;
2938 	ifp->if_softc = adapter;
2939 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2940 	ifp->if_ioctl = em_ioctl;
2941 #ifdef EM_MULTIQUEUE
2942 	/* Multiqueue stack interface */
2943 	ifp->if_transmit = em_mq_start;
2944 	ifp->if_qflush = em_qflush;
2945 #else
2946 	ifp->if_start = em_start;
2947 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2948 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2949 	IFQ_SET_READY(&ifp->if_snd);
2950 #endif
2951 
2952 	ether_ifattach(ifp, adapter->hw.mac.addr);
2953 
2954 	ifp->if_capabilities = ifp->if_capenable = 0;
2955 
2956 
2957 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2958 	ifp->if_capabilities |= IFCAP_TSO4;
2959 	/*
2960 	 * Tell the upper layer(s) we
2961 	 * support full VLAN capability
2962 	 */
2963 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2964 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2965 			     |  IFCAP_VLAN_HWTSO
2966 			     |  IFCAP_VLAN_MTU;
2967 	ifp->if_capenable = ifp->if_capabilities;
2968 
2969 	/*
2970 	** Don't turn this on by default, if vlans are
2971 	** created on another pseudo device (eg. lagg)
2972 	** then vlan events are not passed thru, breaking
2973 	** operation, but with HW FILTER off it works. If
2974 	** using vlans directly on the em driver you can
2975 	** enable this and get full hardware tag filtering.
2976 	*/
2977 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2978 
2979 #ifdef DEVICE_POLLING
2980 	ifp->if_capabilities |= IFCAP_POLLING;
2981 #endif
2982 
2983 	/* Enable only WOL MAGIC by default */
2984 	if (adapter->wol) {
2985 		ifp->if_capabilities |= IFCAP_WOL;
2986 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2987 	}
2988 
2989 	/*
2990 	 * Specify the media types supported by this adapter and register
2991 	 * callbacks to update media and link information
2992 	 */
2993 	ifmedia_init(&adapter->media, IFM_IMASK,
2994 	    em_media_change, em_media_status);
2995 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2996 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2997 		u_char fiber_type = IFM_1000_SX;	/* default type */
2998 
2999 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3000 			    0, NULL);
3001 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3002 	} else {
3003 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3004 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3005 			    0, NULL);
3006 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3007 			    0, NULL);
3008 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3009 			    0, NULL);
3010 		if (adapter->hw.phy.type != e1000_phy_ife) {
3011 			ifmedia_add(&adapter->media,
3012 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3013 			ifmedia_add(&adapter->media,
3014 				IFM_ETHER | IFM_1000_T, 0, NULL);
3015 		}
3016 	}
3017 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3018 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3019 	return (0);
3020 }
3021 
3022 
3023 /*
3024  * Manage DMA'able memory.
3025  */
3026 static void
3027 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3028 {
3029 	if (error)
3030 		return;
3031 	*(bus_addr_t *) arg = segs[0].ds_addr;
3032 }
3033 
3034 static int
3035 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3036         struct em_dma_alloc *dma, int mapflags)
3037 {
3038 	int error;
3039 
3040 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3041 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3042 				BUS_SPACE_MAXADDR,	/* lowaddr */
3043 				BUS_SPACE_MAXADDR,	/* highaddr */
3044 				NULL, NULL,		/* filter, filterarg */
3045 				size,			/* maxsize */
3046 				1,			/* nsegments */
3047 				size,			/* maxsegsize */
3048 				0,			/* flags */
3049 				NULL,			/* lockfunc */
3050 				NULL,			/* lockarg */
3051 				&dma->dma_tag);
3052 	if (error) {
3053 		device_printf(adapter->dev,
3054 		    "%s: bus_dma_tag_create failed: %d\n",
3055 		    __func__, error);
3056 		goto fail_0;
3057 	}
3058 
3059 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3060 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3061 	if (error) {
3062 		device_printf(adapter->dev,
3063 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3064 		    __func__, (uintmax_t)size, error);
3065 		goto fail_2;
3066 	}
3067 
3068 	dma->dma_paddr = 0;
3069 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3070 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3071 	if (error || dma->dma_paddr == 0) {
3072 		device_printf(adapter->dev,
3073 		    "%s: bus_dmamap_load failed: %d\n",
3074 		    __func__, error);
3075 		goto fail_3;
3076 	}
3077 
3078 	return (0);
3079 
3080 fail_3:
3081 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3082 fail_2:
3083 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3084 	bus_dma_tag_destroy(dma->dma_tag);
3085 fail_0:
3086 	dma->dma_map = NULL;
3087 	dma->dma_tag = NULL;
3088 
3089 	return (error);
3090 }
3091 
3092 static void
3093 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3094 {
3095 	if (dma->dma_tag == NULL)
3096 		return;
3097 	if (dma->dma_map != NULL) {
3098 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3099 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3100 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3101 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3102 		dma->dma_map = NULL;
3103 	}
3104 	bus_dma_tag_destroy(dma->dma_tag);
3105 	dma->dma_tag = NULL;
3106 }
3107 
3108 
3109 /*********************************************************************
3110  *
3111  *  Allocate memory for the transmit and receive rings, and then
3112  *  the descriptors associated with each, called only once at attach.
3113  *
3114  **********************************************************************/
3115 static int
3116 em_allocate_queues(struct adapter *adapter)
3117 {
3118 	device_t		dev = adapter->dev;
3119 	struct tx_ring		*txr = NULL;
3120 	struct rx_ring		*rxr = NULL;
3121 	int rsize, tsize, error = E1000_SUCCESS;
3122 	int txconf = 0, rxconf = 0;
3123 
3124 
3125 	/* Allocate the TX ring struct memory */
3126 	if (!(adapter->tx_rings =
3127 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3128 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3129 		device_printf(dev, "Unable to allocate TX ring memory\n");
3130 		error = ENOMEM;
3131 		goto fail;
3132 	}
3133 
3134 	/* Now allocate the RX */
3135 	if (!(adapter->rx_rings =
3136 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3137 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3138 		device_printf(dev, "Unable to allocate RX ring memory\n");
3139 		error = ENOMEM;
3140 		goto rx_fail;
3141 	}
3142 
3143 	tsize = roundup2(adapter->num_tx_desc *
3144 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3145 	/*
3146 	 * Now set up the TX queues, txconf is needed to handle the
3147 	 * possibility that things fail midcourse and we need to
3148 	 * undo memory gracefully
3149 	 */
3150 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3151 		/* Set up some basics */
3152 		txr = &adapter->tx_rings[i];
3153 		txr->adapter = adapter;
3154 		txr->me = i;
3155 
3156 		/* Initialize the TX lock */
3157 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3158 		    device_get_nameunit(dev), txr->me);
3159 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3160 
3161 		if (em_dma_malloc(adapter, tsize,
3162 			&txr->txdma, BUS_DMA_NOWAIT)) {
3163 			device_printf(dev,
3164 			    "Unable to allocate TX Descriptor memory\n");
3165 			error = ENOMEM;
3166 			goto err_tx_desc;
3167 		}
3168 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3169 		bzero((void *)txr->tx_base, tsize);
3170 
3171         	if (em_allocate_transmit_buffers(txr)) {
3172 			device_printf(dev,
3173 			    "Critical Failure setting up transmit buffers\n");
3174 			error = ENOMEM;
3175 			goto err_tx_desc;
3176         	}
3177 #if __FreeBSD_version >= 800000
3178 		/* Allocate a buf ring */
3179 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3180 		    M_WAITOK, &txr->tx_mtx);
3181 #endif
3182 	}
3183 
3184 	/*
3185 	 * Next the RX queues...
3186 	 */
3187 	rsize = roundup2(adapter->num_rx_desc *
3188 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3189 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3190 		rxr = &adapter->rx_rings[i];
3191 		rxr->adapter = adapter;
3192 		rxr->me = i;
3193 
3194 		/* Initialize the RX lock */
3195 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3196 		    device_get_nameunit(dev), txr->me);
3197 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3198 
3199 		if (em_dma_malloc(adapter, rsize,
3200 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3201 			device_printf(dev,
3202 			    "Unable to allocate RxDescriptor memory\n");
3203 			error = ENOMEM;
3204 			goto err_rx_desc;
3205 		}
3206 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3207 		bzero((void *)rxr->rx_base, rsize);
3208 
3209         	/* Allocate receive buffers for the ring*/
3210 		if (em_allocate_receive_buffers(rxr)) {
3211 			device_printf(dev,
3212 			    "Critical Failure setting up receive buffers\n");
3213 			error = ENOMEM;
3214 			goto err_rx_desc;
3215 		}
3216 	}
3217 
3218 	return (0);
3219 
3220 err_rx_desc:
3221 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3222 		em_dma_free(adapter, &rxr->rxdma);
3223 err_tx_desc:
3224 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3225 		em_dma_free(adapter, &txr->txdma);
3226 	free(adapter->rx_rings, M_DEVBUF);
3227 rx_fail:
3228 #if __FreeBSD_version >= 800000
3229 	buf_ring_free(txr->br, M_DEVBUF);
3230 #endif
3231 	free(adapter->tx_rings, M_DEVBUF);
3232 fail:
3233 	return (error);
3234 }
3235 
3236 
3237 /*********************************************************************
3238  *
3239  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3240  *  the information needed to transmit a packet on the wire. This is
3241  *  called only once at attach, setup is done every reset.
3242  *
3243  **********************************************************************/
3244 static int
3245 em_allocate_transmit_buffers(struct tx_ring *txr)
3246 {
3247 	struct adapter *adapter = txr->adapter;
3248 	device_t dev = adapter->dev;
3249 	struct em_buffer *txbuf;
3250 	int error, i;
3251 
3252 	/*
3253 	 * Setup DMA descriptor areas.
3254 	 */
3255 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3256 			       1, 0,			/* alignment, bounds */
3257 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3258 			       BUS_SPACE_MAXADDR,	/* highaddr */
3259 			       NULL, NULL,		/* filter, filterarg */
3260 			       EM_TSO_SIZE,		/* maxsize */
3261 			       EM_MAX_SCATTER,		/* nsegments */
3262 			       PAGE_SIZE,		/* maxsegsize */
3263 			       0,			/* flags */
3264 			       NULL,			/* lockfunc */
3265 			       NULL,			/* lockfuncarg */
3266 			       &txr->txtag))) {
3267 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3268 		goto fail;
3269 	}
3270 
3271 	if (!(txr->tx_buffers =
3272 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3273 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3274 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3275 		error = ENOMEM;
3276 		goto fail;
3277 	}
3278 
3279         /* Create the descriptor buffer dma maps */
3280 	txbuf = txr->tx_buffers;
3281 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3282 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3283 		if (error != 0) {
3284 			device_printf(dev, "Unable to create TX DMA map\n");
3285 			goto fail;
3286 		}
3287 	}
3288 
3289 	return 0;
3290 fail:
3291 	/* We free all, it handles case where we are in the middle */
3292 	em_free_transmit_structures(adapter);
3293 	return (error);
3294 }
3295 
3296 /*********************************************************************
3297  *
3298  *  Initialize a transmit ring.
3299  *
3300  **********************************************************************/
3301 static void
3302 em_setup_transmit_ring(struct tx_ring *txr)
3303 {
3304 	struct adapter *adapter = txr->adapter;
3305 	struct em_buffer *txbuf;
3306 	int i;
3307 #ifdef DEV_NETMAP
3308 	struct netmap_adapter *na = NA(adapter->ifp);
3309 	struct netmap_slot *slot;
3310 #endif /* DEV_NETMAP */
3311 
3312 	/* Clear the old descriptor contents */
3313 	EM_TX_LOCK(txr);
3314 #ifdef DEV_NETMAP
3315 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3316 #endif /* DEV_NETMAP */
3317 
3318 	bzero((void *)txr->tx_base,
3319 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3320 	/* Reset indices */
3321 	txr->next_avail_desc = 0;
3322 	txr->next_to_clean = 0;
3323 
3324 	/* Free any existing tx buffers. */
3325         txbuf = txr->tx_buffers;
3326 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3327 		if (txbuf->m_head != NULL) {
3328 			bus_dmamap_sync(txr->txtag, txbuf->map,
3329 			    BUS_DMASYNC_POSTWRITE);
3330 			bus_dmamap_unload(txr->txtag, txbuf->map);
3331 			m_freem(txbuf->m_head);
3332 			txbuf->m_head = NULL;
3333 		}
3334 #ifdef DEV_NETMAP
3335 		if (slot) {
3336 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3337 			uint64_t paddr;
3338 			void *addr;
3339 
3340 			addr = PNMB(slot + si, &paddr);
3341 			txr->tx_base[i].buffer_addr = htole64(paddr);
3342 			/* reload the map for netmap mode */
3343 			netmap_load_map(txr->txtag, txbuf->map, addr);
3344 		}
3345 #endif /* DEV_NETMAP */
3346 
3347 		/* clear the watch index */
3348 		txbuf->next_eop = -1;
3349         }
3350 
3351 	/* Set number of descriptors available */
3352 	txr->tx_avail = adapter->num_tx_desc;
3353 	txr->queue_status = EM_QUEUE_IDLE;
3354 
3355 	/* Clear checksum offload context. */
3356 	txr->last_hw_offload = 0;
3357 	txr->last_hw_ipcss = 0;
3358 	txr->last_hw_ipcso = 0;
3359 	txr->last_hw_tucss = 0;
3360 	txr->last_hw_tucso = 0;
3361 
3362 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3363 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3364 	EM_TX_UNLOCK(txr);
3365 }
3366 
3367 /*********************************************************************
3368  *
3369  *  Initialize all transmit rings.
3370  *
3371  **********************************************************************/
3372 static void
3373 em_setup_transmit_structures(struct adapter *adapter)
3374 {
3375 	struct tx_ring *txr = adapter->tx_rings;
3376 
3377 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3378 		em_setup_transmit_ring(txr);
3379 
3380 	return;
3381 }
3382 
3383 /*********************************************************************
3384  *
3385  *  Enable transmit unit.
3386  *
3387  **********************************************************************/
3388 static void
3389 em_initialize_transmit_unit(struct adapter *adapter)
3390 {
3391 	struct tx_ring	*txr = adapter->tx_rings;
3392 	struct e1000_hw	*hw = &adapter->hw;
3393 	u32	tctl, tarc, tipg = 0;
3394 
3395 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3396 
3397 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3398 		u64 bus_addr = txr->txdma.dma_paddr;
3399 		/* Base and Len of TX Ring */
3400 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3401 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3402 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3403 	    	    (u32)(bus_addr >> 32));
3404 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3405 	    	    (u32)bus_addr);
3406 		/* Init the HEAD/TAIL indices */
3407 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3408 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3409 
3410 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3411 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3412 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3413 
3414 		txr->queue_status = EM_QUEUE_IDLE;
3415 	}
3416 
3417 	/* Set the default values for the Tx Inter Packet Gap timer */
3418 	switch (adapter->hw.mac.type) {
3419 	case e1000_80003es2lan:
3420 		tipg = DEFAULT_82543_TIPG_IPGR1;
3421 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3422 		    E1000_TIPG_IPGR2_SHIFT;
3423 		break;
3424 	default:
3425 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3426 		    (adapter->hw.phy.media_type ==
3427 		    e1000_media_type_internal_serdes))
3428 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3429 		else
3430 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3431 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3432 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3433 	}
3434 
3435 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3436 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3437 
3438 	if(adapter->hw.mac.type >= e1000_82540)
3439 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3440 		    adapter->tx_abs_int_delay.value);
3441 
3442 	if ((adapter->hw.mac.type == e1000_82571) ||
3443 	    (adapter->hw.mac.type == e1000_82572)) {
3444 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3445 		tarc |= SPEED_MODE_BIT;
3446 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3447 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3448 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3449 		tarc |= 1;
3450 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3451 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3452 		tarc |= 1;
3453 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3454 	}
3455 
3456 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3457 	if (adapter->tx_int_delay.value > 0)
3458 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3459 
3460 	/* Program the Transmit Control Register */
3461 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3462 	tctl &= ~E1000_TCTL_CT;
3463 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3464 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3465 
3466 	if (adapter->hw.mac.type >= e1000_82571)
3467 		tctl |= E1000_TCTL_MULR;
3468 
3469 	/* This write will effectively turn on the transmit unit. */
3470 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3471 
3472 }
3473 
3474 
3475 /*********************************************************************
3476  *
3477  *  Free all transmit rings.
3478  *
3479  **********************************************************************/
3480 static void
3481 em_free_transmit_structures(struct adapter *adapter)
3482 {
3483 	struct tx_ring *txr = adapter->tx_rings;
3484 
3485 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3486 		EM_TX_LOCK(txr);
3487 		em_free_transmit_buffers(txr);
3488 		em_dma_free(adapter, &txr->txdma);
3489 		EM_TX_UNLOCK(txr);
3490 		EM_TX_LOCK_DESTROY(txr);
3491 	}
3492 
3493 	free(adapter->tx_rings, M_DEVBUF);
3494 }
3495 
3496 /*********************************************************************
3497  *
3498  *  Free transmit ring related data structures.
3499  *
3500  **********************************************************************/
3501 static void
3502 em_free_transmit_buffers(struct tx_ring *txr)
3503 {
3504 	struct adapter		*adapter = txr->adapter;
3505 	struct em_buffer	*txbuf;
3506 
3507 	INIT_DEBUGOUT("free_transmit_ring: begin");
3508 
3509 	if (txr->tx_buffers == NULL)
3510 		return;
3511 
3512 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3513 		txbuf = &txr->tx_buffers[i];
3514 		if (txbuf->m_head != NULL) {
3515 			bus_dmamap_sync(txr->txtag, txbuf->map,
3516 			    BUS_DMASYNC_POSTWRITE);
3517 			bus_dmamap_unload(txr->txtag,
3518 			    txbuf->map);
3519 			m_freem(txbuf->m_head);
3520 			txbuf->m_head = NULL;
3521 			if (txbuf->map != NULL) {
3522 				bus_dmamap_destroy(txr->txtag,
3523 				    txbuf->map);
3524 				txbuf->map = NULL;
3525 			}
3526 		} else if (txbuf->map != NULL) {
3527 			bus_dmamap_unload(txr->txtag,
3528 			    txbuf->map);
3529 			bus_dmamap_destroy(txr->txtag,
3530 			    txbuf->map);
3531 			txbuf->map = NULL;
3532 		}
3533 	}
3534 #if __FreeBSD_version >= 800000
3535 	if (txr->br != NULL)
3536 		buf_ring_free(txr->br, M_DEVBUF);
3537 #endif
3538 	if (txr->tx_buffers != NULL) {
3539 		free(txr->tx_buffers, M_DEVBUF);
3540 		txr->tx_buffers = NULL;
3541 	}
3542 	if (txr->txtag != NULL) {
3543 		bus_dma_tag_destroy(txr->txtag);
3544 		txr->txtag = NULL;
3545 	}
3546 	return;
3547 }
3548 
3549 
3550 /*********************************************************************
3551  *  The offload context is protocol specific (TCP/UDP) and thus
3552  *  only needs to be set when the protocol changes. The occasion
3553  *  of a context change can be a performance detriment, and
3554  *  might be better just disabled. The reason arises in the way
3555  *  in which the controller supports pipelined requests from the
3556  *  Tx data DMA. Up to four requests can be pipelined, and they may
3557  *  belong to the same packet or to multiple packets. However all
3558  *  requests for one packet are issued before a request is issued
3559  *  for a subsequent packet and if a request for the next packet
3560  *  requires a context change, that request will be stalled
3561  *  until the previous request completes. This means setting up
3562  *  a new context effectively disables pipelined Tx data DMA which
3563  *  in turn greatly slow down performance to send small sized
3564  *  frames.
3565  **********************************************************************/
3566 static void
3567 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3568     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3569 {
3570 	struct adapter			*adapter = txr->adapter;
3571 	struct e1000_context_desc	*TXD = NULL;
3572 	struct em_buffer		*tx_buffer;
3573 	int				cur, hdr_len;
3574 	u32				cmd = 0;
3575 	u16				offload = 0;
3576 	u8				ipcso, ipcss, tucso, tucss;
3577 
3578 	ipcss = ipcso = tucss = tucso = 0;
3579 	hdr_len = ip_off + (ip->ip_hl << 2);
3580 	cur = txr->next_avail_desc;
3581 
3582 	/* Setup of IP header checksum. */
3583 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3584 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3585 		offload |= CSUM_IP;
3586 		ipcss = ip_off;
3587 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3588 		/*
3589 		 * Start offset for header checksum calculation.
3590 		 * End offset for header checksum calculation.
3591 		 * Offset of place to put the checksum.
3592 		 */
3593 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3594 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3595 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3596 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3597 		cmd |= E1000_TXD_CMD_IP;
3598 	}
3599 
3600 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3601  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3602  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3603  		offload |= CSUM_TCP;
3604  		tucss = hdr_len;
3605  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3606  		/*
3607  		 * Setting up new checksum offload context for every frames
3608  		 * takes a lot of processing time for hardware. This also
3609  		 * reduces performance a lot for small sized frames so avoid
3610  		 * it if driver can use previously configured checksum
3611  		 * offload context.
3612  		 */
3613  		if (txr->last_hw_offload == offload) {
3614  			if (offload & CSUM_IP) {
3615  				if (txr->last_hw_ipcss == ipcss &&
3616  				    txr->last_hw_ipcso == ipcso &&
3617  				    txr->last_hw_tucss == tucss &&
3618  				    txr->last_hw_tucso == tucso)
3619  					return;
3620  			} else {
3621  				if (txr->last_hw_tucss == tucss &&
3622  				    txr->last_hw_tucso == tucso)
3623  					return;
3624  			}
3625   		}
3626  		txr->last_hw_offload = offload;
3627  		txr->last_hw_tucss = tucss;
3628  		txr->last_hw_tucso = tucso;
3629  		/*
3630  		 * Start offset for payload checksum calculation.
3631  		 * End offset for payload checksum calculation.
3632  		 * Offset of place to put the checksum.
3633  		 */
3634 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3635  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3636  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3637  		TXD->upper_setup.tcp_fields.tucso = tucso;
3638  		cmd |= E1000_TXD_CMD_TCP;
3639  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3640  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3641  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3642  		tucss = hdr_len;
3643  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3644  		/*
3645  		 * Setting up new checksum offload context for every frames
3646  		 * takes a lot of processing time for hardware. This also
3647  		 * reduces performance a lot for small sized frames so avoid
3648  		 * it if driver can use previously configured checksum
3649  		 * offload context.
3650  		 */
3651  		if (txr->last_hw_offload == offload) {
3652  			if (offload & CSUM_IP) {
3653  				if (txr->last_hw_ipcss == ipcss &&
3654  				    txr->last_hw_ipcso == ipcso &&
3655  				    txr->last_hw_tucss == tucss &&
3656  				    txr->last_hw_tucso == tucso)
3657  					return;
3658  			} else {
3659  				if (txr->last_hw_tucss == tucss &&
3660  				    txr->last_hw_tucso == tucso)
3661  					return;
3662  			}
3663  		}
3664  		txr->last_hw_offload = offload;
3665  		txr->last_hw_tucss = tucss;
3666  		txr->last_hw_tucso = tucso;
3667  		/*
3668  		 * Start offset for header checksum calculation.
3669  		 * End offset for header checksum calculation.
3670  		 * Offset of place to put the checksum.
3671  		 */
3672 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3673  		TXD->upper_setup.tcp_fields.tucss = tucss;
3674  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3675  		TXD->upper_setup.tcp_fields.tucso = tucso;
3676   	}
3677 
3678  	if (offload & CSUM_IP) {
3679  		txr->last_hw_ipcss = ipcss;
3680  		txr->last_hw_ipcso = ipcso;
3681   	}
3682 
3683 	TXD->tcp_seg_setup.data = htole32(0);
3684 	TXD->cmd_and_length =
3685 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3686 	tx_buffer = &txr->tx_buffers[cur];
3687 	tx_buffer->m_head = NULL;
3688 	tx_buffer->next_eop = -1;
3689 
3690 	if (++cur == adapter->num_tx_desc)
3691 		cur = 0;
3692 
3693 	txr->tx_avail--;
3694 	txr->next_avail_desc = cur;
3695 }
3696 
3697 
3698 /**********************************************************************
3699  *
3700  *  Setup work for hardware segmentation offload (TSO)
3701  *
3702  **********************************************************************/
3703 static void
3704 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3705     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3706 {
3707 	struct adapter			*adapter = txr->adapter;
3708 	struct e1000_context_desc	*TXD;
3709 	struct em_buffer		*tx_buffer;
3710 	int cur, hdr_len;
3711 
3712 	/*
3713 	 * In theory we can use the same TSO context if and only if
3714 	 * frame is the same type(IP/TCP) and the same MSS. However
3715 	 * checking whether a frame has the same IP/TCP structure is
3716 	 * hard thing so just ignore that and always restablish a
3717 	 * new TSO context.
3718 	 */
3719 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3720 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3721 		      E1000_TXD_DTYP_D |	/* Data descr type */
3722 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3723 
3724 	/* IP and/or TCP header checksum calculation and insertion. */
3725 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3726 
3727 	cur = txr->next_avail_desc;
3728 	tx_buffer = &txr->tx_buffers[cur];
3729 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3730 
3731 	/*
3732 	 * Start offset for header checksum calculation.
3733 	 * End offset for header checksum calculation.
3734 	 * Offset of place put the checksum.
3735 	 */
3736 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3737 	TXD->lower_setup.ip_fields.ipcse =
3738 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3739 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3740 	/*
3741 	 * Start offset for payload checksum calculation.
3742 	 * End offset for payload checksum calculation.
3743 	 * Offset of place to put the checksum.
3744 	 */
3745 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3746 	TXD->upper_setup.tcp_fields.tucse = 0;
3747 	TXD->upper_setup.tcp_fields.tucso =
3748 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3749 	/*
3750 	 * Payload size per packet w/o any headers.
3751 	 * Length of all headers up to payload.
3752 	 */
3753 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3754 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3755 
3756 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3757 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3758 				E1000_TXD_CMD_TSE |	/* TSE context */
3759 				E1000_TXD_CMD_IP |	/* Do IP csum */
3760 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3761 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3762 
3763 	tx_buffer->m_head = NULL;
3764 	tx_buffer->next_eop = -1;
3765 
3766 	if (++cur == adapter->num_tx_desc)
3767 		cur = 0;
3768 
3769 	txr->tx_avail--;
3770 	txr->next_avail_desc = cur;
3771 	txr->tx_tso = TRUE;
3772 }
3773 
3774 
3775 /**********************************************************************
3776  *
3777  *  Examine each tx_buffer in the used queue. If the hardware is done
3778  *  processing the packet then free associated resources. The
3779  *  tx_buffer is put back on the free queue.
3780  *
3781  **********************************************************************/
3782 static void
3783 em_txeof(struct tx_ring *txr)
3784 {
3785 	struct adapter	*adapter = txr->adapter;
3786         int first, last, done, processed;
3787         struct em_buffer *tx_buffer;
3788         struct e1000_tx_desc   *tx_desc, *eop_desc;
3789 	struct ifnet   *ifp = adapter->ifp;
3790 
3791 	EM_TX_LOCK_ASSERT(txr);
3792 #ifdef DEV_NETMAP
3793 	if (ifp->if_capenable & IFCAP_NETMAP) {
3794 		struct netmap_adapter *na = NA(ifp);
3795 
3796 		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3797 		EM_TX_UNLOCK(txr);
3798 		EM_CORE_LOCK(adapter);
3799 		selwakeuppri(&na->tx_si, PI_NET);
3800 		EM_CORE_UNLOCK(adapter);
3801 		EM_TX_LOCK(txr);
3802 		return;
3803 	}
3804 #endif /* DEV_NETMAP */
3805 
3806 	/* No work, make sure watchdog is off */
3807         if (txr->tx_avail == adapter->num_tx_desc) {
3808 		txr->queue_status = EM_QUEUE_IDLE;
3809                 return;
3810 	}
3811 
3812 	processed = 0;
3813         first = txr->next_to_clean;
3814         tx_desc = &txr->tx_base[first];
3815         tx_buffer = &txr->tx_buffers[first];
3816 	last = tx_buffer->next_eop;
3817         eop_desc = &txr->tx_base[last];
3818 
3819 	/*
3820 	 * What this does is get the index of the
3821 	 * first descriptor AFTER the EOP of the
3822 	 * first packet, that way we can do the
3823 	 * simple comparison on the inner while loop.
3824 	 */
3825 	if (++last == adapter->num_tx_desc)
3826  		last = 0;
3827 	done = last;
3828 
3829         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3830             BUS_DMASYNC_POSTREAD);
3831 
3832         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3833 		/* We clean the range of the packet */
3834 		while (first != done) {
3835                 	tx_desc->upper.data = 0;
3836                 	tx_desc->lower.data = 0;
3837                 	tx_desc->buffer_addr = 0;
3838                 	++txr->tx_avail;
3839 			++processed;
3840 
3841 			if (tx_buffer->m_head) {
3842 				bus_dmamap_sync(txr->txtag,
3843 				    tx_buffer->map,
3844 				    BUS_DMASYNC_POSTWRITE);
3845 				bus_dmamap_unload(txr->txtag,
3846 				    tx_buffer->map);
3847                         	m_freem(tx_buffer->m_head);
3848                         	tx_buffer->m_head = NULL;
3849                 	}
3850 			tx_buffer->next_eop = -1;
3851 			txr->watchdog_time = ticks;
3852 
3853 	                if (++first == adapter->num_tx_desc)
3854 				first = 0;
3855 
3856 	                tx_buffer = &txr->tx_buffers[first];
3857 			tx_desc = &txr->tx_base[first];
3858 		}
3859 		++ifp->if_opackets;
3860 		/* See if we can continue to the next packet */
3861 		last = tx_buffer->next_eop;
3862 		if (last != -1) {
3863         		eop_desc = &txr->tx_base[last];
3864 			/* Get new done point */
3865 			if (++last == adapter->num_tx_desc) last = 0;
3866 			done = last;
3867 		} else
3868 			break;
3869         }
3870         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3871             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3872 
3873         txr->next_to_clean = first;
3874 
3875 	/*
3876 	** Watchdog calculation, we know there's
3877 	** work outstanding or the first return
3878 	** would have been taken, so none processed
3879 	** for too long indicates a hang. local timer
3880 	** will examine this and do a reset if needed.
3881 	*/
3882 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3883 		txr->queue_status = EM_QUEUE_HUNG;
3884 
3885         /*
3886          * If we have a minimum free, clear IFF_DRV_OACTIVE
3887          * to tell the stack that it is OK to send packets.
3888 	 * Notice that all writes of OACTIVE happen under the
3889 	 * TX lock which, with a single queue, guarantees
3890 	 * sanity.
3891          */
3892         if (txr->tx_avail >= EM_MAX_SCATTER)
3893 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3894 
3895 	/* Disable watchdog if all clean */
3896 	if (txr->tx_avail == adapter->num_tx_desc) {
3897 		txr->queue_status = EM_QUEUE_IDLE;
3898 	}
3899 }
3900 
3901 
3902 /*********************************************************************
3903  *
3904  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3905  *
3906  **********************************************************************/
3907 static void
3908 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3909 {
3910 	struct adapter		*adapter = rxr->adapter;
3911 	struct mbuf		*m;
3912 	bus_dma_segment_t	segs[1];
3913 	struct em_buffer	*rxbuf;
3914 	int			i, j, error, nsegs;
3915 	bool			cleaned = FALSE;
3916 
3917 	i = j = rxr->next_to_refresh;
3918 	/*
3919 	** Get one descriptor beyond
3920 	** our work mark to control
3921 	** the loop.
3922 	*/
3923 	if (++j == adapter->num_rx_desc)
3924 		j = 0;
3925 
3926 	while (j != limit) {
3927 		rxbuf = &rxr->rx_buffers[i];
3928 		if (rxbuf->m_head == NULL) {
3929 			m = m_getjcl(M_DONTWAIT, MT_DATA,
3930 			    M_PKTHDR, adapter->rx_mbuf_sz);
3931 			/*
3932 			** If we have a temporary resource shortage
3933 			** that causes a failure, just abort refresh
3934 			** for now, we will return to this point when
3935 			** reinvoked from em_rxeof.
3936 			*/
3937 			if (m == NULL)
3938 				goto update;
3939 		} else
3940 			m = rxbuf->m_head;
3941 
3942 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3943 		m->m_flags |= M_PKTHDR;
3944 		m->m_data = m->m_ext.ext_buf;
3945 
3946 		/* Use bus_dma machinery to setup the memory mapping  */
3947 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3948 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3949 		if (error != 0) {
3950 			printf("Refresh mbufs: hdr dmamap load"
3951 			    " failure - %d\n", error);
3952 			m_free(m);
3953 			rxbuf->m_head = NULL;
3954 			goto update;
3955 		}
3956 		rxbuf->m_head = m;
3957 		bus_dmamap_sync(rxr->rxtag,
3958 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3959 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3960 		cleaned = TRUE;
3961 
3962 		i = j; /* Next is precalulated for us */
3963 		rxr->next_to_refresh = i;
3964 		/* Calculate next controlling index */
3965 		if (++j == adapter->num_rx_desc)
3966 			j = 0;
3967 	}
3968 update:
3969 	/*
3970 	** Update the tail pointer only if,
3971 	** and as far as we have refreshed.
3972 	*/
3973 	if (cleaned)
3974 		E1000_WRITE_REG(&adapter->hw,
3975 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3976 
3977 	return;
3978 }
3979 
3980 
3981 /*********************************************************************
3982  *
3983  *  Allocate memory for rx_buffer structures. Since we use one
3984  *  rx_buffer per received packet, the maximum number of rx_buffer's
3985  *  that we'll need is equal to the number of receive descriptors
3986  *  that we've allocated.
3987  *
3988  **********************************************************************/
3989 static int
3990 em_allocate_receive_buffers(struct rx_ring *rxr)
3991 {
3992 	struct adapter		*adapter = rxr->adapter;
3993 	device_t		dev = adapter->dev;
3994 	struct em_buffer	*rxbuf;
3995 	int			error;
3996 
3997 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3998 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3999 	if (rxr->rx_buffers == NULL) {
4000 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4001 		return (ENOMEM);
4002 	}
4003 
4004 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4005 				1, 0,			/* alignment, bounds */
4006 				BUS_SPACE_MAXADDR,	/* lowaddr */
4007 				BUS_SPACE_MAXADDR,	/* highaddr */
4008 				NULL, NULL,		/* filter, filterarg */
4009 				MJUM9BYTES,		/* maxsize */
4010 				1,			/* nsegments */
4011 				MJUM9BYTES,		/* maxsegsize */
4012 				0,			/* flags */
4013 				NULL,			/* lockfunc */
4014 				NULL,			/* lockarg */
4015 				&rxr->rxtag);
4016 	if (error) {
4017 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4018 		    __func__, error);
4019 		goto fail;
4020 	}
4021 
4022 	rxbuf = rxr->rx_buffers;
4023 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4024 		rxbuf = &rxr->rx_buffers[i];
4025 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4026 		    &rxbuf->map);
4027 		if (error) {
4028 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4029 			    __func__, error);
4030 			goto fail;
4031 		}
4032 	}
4033 
4034 	return (0);
4035 
4036 fail:
4037 	em_free_receive_structures(adapter);
4038 	return (error);
4039 }
4040 
4041 
4042 /*********************************************************************
4043  *
4044  *  Initialize a receive ring and its buffers.
4045  *
4046  **********************************************************************/
4047 static int
4048 em_setup_receive_ring(struct rx_ring *rxr)
4049 {
4050 	struct	adapter 	*adapter = rxr->adapter;
4051 	struct em_buffer	*rxbuf;
4052 	bus_dma_segment_t	seg[1];
4053 	int			rsize, nsegs, error = 0;
4054 #ifdef DEV_NETMAP
4055 	struct netmap_adapter *na = NA(adapter->ifp);
4056 	struct netmap_slot *slot;
4057 #endif
4058 
4059 
4060 	/* Clear the ring contents */
4061 	EM_RX_LOCK(rxr);
4062 	rsize = roundup2(adapter->num_rx_desc *
4063 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4064 	bzero((void *)rxr->rx_base, rsize);
4065 #ifdef DEV_NETMAP
4066 	slot = netmap_reset(na, NR_RX, 0, 0);
4067 #endif
4068 
4069 	/*
4070 	** Free current RX buffer structs and their mbufs
4071 	*/
4072 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4073 		rxbuf = &rxr->rx_buffers[i];
4074 		if (rxbuf->m_head != NULL) {
4075 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4076 			    BUS_DMASYNC_POSTREAD);
4077 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4078 			m_freem(rxbuf->m_head);
4079 			rxbuf->m_head = NULL; /* mark as freed */
4080 		}
4081 	}
4082 
4083 	/* Now replenish the mbufs */
4084         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4085 		rxbuf = &rxr->rx_buffers[j];
4086 #ifdef DEV_NETMAP
4087 		if (slot) {
4088 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4089 			uint64_t paddr;
4090 			void *addr;
4091 
4092 			addr = PNMB(slot + si, &paddr);
4093 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4094 			/* Update descriptor */
4095 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4096 			continue;
4097 		}
4098 #endif /* DEV_NETMAP */
4099 		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4100 		    M_PKTHDR, adapter->rx_mbuf_sz);
4101 		if (rxbuf->m_head == NULL) {
4102 			error = ENOBUFS;
4103 			goto fail;
4104 		}
4105 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4106 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4107 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4108 
4109 		/* Get the memory mapping */
4110 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4111 		    rxbuf->map, rxbuf->m_head, seg,
4112 		    &nsegs, BUS_DMA_NOWAIT);
4113 		if (error != 0) {
4114 			m_freem(rxbuf->m_head);
4115 			rxbuf->m_head = NULL;
4116 			goto fail;
4117 		}
4118 		bus_dmamap_sync(rxr->rxtag,
4119 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4120 
4121 		/* Update descriptor */
4122 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4123 	}
4124 	rxr->next_to_check = 0;
4125 	rxr->next_to_refresh = 0;
4126 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4127 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4128 
4129 fail:
4130 	EM_RX_UNLOCK(rxr);
4131 	return (error);
4132 }
4133 
4134 /*********************************************************************
4135  *
4136  *  Initialize all receive rings.
4137  *
4138  **********************************************************************/
4139 static int
4140 em_setup_receive_structures(struct adapter *adapter)
4141 {
4142 	struct rx_ring *rxr = adapter->rx_rings;
4143 	int q;
4144 
4145 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4146 		if (em_setup_receive_ring(rxr))
4147 			goto fail;
4148 
4149 	return (0);
4150 fail:
4151 	/*
4152 	 * Free RX buffers allocated so far, we will only handle
4153 	 * the rings that completed, the failing case will have
4154 	 * cleaned up for itself. 'q' failed, so its the terminus.
4155 	 */
4156 	for (int i = 0; i < q; ++i) {
4157 		rxr = &adapter->rx_rings[i];
4158 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4159 			struct em_buffer *rxbuf;
4160 			rxbuf = &rxr->rx_buffers[n];
4161 			if (rxbuf->m_head != NULL) {
4162 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4163 			  	  BUS_DMASYNC_POSTREAD);
4164 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4165 				m_freem(rxbuf->m_head);
4166 				rxbuf->m_head = NULL;
4167 			}
4168 		}
4169 		rxr->next_to_check = 0;
4170 		rxr->next_to_refresh = 0;
4171 	}
4172 
4173 	return (ENOBUFS);
4174 }
4175 
4176 /*********************************************************************
4177  *
4178  *  Free all receive rings.
4179  *
4180  **********************************************************************/
4181 static void
4182 em_free_receive_structures(struct adapter *adapter)
4183 {
4184 	struct rx_ring *rxr = adapter->rx_rings;
4185 
4186 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4187 		em_free_receive_buffers(rxr);
4188 		/* Free the ring memory as well */
4189 		em_dma_free(adapter, &rxr->rxdma);
4190 		EM_RX_LOCK_DESTROY(rxr);
4191 	}
4192 
4193 	free(adapter->rx_rings, M_DEVBUF);
4194 }
4195 
4196 
4197 /*********************************************************************
4198  *
4199  *  Free receive ring data structures
4200  *
4201  **********************************************************************/
4202 static void
4203 em_free_receive_buffers(struct rx_ring *rxr)
4204 {
4205 	struct adapter		*adapter = rxr->adapter;
4206 	struct em_buffer	*rxbuf = NULL;
4207 
4208 	INIT_DEBUGOUT("free_receive_buffers: begin");
4209 
4210 	if (rxr->rx_buffers != NULL) {
4211 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4212 			rxbuf = &rxr->rx_buffers[i];
4213 			if (rxbuf->map != NULL) {
4214 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4215 				    BUS_DMASYNC_POSTREAD);
4216 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4217 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4218 			}
4219 			if (rxbuf->m_head != NULL) {
4220 				m_freem(rxbuf->m_head);
4221 				rxbuf->m_head = NULL;
4222 			}
4223 		}
4224 		free(rxr->rx_buffers, M_DEVBUF);
4225 		rxr->rx_buffers = NULL;
4226 		rxr->next_to_check = 0;
4227 		rxr->next_to_refresh = 0;
4228 	}
4229 
4230 	if (rxr->rxtag != NULL) {
4231 		bus_dma_tag_destroy(rxr->rxtag);
4232 		rxr->rxtag = NULL;
4233 	}
4234 
4235 	return;
4236 }
4237 
4238 
4239 /*********************************************************************
4240  *
4241  *  Enable receive unit.
4242  *
4243  **********************************************************************/
4244 #define MAX_INTS_PER_SEC	8000
4245 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4246 
4247 static void
4248 em_initialize_receive_unit(struct adapter *adapter)
4249 {
4250 	struct rx_ring	*rxr = adapter->rx_rings;
4251 	struct ifnet	*ifp = adapter->ifp;
4252 	struct e1000_hw	*hw = &adapter->hw;
4253 	u64	bus_addr;
4254 	u32	rctl, rxcsum;
4255 
4256 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4257 
4258 	/*
4259 	 * Make sure receives are disabled while setting
4260 	 * up the descriptor ring
4261 	 */
4262 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4263 	/* Do not disable if ever enabled on this hardware */
4264 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4265 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4266 
4267 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4268 	    adapter->rx_abs_int_delay.value);
4269 	/*
4270 	 * Set the interrupt throttling rate. Value is calculated
4271 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4272 	 */
4273 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4274 
4275 	/*
4276 	** When using MSIX interrupts we need to throttle
4277 	** using the EITR register (82574 only)
4278 	*/
4279 	if (hw->mac.type == e1000_82574) {
4280 		for (int i = 0; i < 4; i++)
4281 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4282 			    DEFAULT_ITR);
4283 		/* Disable accelerated acknowledge */
4284 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4285 	}
4286 
4287 	if (ifp->if_capenable & IFCAP_RXCSUM) {
4288 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4289 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4290 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4291 	}
4292 
4293 	/*
4294 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4295 	** long latencies are observed, like Lenovo X60. This
4296 	** change eliminates the problem, but since having positive
4297 	** values in RDTR is a known source of problems on other
4298 	** platforms another solution is being sought.
4299 	*/
4300 	if (hw->mac.type == e1000_82573)
4301 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4302 
4303 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4304 		/* Setup the Base and Length of the Rx Descriptor Ring */
4305 		bus_addr = rxr->rxdma.dma_paddr;
4306 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4307 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4308 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4309 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4310 		/* Setup the Head and Tail Descriptor Pointers */
4311 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4312 #ifdef DEV_NETMAP
4313 		/*
4314 		 * an init() while a netmap client is active must
4315 		 * preserve the rx buffers passed to userspace.
4316 		 * In this driver it means we adjust RDT to
4317 		 * something different from na->num_rx_desc - 1.
4318 		 */
4319 		if (ifp->if_capenable & IFCAP_NETMAP) {
4320 			struct netmap_adapter *na = NA(adapter->ifp);
4321 			struct netmap_kring *kring = &na->rx_rings[i];
4322 			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4323 
4324 			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4325 		} else
4326 #endif /* DEV_NETMAP */
4327 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4328 	}
4329 
4330 	/* Set PTHRESH for improved jumbo performance */
4331 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4332 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4333 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4334 	    (ifp->if_mtu > ETHERMTU)) {
4335 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4336 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4337 	}
4338 
4339 	if (adapter->hw.mac.type == e1000_pch2lan) {
4340 		if (ifp->if_mtu > ETHERMTU)
4341 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4342 		else
4343 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4344 	}
4345 
4346 	/* Setup the Receive Control Register */
4347 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4348 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4349 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4350 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4351 
4352         /* Strip the CRC */
4353         rctl |= E1000_RCTL_SECRC;
4354 
4355         /* Make sure VLAN Filters are off */
4356         rctl &= ~E1000_RCTL_VFE;
4357 	rctl &= ~E1000_RCTL_SBP;
4358 
4359 	if (adapter->rx_mbuf_sz == MCLBYTES)
4360 		rctl |= E1000_RCTL_SZ_2048;
4361 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4362 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4363 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4364 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4365 
4366 	if (ifp->if_mtu > ETHERMTU)
4367 		rctl |= E1000_RCTL_LPE;
4368 	else
4369 		rctl &= ~E1000_RCTL_LPE;
4370 
4371 	/* Write out the settings */
4372 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4373 
4374 	return;
4375 }
4376 
4377 
4378 /*********************************************************************
4379  *
4380  *  This routine executes in interrupt context. It replenishes
4381  *  the mbufs in the descriptor and sends data which has been
4382  *  dma'ed into host memory to upper layer.
4383  *
4384  *  We loop at most count times if count is > 0, or until done if
4385  *  count < 0.
4386  *
4387  *  For polling we also now return the number of cleaned packets
4388  *********************************************************************/
4389 static bool
4390 em_rxeof(struct rx_ring *rxr, int count, int *done)
4391 {
4392 	struct adapter		*adapter = rxr->adapter;
4393 	struct ifnet		*ifp = adapter->ifp;
4394 	struct mbuf		*mp, *sendmp;
4395 	u8			status = 0;
4396 	u16 			len;
4397 	int			i, processed, rxdone = 0;
4398 	bool			eop;
4399 	struct e1000_rx_desc	*cur;
4400 
4401 	EM_RX_LOCK(rxr);
4402 
4403 #ifdef DEV_NETMAP
4404 	if (ifp->if_capenable & IFCAP_NETMAP) {
4405 		struct netmap_adapter *na = NA(ifp);
4406 
4407 		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4408 		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4409 		EM_RX_UNLOCK(rxr);
4410 		EM_CORE_LOCK(adapter);
4411 		selwakeuppri(&na->rx_si, PI_NET);
4412 		EM_CORE_UNLOCK(adapter);
4413 		return (0);
4414 	}
4415 #endif /* DEV_NETMAP */
4416 
4417 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4418 
4419 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4420 			break;
4421 
4422 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4423 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4424 
4425 		cur = &rxr->rx_base[i];
4426 		status = cur->status;
4427 		mp = sendmp = NULL;
4428 
4429 		if ((status & E1000_RXD_STAT_DD) == 0)
4430 			break;
4431 
4432 		len = le16toh(cur->length);
4433 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4434 
4435 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4436 		    (rxr->discard == TRUE)) {
4437 			ifp->if_ierrors++;
4438 			++rxr->rx_discarded;
4439 			if (!eop) /* Catch subsequent segs */
4440 				rxr->discard = TRUE;
4441 			else
4442 				rxr->discard = FALSE;
4443 			em_rx_discard(rxr, i);
4444 			goto next_desc;
4445 		}
4446 
4447 		/* Assign correct length to the current fragment */
4448 		mp = rxr->rx_buffers[i].m_head;
4449 		mp->m_len = len;
4450 
4451 		/* Trigger for refresh */
4452 		rxr->rx_buffers[i].m_head = NULL;
4453 
4454 		/* First segment? */
4455 		if (rxr->fmp == NULL) {
4456 			mp->m_pkthdr.len = len;
4457 			rxr->fmp = rxr->lmp = mp;
4458 		} else {
4459 			/* Chain mbuf's together */
4460 			mp->m_flags &= ~M_PKTHDR;
4461 			rxr->lmp->m_next = mp;
4462 			rxr->lmp = mp;
4463 			rxr->fmp->m_pkthdr.len += len;
4464 		}
4465 
4466 		if (eop) {
4467 			--count;
4468 			sendmp = rxr->fmp;
4469 			sendmp->m_pkthdr.rcvif = ifp;
4470 			ifp->if_ipackets++;
4471 			em_receive_checksum(cur, sendmp);
4472 #ifndef __NO_STRICT_ALIGNMENT
4473 			if (adapter->max_frame_size >
4474 			    (MCLBYTES - ETHER_ALIGN) &&
4475 			    em_fixup_rx(rxr) != 0)
4476 				goto skip;
4477 #endif
4478 			if (status & E1000_RXD_STAT_VP) {
4479 				sendmp->m_pkthdr.ether_vtag =
4480 				    le16toh(cur->special);
4481 				sendmp->m_flags |= M_VLANTAG;
4482 			}
4483 #ifndef __NO_STRICT_ALIGNMENT
4484 skip:
4485 #endif
4486 			rxr->fmp = rxr->lmp = NULL;
4487 		}
4488 next_desc:
4489 		/* Zero out the receive descriptors status. */
4490 		cur->status = 0;
4491 		++rxdone;	/* cumulative for POLL */
4492 		++processed;
4493 
4494 		/* Advance our pointers to the next descriptor. */
4495 		if (++i == adapter->num_rx_desc)
4496 			i = 0;
4497 
4498 		/* Send to the stack */
4499 		if (sendmp != NULL) {
4500 			rxr->next_to_check = i;
4501 			EM_RX_UNLOCK(rxr);
4502 			(*ifp->if_input)(ifp, sendmp);
4503 			EM_RX_LOCK(rxr);
4504 			i = rxr->next_to_check;
4505 		}
4506 
4507 		/* Only refresh mbufs every 8 descriptors */
4508 		if (processed == 8) {
4509 			em_refresh_mbufs(rxr, i);
4510 			processed = 0;
4511 		}
4512 	}
4513 
4514 	/* Catch any remaining refresh work */
4515 	if (e1000_rx_unrefreshed(rxr))
4516 		em_refresh_mbufs(rxr, i);
4517 
4518 	rxr->next_to_check = i;
4519 	if (done != NULL)
4520 		*done = rxdone;
4521 	EM_RX_UNLOCK(rxr);
4522 
4523 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4524 }
4525 
4526 static __inline void
4527 em_rx_discard(struct rx_ring *rxr, int i)
4528 {
4529 	struct em_buffer	*rbuf;
4530 
4531 	rbuf = &rxr->rx_buffers[i];
4532 	/* Free any previous pieces */
4533 	if (rxr->fmp != NULL) {
4534 		rxr->fmp->m_flags |= M_PKTHDR;
4535 		m_freem(rxr->fmp);
4536 		rxr->fmp = NULL;
4537 		rxr->lmp = NULL;
4538 	}
4539 	/*
4540 	** Free buffer and allow em_refresh_mbufs()
4541 	** to clean up and recharge buffer.
4542 	*/
4543 	if (rbuf->m_head) {
4544 		m_free(rbuf->m_head);
4545 		rbuf->m_head = NULL;
4546 	}
4547 	return;
4548 }
4549 
4550 #ifndef __NO_STRICT_ALIGNMENT
4551 /*
4552  * When jumbo frames are enabled we should realign entire payload on
4553  * architecures with strict alignment. This is serious design mistake of 8254x
4554  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4555  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4556  * payload. On architecures without strict alignment restrictions 8254x still
4557  * performs unaligned memory access which would reduce the performance too.
4558  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4559  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4560  * existing mbuf chain.
4561  *
4562  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4563  * not used at all on architectures with strict alignment.
4564  */
4565 static int
4566 em_fixup_rx(struct rx_ring *rxr)
4567 {
4568 	struct adapter *adapter = rxr->adapter;
4569 	struct mbuf *m, *n;
4570 	int error;
4571 
4572 	error = 0;
4573 	m = rxr->fmp;
4574 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4575 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4576 		m->m_data += ETHER_HDR_LEN;
4577 	} else {
4578 		MGETHDR(n, M_DONTWAIT, MT_DATA);
4579 		if (n != NULL) {
4580 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4581 			m->m_data += ETHER_HDR_LEN;
4582 			m->m_len -= ETHER_HDR_LEN;
4583 			n->m_len = ETHER_HDR_LEN;
4584 			M_MOVE_PKTHDR(n, m);
4585 			n->m_next = m;
4586 			rxr->fmp = n;
4587 		} else {
4588 			adapter->dropped_pkts++;
4589 			m_freem(rxr->fmp);
4590 			rxr->fmp = NULL;
4591 			error = ENOMEM;
4592 		}
4593 	}
4594 
4595 	return (error);
4596 }
4597 #endif
4598 
4599 /*********************************************************************
4600  *
4601  *  Verify that the hardware indicated that the checksum is valid.
4602  *  Inform the stack about the status of checksum so that stack
4603  *  doesn't spend time verifying the checksum.
4604  *
4605  *********************************************************************/
4606 static void
4607 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4608 {
4609 	/* Ignore Checksum bit is set */
4610 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4611 		mp->m_pkthdr.csum_flags = 0;
4612 		return;
4613 	}
4614 
4615 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4616 		/* Did it pass? */
4617 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4618 			/* IP Checksum Good */
4619 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4620 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4621 
4622 		} else {
4623 			mp->m_pkthdr.csum_flags = 0;
4624 		}
4625 	}
4626 
4627 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4628 		/* Did it pass? */
4629 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4630 			mp->m_pkthdr.csum_flags |=
4631 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4632 			mp->m_pkthdr.csum_data = htons(0xffff);
4633 		}
4634 	}
4635 }
4636 
4637 /*
4638  * This routine is run via an vlan
4639  * config EVENT
4640  */
4641 static void
4642 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4643 {
4644 	struct adapter	*adapter = ifp->if_softc;
4645 	u32		index, bit;
4646 
4647 	if (ifp->if_softc !=  arg)   /* Not our event */
4648 		return;
4649 
4650 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4651                 return;
4652 
4653 	EM_CORE_LOCK(adapter);
4654 	index = (vtag >> 5) & 0x7F;
4655 	bit = vtag & 0x1F;
4656 	adapter->shadow_vfta[index] |= (1 << bit);
4657 	++adapter->num_vlans;
4658 	/* Re-init to load the changes */
4659 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4660 		em_init_locked(adapter);
4661 	EM_CORE_UNLOCK(adapter);
4662 }
4663 
4664 /*
4665  * This routine is run via an vlan
4666  * unconfig EVENT
4667  */
4668 static void
4669 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4670 {
4671 	struct adapter	*adapter = ifp->if_softc;
4672 	u32		index, bit;
4673 
4674 	if (ifp->if_softc !=  arg)
4675 		return;
4676 
4677 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4678                 return;
4679 
4680 	EM_CORE_LOCK(adapter);
4681 	index = (vtag >> 5) & 0x7F;
4682 	bit = vtag & 0x1F;
4683 	adapter->shadow_vfta[index] &= ~(1 << bit);
4684 	--adapter->num_vlans;
4685 	/* Re-init to load the changes */
4686 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4687 		em_init_locked(adapter);
4688 	EM_CORE_UNLOCK(adapter);
4689 }
4690 
4691 static void
4692 em_setup_vlan_hw_support(struct adapter *adapter)
4693 {
4694 	struct e1000_hw *hw = &adapter->hw;
4695 	u32             reg;
4696 
4697 	/*
4698 	** We get here thru init_locked, meaning
4699 	** a soft reset, this has already cleared
4700 	** the VFTA and other state, so if there
4701 	** have been no vlan's registered do nothing.
4702 	*/
4703 	if (adapter->num_vlans == 0)
4704                 return;
4705 
4706 	/*
4707 	** A soft reset zero's out the VFTA, so
4708 	** we need to repopulate it now.
4709 	*/
4710 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4711                 if (adapter->shadow_vfta[i] != 0)
4712 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4713                             i, adapter->shadow_vfta[i]);
4714 
4715 	reg = E1000_READ_REG(hw, E1000_CTRL);
4716 	reg |= E1000_CTRL_VME;
4717 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4718 
4719 	/* Enable the Filter Table */
4720 	reg = E1000_READ_REG(hw, E1000_RCTL);
4721 	reg &= ~E1000_RCTL_CFIEN;
4722 	reg |= E1000_RCTL_VFE;
4723 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4724 }
4725 
4726 static void
4727 em_enable_intr(struct adapter *adapter)
4728 {
4729 	struct e1000_hw *hw = &adapter->hw;
4730 	u32 ims_mask = IMS_ENABLE_MASK;
4731 
4732 	if (hw->mac.type == e1000_82574) {
4733 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4734 		ims_mask |= EM_MSIX_MASK;
4735 	}
4736 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4737 }
4738 
4739 static void
4740 em_disable_intr(struct adapter *adapter)
4741 {
4742 	struct e1000_hw *hw = &adapter->hw;
4743 
4744 	if (hw->mac.type == e1000_82574)
4745 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4746 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4747 }
4748 
4749 /*
4750  * Bit of a misnomer, what this really means is
4751  * to enable OS management of the system... aka
4752  * to disable special hardware management features
4753  */
4754 static void
4755 em_init_manageability(struct adapter *adapter)
4756 {
4757 	/* A shared code workaround */
4758 #define E1000_82542_MANC2H E1000_MANC2H
4759 	if (adapter->has_manage) {
4760 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4761 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4762 
4763 		/* disable hardware interception of ARP */
4764 		manc &= ~(E1000_MANC_ARP_EN);
4765 
4766                 /* enable receiving management packets to the host */
4767 		manc |= E1000_MANC_EN_MNG2HOST;
4768 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4769 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4770 		manc2h |= E1000_MNG2HOST_PORT_623;
4771 		manc2h |= E1000_MNG2HOST_PORT_664;
4772 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4773 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4774 	}
4775 }
4776 
4777 /*
4778  * Give control back to hardware management
4779  * controller if there is one.
4780  */
4781 static void
4782 em_release_manageability(struct adapter *adapter)
4783 {
4784 	if (adapter->has_manage) {
4785 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4786 
4787 		/* re-enable hardware interception of ARP */
4788 		manc |= E1000_MANC_ARP_EN;
4789 		manc &= ~E1000_MANC_EN_MNG2HOST;
4790 
4791 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4792 	}
4793 }
4794 
4795 /*
4796  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4797  * For ASF and Pass Through versions of f/w this means
4798  * that the driver is loaded. For AMT version type f/w
4799  * this means that the network i/f is open.
4800  */
4801 static void
4802 em_get_hw_control(struct adapter *adapter)
4803 {
4804 	u32 ctrl_ext, swsm;
4805 
4806 	if (adapter->hw.mac.type == e1000_82573) {
4807 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4808 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4809 		    swsm | E1000_SWSM_DRV_LOAD);
4810 		return;
4811 	}
4812 	/* else */
4813 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4814 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4815 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4816 	return;
4817 }
4818 
4819 /*
4820  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4821  * For ASF and Pass Through versions of f/w this means that
4822  * the driver is no longer loaded. For AMT versions of the
4823  * f/w this means that the network i/f is closed.
4824  */
4825 static void
4826 em_release_hw_control(struct adapter *adapter)
4827 {
4828 	u32 ctrl_ext, swsm;
4829 
4830 	if (!adapter->has_manage)
4831 		return;
4832 
4833 	if (adapter->hw.mac.type == e1000_82573) {
4834 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4835 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4836 		    swsm & ~E1000_SWSM_DRV_LOAD);
4837 		return;
4838 	}
4839 	/* else */
4840 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4841 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4842 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4843 	return;
4844 }
4845 
4846 static int
4847 em_is_valid_ether_addr(u8 *addr)
4848 {
4849 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4850 
4851 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4852 		return (FALSE);
4853 	}
4854 
4855 	return (TRUE);
4856 }
4857 
4858 /*
4859 ** Parse the interface capabilities with regard
4860 ** to both system management and wake-on-lan for
4861 ** later use.
4862 */
4863 static void
4864 em_get_wakeup(device_t dev)
4865 {
4866 	struct adapter	*adapter = device_get_softc(dev);
4867 	u16		eeprom_data = 0, device_id, apme_mask;
4868 
4869 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4870 	apme_mask = EM_EEPROM_APME;
4871 
4872 	switch (adapter->hw.mac.type) {
4873 	case e1000_82573:
4874 	case e1000_82583:
4875 		adapter->has_amt = TRUE;
4876 		/* Falls thru */
4877 	case e1000_82571:
4878 	case e1000_82572:
4879 	case e1000_80003es2lan:
4880 		if (adapter->hw.bus.func == 1) {
4881 			e1000_read_nvm(&adapter->hw,
4882 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4883 			break;
4884 		} else
4885 			e1000_read_nvm(&adapter->hw,
4886 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4887 		break;
4888 	case e1000_ich8lan:
4889 	case e1000_ich9lan:
4890 	case e1000_ich10lan:
4891 	case e1000_pchlan:
4892 	case e1000_pch2lan:
4893 		apme_mask = E1000_WUC_APME;
4894 		adapter->has_amt = TRUE;
4895 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4896 		break;
4897 	default:
4898 		e1000_read_nvm(&adapter->hw,
4899 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4900 		break;
4901 	}
4902 	if (eeprom_data & apme_mask)
4903 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4904 	/*
4905          * We have the eeprom settings, now apply the special cases
4906          * where the eeprom may be wrong or the board won't support
4907          * wake on lan on a particular port
4908 	 */
4909 	device_id = pci_get_device(dev);
4910         switch (device_id) {
4911 	case E1000_DEV_ID_82571EB_FIBER:
4912 		/* Wake events only supported on port A for dual fiber
4913 		 * regardless of eeprom setting */
4914 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4915 		    E1000_STATUS_FUNC_1)
4916 			adapter->wol = 0;
4917 		break;
4918 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4919 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4920 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4921                 /* if quad port adapter, disable WoL on all but port A */
4922 		if (global_quad_port_a != 0)
4923 			adapter->wol = 0;
4924 		/* Reset for multiple quad port adapters */
4925 		if (++global_quad_port_a == 4)
4926 			global_quad_port_a = 0;
4927                 break;
4928 	}
4929 	return;
4930 }
4931 
4932 
4933 /*
4934  * Enable PCI Wake On Lan capability
4935  */
4936 static void
4937 em_enable_wakeup(device_t dev)
4938 {
4939 	struct adapter	*adapter = device_get_softc(dev);
4940 	struct ifnet	*ifp = adapter->ifp;
4941 	u32		pmc, ctrl, ctrl_ext, rctl;
4942 	u16     	status;
4943 
4944 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4945 		return;
4946 
4947 	/* Advertise the wakeup capability */
4948 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4949 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4950 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4951 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4952 
4953 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4954 	    (adapter->hw.mac.type == e1000_pchlan) ||
4955 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4956 	    (adapter->hw.mac.type == e1000_ich10lan))
4957 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4958 
4959 	/* Keep the laser running on Fiber adapters */
4960 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4961 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4962 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4963 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4964 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4965 	}
4966 
4967 	/*
4968 	** Determine type of Wakeup: note that wol
4969 	** is set with all bits on by default.
4970 	*/
4971 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4972 		adapter->wol &= ~E1000_WUFC_MAG;
4973 
4974 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4975 		adapter->wol &= ~E1000_WUFC_MC;
4976 	else {
4977 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4978 		rctl |= E1000_RCTL_MPE;
4979 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4980 	}
4981 
4982 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4983 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4984 		if (em_enable_phy_wakeup(adapter))
4985 			return;
4986 	} else {
4987 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4988 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4989 	}
4990 
4991 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4992 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4993 
4994         /* Request PME */
4995         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4996 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4997 	if (ifp->if_capenable & IFCAP_WOL)
4998 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4999         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5000 
5001 	return;
5002 }
5003 
5004 /*
5005 ** WOL in the newer chipset interfaces (pchlan)
5006 ** require thing to be copied into the phy
5007 */
5008 static int
5009 em_enable_phy_wakeup(struct adapter *adapter)
5010 {
5011 	struct e1000_hw *hw = &adapter->hw;
5012 	u32 mreg, ret = 0;
5013 	u16 preg;
5014 
5015 	/* copy MAC RARs to PHY RARs */
5016 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5017 
5018 	/* copy MAC MTA to PHY MTA */
5019 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5020 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5021 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5022 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5023 		    (u16)((mreg >> 16) & 0xFFFF));
5024 	}
5025 
5026 	/* configure PHY Rx Control register */
5027 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5028 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5029 	if (mreg & E1000_RCTL_UPE)
5030 		preg |= BM_RCTL_UPE;
5031 	if (mreg & E1000_RCTL_MPE)
5032 		preg |= BM_RCTL_MPE;
5033 	preg &= ~(BM_RCTL_MO_MASK);
5034 	if (mreg & E1000_RCTL_MO_3)
5035 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5036 				<< BM_RCTL_MO_SHIFT);
5037 	if (mreg & E1000_RCTL_BAM)
5038 		preg |= BM_RCTL_BAM;
5039 	if (mreg & E1000_RCTL_PMCF)
5040 		preg |= BM_RCTL_PMCF;
5041 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5042 	if (mreg & E1000_CTRL_RFCE)
5043 		preg |= BM_RCTL_RFCE;
5044 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5045 
5046 	/* enable PHY wakeup in MAC register */
5047 	E1000_WRITE_REG(hw, E1000_WUC,
5048 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5049 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5050 
5051 	/* configure and enable PHY wakeup in PHY registers */
5052 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5053 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5054 
5055 	/* activate PHY wakeup */
5056 	ret = hw->phy.ops.acquire(hw);
5057 	if (ret) {
5058 		printf("Could not acquire PHY\n");
5059 		return ret;
5060 	}
5061 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5062 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5063 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5064 	if (ret) {
5065 		printf("Could not read PHY page 769\n");
5066 		goto out;
5067 	}
5068 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5069 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5070 	if (ret)
5071 		printf("Could not set PHY Host Wakeup bit\n");
5072 out:
5073 	hw->phy.ops.release(hw);
5074 
5075 	return ret;
5076 }
5077 
5078 static void
5079 em_led_func(void *arg, int onoff)
5080 {
5081 	struct adapter	*adapter = arg;
5082 
5083 	EM_CORE_LOCK(adapter);
5084 	if (onoff) {
5085 		e1000_setup_led(&adapter->hw);
5086 		e1000_led_on(&adapter->hw);
5087 	} else {
5088 		e1000_led_off(&adapter->hw);
5089 		e1000_cleanup_led(&adapter->hw);
5090 	}
5091 	EM_CORE_UNLOCK(adapter);
5092 }
5093 
5094 /*
5095 ** Disable the L0S and L1 LINK states
5096 */
5097 static void
5098 em_disable_aspm(struct adapter *adapter)
5099 {
5100 	int		base, reg;
5101 	u16		link_cap,link_ctrl;
5102 	device_t	dev = adapter->dev;
5103 
5104 	switch (adapter->hw.mac.type) {
5105 		case e1000_82573:
5106 		case e1000_82574:
5107 		case e1000_82583:
5108 			break;
5109 		default:
5110 			return;
5111 	}
5112 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5113 		return;
5114 	reg = base + PCIR_EXPRESS_LINK_CAP;
5115 	link_cap = pci_read_config(dev, reg, 2);
5116 	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
5117 		return;
5118 	reg = base + PCIR_EXPRESS_LINK_CTL;
5119 	link_ctrl = pci_read_config(dev, reg, 2);
5120 	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
5121 	pci_write_config(dev, reg, link_ctrl, 2);
5122 	return;
5123 }
5124 
5125 /**********************************************************************
5126  *
5127  *  Update the board statistics counters.
5128  *
5129  **********************************************************************/
5130 static void
5131 em_update_stats_counters(struct adapter *adapter)
5132 {
5133 	struct ifnet   *ifp;
5134 
5135 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5136 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5137 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5138 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5139 	}
5140 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5141 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5142 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5143 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5144 
5145 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5146 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5147 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5148 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5149 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5150 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5151 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5152 	/*
5153 	** For watchdog management we need to know if we have been
5154 	** paused during the last interval, so capture that here.
5155 	*/
5156 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5157 	adapter->stats.xoffrxc += adapter->pause_frames;
5158 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5159 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5160 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5161 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5162 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5163 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5164 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5165 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5166 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5167 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5168 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5169 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5170 
5171 	/* For the 64-bit byte counters the low dword must be read first. */
5172 	/* Both registers clear on the read of the high dword */
5173 
5174 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5175 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5176 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5177 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5178 
5179 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5180 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5181 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5182 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5183 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5184 
5185 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5186 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5187 
5188 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5189 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5190 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5191 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5192 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5193 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5194 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5195 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5196 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5197 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5198 
5199 	/* Interrupt Counts */
5200 
5201 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5202 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5203 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5204 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5205 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5206 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5207 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5208 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5209 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5210 
5211 	if (adapter->hw.mac.type >= e1000_82543) {
5212 		adapter->stats.algnerrc +=
5213 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5214 		adapter->stats.rxerrc +=
5215 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5216 		adapter->stats.tncrs +=
5217 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5218 		adapter->stats.cexterr +=
5219 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5220 		adapter->stats.tsctc +=
5221 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5222 		adapter->stats.tsctfc +=
5223 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5224 	}
5225 	ifp = adapter->ifp;
5226 
5227 	ifp->if_collisions = adapter->stats.colc;
5228 
5229 	/* Rx Errors */
5230 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5231 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5232 	    adapter->stats.ruc + adapter->stats.roc +
5233 	    adapter->stats.mpc + adapter->stats.cexterr;
5234 
5235 	/* Tx Errors */
5236 	ifp->if_oerrors = adapter->stats.ecol +
5237 	    adapter->stats.latecol + adapter->watchdog_events;
5238 }
5239 
5240 /* Export a single 32-bit register via a read-only sysctl. */
5241 static int
5242 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5243 {
5244 	struct adapter *adapter;
5245 	u_int val;
5246 
5247 	adapter = oidp->oid_arg1;
5248 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5249 	return (sysctl_handle_int(oidp, &val, 0, req));
5250 }
5251 
5252 /*
5253  * Add sysctl variables, one per statistic, to the system.
5254  */
5255 static void
5256 em_add_hw_stats(struct adapter *adapter)
5257 {
5258 	device_t dev = adapter->dev;
5259 
5260 	struct tx_ring *txr = adapter->tx_rings;
5261 	struct rx_ring *rxr = adapter->rx_rings;
5262 
5263 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5264 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5265 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5266 	struct e1000_hw_stats *stats = &adapter->stats;
5267 
5268 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5269 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5270 
5271 #define QUEUE_NAME_LEN 32
5272 	char namebuf[QUEUE_NAME_LEN];
5273 
5274 	/* Driver Statistics */
5275 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5276 			CTLFLAG_RD, &adapter->link_irq,
5277 			"Link MSIX IRQ Handled");
5278 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5279 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5280 			 "Std mbuf failed");
5281 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5282 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5283 			 "Std mbuf cluster failed");
5284 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5285 			CTLFLAG_RD, &adapter->dropped_pkts,
5286 			"Driver dropped packets");
5287 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5288 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5289 			"Driver tx dma failure in xmit");
5290 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5291 			CTLFLAG_RD, &adapter->rx_overruns,
5292 			"RX overruns");
5293 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5294 			CTLFLAG_RD, &adapter->watchdog_events,
5295 			"Watchdog timeouts");
5296 
5297 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5298 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5299 			em_sysctl_reg_handler, "IU",
5300 			"Device Control Register");
5301 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5302 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5303 			em_sysctl_reg_handler, "IU",
5304 			"Receiver Control Register");
5305 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5306 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5307 			"Flow Control High Watermark");
5308 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5309 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5310 			"Flow Control Low Watermark");
5311 
5312 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5313 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5314 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5315 					    CTLFLAG_RD, NULL, "Queue Name");
5316 		queue_list = SYSCTL_CHILDREN(queue_node);
5317 
5318 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5319 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5320 				E1000_TDH(txr->me),
5321 				em_sysctl_reg_handler, "IU",
5322  				"Transmit Descriptor Head");
5323 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5324 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5325 				E1000_TDT(txr->me),
5326 				em_sysctl_reg_handler, "IU",
5327  				"Transmit Descriptor Tail");
5328 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5329 				CTLFLAG_RD, &txr->tx_irq,
5330 				"Queue MSI-X Transmit Interrupts");
5331 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5332 				CTLFLAG_RD, &txr->no_desc_avail,
5333 				"Queue No Descriptor Available");
5334 
5335 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5336 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5337 				E1000_RDH(rxr->me),
5338 				em_sysctl_reg_handler, "IU",
5339 				"Receive Descriptor Head");
5340 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5341 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5342 				E1000_RDT(rxr->me),
5343 				em_sysctl_reg_handler, "IU",
5344 				"Receive Descriptor Tail");
5345 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5346 				CTLFLAG_RD, &rxr->rx_irq,
5347 				"Queue MSI-X Receive Interrupts");
5348 	}
5349 
5350 	/* MAC stats get their own sub node */
5351 
5352 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5353 				    CTLFLAG_RD, NULL, "Statistics");
5354 	stat_list = SYSCTL_CHILDREN(stat_node);
5355 
5356 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5357 			CTLFLAG_RD, &stats->ecol,
5358 			"Excessive collisions");
5359 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5360 			CTLFLAG_RD, &stats->scc,
5361 			"Single collisions");
5362 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5363 			CTLFLAG_RD, &stats->mcc,
5364 			"Multiple collisions");
5365 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5366 			CTLFLAG_RD, &stats->latecol,
5367 			"Late collisions");
5368 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5369 			CTLFLAG_RD, &stats->colc,
5370 			"Collision Count");
5371 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5372 			CTLFLAG_RD, &adapter->stats.symerrs,
5373 			"Symbol Errors");
5374 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5375 			CTLFLAG_RD, &adapter->stats.sec,
5376 			"Sequence Errors");
5377 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5378 			CTLFLAG_RD, &adapter->stats.dc,
5379 			"Defer Count");
5380 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5381 			CTLFLAG_RD, &adapter->stats.mpc,
5382 			"Missed Packets");
5383 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5384 			CTLFLAG_RD, &adapter->stats.rnbc,
5385 			"Receive No Buffers");
5386 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5387 			CTLFLAG_RD, &adapter->stats.ruc,
5388 			"Receive Undersize");
5389 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5390 			CTLFLAG_RD, &adapter->stats.rfc,
5391 			"Fragmented Packets Received ");
5392 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5393 			CTLFLAG_RD, &adapter->stats.roc,
5394 			"Oversized Packets Received");
5395 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5396 			CTLFLAG_RD, &adapter->stats.rjc,
5397 			"Recevied Jabber");
5398 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5399 			CTLFLAG_RD, &adapter->stats.rxerrc,
5400 			"Receive Errors");
5401 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5402 			CTLFLAG_RD, &adapter->stats.crcerrs,
5403 			"CRC errors");
5404 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5405 			CTLFLAG_RD, &adapter->stats.algnerrc,
5406 			"Alignment Errors");
5407 	/* On 82575 these are collision counts */
5408 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5409 			CTLFLAG_RD, &adapter->stats.cexterr,
5410 			"Collision/Carrier extension errors");
5411 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5412 			CTLFLAG_RD, &adapter->stats.xonrxc,
5413 			"XON Received");
5414 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5415 			CTLFLAG_RD, &adapter->stats.xontxc,
5416 			"XON Transmitted");
5417 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5418 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5419 			"XOFF Received");
5420 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5421 			CTLFLAG_RD, &adapter->stats.xofftxc,
5422 			"XOFF Transmitted");
5423 
5424 	/* Packet Reception Stats */
5425 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5426 			CTLFLAG_RD, &adapter->stats.tpr,
5427 			"Total Packets Received ");
5428 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5429 			CTLFLAG_RD, &adapter->stats.gprc,
5430 			"Good Packets Received");
5431 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5432 			CTLFLAG_RD, &adapter->stats.bprc,
5433 			"Broadcast Packets Received");
5434 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5435 			CTLFLAG_RD, &adapter->stats.mprc,
5436 			"Multicast Packets Received");
5437 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5438 			CTLFLAG_RD, &adapter->stats.prc64,
5439 			"64 byte frames received ");
5440 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5441 			CTLFLAG_RD, &adapter->stats.prc127,
5442 			"65-127 byte frames received");
5443 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5444 			CTLFLAG_RD, &adapter->stats.prc255,
5445 			"128-255 byte frames received");
5446 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5447 			CTLFLAG_RD, &adapter->stats.prc511,
5448 			"256-511 byte frames received");
5449 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5450 			CTLFLAG_RD, &adapter->stats.prc1023,
5451 			"512-1023 byte frames received");
5452 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5453 			CTLFLAG_RD, &adapter->stats.prc1522,
5454 			"1023-1522 byte frames received");
5455  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5456  			CTLFLAG_RD, &adapter->stats.gorc,
5457  			"Good Octets Received");
5458 
5459 	/* Packet Transmission Stats */
5460  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5461  			CTLFLAG_RD, &adapter->stats.gotc,
5462  			"Good Octets Transmitted");
5463 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5464 			CTLFLAG_RD, &adapter->stats.tpt,
5465 			"Total Packets Transmitted");
5466 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5467 			CTLFLAG_RD, &adapter->stats.gptc,
5468 			"Good Packets Transmitted");
5469 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5470 			CTLFLAG_RD, &adapter->stats.bptc,
5471 			"Broadcast Packets Transmitted");
5472 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5473 			CTLFLAG_RD, &adapter->stats.mptc,
5474 			"Multicast Packets Transmitted");
5475 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5476 			CTLFLAG_RD, &adapter->stats.ptc64,
5477 			"64 byte frames transmitted ");
5478 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5479 			CTLFLAG_RD, &adapter->stats.ptc127,
5480 			"65-127 byte frames transmitted");
5481 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5482 			CTLFLAG_RD, &adapter->stats.ptc255,
5483 			"128-255 byte frames transmitted");
5484 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5485 			CTLFLAG_RD, &adapter->stats.ptc511,
5486 			"256-511 byte frames transmitted");
5487 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5488 			CTLFLAG_RD, &adapter->stats.ptc1023,
5489 			"512-1023 byte frames transmitted");
5490 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5491 			CTLFLAG_RD, &adapter->stats.ptc1522,
5492 			"1024-1522 byte frames transmitted");
5493 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5494 			CTLFLAG_RD, &adapter->stats.tsctc,
5495 			"TSO Contexts Transmitted");
5496 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5497 			CTLFLAG_RD, &adapter->stats.tsctfc,
5498 			"TSO Contexts Failed");
5499 
5500 
5501 	/* Interrupt Stats */
5502 
5503 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5504 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5505 	int_list = SYSCTL_CHILDREN(int_node);
5506 
5507 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5508 			CTLFLAG_RD, &adapter->stats.iac,
5509 			"Interrupt Assertion Count");
5510 
5511 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5512 			CTLFLAG_RD, &adapter->stats.icrxptc,
5513 			"Interrupt Cause Rx Pkt Timer Expire Count");
5514 
5515 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5516 			CTLFLAG_RD, &adapter->stats.icrxatc,
5517 			"Interrupt Cause Rx Abs Timer Expire Count");
5518 
5519 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5520 			CTLFLAG_RD, &adapter->stats.ictxptc,
5521 			"Interrupt Cause Tx Pkt Timer Expire Count");
5522 
5523 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5524 			CTLFLAG_RD, &adapter->stats.ictxatc,
5525 			"Interrupt Cause Tx Abs Timer Expire Count");
5526 
5527 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5528 			CTLFLAG_RD, &adapter->stats.ictxqec,
5529 			"Interrupt Cause Tx Queue Empty Count");
5530 
5531 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5532 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5533 			"Interrupt Cause Tx Queue Min Thresh Count");
5534 
5535 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5536 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5537 			"Interrupt Cause Rx Desc Min Thresh Count");
5538 
5539 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5540 			CTLFLAG_RD, &adapter->stats.icrxoc,
5541 			"Interrupt Cause Receiver Overrun Count");
5542 }
5543 
5544 /**********************************************************************
5545  *
5546  *  This routine provides a way to dump out the adapter eeprom,
5547  *  often a useful debug/service tool. This only dumps the first
5548  *  32 words, stuff that matters is in that extent.
5549  *
5550  **********************************************************************/
5551 static int
5552 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5553 {
5554 	struct adapter *adapter = (struct adapter *)arg1;
5555 	int error;
5556 	int result;
5557 
5558 	result = -1;
5559 	error = sysctl_handle_int(oidp, &result, 0, req);
5560 
5561 	if (error || !req->newptr)
5562 		return (error);
5563 
5564 	/*
5565 	 * This value will cause a hex dump of the
5566 	 * first 32 16-bit words of the EEPROM to
5567 	 * the screen.
5568 	 */
5569 	if (result == 1)
5570 		em_print_nvm_info(adapter);
5571 
5572 	return (error);
5573 }
5574 
5575 static void
5576 em_print_nvm_info(struct adapter *adapter)
5577 {
5578 	u16	eeprom_data;
5579 	int	i, j, row = 0;
5580 
5581 	/* Its a bit crude, but it gets the job done */
5582 	printf("\nInterface EEPROM Dump:\n");
5583 	printf("Offset\n0x0000  ");
5584 	for (i = 0, j = 0; i < 32; i++, j++) {
5585 		if (j == 8) { /* Make the offset block */
5586 			j = 0; ++row;
5587 			printf("\n0x00%x0  ",row);
5588 		}
5589 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5590 		printf("%04x ", eeprom_data);
5591 	}
5592 	printf("\n");
5593 }
5594 
5595 static int
5596 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5597 {
5598 	struct em_int_delay_info *info;
5599 	struct adapter *adapter;
5600 	u32 regval;
5601 	int error, usecs, ticks;
5602 
5603 	info = (struct em_int_delay_info *)arg1;
5604 	usecs = info->value;
5605 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5606 	if (error != 0 || req->newptr == NULL)
5607 		return (error);
5608 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5609 		return (EINVAL);
5610 	info->value = usecs;
5611 	ticks = EM_USECS_TO_TICKS(usecs);
5612 
5613 	adapter = info->adapter;
5614 
5615 	EM_CORE_LOCK(adapter);
5616 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5617 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5618 	/* Handle a few special cases. */
5619 	switch (info->offset) {
5620 	case E1000_RDTR:
5621 		break;
5622 	case E1000_TIDV:
5623 		if (ticks == 0) {
5624 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5625 			/* Don't write 0 into the TIDV register. */
5626 			regval++;
5627 		} else
5628 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5629 		break;
5630 	}
5631 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5632 	EM_CORE_UNLOCK(adapter);
5633 	return (0);
5634 }
5635 
5636 static void
5637 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5638 	const char *description, struct em_int_delay_info *info,
5639 	int offset, int value)
5640 {
5641 	info->adapter = adapter;
5642 	info->offset = offset;
5643 	info->value = value;
5644 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5645 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5646 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5647 	    info, 0, em_sysctl_int_delay, "I", description);
5648 }
5649 
5650 static void
5651 em_set_sysctl_value(struct adapter *adapter, const char *name,
5652 	const char *description, int *limit, int value)
5653 {
5654 	*limit = value;
5655 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5656 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5657 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5658 }
5659 
5660 
5661 /*
5662 ** Set flow control using sysctl:
5663 ** Flow control values:
5664 **      0 - off
5665 **      1 - rx pause
5666 **      2 - tx pause
5667 **      3 - full
5668 */
5669 static int
5670 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5671 {
5672         int		error;
5673 	static int	input = 3; /* default is full */
5674         struct adapter	*adapter = (struct adapter *) arg1;
5675 
5676         error = sysctl_handle_int(oidp, &input, 0, req);
5677 
5678         if ((error) || (req->newptr == NULL))
5679                 return (error);
5680 
5681 	if (input == adapter->fc) /* no change? */
5682 		return (error);
5683 
5684         switch (input) {
5685                 case e1000_fc_rx_pause:
5686                 case e1000_fc_tx_pause:
5687                 case e1000_fc_full:
5688                 case e1000_fc_none:
5689                         adapter->hw.fc.requested_mode = input;
5690 			adapter->fc = input;
5691                         break;
5692                 default:
5693 			/* Do nothing */
5694 			return (error);
5695         }
5696 
5697         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5698         e1000_force_mac_fc(&adapter->hw);
5699         return (error);
5700 }
5701 
5702 /*
5703 ** Manage Energy Efficient Ethernet:
5704 ** Control values:
5705 **     0/1 - enabled/disabled
5706 */
5707 static int
5708 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5709 {
5710        struct adapter *adapter = (struct adapter *) arg1;
5711        int             error, value;
5712 
5713        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5714        error = sysctl_handle_int(oidp, &value, 0, req);
5715        if (error || req->newptr == NULL)
5716                return (error);
5717        EM_CORE_LOCK(adapter);
5718        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5719        em_init_locked(adapter);
5720        EM_CORE_UNLOCK(adapter);
5721        return (0);
5722 }
5723 
5724 static int
5725 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5726 {
5727 	struct adapter *adapter;
5728 	int error;
5729 	int result;
5730 
5731 	result = -1;
5732 	error = sysctl_handle_int(oidp, &result, 0, req);
5733 
5734 	if (error || !req->newptr)
5735 		return (error);
5736 
5737 	if (result == 1) {
5738 		adapter = (struct adapter *)arg1;
5739 		em_print_debug_info(adapter);
5740         }
5741 
5742 	return (error);
5743 }
5744 
5745 /*
5746 ** This routine is meant to be fluid, add whatever is
5747 ** needed for debugging a problem.  -jfv
5748 */
5749 static void
5750 em_print_debug_info(struct adapter *adapter)
5751 {
5752 	device_t dev = adapter->dev;
5753 	struct tx_ring *txr = adapter->tx_rings;
5754 	struct rx_ring *rxr = adapter->rx_rings;
5755 
5756 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5757 		printf("Interface is RUNNING ");
5758 	else
5759 		printf("Interface is NOT RUNNING\n");
5760 
5761 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5762 		printf("and INACTIVE\n");
5763 	else
5764 		printf("and ACTIVE\n");
5765 
5766 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5767 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5768 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5769 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5770 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5771 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5772 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5773 	device_printf(dev, "TX descriptors avail = %d\n",
5774 	    txr->tx_avail);
5775 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5776 	    txr->no_desc_avail);
5777 	device_printf(dev, "RX discarded packets = %ld\n",
5778 	    rxr->rx_discarded);
5779 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5780 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5781 }
5782