xref: /freebsd/sys/dev/e1000/if_em.c (revision e14ddd1f16e7e5788392c50de21ea7c927e0690c)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2013, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71 
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79 
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84 
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88 
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int	em_display_debug_stats = 0;
93 
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.8";
98 
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108 
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111 	/* Intel(R) PRO/1000 Network Connection */
112 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116 						PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131 
132 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137 						PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
178 						PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
180 						PCI_ANY_ID, PCI_ANY_ID, 0},
181 	/* required last entry */
182 	{ 0, 0, 0, 0, 0}
183 };
184 
185 /*********************************************************************
186  *  Table of branding strings for all supported NICs.
187  *********************************************************************/
188 
189 static char *em_strings[] = {
190 	"Intel(R) PRO/1000 Network Connection"
191 };
192 
193 /*********************************************************************
194  *  Function prototypes
195  *********************************************************************/
196 static int	em_probe(device_t);
197 static int	em_attach(device_t);
198 static int	em_detach(device_t);
199 static int	em_shutdown(device_t);
200 static int	em_suspend(device_t);
201 static int	em_resume(device_t);
202 #ifdef EM_MULTIQUEUE
203 static int	em_mq_start(struct ifnet *, struct mbuf *);
204 static int	em_mq_start_locked(struct ifnet *,
205 		    struct tx_ring *, struct mbuf *);
206 static void	em_qflush(struct ifnet *);
207 #else
208 static void	em_start(struct ifnet *);
209 static void	em_start_locked(struct ifnet *, struct tx_ring *);
210 #endif
211 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
212 static void	em_init(void *);
213 static void	em_init_locked(struct adapter *);
214 static void	em_stop(void *);
215 static void	em_media_status(struct ifnet *, struct ifmediareq *);
216 static int	em_media_change(struct ifnet *);
217 static void	em_identify_hardware(struct adapter *);
218 static int	em_allocate_pci_resources(struct adapter *);
219 static int	em_allocate_legacy(struct adapter *);
220 static int	em_allocate_msix(struct adapter *);
221 static int	em_allocate_queues(struct adapter *);
222 static int	em_setup_msix(struct adapter *);
223 static void	em_free_pci_resources(struct adapter *);
224 static void	em_local_timer(void *);
225 static void	em_reset(struct adapter *);
226 static int	em_setup_interface(device_t, struct adapter *);
227 
228 static void	em_setup_transmit_structures(struct adapter *);
229 static void	em_initialize_transmit_unit(struct adapter *);
230 static int	em_allocate_transmit_buffers(struct tx_ring *);
231 static void	em_free_transmit_structures(struct adapter *);
232 static void	em_free_transmit_buffers(struct tx_ring *);
233 
234 static int	em_setup_receive_structures(struct adapter *);
235 static int	em_allocate_receive_buffers(struct rx_ring *);
236 static void	em_initialize_receive_unit(struct adapter *);
237 static void	em_free_receive_structures(struct adapter *);
238 static void	em_free_receive_buffers(struct rx_ring *);
239 
240 static void	em_enable_intr(struct adapter *);
241 static void	em_disable_intr(struct adapter *);
242 static void	em_update_stats_counters(struct adapter *);
243 static void	em_add_hw_stats(struct adapter *adapter);
244 static void	em_txeof(struct tx_ring *);
245 static bool	em_rxeof(struct rx_ring *, int, int *);
246 #ifndef __NO_STRICT_ALIGNMENT
247 static int	em_fixup_rx(struct rx_ring *);
248 #endif
249 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
250 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
251 		    struct ip *, u32 *, u32 *);
252 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
253 		    struct tcphdr *, u32 *, u32 *);
254 static void	em_set_promisc(struct adapter *);
255 static void	em_disable_promisc(struct adapter *);
256 static void	em_set_multi(struct adapter *);
257 static void	em_update_link_status(struct adapter *);
258 static void	em_refresh_mbufs(struct rx_ring *, int);
259 static void	em_register_vlan(void *, struct ifnet *, u16);
260 static void	em_unregister_vlan(void *, struct ifnet *, u16);
261 static void	em_setup_vlan_hw_support(struct adapter *);
262 static int	em_xmit(struct tx_ring *, struct mbuf **);
263 static int	em_dma_malloc(struct adapter *, bus_size_t,
264 		    struct em_dma_alloc *, int);
265 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
266 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
267 static void	em_print_nvm_info(struct adapter *);
268 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
269 static void	em_print_debug_info(struct adapter *);
270 static int 	em_is_valid_ether_addr(u8 *);
271 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
272 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
273 		    const char *, struct em_int_delay_info *, int, int);
274 /* Management and WOL Support */
275 static void	em_init_manageability(struct adapter *);
276 static void	em_release_manageability(struct adapter *);
277 static void     em_get_hw_control(struct adapter *);
278 static void     em_release_hw_control(struct adapter *);
279 static void	em_get_wakeup(device_t);
280 static void     em_enable_wakeup(device_t);
281 static int	em_enable_phy_wakeup(struct adapter *);
282 static void	em_led_func(void *, int);
283 static void	em_disable_aspm(struct adapter *);
284 
285 static int	em_irq_fast(void *);
286 
287 /* MSIX handlers */
288 static void	em_msix_tx(void *);
289 static void	em_msix_rx(void *);
290 static void	em_msix_link(void *);
291 static void	em_handle_tx(void *context, int pending);
292 static void	em_handle_rx(void *context, int pending);
293 static void	em_handle_link(void *context, int pending);
294 
295 static void	em_set_sysctl_value(struct adapter *, const char *,
296 		    const char *, int *, int);
297 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
298 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
299 
300 static __inline void em_rx_discard(struct rx_ring *, int);
301 
302 #ifdef DEVICE_POLLING
303 static poll_handler_t em_poll;
304 #endif /* POLLING */
305 
306 /*********************************************************************
307  *  FreeBSD Device Interface Entry Points
308  *********************************************************************/
309 
310 static device_method_t em_methods[] = {
311 	/* Device interface */
312 	DEVMETHOD(device_probe, em_probe),
313 	DEVMETHOD(device_attach, em_attach),
314 	DEVMETHOD(device_detach, em_detach),
315 	DEVMETHOD(device_shutdown, em_shutdown),
316 	DEVMETHOD(device_suspend, em_suspend),
317 	DEVMETHOD(device_resume, em_resume),
318 	DEVMETHOD_END
319 };
320 
321 static driver_t em_driver = {
322 	"em", em_methods, sizeof(struct adapter),
323 };
324 
325 devclass_t em_devclass;
326 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327 MODULE_DEPEND(em, pci, 1, 1, 1);
328 MODULE_DEPEND(em, ether, 1, 1, 1);
329 
330 /*********************************************************************
331  *  Tunable default values.
332  *********************************************************************/
333 
334 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
335 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
336 #define M_TSO_LEN			66
337 
338 /* Allow common code without TSO */
339 #ifndef CSUM_TSO
340 #define CSUM_TSO	0
341 #endif
342 
343 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
344 
345 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
346 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
347 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
348 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
349 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
350     0, "Default transmit interrupt delay in usecs");
351 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
352     0, "Default receive interrupt delay in usecs");
353 
354 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
355 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
356 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
357 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
358 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
359     &em_tx_abs_int_delay_dflt, 0,
360     "Default transmit interrupt delay limit in usecs");
361 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
362     &em_rx_abs_int_delay_dflt, 0,
363     "Default receive interrupt delay limit in usecs");
364 
365 static int em_rxd = EM_DEFAULT_RXD;
366 static int em_txd = EM_DEFAULT_TXD;
367 TUNABLE_INT("hw.em.rxd", &em_rxd);
368 TUNABLE_INT("hw.em.txd", &em_txd);
369 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
370     "Number of receive descriptors per queue");
371 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
372     "Number of transmit descriptors per queue");
373 
374 static int em_smart_pwr_down = FALSE;
375 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
376 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
377     0, "Set to true to leave smart power down enabled on newer adapters");
378 
379 /* Controls whether promiscuous also shows bad packets */
380 static int em_debug_sbp = FALSE;
381 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
382 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
383     "Show bad packets in promiscuous mode");
384 
385 static int em_enable_msix = TRUE;
386 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
387 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
388     "Enable MSI-X interrupts");
389 
390 /* How many packets rxeof tries to clean at a time */
391 static int em_rx_process_limit = 100;
392 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
393 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
394     &em_rx_process_limit, 0,
395     "Maximum number of received packets to process "
396     "at a time, -1 means unlimited");
397 
398 /* Energy efficient ethernet - default to OFF */
399 static int eee_setting = 1;
400 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
401 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
402     "Enable Energy Efficient Ethernet");
403 
404 /* Global used in WOL setup with multiport cards */
405 static int global_quad_port_a = 0;
406 
407 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
408 #include <dev/netmap/if_em_netmap.h>
409 #endif /* DEV_NETMAP */
410 
411 /*********************************************************************
412  *  Device identification routine
413  *
414  *  em_probe determines if the driver should be loaded on
415  *  adapter based on PCI vendor/device id of the adapter.
416  *
417  *  return BUS_PROBE_DEFAULT on success, positive on failure
418  *********************************************************************/
419 
420 static int
421 em_probe(device_t dev)
422 {
423 	char		adapter_name[60];
424 	u16		pci_vendor_id = 0;
425 	u16		pci_device_id = 0;
426 	u16		pci_subvendor_id = 0;
427 	u16		pci_subdevice_id = 0;
428 	em_vendor_info_t *ent;
429 
430 	INIT_DEBUGOUT("em_probe: begin");
431 
432 	pci_vendor_id = pci_get_vendor(dev);
433 	if (pci_vendor_id != EM_VENDOR_ID)
434 		return (ENXIO);
435 
436 	pci_device_id = pci_get_device(dev);
437 	pci_subvendor_id = pci_get_subvendor(dev);
438 	pci_subdevice_id = pci_get_subdevice(dev);
439 
440 	ent = em_vendor_info_array;
441 	while (ent->vendor_id != 0) {
442 		if ((pci_vendor_id == ent->vendor_id) &&
443 		    (pci_device_id == ent->device_id) &&
444 
445 		    ((pci_subvendor_id == ent->subvendor_id) ||
446 		    (ent->subvendor_id == PCI_ANY_ID)) &&
447 
448 		    ((pci_subdevice_id == ent->subdevice_id) ||
449 		    (ent->subdevice_id == PCI_ANY_ID))) {
450 			sprintf(adapter_name, "%s %s",
451 				em_strings[ent->index],
452 				em_driver_version);
453 			device_set_desc_copy(dev, adapter_name);
454 			return (BUS_PROBE_DEFAULT);
455 		}
456 		ent++;
457 	}
458 
459 	return (ENXIO);
460 }
461 
462 /*********************************************************************
463  *  Device initialization routine
464  *
465  *  The attach entry point is called when the driver is being loaded.
466  *  This routine identifies the type of hardware, allocates all resources
467  *  and initializes the hardware.
468  *
469  *  return 0 on success, positive on failure
470  *********************************************************************/
471 
472 static int
473 em_attach(device_t dev)
474 {
475 	struct adapter	*adapter;
476 	struct e1000_hw	*hw;
477 	int		error = 0;
478 
479 	INIT_DEBUGOUT("em_attach: begin");
480 
481 	if (resource_disabled("em", device_get_unit(dev))) {
482 		device_printf(dev, "Disabled by device hint\n");
483 		return (ENXIO);
484 	}
485 
486 	adapter = device_get_softc(dev);
487 	adapter->dev = adapter->osdep.dev = dev;
488 	hw = &adapter->hw;
489 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
490 
491 	/* SYSCTL stuff */
492 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
493 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
494 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
495 	    em_sysctl_nvm_info, "I", "NVM Information");
496 
497 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
498 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
499 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
500 	    em_sysctl_debug_info, "I", "Debug Information");
501 
502 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
503 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
504 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
505 	    em_set_flowcntl, "I", "Flow Control");
506 
507 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
508 
509 	/* Determine hardware and mac info */
510 	em_identify_hardware(adapter);
511 
512 	/* Setup PCI resources */
513 	if (em_allocate_pci_resources(adapter)) {
514 		device_printf(dev, "Allocation of PCI resources failed\n");
515 		error = ENXIO;
516 		goto err_pci;
517 	}
518 
519 	/*
520 	** For ICH8 and family we need to
521 	** map the flash memory, and this
522 	** must happen after the MAC is
523 	** identified
524 	*/
525 	if ((hw->mac.type == e1000_ich8lan) ||
526 	    (hw->mac.type == e1000_ich9lan) ||
527 	    (hw->mac.type == e1000_ich10lan) ||
528 	    (hw->mac.type == e1000_pchlan) ||
529 	    (hw->mac.type == e1000_pch2lan) ||
530 	    (hw->mac.type == e1000_pch_lpt)) {
531 		int rid = EM_BAR_TYPE_FLASH;
532 		adapter->flash = bus_alloc_resource_any(dev,
533 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
534 		if (adapter->flash == NULL) {
535 			device_printf(dev, "Mapping of Flash failed\n");
536 			error = ENXIO;
537 			goto err_pci;
538 		}
539 		/* This is used in the shared code */
540 		hw->flash_address = (u8 *)adapter->flash;
541 		adapter->osdep.flash_bus_space_tag =
542 		    rman_get_bustag(adapter->flash);
543 		adapter->osdep.flash_bus_space_handle =
544 		    rman_get_bushandle(adapter->flash);
545 	}
546 
547 	/* Do Shared Code initialization */
548 	if (e1000_setup_init_funcs(hw, TRUE)) {
549 		device_printf(dev, "Setup of Shared code failed\n");
550 		error = ENXIO;
551 		goto err_pci;
552 	}
553 
554 	e1000_get_bus_info(hw);
555 
556 	/* Set up some sysctls for the tunable interrupt delays */
557 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
558 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
559 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
560 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
561 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
562 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
563 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
564 	    "receive interrupt delay limit in usecs",
565 	    &adapter->rx_abs_int_delay,
566 	    E1000_REGISTER(hw, E1000_RADV),
567 	    em_rx_abs_int_delay_dflt);
568 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
569 	    "transmit interrupt delay limit in usecs",
570 	    &adapter->tx_abs_int_delay,
571 	    E1000_REGISTER(hw, E1000_TADV),
572 	    em_tx_abs_int_delay_dflt);
573 
574 	/* Sysctl for limiting the amount of work done in the taskqueue */
575 	em_set_sysctl_value(adapter, "rx_processing_limit",
576 	    "max number of rx packets to process", &adapter->rx_process_limit,
577 	    em_rx_process_limit);
578 
579 	/*
580 	 * Validate number of transmit and receive descriptors. It
581 	 * must not exceed hardware maximum, and must be multiple
582 	 * of E1000_DBA_ALIGN.
583 	 */
584 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
585 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
586 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
587 		    EM_DEFAULT_TXD, em_txd);
588 		adapter->num_tx_desc = EM_DEFAULT_TXD;
589 	} else
590 		adapter->num_tx_desc = em_txd;
591 
592 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
593 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
594 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
595 		    EM_DEFAULT_RXD, em_rxd);
596 		adapter->num_rx_desc = EM_DEFAULT_RXD;
597 	} else
598 		adapter->num_rx_desc = em_rxd;
599 
600 	hw->mac.autoneg = DO_AUTO_NEG;
601 	hw->phy.autoneg_wait_to_complete = FALSE;
602 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
603 
604 	/* Copper options */
605 	if (hw->phy.media_type == e1000_media_type_copper) {
606 		hw->phy.mdix = AUTO_ALL_MODES;
607 		hw->phy.disable_polarity_correction = FALSE;
608 		hw->phy.ms_type = EM_MASTER_SLAVE;
609 	}
610 
611 	/*
612 	 * Set the frame limits assuming
613 	 * standard ethernet sized frames.
614 	 */
615 	adapter->hw.mac.max_frame_size =
616 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
617 
618 	/*
619 	 * This controls when hardware reports transmit completion
620 	 * status.
621 	 */
622 	hw->mac.report_tx_early = 1;
623 
624 	/*
625 	** Get queue/ring memory
626 	*/
627 	if (em_allocate_queues(adapter)) {
628 		error = ENOMEM;
629 		goto err_pci;
630 	}
631 
632 	/* Allocate multicast array memory. */
633 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
634 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
635 	if (adapter->mta == NULL) {
636 		device_printf(dev, "Can not allocate multicast setup array\n");
637 		error = ENOMEM;
638 		goto err_late;
639 	}
640 
641 	/* Check SOL/IDER usage */
642 	if (e1000_check_reset_block(hw))
643 		device_printf(dev, "PHY reset is blocked"
644 		    " due to SOL/IDER session.\n");
645 
646 	/* Sysctl for setting Energy Efficient Ethernet */
647 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
648 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
649 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
650 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
651 	    adapter, 0, em_sysctl_eee, "I",
652 	    "Disable Energy Efficient Ethernet");
653 
654 	/*
655 	** Start from a known state, this is
656 	** important in reading the nvm and
657 	** mac from that.
658 	*/
659 	e1000_reset_hw(hw);
660 
661 
662 	/* Make sure we have a good EEPROM before we read from it */
663 	if (e1000_validate_nvm_checksum(hw) < 0) {
664 		/*
665 		** Some PCI-E parts fail the first check due to
666 		** the link being in sleep state, call it again,
667 		** if it fails a second time its a real issue.
668 		*/
669 		if (e1000_validate_nvm_checksum(hw) < 0) {
670 			device_printf(dev,
671 			    "The EEPROM Checksum Is Not Valid\n");
672 			error = EIO;
673 			goto err_late;
674 		}
675 	}
676 
677 	/* Copy the permanent MAC address out of the EEPROM */
678 	if (e1000_read_mac_addr(hw) < 0) {
679 		device_printf(dev, "EEPROM read error while reading MAC"
680 		    " address\n");
681 		error = EIO;
682 		goto err_late;
683 	}
684 
685 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
686 		device_printf(dev, "Invalid MAC address\n");
687 		error = EIO;
688 		goto err_late;
689 	}
690 
691 	/*
692 	**  Do interrupt configuration
693 	*/
694 	if (adapter->msix > 1) /* Do MSIX */
695 		error = em_allocate_msix(adapter);
696 	else  /* MSI or Legacy */
697 		error = em_allocate_legacy(adapter);
698 	if (error)
699 		goto err_late;
700 
701 	/*
702 	 * Get Wake-on-Lan and Management info for later use
703 	 */
704 	em_get_wakeup(dev);
705 
706 	/* Setup OS specific network interface */
707 	if (em_setup_interface(dev, adapter) != 0)
708 		goto err_late;
709 
710 	em_reset(adapter);
711 
712 	/* Initialize statistics */
713 	em_update_stats_counters(adapter);
714 
715 	hw->mac.get_link_status = 1;
716 	em_update_link_status(adapter);
717 
718 	/* Register for VLAN events */
719 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
720 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
721 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
722 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
723 
724 	em_add_hw_stats(adapter);
725 
726 	/* Non-AMT based hardware can now take control from firmware */
727 	if (adapter->has_manage && !adapter->has_amt)
728 		em_get_hw_control(adapter);
729 
730 	/* Tell the stack that the interface is not active */
731 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
732 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
733 
734 	adapter->led_dev = led_create(em_led_func, adapter,
735 	    device_get_nameunit(dev));
736 #ifdef DEV_NETMAP
737 	em_netmap_attach(adapter);
738 #endif /* DEV_NETMAP */
739 
740 	INIT_DEBUGOUT("em_attach: end");
741 
742 	return (0);
743 
744 err_late:
745 	em_free_transmit_structures(adapter);
746 	em_free_receive_structures(adapter);
747 	em_release_hw_control(adapter);
748 	if (adapter->ifp != NULL)
749 		if_free(adapter->ifp);
750 err_pci:
751 	em_free_pci_resources(adapter);
752 	free(adapter->mta, M_DEVBUF);
753 	EM_CORE_LOCK_DESTROY(adapter);
754 
755 	return (error);
756 }
757 
758 /*********************************************************************
759  *  Device removal routine
760  *
761  *  The detach entry point is called when the driver is being removed.
762  *  This routine stops the adapter and deallocates all the resources
763  *  that were allocated for driver operation.
764  *
765  *  return 0 on success, positive on failure
766  *********************************************************************/
767 
768 static int
769 em_detach(device_t dev)
770 {
771 	struct adapter	*adapter = device_get_softc(dev);
772 	struct ifnet	*ifp = adapter->ifp;
773 
774 	INIT_DEBUGOUT("em_detach: begin");
775 
776 	/* Make sure VLANS are not using driver */
777 	if (adapter->ifp->if_vlantrunk != NULL) {
778 		device_printf(dev,"Vlan in use, detach first\n");
779 		return (EBUSY);
780 	}
781 
782 #ifdef DEVICE_POLLING
783 	if (ifp->if_capenable & IFCAP_POLLING)
784 		ether_poll_deregister(ifp);
785 #endif
786 
787 	if (adapter->led_dev != NULL)
788 		led_destroy(adapter->led_dev);
789 
790 	EM_CORE_LOCK(adapter);
791 	adapter->in_detach = 1;
792 	em_stop(adapter);
793 	EM_CORE_UNLOCK(adapter);
794 	EM_CORE_LOCK_DESTROY(adapter);
795 
796 	e1000_phy_hw_reset(&adapter->hw);
797 
798 	em_release_manageability(adapter);
799 	em_release_hw_control(adapter);
800 
801 	/* Unregister VLAN events */
802 	if (adapter->vlan_attach != NULL)
803 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
804 	if (adapter->vlan_detach != NULL)
805 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
806 
807 	ether_ifdetach(adapter->ifp);
808 	callout_drain(&adapter->timer);
809 
810 #ifdef DEV_NETMAP
811 	netmap_detach(ifp);
812 #endif /* DEV_NETMAP */
813 
814 	em_free_pci_resources(adapter);
815 	bus_generic_detach(dev);
816 	if_free(ifp);
817 
818 	em_free_transmit_structures(adapter);
819 	em_free_receive_structures(adapter);
820 
821 	em_release_hw_control(adapter);
822 	free(adapter->mta, M_DEVBUF);
823 
824 	return (0);
825 }
826 
827 /*********************************************************************
828  *
829  *  Shutdown entry point
830  *
831  **********************************************************************/
832 
833 static int
834 em_shutdown(device_t dev)
835 {
836 	return em_suspend(dev);
837 }
838 
839 /*
840  * Suspend/resume device methods.
841  */
842 static int
843 em_suspend(device_t dev)
844 {
845 	struct adapter *adapter = device_get_softc(dev);
846 
847 	EM_CORE_LOCK(adapter);
848 
849         em_release_manageability(adapter);
850 	em_release_hw_control(adapter);
851 	em_enable_wakeup(dev);
852 
853 	EM_CORE_UNLOCK(adapter);
854 
855 	return bus_generic_suspend(dev);
856 }
857 
858 static int
859 em_resume(device_t dev)
860 {
861 	struct adapter *adapter = device_get_softc(dev);
862 	struct tx_ring	*txr = adapter->tx_rings;
863 	struct ifnet *ifp = adapter->ifp;
864 
865 	EM_CORE_LOCK(adapter);
866 	if (adapter->hw.mac.type == e1000_pch2lan)
867 		e1000_resume_workarounds_pchlan(&adapter->hw);
868 	em_init_locked(adapter);
869 	em_init_manageability(adapter);
870 
871 	if ((ifp->if_flags & IFF_UP) &&
872 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
873 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
874 			EM_TX_LOCK(txr);
875 #ifdef EM_MULTIQUEUE
876 			if (!drbr_empty(ifp, txr->br))
877 				em_mq_start_locked(ifp, txr, NULL);
878 #else
879 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
880 				em_start_locked(ifp, txr);
881 #endif
882 			EM_TX_UNLOCK(txr);
883 		}
884 	}
885 	EM_CORE_UNLOCK(adapter);
886 
887 	return bus_generic_resume(dev);
888 }
889 
890 
891 #ifdef EM_MULTIQUEUE
892 /*********************************************************************
893  *  Multiqueue Transmit routines
894  *
895  *  em_mq_start is called by the stack to initiate a transmit.
896  *  however, if busy the driver can queue the request rather
897  *  than do an immediate send. It is this that is an advantage
898  *  in this driver, rather than also having multiple tx queues.
899  **********************************************************************/
900 static int
901 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
902 {
903 	struct adapter  *adapter = txr->adapter;
904         struct mbuf     *next;
905         int             err = 0, enq = 0;
906 
907 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
908 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
909 		if (m != NULL)
910 			err = drbr_enqueue(ifp, txr->br, m);
911 		return (err);
912 	}
913 
914 	enq = 0;
915 	if (m != NULL) {
916 		err = drbr_enqueue(ifp, txr->br, m);
917 		if (err)
918 			return (err);
919 	}
920 
921 	/* Process the queue */
922 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
923 		if ((err = em_xmit(txr, &next)) != 0) {
924 			if (next == NULL)
925 				drbr_advance(ifp, txr->br);
926 			else
927 				drbr_putback(ifp, txr->br, next);
928 			break;
929 		}
930 		drbr_advance(ifp, txr->br);
931 		enq++;
932 		ifp->if_obytes += next->m_pkthdr.len;
933 		if (next->m_flags & M_MCAST)
934 			ifp->if_omcasts++;
935 		ETHER_BPF_MTAP(ifp, next);
936 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
937                         break;
938 	}
939 
940 	if (enq > 0) {
941                 /* Set the watchdog */
942                 txr->queue_status = EM_QUEUE_WORKING;
943 		txr->watchdog_time = ticks;
944 	}
945 
946 	if (txr->tx_avail < EM_MAX_SCATTER)
947 		em_txeof(txr);
948 	if (txr->tx_avail < EM_MAX_SCATTER)
949 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
950 	return (err);
951 }
952 
953 /*
954 ** Multiqueue capable stack interface
955 */
956 static int
957 em_mq_start(struct ifnet *ifp, struct mbuf *m)
958 {
959 	struct adapter	*adapter = ifp->if_softc;
960 	struct tx_ring	*txr = adapter->tx_rings;
961 	int 		error;
962 
963 	if (EM_TX_TRYLOCK(txr)) {
964 		error = em_mq_start_locked(ifp, txr, m);
965 		EM_TX_UNLOCK(txr);
966 	} else
967 		error = drbr_enqueue(ifp, txr->br, m);
968 
969 	return (error);
970 }
971 
972 /*
973 ** Flush all ring buffers
974 */
975 static void
976 em_qflush(struct ifnet *ifp)
977 {
978 	struct adapter  *adapter = ifp->if_softc;
979 	struct tx_ring  *txr = adapter->tx_rings;
980 	struct mbuf     *m;
981 
982 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
983 		EM_TX_LOCK(txr);
984 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
985 			m_freem(m);
986 		EM_TX_UNLOCK(txr);
987 	}
988 	if_qflush(ifp);
989 }
990 #else  /* !EM_MULTIQUEUE */
991 
992 static void
993 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
994 {
995 	struct adapter	*adapter = ifp->if_softc;
996 	struct mbuf	*m_head;
997 
998 	EM_TX_LOCK_ASSERT(txr);
999 
1000 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1001 	    IFF_DRV_RUNNING)
1002 		return;
1003 
1004 	if (!adapter->link_active)
1005 		return;
1006 
1007 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1008         	/* Call cleanup if number of TX descriptors low */
1009 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1010 			em_txeof(txr);
1011 		if (txr->tx_avail < EM_MAX_SCATTER) {
1012 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1013 			break;
1014 		}
1015                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1016 		if (m_head == NULL)
1017 			break;
1018 		/*
1019 		 *  Encapsulation can modify our pointer, and or make it
1020 		 *  NULL on failure.  In that event, we can't requeue.
1021 		 */
1022 		if (em_xmit(txr, &m_head)) {
1023 			if (m_head == NULL)
1024 				break;
1025 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1026 			break;
1027 		}
1028 
1029 		/* Send a copy of the frame to the BPF listener */
1030 		ETHER_BPF_MTAP(ifp, m_head);
1031 
1032 		/* Set timeout in case hardware has problems transmitting. */
1033 		txr->watchdog_time = ticks;
1034                 txr->queue_status = EM_QUEUE_WORKING;
1035 	}
1036 
1037 	return;
1038 }
1039 
1040 static void
1041 em_start(struct ifnet *ifp)
1042 {
1043 	struct adapter	*adapter = ifp->if_softc;
1044 	struct tx_ring	*txr = adapter->tx_rings;
1045 
1046 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1047 		EM_TX_LOCK(txr);
1048 		em_start_locked(ifp, txr);
1049 		EM_TX_UNLOCK(txr);
1050 	}
1051 	return;
1052 }
1053 #endif /* EM_MULTIQUEUE */
1054 
1055 /*********************************************************************
1056  *  Ioctl entry point
1057  *
1058  *  em_ioctl is called when the user wants to configure the
1059  *  interface.
1060  *
1061  *  return 0 on success, positive on failure
1062  **********************************************************************/
1063 
1064 static int
1065 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1066 {
1067 	struct adapter	*adapter = ifp->if_softc;
1068 	struct ifreq	*ifr = (struct ifreq *)data;
1069 #if defined(INET) || defined(INET6)
1070 	struct ifaddr	*ifa = (struct ifaddr *)data;
1071 #endif
1072 	bool		avoid_reset = FALSE;
1073 	int		error = 0;
1074 
1075 	if (adapter->in_detach)
1076 		return (error);
1077 
1078 	switch (command) {
1079 	case SIOCSIFADDR:
1080 #ifdef INET
1081 		if (ifa->ifa_addr->sa_family == AF_INET)
1082 			avoid_reset = TRUE;
1083 #endif
1084 #ifdef INET6
1085 		if (ifa->ifa_addr->sa_family == AF_INET6)
1086 			avoid_reset = TRUE;
1087 #endif
1088 		/*
1089 		** Calling init results in link renegotiation,
1090 		** so we avoid doing it when possible.
1091 		*/
1092 		if (avoid_reset) {
1093 			ifp->if_flags |= IFF_UP;
1094 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1095 				em_init(adapter);
1096 #ifdef INET
1097 			if (!(ifp->if_flags & IFF_NOARP))
1098 				arp_ifinit(ifp, ifa);
1099 #endif
1100 		} else
1101 			error = ether_ioctl(ifp, command, data);
1102 		break;
1103 	case SIOCSIFMTU:
1104 	    {
1105 		int max_frame_size;
1106 
1107 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1108 
1109 		EM_CORE_LOCK(adapter);
1110 		switch (adapter->hw.mac.type) {
1111 		case e1000_82571:
1112 		case e1000_82572:
1113 		case e1000_ich9lan:
1114 		case e1000_ich10lan:
1115 		case e1000_pch2lan:
1116 		case e1000_pch_lpt:
1117 		case e1000_82574:
1118 		case e1000_82583:
1119 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1120 			max_frame_size = 9234;
1121 			break;
1122 		case e1000_pchlan:
1123 			max_frame_size = 4096;
1124 			break;
1125 			/* Adapters that do not support jumbo frames */
1126 		case e1000_ich8lan:
1127 			max_frame_size = ETHER_MAX_LEN;
1128 			break;
1129 		default:
1130 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1131 		}
1132 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1133 		    ETHER_CRC_LEN) {
1134 			EM_CORE_UNLOCK(adapter);
1135 			error = EINVAL;
1136 			break;
1137 		}
1138 
1139 		ifp->if_mtu = ifr->ifr_mtu;
1140 		adapter->hw.mac.max_frame_size =
1141 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1142 		em_init_locked(adapter);
1143 		EM_CORE_UNLOCK(adapter);
1144 		break;
1145 	    }
1146 	case SIOCSIFFLAGS:
1147 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1148 		    SIOCSIFFLAGS (Set Interface Flags)");
1149 		EM_CORE_LOCK(adapter);
1150 		if (ifp->if_flags & IFF_UP) {
1151 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1152 				if ((ifp->if_flags ^ adapter->if_flags) &
1153 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1154 					em_disable_promisc(adapter);
1155 					em_set_promisc(adapter);
1156 				}
1157 			} else
1158 				em_init_locked(adapter);
1159 		} else
1160 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1161 				em_stop(adapter);
1162 		adapter->if_flags = ifp->if_flags;
1163 		EM_CORE_UNLOCK(adapter);
1164 		break;
1165 	case SIOCADDMULTI:
1166 	case SIOCDELMULTI:
1167 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1168 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1169 			EM_CORE_LOCK(adapter);
1170 			em_disable_intr(adapter);
1171 			em_set_multi(adapter);
1172 #ifdef DEVICE_POLLING
1173 			if (!(ifp->if_capenable & IFCAP_POLLING))
1174 #endif
1175 				em_enable_intr(adapter);
1176 			EM_CORE_UNLOCK(adapter);
1177 		}
1178 		break;
1179 	case SIOCSIFMEDIA:
1180 		/* Check SOL/IDER usage */
1181 		EM_CORE_LOCK(adapter);
1182 		if (e1000_check_reset_block(&adapter->hw)) {
1183 			EM_CORE_UNLOCK(adapter);
1184 			device_printf(adapter->dev, "Media change is"
1185 			    " blocked due to SOL/IDER session.\n");
1186 			break;
1187 		}
1188 		EM_CORE_UNLOCK(adapter);
1189 		/* falls thru */
1190 	case SIOCGIFMEDIA:
1191 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1192 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1193 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1194 		break;
1195 	case SIOCSIFCAP:
1196 	    {
1197 		int mask, reinit;
1198 
1199 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1200 		reinit = 0;
1201 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1202 #ifdef DEVICE_POLLING
1203 		if (mask & IFCAP_POLLING) {
1204 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1205 				error = ether_poll_register(em_poll, ifp);
1206 				if (error)
1207 					return (error);
1208 				EM_CORE_LOCK(adapter);
1209 				em_disable_intr(adapter);
1210 				ifp->if_capenable |= IFCAP_POLLING;
1211 				EM_CORE_UNLOCK(adapter);
1212 			} else {
1213 				error = ether_poll_deregister(ifp);
1214 				/* Enable interrupt even in error case */
1215 				EM_CORE_LOCK(adapter);
1216 				em_enable_intr(adapter);
1217 				ifp->if_capenable &= ~IFCAP_POLLING;
1218 				EM_CORE_UNLOCK(adapter);
1219 			}
1220 		}
1221 #endif
1222 		if (mask & IFCAP_HWCSUM) {
1223 			ifp->if_capenable ^= IFCAP_HWCSUM;
1224 			reinit = 1;
1225 		}
1226 		if (mask & IFCAP_TSO4) {
1227 			ifp->if_capenable ^= IFCAP_TSO4;
1228 			reinit = 1;
1229 		}
1230 		if (mask & IFCAP_VLAN_HWTAGGING) {
1231 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1232 			reinit = 1;
1233 		}
1234 		if (mask & IFCAP_VLAN_HWFILTER) {
1235 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1236 			reinit = 1;
1237 		}
1238 		if (mask & IFCAP_VLAN_HWTSO) {
1239 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1240 			reinit = 1;
1241 		}
1242 		if ((mask & IFCAP_WOL) &&
1243 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1244 			if (mask & IFCAP_WOL_MCAST)
1245 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1246 			if (mask & IFCAP_WOL_MAGIC)
1247 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1248 		}
1249 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1250 			em_init(adapter);
1251 		VLAN_CAPABILITIES(ifp);
1252 		break;
1253 	    }
1254 
1255 	default:
1256 		error = ether_ioctl(ifp, command, data);
1257 		break;
1258 	}
1259 
1260 	return (error);
1261 }
1262 
1263 
1264 /*********************************************************************
1265  *  Init entry point
1266  *
1267  *  This routine is used in two ways. It is used by the stack as
1268  *  init entry point in network interface structure. It is also used
1269  *  by the driver as a hw/sw initialization routine to get to a
1270  *  consistent state.
1271  *
1272  *  return 0 on success, positive on failure
1273  **********************************************************************/
1274 
1275 static void
1276 em_init_locked(struct adapter *adapter)
1277 {
1278 	struct ifnet	*ifp = adapter->ifp;
1279 	device_t	dev = adapter->dev;
1280 
1281 	INIT_DEBUGOUT("em_init: begin");
1282 
1283 	EM_CORE_LOCK_ASSERT(adapter);
1284 
1285 	em_disable_intr(adapter);
1286 	callout_stop(&adapter->timer);
1287 
1288 	/* Get the latest mac address, User can use a LAA */
1289         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1290               ETHER_ADDR_LEN);
1291 
1292 	/* Put the address into the Receive Address Array */
1293 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1294 
1295 	/*
1296 	 * With the 82571 adapter, RAR[0] may be overwritten
1297 	 * when the other port is reset, we make a duplicate
1298 	 * in RAR[14] for that eventuality, this assures
1299 	 * the interface continues to function.
1300 	 */
1301 	if (adapter->hw.mac.type == e1000_82571) {
1302 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1303 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1304 		    E1000_RAR_ENTRIES - 1);
1305 	}
1306 
1307 	/* Initialize the hardware */
1308 	em_reset(adapter);
1309 	em_update_link_status(adapter);
1310 
1311 	/* Setup VLAN support, basic and offload if available */
1312 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1313 
1314 	/* Set hardware offload abilities */
1315 	ifp->if_hwassist = 0;
1316 	if (ifp->if_capenable & IFCAP_TXCSUM)
1317 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1318 	if (ifp->if_capenable & IFCAP_TSO4)
1319 		ifp->if_hwassist |= CSUM_TSO;
1320 
1321 	/* Configure for OS presence */
1322 	em_init_manageability(adapter);
1323 
1324 	/* Prepare transmit descriptors and buffers */
1325 	em_setup_transmit_structures(adapter);
1326 	em_initialize_transmit_unit(adapter);
1327 
1328 	/* Setup Multicast table */
1329 	em_set_multi(adapter);
1330 
1331 	/*
1332 	** Figure out the desired mbuf
1333 	** pool for doing jumbos
1334 	*/
1335 	if (adapter->hw.mac.max_frame_size <= 2048)
1336 		adapter->rx_mbuf_sz = MCLBYTES;
1337 	else if (adapter->hw.mac.max_frame_size <= 4096)
1338 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1339 	else
1340 		adapter->rx_mbuf_sz = MJUM9BYTES;
1341 
1342 	/* Prepare receive descriptors and buffers */
1343 	if (em_setup_receive_structures(adapter)) {
1344 		device_printf(dev, "Could not setup receive structures\n");
1345 		em_stop(adapter);
1346 		return;
1347 	}
1348 	em_initialize_receive_unit(adapter);
1349 
1350 	/* Use real VLAN Filter support? */
1351 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1352 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1353 			/* Use real VLAN Filter support */
1354 			em_setup_vlan_hw_support(adapter);
1355 		else {
1356 			u32 ctrl;
1357 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1358 			ctrl |= E1000_CTRL_VME;
1359 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1360 		}
1361 	}
1362 
1363 	/* Don't lose promiscuous settings */
1364 	em_set_promisc(adapter);
1365 
1366 	/* Set the interface as ACTIVE */
1367 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1368 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1369 
1370 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1371 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1372 
1373 	/* MSI/X configuration for 82574 */
1374 	if (adapter->hw.mac.type == e1000_82574) {
1375 		int tmp;
1376 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1377 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1378 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1379 		/* Set the IVAR - interrupt vector routing. */
1380 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1381 	}
1382 
1383 #ifdef DEVICE_POLLING
1384 	/*
1385 	 * Only enable interrupts if we are not polling, make sure
1386 	 * they are off otherwise.
1387 	 */
1388 	if (ifp->if_capenable & IFCAP_POLLING)
1389 		em_disable_intr(adapter);
1390 	else
1391 #endif /* DEVICE_POLLING */
1392 		em_enable_intr(adapter);
1393 
1394 	/* AMT based hardware can now take control from firmware */
1395 	if (adapter->has_manage && adapter->has_amt)
1396 		em_get_hw_control(adapter);
1397 }
1398 
1399 static void
1400 em_init(void *arg)
1401 {
1402 	struct adapter *adapter = arg;
1403 
1404 	EM_CORE_LOCK(adapter);
1405 	em_init_locked(adapter);
1406 	EM_CORE_UNLOCK(adapter);
1407 }
1408 
1409 
1410 #ifdef DEVICE_POLLING
1411 /*********************************************************************
1412  *
1413  *  Legacy polling routine: note this only works with single queue
1414  *
1415  *********************************************************************/
1416 static int
1417 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1418 {
1419 	struct adapter *adapter = ifp->if_softc;
1420 	struct tx_ring	*txr = adapter->tx_rings;
1421 	struct rx_ring	*rxr = adapter->rx_rings;
1422 	u32		reg_icr;
1423 	int		rx_done;
1424 
1425 	EM_CORE_LOCK(adapter);
1426 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1427 		EM_CORE_UNLOCK(adapter);
1428 		return (0);
1429 	}
1430 
1431 	if (cmd == POLL_AND_CHECK_STATUS) {
1432 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1433 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1434 			callout_stop(&adapter->timer);
1435 			adapter->hw.mac.get_link_status = 1;
1436 			em_update_link_status(adapter);
1437 			callout_reset(&adapter->timer, hz,
1438 			    em_local_timer, adapter);
1439 		}
1440 	}
1441 	EM_CORE_UNLOCK(adapter);
1442 
1443 	em_rxeof(rxr, count, &rx_done);
1444 
1445 	EM_TX_LOCK(txr);
1446 	em_txeof(txr);
1447 #ifdef EM_MULTIQUEUE
1448 	if (!drbr_empty(ifp, txr->br))
1449 		em_mq_start_locked(ifp, txr, NULL);
1450 #else
1451 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1452 		em_start_locked(ifp, txr);
1453 #endif
1454 	EM_TX_UNLOCK(txr);
1455 
1456 	return (rx_done);
1457 }
1458 #endif /* DEVICE_POLLING */
1459 
1460 
1461 /*********************************************************************
1462  *
1463  *  Fast Legacy/MSI Combined Interrupt Service routine
1464  *
1465  *********************************************************************/
1466 static int
1467 em_irq_fast(void *arg)
1468 {
1469 	struct adapter	*adapter = arg;
1470 	struct ifnet	*ifp;
1471 	u32		reg_icr;
1472 
1473 	ifp = adapter->ifp;
1474 
1475 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1476 
1477 	/* Hot eject?  */
1478 	if (reg_icr == 0xffffffff)
1479 		return FILTER_STRAY;
1480 
1481 	/* Definitely not our interrupt.  */
1482 	if (reg_icr == 0x0)
1483 		return FILTER_STRAY;
1484 
1485 	/*
1486 	 * Starting with the 82571 chip, bit 31 should be used to
1487 	 * determine whether the interrupt belongs to us.
1488 	 */
1489 	if (adapter->hw.mac.type >= e1000_82571 &&
1490 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1491 		return FILTER_STRAY;
1492 
1493 	em_disable_intr(adapter);
1494 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1495 
1496 	/* Link status change */
1497 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1498 		adapter->hw.mac.get_link_status = 1;
1499 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1500 	}
1501 
1502 	if (reg_icr & E1000_ICR_RXO)
1503 		adapter->rx_overruns++;
1504 	return FILTER_HANDLED;
1505 }
1506 
1507 /* Combined RX/TX handler, used by Legacy and MSI */
1508 static void
1509 em_handle_que(void *context, int pending)
1510 {
1511 	struct adapter	*adapter = context;
1512 	struct ifnet	*ifp = adapter->ifp;
1513 	struct tx_ring	*txr = adapter->tx_rings;
1514 	struct rx_ring	*rxr = adapter->rx_rings;
1515 
1516 
1517 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1518 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1519 		EM_TX_LOCK(txr);
1520 		em_txeof(txr);
1521 #ifdef EM_MULTIQUEUE
1522 		if (!drbr_empty(ifp, txr->br))
1523 			em_mq_start_locked(ifp, txr, NULL);
1524 #else
1525 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1526 			em_start_locked(ifp, txr);
1527 #endif
1528 		EM_TX_UNLOCK(txr);
1529 		if (more) {
1530 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1531 			return;
1532 		}
1533 	}
1534 
1535 	em_enable_intr(adapter);
1536 	return;
1537 }
1538 
1539 
1540 /*********************************************************************
1541  *
1542  *  MSIX Interrupt Service Routines
1543  *
1544  **********************************************************************/
1545 static void
1546 em_msix_tx(void *arg)
1547 {
1548 	struct tx_ring *txr = arg;
1549 	struct adapter *adapter = txr->adapter;
1550 	struct ifnet	*ifp = adapter->ifp;
1551 
1552 	++txr->tx_irq;
1553 	EM_TX_LOCK(txr);
1554 	em_txeof(txr);
1555 #ifdef EM_MULTIQUEUE
1556 	if (!drbr_empty(ifp, txr->br))
1557 		em_mq_start_locked(ifp, txr, NULL);
1558 #else
1559 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1560 		em_start_locked(ifp, txr);
1561 #endif
1562 	/* Reenable this interrupt */
1563 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1564 	EM_TX_UNLOCK(txr);
1565 	return;
1566 }
1567 
1568 /*********************************************************************
1569  *
1570  *  MSIX RX Interrupt Service routine
1571  *
1572  **********************************************************************/
1573 
1574 static void
1575 em_msix_rx(void *arg)
1576 {
1577 	struct rx_ring	*rxr = arg;
1578 	struct adapter	*adapter = rxr->adapter;
1579 	bool		more;
1580 
1581 	++rxr->rx_irq;
1582 	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1583 		return;
1584 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1585 	if (more)
1586 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1587 	else
1588 		/* Reenable this interrupt */
1589 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1590 	return;
1591 }
1592 
1593 /*********************************************************************
1594  *
1595  *  MSIX Link Fast Interrupt Service routine
1596  *
1597  **********************************************************************/
1598 static void
1599 em_msix_link(void *arg)
1600 {
1601 	struct adapter	*adapter = arg;
1602 	u32		reg_icr;
1603 
1604 	++adapter->link_irq;
1605 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1606 
1607 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1608 		adapter->hw.mac.get_link_status = 1;
1609 		em_handle_link(adapter, 0);
1610 	} else
1611 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1612 		    EM_MSIX_LINK | E1000_IMS_LSC);
1613 	return;
1614 }
1615 
1616 static void
1617 em_handle_rx(void *context, int pending)
1618 {
1619 	struct rx_ring	*rxr = context;
1620 	struct adapter	*adapter = rxr->adapter;
1621         bool            more;
1622 
1623 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1624 	if (more)
1625 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1626 	else
1627 		/* Reenable this interrupt */
1628 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1629 }
1630 
1631 static void
1632 em_handle_tx(void *context, int pending)
1633 {
1634 	struct tx_ring	*txr = context;
1635 	struct adapter	*adapter = txr->adapter;
1636 	struct ifnet	*ifp = adapter->ifp;
1637 
1638 	EM_TX_LOCK(txr);
1639 	em_txeof(txr);
1640 #ifdef EM_MULTIQUEUE
1641 	if (!drbr_empty(ifp, txr->br))
1642 		em_mq_start_locked(ifp, txr, NULL);
1643 #else
1644 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1645 		em_start_locked(ifp, txr);
1646 #endif
1647 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1648 	EM_TX_UNLOCK(txr);
1649 }
1650 
1651 static void
1652 em_handle_link(void *context, int pending)
1653 {
1654 	struct adapter	*adapter = context;
1655 	struct tx_ring	*txr = adapter->tx_rings;
1656 	struct ifnet *ifp = adapter->ifp;
1657 
1658 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1659 		return;
1660 
1661 	EM_CORE_LOCK(adapter);
1662 	callout_stop(&adapter->timer);
1663 	em_update_link_status(adapter);
1664 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1665 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1666 	    EM_MSIX_LINK | E1000_IMS_LSC);
1667 	if (adapter->link_active) {
1668 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1669 			EM_TX_LOCK(txr);
1670 #ifdef EM_MULTIQUEUE
1671 			if (!drbr_empty(ifp, txr->br))
1672 				em_mq_start_locked(ifp, txr, NULL);
1673 #else
1674 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1675 				em_start_locked(ifp, txr);
1676 #endif
1677 			EM_TX_UNLOCK(txr);
1678 		}
1679 	}
1680 	EM_CORE_UNLOCK(adapter);
1681 }
1682 
1683 
1684 /*********************************************************************
1685  *
1686  *  Media Ioctl callback
1687  *
1688  *  This routine is called whenever the user queries the status of
1689  *  the interface using ifconfig.
1690  *
1691  **********************************************************************/
1692 static void
1693 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1694 {
1695 	struct adapter *adapter = ifp->if_softc;
1696 	u_char fiber_type = IFM_1000_SX;
1697 
1698 	INIT_DEBUGOUT("em_media_status: begin");
1699 
1700 	EM_CORE_LOCK(adapter);
1701 	em_update_link_status(adapter);
1702 
1703 	ifmr->ifm_status = IFM_AVALID;
1704 	ifmr->ifm_active = IFM_ETHER;
1705 
1706 	if (!adapter->link_active) {
1707 		EM_CORE_UNLOCK(adapter);
1708 		return;
1709 	}
1710 
1711 	ifmr->ifm_status |= IFM_ACTIVE;
1712 
1713 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1714 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1715 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1716 	} else {
1717 		switch (adapter->link_speed) {
1718 		case 10:
1719 			ifmr->ifm_active |= IFM_10_T;
1720 			break;
1721 		case 100:
1722 			ifmr->ifm_active |= IFM_100_TX;
1723 			break;
1724 		case 1000:
1725 			ifmr->ifm_active |= IFM_1000_T;
1726 			break;
1727 		}
1728 		if (adapter->link_duplex == FULL_DUPLEX)
1729 			ifmr->ifm_active |= IFM_FDX;
1730 		else
1731 			ifmr->ifm_active |= IFM_HDX;
1732 	}
1733 	EM_CORE_UNLOCK(adapter);
1734 }
1735 
1736 /*********************************************************************
1737  *
1738  *  Media Ioctl callback
1739  *
1740  *  This routine is called when the user changes speed/duplex using
1741  *  media/mediopt option with ifconfig.
1742  *
1743  **********************************************************************/
1744 static int
1745 em_media_change(struct ifnet *ifp)
1746 {
1747 	struct adapter *adapter = ifp->if_softc;
1748 	struct ifmedia  *ifm = &adapter->media;
1749 
1750 	INIT_DEBUGOUT("em_media_change: begin");
1751 
1752 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1753 		return (EINVAL);
1754 
1755 	EM_CORE_LOCK(adapter);
1756 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1757 	case IFM_AUTO:
1758 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1759 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1760 		break;
1761 	case IFM_1000_LX:
1762 	case IFM_1000_SX:
1763 	case IFM_1000_T:
1764 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1765 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1766 		break;
1767 	case IFM_100_TX:
1768 		adapter->hw.mac.autoneg = FALSE;
1769 		adapter->hw.phy.autoneg_advertised = 0;
1770 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1771 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1772 		else
1773 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1774 		break;
1775 	case IFM_10_T:
1776 		adapter->hw.mac.autoneg = FALSE;
1777 		adapter->hw.phy.autoneg_advertised = 0;
1778 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1780 		else
1781 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1782 		break;
1783 	default:
1784 		device_printf(adapter->dev, "Unsupported media type\n");
1785 	}
1786 
1787 	em_init_locked(adapter);
1788 	EM_CORE_UNLOCK(adapter);
1789 
1790 	return (0);
1791 }
1792 
1793 /*********************************************************************
1794  *
1795  *  This routine maps the mbufs to tx descriptors.
1796  *
1797  *  return 0 on success, positive on failure
1798  **********************************************************************/
1799 
1800 static int
1801 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1802 {
1803 	struct adapter		*adapter = txr->adapter;
1804 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1805 	bus_dmamap_t		map;
1806 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1807 	struct e1000_tx_desc	*ctxd = NULL;
1808 	struct mbuf		*m_head;
1809 	struct ether_header	*eh;
1810 	struct ip		*ip = NULL;
1811 	struct tcphdr		*tp = NULL;
1812 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1813 	int			ip_off, poff;
1814 	int			nsegs, i, j, first, last = 0;
1815 	int			error, do_tso, tso_desc = 0, remap = 1;
1816 
1817 retry:
1818 	m_head = *m_headp;
1819 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1820 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1821 	ip_off = poff = 0;
1822 
1823 	/*
1824 	 * Intel recommends entire IP/TCP header length reside in a single
1825 	 * buffer. If multiple descriptors are used to describe the IP and
1826 	 * TCP header, each descriptor should describe one or more
1827 	 * complete headers; descriptors referencing only parts of headers
1828 	 * are not supported. If all layer headers are not coalesced into
1829 	 * a single buffer, each buffer should not cross a 4KB boundary,
1830 	 * or be larger than the maximum read request size.
1831 	 * Controller also requires modifing IP/TCP header to make TSO work
1832 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1833 	 * IP/TCP header into a single buffer to meet the requirement of
1834 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1835 	 * which also has similiar restrictions.
1836 	 */
1837 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1838 		if (do_tso || (m_head->m_next != NULL &&
1839 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1840 			if (M_WRITABLE(*m_headp) == 0) {
1841 				m_head = m_dup(*m_headp, M_NOWAIT);
1842 				m_freem(*m_headp);
1843 				if (m_head == NULL) {
1844 					*m_headp = NULL;
1845 					return (ENOBUFS);
1846 				}
1847 				*m_headp = m_head;
1848 			}
1849 		}
1850 		/*
1851 		 * XXX
1852 		 * Assume IPv4, we don't have TSO/checksum offload support
1853 		 * for IPv6 yet.
1854 		 */
1855 		ip_off = sizeof(struct ether_header);
1856 		m_head = m_pullup(m_head, ip_off);
1857 		if (m_head == NULL) {
1858 			*m_headp = NULL;
1859 			return (ENOBUFS);
1860 		}
1861 		eh = mtod(m_head, struct ether_header *);
1862 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1863 			ip_off = sizeof(struct ether_vlan_header);
1864 			m_head = m_pullup(m_head, ip_off);
1865 			if (m_head == NULL) {
1866 				*m_headp = NULL;
1867 				return (ENOBUFS);
1868 			}
1869 		}
1870 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1871 		if (m_head == NULL) {
1872 			*m_headp = NULL;
1873 			return (ENOBUFS);
1874 		}
1875 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1876 		poff = ip_off + (ip->ip_hl << 2);
1877 		if (do_tso) {
1878 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1879 			if (m_head == NULL) {
1880 				*m_headp = NULL;
1881 				return (ENOBUFS);
1882 			}
1883 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1884 			/*
1885 			 * TSO workaround:
1886 			 *   pull 4 more bytes of data into it.
1887 			 */
1888 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1889 			if (m_head == NULL) {
1890 				*m_headp = NULL;
1891 				return (ENOBUFS);
1892 			}
1893 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1894 			ip->ip_len = 0;
1895 			ip->ip_sum = 0;
1896 			/*
1897 			 * The pseudo TCP checksum does not include TCP payload
1898 			 * length so driver should recompute the checksum here
1899 			 * what hardware expect to see. This is adherence of
1900 			 * Microsoft's Large Send specification.
1901 			 */
1902 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1903 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1904 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1905 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1906 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1907 			if (m_head == NULL) {
1908 				*m_headp = NULL;
1909 				return (ENOBUFS);
1910 			}
1911 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1913 			if (m_head == NULL) {
1914 				*m_headp = NULL;
1915 				return (ENOBUFS);
1916 			}
1917 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1918 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1919 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1920 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1921 			if (m_head == NULL) {
1922 				*m_headp = NULL;
1923 				return (ENOBUFS);
1924 			}
1925 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1926 		}
1927 		*m_headp = m_head;
1928 	}
1929 
1930 	/*
1931 	 * Map the packet for DMA
1932 	 *
1933 	 * Capture the first descriptor index,
1934 	 * this descriptor will have the index
1935 	 * of the EOP which is the only one that
1936 	 * now gets a DONE bit writeback.
1937 	 */
1938 	first = txr->next_avail_desc;
1939 	tx_buffer = &txr->tx_buffers[first];
1940 	tx_buffer_mapped = tx_buffer;
1941 	map = tx_buffer->map;
1942 
1943 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1944 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1945 
1946 	/*
1947 	 * There are two types of errors we can (try) to handle:
1948 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1949 	 *   out of segments.  Defragment the mbuf chain and try again.
1950 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1951 	 *   at this point in time.  Defer sending and try again later.
1952 	 * All other errors, in particular EINVAL, are fatal and prevent the
1953 	 * mbuf chain from ever going through.  Drop it and report error.
1954 	 */
1955 	if (error == EFBIG && remap) {
1956 		struct mbuf *m;
1957 
1958 		m = m_defrag(*m_headp, M_NOWAIT);
1959 		if (m == NULL) {
1960 			adapter->mbuf_alloc_failed++;
1961 			m_freem(*m_headp);
1962 			*m_headp = NULL;
1963 			return (ENOBUFS);
1964 		}
1965 		*m_headp = m;
1966 
1967 		/* Try it again, but only once */
1968 		remap = 0;
1969 		goto retry;
1970 	} else if (error == ENOMEM) {
1971 		adapter->no_tx_dma_setup++;
1972 		return (error);
1973 	} else if (error != 0) {
1974 		adapter->no_tx_dma_setup++;
1975 		m_freem(*m_headp);
1976 		*m_headp = NULL;
1977 		return (error);
1978 	}
1979 
1980 	/*
1981 	 * TSO Hardware workaround, if this packet is not
1982 	 * TSO, and is only a single descriptor long, and
1983 	 * it follows a TSO burst, then we need to add a
1984 	 * sentinel descriptor to prevent premature writeback.
1985 	 */
1986 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1987 		if (nsegs == 1)
1988 			tso_desc = TRUE;
1989 		txr->tx_tso = FALSE;
1990 	}
1991 
1992         if (nsegs > (txr->tx_avail - 2)) {
1993                 txr->no_desc_avail++;
1994 		bus_dmamap_unload(txr->txtag, map);
1995 		return (ENOBUFS);
1996         }
1997 	m_head = *m_headp;
1998 
1999 	/* Do hardware assists */
2000 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2001 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2002 		    &txd_upper, &txd_lower);
2003 		/* we need to make a final sentinel transmit desc */
2004 		tso_desc = TRUE;
2005 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2006 		em_transmit_checksum_setup(txr, m_head,
2007 		    ip_off, ip, &txd_upper, &txd_lower);
2008 
2009 	if (m_head->m_flags & M_VLANTAG) {
2010 		/* Set the vlan id. */
2011 		txd_upper |=
2012 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2013                 /* Tell hardware to add tag */
2014                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2015         }
2016 
2017 	i = txr->next_avail_desc;
2018 
2019 	/* Set up our transmit descriptors */
2020 	for (j = 0; j < nsegs; j++) {
2021 		bus_size_t seg_len;
2022 		bus_addr_t seg_addr;
2023 
2024 		tx_buffer = &txr->tx_buffers[i];
2025 		ctxd = &txr->tx_base[i];
2026 		seg_addr = segs[j].ds_addr;
2027 		seg_len  = segs[j].ds_len;
2028 		/*
2029 		** TSO Workaround:
2030 		** If this is the last descriptor, we want to
2031 		** split it so we have a small final sentinel
2032 		*/
2033 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2034 			seg_len -= 4;
2035 			ctxd->buffer_addr = htole64(seg_addr);
2036 			ctxd->lower.data = htole32(
2037 			adapter->txd_cmd | txd_lower | seg_len);
2038 			ctxd->upper.data =
2039 			    htole32(txd_upper);
2040 			if (++i == adapter->num_tx_desc)
2041 				i = 0;
2042 			/* Now make the sentinel */
2043 			++txd_used; /* using an extra txd */
2044 			ctxd = &txr->tx_base[i];
2045 			tx_buffer = &txr->tx_buffers[i];
2046 			ctxd->buffer_addr =
2047 			    htole64(seg_addr + seg_len);
2048 			ctxd->lower.data = htole32(
2049 			adapter->txd_cmd | txd_lower | 4);
2050 			ctxd->upper.data =
2051 			    htole32(txd_upper);
2052 			last = i;
2053 			if (++i == adapter->num_tx_desc)
2054 				i = 0;
2055 		} else {
2056 			ctxd->buffer_addr = htole64(seg_addr);
2057 			ctxd->lower.data = htole32(
2058 			adapter->txd_cmd | txd_lower | seg_len);
2059 			ctxd->upper.data =
2060 			    htole32(txd_upper);
2061 			last = i;
2062 			if (++i == adapter->num_tx_desc)
2063 				i = 0;
2064 		}
2065 		tx_buffer->m_head = NULL;
2066 		tx_buffer->next_eop = -1;
2067 	}
2068 
2069 	txr->next_avail_desc = i;
2070 	txr->tx_avail -= nsegs;
2071 	if (tso_desc) /* TSO used an extra for sentinel */
2072 		txr->tx_avail -= txd_used;
2073 
2074         tx_buffer->m_head = m_head;
2075 	/*
2076 	** Here we swap the map so the last descriptor,
2077 	** which gets the completion interrupt has the
2078 	** real map, and the first descriptor gets the
2079 	** unused map from this descriptor.
2080 	*/
2081 	tx_buffer_mapped->map = tx_buffer->map;
2082 	tx_buffer->map = map;
2083         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2084 
2085         /*
2086          * Last Descriptor of Packet
2087 	 * needs End Of Packet (EOP)
2088 	 * and Report Status (RS)
2089          */
2090         ctxd->lower.data |=
2091 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2092 	/*
2093 	 * Keep track in the first buffer which
2094 	 * descriptor will be written back
2095 	 */
2096 	tx_buffer = &txr->tx_buffers[first];
2097 	tx_buffer->next_eop = last;
2098 	/* Update the watchdog time early and often */
2099 	txr->watchdog_time = ticks;
2100 
2101 	/*
2102 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2103 	 * that this frame is available to transmit.
2104 	 */
2105 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2106 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2107 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2108 
2109 	return (0);
2110 }
2111 
2112 static void
2113 em_set_promisc(struct adapter *adapter)
2114 {
2115 	struct ifnet	*ifp = adapter->ifp;
2116 	u32		reg_rctl;
2117 
2118 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2119 
2120 	if (ifp->if_flags & IFF_PROMISC) {
2121 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2122 		/* Turn this on if you want to see bad packets */
2123 		if (em_debug_sbp)
2124 			reg_rctl |= E1000_RCTL_SBP;
2125 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2126 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2127 		reg_rctl |= E1000_RCTL_MPE;
2128 		reg_rctl &= ~E1000_RCTL_UPE;
2129 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2130 	}
2131 }
2132 
2133 static void
2134 em_disable_promisc(struct adapter *adapter)
2135 {
2136 	struct ifnet	*ifp = adapter->ifp;
2137 	u32		reg_rctl;
2138 	int		mcnt = 0;
2139 
2140 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2141 	reg_rctl &=  (~E1000_RCTL_UPE);
2142 	if (ifp->if_flags & IFF_ALLMULTI)
2143 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2144 	else {
2145 		struct  ifmultiaddr *ifma;
2146 #if __FreeBSD_version < 800000
2147 		IF_ADDR_LOCK(ifp);
2148 #else
2149 		if_maddr_rlock(ifp);
2150 #endif
2151 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2152 			if (ifma->ifma_addr->sa_family != AF_LINK)
2153 				continue;
2154 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2155 				break;
2156 			mcnt++;
2157 		}
2158 #if __FreeBSD_version < 800000
2159 		IF_ADDR_UNLOCK(ifp);
2160 #else
2161 		if_maddr_runlock(ifp);
2162 #endif
2163 	}
2164 	/* Don't disable if in MAX groups */
2165 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2166 		reg_rctl &=  (~E1000_RCTL_MPE);
2167 	reg_rctl &=  (~E1000_RCTL_SBP);
2168 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2169 }
2170 
2171 
2172 /*********************************************************************
2173  *  Multicast Update
2174  *
2175  *  This routine is called whenever multicast address list is updated.
2176  *
2177  **********************************************************************/
2178 
2179 static void
2180 em_set_multi(struct adapter *adapter)
2181 {
2182 	struct ifnet	*ifp = adapter->ifp;
2183 	struct ifmultiaddr *ifma;
2184 	u32 reg_rctl = 0;
2185 	u8  *mta; /* Multicast array memory */
2186 	int mcnt = 0;
2187 
2188 	IOCTL_DEBUGOUT("em_set_multi: begin");
2189 
2190 	mta = adapter->mta;
2191 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2192 
2193 	if (adapter->hw.mac.type == e1000_82542 &&
2194 	    adapter->hw.revision_id == E1000_REVISION_2) {
2195 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2196 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2197 			e1000_pci_clear_mwi(&adapter->hw);
2198 		reg_rctl |= E1000_RCTL_RST;
2199 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2200 		msec_delay(5);
2201 	}
2202 
2203 #if __FreeBSD_version < 800000
2204 	IF_ADDR_LOCK(ifp);
2205 #else
2206 	if_maddr_rlock(ifp);
2207 #endif
2208 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2209 		if (ifma->ifma_addr->sa_family != AF_LINK)
2210 			continue;
2211 
2212 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2213 			break;
2214 
2215 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2216 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2217 		mcnt++;
2218 	}
2219 #if __FreeBSD_version < 800000
2220 	IF_ADDR_UNLOCK(ifp);
2221 #else
2222 	if_maddr_runlock(ifp);
2223 #endif
2224 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2225 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2226 		reg_rctl |= E1000_RCTL_MPE;
2227 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2228 	} else
2229 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2230 
2231 	if (adapter->hw.mac.type == e1000_82542 &&
2232 	    adapter->hw.revision_id == E1000_REVISION_2) {
2233 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2234 		reg_rctl &= ~E1000_RCTL_RST;
2235 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2236 		msec_delay(5);
2237 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2238 			e1000_pci_set_mwi(&adapter->hw);
2239 	}
2240 }
2241 
2242 
2243 /*********************************************************************
2244  *  Timer routine
2245  *
2246  *  This routine checks for link status and updates statistics.
2247  *
2248  **********************************************************************/
2249 
2250 static void
2251 em_local_timer(void *arg)
2252 {
2253 	struct adapter	*adapter = arg;
2254 	struct ifnet	*ifp = adapter->ifp;
2255 	struct tx_ring	*txr = adapter->tx_rings;
2256 	struct rx_ring	*rxr = adapter->rx_rings;
2257 	u32		trigger;
2258 
2259 	EM_CORE_LOCK_ASSERT(adapter);
2260 
2261 	em_update_link_status(adapter);
2262 	em_update_stats_counters(adapter);
2263 
2264 	/* Reset LAA into RAR[0] on 82571 */
2265 	if ((adapter->hw.mac.type == e1000_82571) &&
2266 	    e1000_get_laa_state_82571(&adapter->hw))
2267 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2268 
2269 	/* Mask to use in the irq trigger */
2270 	if (adapter->msix_mem)
2271 		trigger = rxr->ims; /* RX for 82574 */
2272 	else
2273 		trigger = E1000_ICS_RXDMT0;
2274 
2275 	/*
2276 	** Check on the state of the TX queue(s), this
2277 	** can be done without the lock because its RO
2278 	** and the HUNG state will be static if set.
2279 	*/
2280 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2281 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2282 		    (adapter->pause_frames == 0))
2283 			goto hung;
2284 		/* Schedule a TX tasklet if needed */
2285 		if (txr->tx_avail <= EM_MAX_SCATTER)
2286 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2287 	}
2288 
2289 	adapter->pause_frames = 0;
2290 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2291 #ifndef DEVICE_POLLING
2292 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2293 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2294 #endif
2295 	return;
2296 hung:
2297 	/* Looks like we're hung */
2298 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2299 	device_printf(adapter->dev,
2300 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2301 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2302 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2303 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2304 	    "Next TX to Clean = %d\n",
2305 	    txr->me, txr->tx_avail, txr->next_to_clean);
2306 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2307 	adapter->watchdog_events++;
2308 	adapter->pause_frames = 0;
2309 	em_init_locked(adapter);
2310 }
2311 
2312 
2313 static void
2314 em_update_link_status(struct adapter *adapter)
2315 {
2316 	struct e1000_hw *hw = &adapter->hw;
2317 	struct ifnet *ifp = adapter->ifp;
2318 	device_t dev = adapter->dev;
2319 	struct tx_ring *txr = adapter->tx_rings;
2320 	u32 link_check = 0;
2321 
2322 	/* Get the cached link value or read phy for real */
2323 	switch (hw->phy.media_type) {
2324 	case e1000_media_type_copper:
2325 		if (hw->mac.get_link_status) {
2326 			/* Do the work to read phy */
2327 			e1000_check_for_link(hw);
2328 			link_check = !hw->mac.get_link_status;
2329 			if (link_check) /* ESB2 fix */
2330 				e1000_cfg_on_link_up(hw);
2331 		} else
2332 			link_check = TRUE;
2333 		break;
2334 	case e1000_media_type_fiber:
2335 		e1000_check_for_link(hw);
2336 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2337                                  E1000_STATUS_LU);
2338 		break;
2339 	case e1000_media_type_internal_serdes:
2340 		e1000_check_for_link(hw);
2341 		link_check = adapter->hw.mac.serdes_has_link;
2342 		break;
2343 	default:
2344 	case e1000_media_type_unknown:
2345 		break;
2346 	}
2347 
2348 	/* Now check for a transition */
2349 	if (link_check && (adapter->link_active == 0)) {
2350 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2351 		    &adapter->link_duplex);
2352 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2353 		if ((adapter->link_speed != SPEED_1000) &&
2354 		    ((hw->mac.type == e1000_82571) ||
2355 		    (hw->mac.type == e1000_82572))) {
2356 			int tarc0;
2357 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2358 			tarc0 &= ~SPEED_MODE_BIT;
2359 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2360 		}
2361 		if (bootverbose)
2362 			device_printf(dev, "Link is up %d Mbps %s\n",
2363 			    adapter->link_speed,
2364 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2365 			    "Full Duplex" : "Half Duplex"));
2366 		adapter->link_active = 1;
2367 		adapter->smartspeed = 0;
2368 		ifp->if_baudrate = adapter->link_speed * 1000000;
2369 		if_link_state_change(ifp, LINK_STATE_UP);
2370 	} else if (!link_check && (adapter->link_active == 1)) {
2371 		ifp->if_baudrate = adapter->link_speed = 0;
2372 		adapter->link_duplex = 0;
2373 		if (bootverbose)
2374 			device_printf(dev, "Link is Down\n");
2375 		adapter->link_active = 0;
2376 		/* Link down, disable watchdog */
2377 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2378 			txr->queue_status = EM_QUEUE_IDLE;
2379 		if_link_state_change(ifp, LINK_STATE_DOWN);
2380 	}
2381 }
2382 
2383 /*********************************************************************
2384  *
2385  *  This routine disables all traffic on the adapter by issuing a
2386  *  global reset on the MAC and deallocates TX/RX buffers.
2387  *
2388  *  This routine should always be called with BOTH the CORE
2389  *  and TX locks.
2390  **********************************************************************/
2391 
2392 static void
2393 em_stop(void *arg)
2394 {
2395 	struct adapter	*adapter = arg;
2396 	struct ifnet	*ifp = adapter->ifp;
2397 	struct tx_ring	*txr = adapter->tx_rings;
2398 
2399 	EM_CORE_LOCK_ASSERT(adapter);
2400 
2401 	INIT_DEBUGOUT("em_stop: begin");
2402 
2403 	em_disable_intr(adapter);
2404 	callout_stop(&adapter->timer);
2405 
2406 	/* Tell the stack that the interface is no longer active */
2407 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2408 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2409 
2410         /* Unarm watchdog timer. */
2411 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2412 		EM_TX_LOCK(txr);
2413 		txr->queue_status = EM_QUEUE_IDLE;
2414 		EM_TX_UNLOCK(txr);
2415 	}
2416 
2417 	e1000_reset_hw(&adapter->hw);
2418 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2419 
2420 	e1000_led_off(&adapter->hw);
2421 	e1000_cleanup_led(&adapter->hw);
2422 }
2423 
2424 
2425 /*********************************************************************
2426  *
2427  *  Determine hardware revision.
2428  *
2429  **********************************************************************/
2430 static void
2431 em_identify_hardware(struct adapter *adapter)
2432 {
2433 	device_t dev = adapter->dev;
2434 
2435 	/* Make sure our PCI config space has the necessary stuff set */
2436 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2437 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2438 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2439 		device_printf(dev, "Memory Access and/or Bus Master bits "
2440 		    "were not set!\n");
2441 		adapter->hw.bus.pci_cmd_word |=
2442 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2443 		pci_write_config(dev, PCIR_COMMAND,
2444 		    adapter->hw.bus.pci_cmd_word, 2);
2445 	}
2446 
2447 	/* Save off the information about this board */
2448 	adapter->hw.vendor_id = pci_get_vendor(dev);
2449 	adapter->hw.device_id = pci_get_device(dev);
2450 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2451 	adapter->hw.subsystem_vendor_id =
2452 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2453 	adapter->hw.subsystem_device_id =
2454 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2455 
2456 	/* Do Shared Code Init and Setup */
2457 	if (e1000_set_mac_type(&adapter->hw)) {
2458 		device_printf(dev, "Setup init failure\n");
2459 		return;
2460 	}
2461 }
2462 
2463 static int
2464 em_allocate_pci_resources(struct adapter *adapter)
2465 {
2466 	device_t	dev = adapter->dev;
2467 	int		rid;
2468 
2469 	rid = PCIR_BAR(0);
2470 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2471 	    &rid, RF_ACTIVE);
2472 	if (adapter->memory == NULL) {
2473 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2474 		return (ENXIO);
2475 	}
2476 	adapter->osdep.mem_bus_space_tag =
2477 	    rman_get_bustag(adapter->memory);
2478 	adapter->osdep.mem_bus_space_handle =
2479 	    rman_get_bushandle(adapter->memory);
2480 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2481 
2482 	/* Default to a single queue */
2483 	adapter->num_queues = 1;
2484 
2485 	/*
2486 	 * Setup MSI/X or MSI if PCI Express
2487 	 */
2488 	adapter->msix = em_setup_msix(adapter);
2489 
2490 	adapter->hw.back = &adapter->osdep;
2491 
2492 	return (0);
2493 }
2494 
2495 /*********************************************************************
2496  *
2497  *  Setup the Legacy or MSI Interrupt handler
2498  *
2499  **********************************************************************/
2500 int
2501 em_allocate_legacy(struct adapter *adapter)
2502 {
2503 	device_t dev = adapter->dev;
2504 	struct tx_ring	*txr = adapter->tx_rings;
2505 	int error, rid = 0;
2506 
2507 	/* Manually turn off all interrupts */
2508 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2509 
2510 	if (adapter->msix == 1) /* using MSI */
2511 		rid = 1;
2512 	/* We allocate a single interrupt resource */
2513 	adapter->res = bus_alloc_resource_any(dev,
2514 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2515 	if (adapter->res == NULL) {
2516 		device_printf(dev, "Unable to allocate bus resource: "
2517 		    "interrupt\n");
2518 		return (ENXIO);
2519 	}
2520 
2521 	/*
2522 	 * Allocate a fast interrupt and the associated
2523 	 * deferred processing contexts.
2524 	 */
2525 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2526 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2527 	    taskqueue_thread_enqueue, &adapter->tq);
2528 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2529 	    device_get_nameunit(adapter->dev));
2530 	/* Use a TX only tasklet for local timer */
2531 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2532 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2533 	    taskqueue_thread_enqueue, &txr->tq);
2534 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2535 	    device_get_nameunit(adapter->dev));
2536 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2537 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2538 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2539 		device_printf(dev, "Failed to register fast interrupt "
2540 			    "handler: %d\n", error);
2541 		taskqueue_free(adapter->tq);
2542 		adapter->tq = NULL;
2543 		return (error);
2544 	}
2545 
2546 	return (0);
2547 }
2548 
2549 /*********************************************************************
2550  *
2551  *  Setup the MSIX Interrupt handlers
2552  *   This is not really Multiqueue, rather
2553  *   its just seperate interrupt vectors
2554  *   for TX, RX, and Link.
2555  *
2556  **********************************************************************/
2557 int
2558 em_allocate_msix(struct adapter *adapter)
2559 {
2560 	device_t	dev = adapter->dev;
2561 	struct		tx_ring *txr = adapter->tx_rings;
2562 	struct		rx_ring *rxr = adapter->rx_rings;
2563 	int		error, rid, vector = 0;
2564 
2565 
2566 	/* Make sure all interrupts are disabled */
2567 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2568 
2569 	/* First set up ring resources */
2570 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2571 
2572 		/* RX ring */
2573 		rid = vector + 1;
2574 
2575 		rxr->res = bus_alloc_resource_any(dev,
2576 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2577 		if (rxr->res == NULL) {
2578 			device_printf(dev,
2579 			    "Unable to allocate bus resource: "
2580 			    "RX MSIX Interrupt %d\n", i);
2581 			return (ENXIO);
2582 		}
2583 		if ((error = bus_setup_intr(dev, rxr->res,
2584 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2585 		    rxr, &rxr->tag)) != 0) {
2586 			device_printf(dev, "Failed to register RX handler");
2587 			return (error);
2588 		}
2589 #if __FreeBSD_version >= 800504
2590 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2591 #endif
2592 		rxr->msix = vector++; /* NOTE increment vector for TX */
2593 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2594 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2595 		    taskqueue_thread_enqueue, &rxr->tq);
2596 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2597 		    device_get_nameunit(adapter->dev));
2598 		/*
2599 		** Set the bit to enable interrupt
2600 		** in E1000_IMS -- bits 20 and 21
2601 		** are for RX0 and RX1, note this has
2602 		** NOTHING to do with the MSIX vector
2603 		*/
2604 		rxr->ims = 1 << (20 + i);
2605 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2606 
2607 		/* TX ring */
2608 		rid = vector + 1;
2609 		txr->res = bus_alloc_resource_any(dev,
2610 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2611 		if (txr->res == NULL) {
2612 			device_printf(dev,
2613 			    "Unable to allocate bus resource: "
2614 			    "TX MSIX Interrupt %d\n", i);
2615 			return (ENXIO);
2616 		}
2617 		if ((error = bus_setup_intr(dev, txr->res,
2618 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2619 		    txr, &txr->tag)) != 0) {
2620 			device_printf(dev, "Failed to register TX handler");
2621 			return (error);
2622 		}
2623 #if __FreeBSD_version >= 800504
2624 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2625 #endif
2626 		txr->msix = vector++; /* Increment vector for next pass */
2627 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2628 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2629 		    taskqueue_thread_enqueue, &txr->tq);
2630 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2631 		    device_get_nameunit(adapter->dev));
2632 		/*
2633 		** Set the bit to enable interrupt
2634 		** in E1000_IMS -- bits 22 and 23
2635 		** are for TX0 and TX1, note this has
2636 		** NOTHING to do with the MSIX vector
2637 		*/
2638 		txr->ims = 1 << (22 + i);
2639 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2640 	}
2641 
2642 	/* Link interrupt */
2643 	++rid;
2644 	adapter->res = bus_alloc_resource_any(dev,
2645 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2646 	if (!adapter->res) {
2647 		device_printf(dev,"Unable to allocate "
2648 		    "bus resource: Link interrupt [%d]\n", rid);
2649 		return (ENXIO);
2650         }
2651 	/* Set the link handler function */
2652 	error = bus_setup_intr(dev, adapter->res,
2653 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2654 	    em_msix_link, adapter, &adapter->tag);
2655 	if (error) {
2656 		adapter->res = NULL;
2657 		device_printf(dev, "Failed to register LINK handler");
2658 		return (error);
2659 	}
2660 #if __FreeBSD_version >= 800504
2661 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2662 #endif
2663 	adapter->linkvec = vector;
2664 	adapter->ivars |=  (8 | vector) << 16;
2665 	adapter->ivars |= 0x80000000;
2666 
2667 	return (0);
2668 }
2669 
2670 
2671 static void
2672 em_free_pci_resources(struct adapter *adapter)
2673 {
2674 	device_t	dev = adapter->dev;
2675 	struct tx_ring	*txr;
2676 	struct rx_ring	*rxr;
2677 	int		rid;
2678 
2679 
2680 	/*
2681 	** Release all the queue interrupt resources:
2682 	*/
2683 	for (int i = 0; i < adapter->num_queues; i++) {
2684 		txr = &adapter->tx_rings[i];
2685 		rxr = &adapter->rx_rings[i];
2686 		/* an early abort? */
2687 		if ((txr == NULL) || (rxr == NULL))
2688 			break;
2689 		rid = txr->msix +1;
2690 		if (txr->tag != NULL) {
2691 			bus_teardown_intr(dev, txr->res, txr->tag);
2692 			txr->tag = NULL;
2693 		}
2694 		if (txr->res != NULL)
2695 			bus_release_resource(dev, SYS_RES_IRQ,
2696 			    rid, txr->res);
2697 		rid = rxr->msix +1;
2698 		if (rxr->tag != NULL) {
2699 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2700 			rxr->tag = NULL;
2701 		}
2702 		if (rxr->res != NULL)
2703 			bus_release_resource(dev, SYS_RES_IRQ,
2704 			    rid, rxr->res);
2705 	}
2706 
2707         if (adapter->linkvec) /* we are doing MSIX */
2708                 rid = adapter->linkvec + 1;
2709         else
2710                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2711 
2712 	if (adapter->tag != NULL) {
2713 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2714 		adapter->tag = NULL;
2715 	}
2716 
2717 	if (adapter->res != NULL)
2718 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2719 
2720 
2721 	if (adapter->msix)
2722 		pci_release_msi(dev);
2723 
2724 	if (adapter->msix_mem != NULL)
2725 		bus_release_resource(dev, SYS_RES_MEMORY,
2726 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2727 
2728 	if (adapter->memory != NULL)
2729 		bus_release_resource(dev, SYS_RES_MEMORY,
2730 		    PCIR_BAR(0), adapter->memory);
2731 
2732 	if (adapter->flash != NULL)
2733 		bus_release_resource(dev, SYS_RES_MEMORY,
2734 		    EM_FLASH, adapter->flash);
2735 }
2736 
2737 /*
2738  * Setup MSI or MSI/X
2739  */
2740 static int
2741 em_setup_msix(struct adapter *adapter)
2742 {
2743 	device_t dev = adapter->dev;
2744 	int val = 0;
2745 
2746 	/*
2747 	** Setup MSI/X for Hartwell: tests have shown
2748 	** use of two queues to be unstable, and to
2749 	** provide no great gain anyway, so we simply
2750 	** seperate the interrupts and use a single queue.
2751 	*/
2752 	if ((adapter->hw.mac.type == e1000_82574) &&
2753 	    (em_enable_msix == TRUE)) {
2754 		/* Map the MSIX BAR */
2755 		int rid = PCIR_BAR(EM_MSIX_BAR);
2756 		adapter->msix_mem = bus_alloc_resource_any(dev,
2757 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2758        		if (!adapter->msix_mem) {
2759 			/* May not be enabled */
2760                		device_printf(adapter->dev,
2761 			    "Unable to map MSIX table \n");
2762 			goto msi;
2763        		}
2764 		val = pci_msix_count(dev);
2765 		/* We only need 3 vectors */
2766 		if (val > 3)
2767 			val = 3;
2768 		if ((val != 3) && (val != 5)) {
2769 			bus_release_resource(dev, SYS_RES_MEMORY,
2770 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2771 			adapter->msix_mem = NULL;
2772                		device_printf(adapter->dev,
2773 			    "MSIX: incorrect vectors, using MSI\n");
2774 			goto msi;
2775 		}
2776 
2777 		if (pci_alloc_msix(dev, &val) == 0) {
2778 			device_printf(adapter->dev,
2779 			    "Using MSIX interrupts "
2780 			    "with %d vectors\n", val);
2781 		}
2782 
2783 		return (val);
2784 	}
2785 msi:
2786        	val = pci_msi_count(dev);
2787        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2788                	adapter->msix = 1;
2789                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2790 		return (val);
2791 	}
2792 	/* Should only happen due to manual configuration */
2793 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2794 	return (0);
2795 }
2796 
2797 
2798 /*********************************************************************
2799  *
2800  *  Initialize the hardware to a configuration
2801  *  as specified by the adapter structure.
2802  *
2803  **********************************************************************/
2804 static void
2805 em_reset(struct adapter *adapter)
2806 {
2807 	device_t	dev = adapter->dev;
2808 	struct ifnet	*ifp = adapter->ifp;
2809 	struct e1000_hw	*hw = &adapter->hw;
2810 	u16		rx_buffer_size;
2811 	u32		pba;
2812 
2813 	INIT_DEBUGOUT("em_reset: begin");
2814 
2815 	/* Set up smart power down as default off on newer adapters. */
2816 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2817 	    hw->mac.type == e1000_82572)) {
2818 		u16 phy_tmp = 0;
2819 
2820 		/* Speed up time to link by disabling smart power down. */
2821 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2822 		phy_tmp &= ~IGP02E1000_PM_SPD;
2823 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2824 	}
2825 
2826 	/*
2827 	 * Packet Buffer Allocation (PBA)
2828 	 * Writing PBA sets the receive portion of the buffer
2829 	 * the remainder is used for the transmit buffer.
2830 	 */
2831 	switch (hw->mac.type) {
2832 	/* Total Packet Buffer on these is 48K */
2833 	case e1000_82571:
2834 	case e1000_82572:
2835 	case e1000_80003es2lan:
2836 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2837 		break;
2838 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2839 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2840 		break;
2841 	case e1000_82574:
2842 	case e1000_82583:
2843 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2844 		break;
2845 	case e1000_ich8lan:
2846 		pba = E1000_PBA_8K;
2847 		break;
2848 	case e1000_ich9lan:
2849 	case e1000_ich10lan:
2850 		/* Boost Receive side for jumbo frames */
2851 		if (adapter->hw.mac.max_frame_size > 4096)
2852 			pba = E1000_PBA_14K;
2853 		else
2854 			pba = E1000_PBA_10K;
2855 		break;
2856 	case e1000_pchlan:
2857 	case e1000_pch2lan:
2858 	case e1000_pch_lpt:
2859 		pba = E1000_PBA_26K;
2860 		break;
2861 	default:
2862 		if (adapter->hw.mac.max_frame_size > 8192)
2863 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2864 		else
2865 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2866 	}
2867 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2868 
2869 	/*
2870 	 * These parameters control the automatic generation (Tx) and
2871 	 * response (Rx) to Ethernet PAUSE frames.
2872 	 * - High water mark should allow for at least two frames to be
2873 	 *   received after sending an XOFF.
2874 	 * - Low water mark works best when it is very near the high water mark.
2875 	 *   This allows the receiver to restart by sending XON when it has
2876 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2877 	 *   restart after one full frame is pulled from the buffer. There
2878 	 *   could be several smaller frames in the buffer and if so they will
2879 	 *   not trigger the XON until their total number reduces the buffer
2880 	 *   by 1500.
2881 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2882 	 */
2883 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2884 	hw->fc.high_water = rx_buffer_size -
2885 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2886 	hw->fc.low_water = hw->fc.high_water - 1500;
2887 
2888 	if (adapter->fc) /* locally set flow control value? */
2889 		hw->fc.requested_mode = adapter->fc;
2890 	else
2891 		hw->fc.requested_mode = e1000_fc_full;
2892 
2893 	if (hw->mac.type == e1000_80003es2lan)
2894 		hw->fc.pause_time = 0xFFFF;
2895 	else
2896 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2897 
2898 	hw->fc.send_xon = TRUE;
2899 
2900 	/* Device specific overrides/settings */
2901 	switch (hw->mac.type) {
2902 	case e1000_pchlan:
2903 		/* Workaround: no TX flow ctrl for PCH */
2904                 hw->fc.requested_mode = e1000_fc_rx_pause;
2905 		hw->fc.pause_time = 0xFFFF; /* override */
2906 		if (ifp->if_mtu > ETHERMTU) {
2907 			hw->fc.high_water = 0x3500;
2908 			hw->fc.low_water = 0x1500;
2909 		} else {
2910 			hw->fc.high_water = 0x5000;
2911 			hw->fc.low_water = 0x3000;
2912 		}
2913 		hw->fc.refresh_time = 0x1000;
2914 		break;
2915 	case e1000_pch2lan:
2916 	case e1000_pch_lpt:
2917 		hw->fc.high_water = 0x5C20;
2918 		hw->fc.low_water = 0x5048;
2919 		hw->fc.pause_time = 0x0650;
2920 		hw->fc.refresh_time = 0x0400;
2921 		/* Jumbos need adjusted PBA */
2922 		if (ifp->if_mtu > ETHERMTU)
2923 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2924 		else
2925 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2926 		break;
2927         case e1000_ich9lan:
2928         case e1000_ich10lan:
2929 		if (ifp->if_mtu > ETHERMTU) {
2930 			hw->fc.high_water = 0x2800;
2931 			hw->fc.low_water = hw->fc.high_water - 8;
2932 			break;
2933 		}
2934 		/* else fall thru */
2935 	default:
2936 		if (hw->mac.type == e1000_80003es2lan)
2937 			hw->fc.pause_time = 0xFFFF;
2938 		break;
2939 	}
2940 
2941 	/* Issue a global reset */
2942 	e1000_reset_hw(hw);
2943 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2944 	em_disable_aspm(adapter);
2945 	/* and a re-init */
2946 	if (e1000_init_hw(hw) < 0) {
2947 		device_printf(dev, "Hardware Initialization Failed\n");
2948 		return;
2949 	}
2950 
2951 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2952 	e1000_get_phy_info(hw);
2953 	e1000_check_for_link(hw);
2954 	return;
2955 }
2956 
2957 /*********************************************************************
2958  *
2959  *  Setup networking device structure and register an interface.
2960  *
2961  **********************************************************************/
2962 static int
2963 em_setup_interface(device_t dev, struct adapter *adapter)
2964 {
2965 	struct ifnet   *ifp;
2966 
2967 	INIT_DEBUGOUT("em_setup_interface: begin");
2968 
2969 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2970 	if (ifp == NULL) {
2971 		device_printf(dev, "can not allocate ifnet structure\n");
2972 		return (-1);
2973 	}
2974 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2975 	ifp->if_init =  em_init;
2976 	ifp->if_softc = adapter;
2977 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2978 	ifp->if_ioctl = em_ioctl;
2979 #ifdef EM_MULTIQUEUE
2980 	/* Multiqueue stack interface */
2981 	ifp->if_transmit = em_mq_start;
2982 	ifp->if_qflush = em_qflush;
2983 #else
2984 	ifp->if_start = em_start;
2985 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2986 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2987 	IFQ_SET_READY(&ifp->if_snd);
2988 #endif
2989 
2990 	ether_ifattach(ifp, adapter->hw.mac.addr);
2991 
2992 	ifp->if_capabilities = ifp->if_capenable = 0;
2993 
2994 
2995 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2996 	ifp->if_capabilities |= IFCAP_TSO4;
2997 	/*
2998 	 * Tell the upper layer(s) we
2999 	 * support full VLAN capability
3000 	 */
3001 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3002 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3003 			     |  IFCAP_VLAN_HWTSO
3004 			     |  IFCAP_VLAN_MTU;
3005 	ifp->if_capenable = ifp->if_capabilities;
3006 
3007 	/*
3008 	** Don't turn this on by default, if vlans are
3009 	** created on another pseudo device (eg. lagg)
3010 	** then vlan events are not passed thru, breaking
3011 	** operation, but with HW FILTER off it works. If
3012 	** using vlans directly on the em driver you can
3013 	** enable this and get full hardware tag filtering.
3014 	*/
3015 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3016 
3017 #ifdef DEVICE_POLLING
3018 	ifp->if_capabilities |= IFCAP_POLLING;
3019 #endif
3020 
3021 	/* Enable only WOL MAGIC by default */
3022 	if (adapter->wol) {
3023 		ifp->if_capabilities |= IFCAP_WOL;
3024 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3025 	}
3026 
3027 	/*
3028 	 * Specify the media types supported by this adapter and register
3029 	 * callbacks to update media and link information
3030 	 */
3031 	ifmedia_init(&adapter->media, IFM_IMASK,
3032 	    em_media_change, em_media_status);
3033 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3034 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3035 		u_char fiber_type = IFM_1000_SX;	/* default type */
3036 
3037 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3038 			    0, NULL);
3039 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3040 	} else {
3041 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3042 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3043 			    0, NULL);
3044 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3045 			    0, NULL);
3046 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3047 			    0, NULL);
3048 		if (adapter->hw.phy.type != e1000_phy_ife) {
3049 			ifmedia_add(&adapter->media,
3050 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3051 			ifmedia_add(&adapter->media,
3052 				IFM_ETHER | IFM_1000_T, 0, NULL);
3053 		}
3054 	}
3055 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3056 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3057 	return (0);
3058 }
3059 
3060 
3061 /*
3062  * Manage DMA'able memory.
3063  */
3064 static void
3065 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3066 {
3067 	if (error)
3068 		return;
3069 	*(bus_addr_t *) arg = segs[0].ds_addr;
3070 }
3071 
3072 static int
3073 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3074         struct em_dma_alloc *dma, int mapflags)
3075 {
3076 	int error;
3077 
3078 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3079 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3080 				BUS_SPACE_MAXADDR,	/* lowaddr */
3081 				BUS_SPACE_MAXADDR,	/* highaddr */
3082 				NULL, NULL,		/* filter, filterarg */
3083 				size,			/* maxsize */
3084 				1,			/* nsegments */
3085 				size,			/* maxsegsize */
3086 				0,			/* flags */
3087 				NULL,			/* lockfunc */
3088 				NULL,			/* lockarg */
3089 				&dma->dma_tag);
3090 	if (error) {
3091 		device_printf(adapter->dev,
3092 		    "%s: bus_dma_tag_create failed: %d\n",
3093 		    __func__, error);
3094 		goto fail_0;
3095 	}
3096 
3097 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3098 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3099 	if (error) {
3100 		device_printf(adapter->dev,
3101 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3102 		    __func__, (uintmax_t)size, error);
3103 		goto fail_2;
3104 	}
3105 
3106 	dma->dma_paddr = 0;
3107 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3108 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3109 	if (error || dma->dma_paddr == 0) {
3110 		device_printf(adapter->dev,
3111 		    "%s: bus_dmamap_load failed: %d\n",
3112 		    __func__, error);
3113 		goto fail_3;
3114 	}
3115 
3116 	return (0);
3117 
3118 fail_3:
3119 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3120 fail_2:
3121 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3122 	bus_dma_tag_destroy(dma->dma_tag);
3123 fail_0:
3124 	dma->dma_map = NULL;
3125 	dma->dma_tag = NULL;
3126 
3127 	return (error);
3128 }
3129 
3130 static void
3131 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3132 {
3133 	if (dma->dma_tag == NULL)
3134 		return;
3135 	if (dma->dma_map != NULL) {
3136 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3137 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3138 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3139 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3140 		dma->dma_map = NULL;
3141 	}
3142 	bus_dma_tag_destroy(dma->dma_tag);
3143 	dma->dma_tag = NULL;
3144 }
3145 
3146 
3147 /*********************************************************************
3148  *
3149  *  Allocate memory for the transmit and receive rings, and then
3150  *  the descriptors associated with each, called only once at attach.
3151  *
3152  **********************************************************************/
3153 static int
3154 em_allocate_queues(struct adapter *adapter)
3155 {
3156 	device_t		dev = adapter->dev;
3157 	struct tx_ring		*txr = NULL;
3158 	struct rx_ring		*rxr = NULL;
3159 	int rsize, tsize, error = E1000_SUCCESS;
3160 	int txconf = 0, rxconf = 0;
3161 
3162 
3163 	/* Allocate the TX ring struct memory */
3164 	if (!(adapter->tx_rings =
3165 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3166 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3167 		device_printf(dev, "Unable to allocate TX ring memory\n");
3168 		error = ENOMEM;
3169 		goto fail;
3170 	}
3171 
3172 	/* Now allocate the RX */
3173 	if (!(adapter->rx_rings =
3174 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3175 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3176 		device_printf(dev, "Unable to allocate RX ring memory\n");
3177 		error = ENOMEM;
3178 		goto rx_fail;
3179 	}
3180 
3181 	tsize = roundup2(adapter->num_tx_desc *
3182 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3183 	/*
3184 	 * Now set up the TX queues, txconf is needed to handle the
3185 	 * possibility that things fail midcourse and we need to
3186 	 * undo memory gracefully
3187 	 */
3188 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3189 		/* Set up some basics */
3190 		txr = &adapter->tx_rings[i];
3191 		txr->adapter = adapter;
3192 		txr->me = i;
3193 
3194 		/* Initialize the TX lock */
3195 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3196 		    device_get_nameunit(dev), txr->me);
3197 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3198 
3199 		if (em_dma_malloc(adapter, tsize,
3200 			&txr->txdma, BUS_DMA_NOWAIT)) {
3201 			device_printf(dev,
3202 			    "Unable to allocate TX Descriptor memory\n");
3203 			error = ENOMEM;
3204 			goto err_tx_desc;
3205 		}
3206 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3207 		bzero((void *)txr->tx_base, tsize);
3208 
3209         	if (em_allocate_transmit_buffers(txr)) {
3210 			device_printf(dev,
3211 			    "Critical Failure setting up transmit buffers\n");
3212 			error = ENOMEM;
3213 			goto err_tx_desc;
3214         	}
3215 #if __FreeBSD_version >= 800000
3216 		/* Allocate a buf ring */
3217 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3218 		    M_WAITOK, &txr->tx_mtx);
3219 #endif
3220 	}
3221 
3222 	/*
3223 	 * Next the RX queues...
3224 	 */
3225 	rsize = roundup2(adapter->num_rx_desc *
3226 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3227 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3228 		rxr = &adapter->rx_rings[i];
3229 		rxr->adapter = adapter;
3230 		rxr->me = i;
3231 
3232 		/* Initialize the RX lock */
3233 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3234 		    device_get_nameunit(dev), txr->me);
3235 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3236 
3237 		if (em_dma_malloc(adapter, rsize,
3238 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3239 			device_printf(dev,
3240 			    "Unable to allocate RxDescriptor memory\n");
3241 			error = ENOMEM;
3242 			goto err_rx_desc;
3243 		}
3244 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3245 		bzero((void *)rxr->rx_base, rsize);
3246 
3247         	/* Allocate receive buffers for the ring*/
3248 		if (em_allocate_receive_buffers(rxr)) {
3249 			device_printf(dev,
3250 			    "Critical Failure setting up receive buffers\n");
3251 			error = ENOMEM;
3252 			goto err_rx_desc;
3253 		}
3254 	}
3255 
3256 	return (0);
3257 
3258 err_rx_desc:
3259 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3260 		em_dma_free(adapter, &rxr->rxdma);
3261 err_tx_desc:
3262 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3263 		em_dma_free(adapter, &txr->txdma);
3264 	free(adapter->rx_rings, M_DEVBUF);
3265 rx_fail:
3266 #if __FreeBSD_version >= 800000
3267 	buf_ring_free(txr->br, M_DEVBUF);
3268 #endif
3269 	free(adapter->tx_rings, M_DEVBUF);
3270 fail:
3271 	return (error);
3272 }
3273 
3274 
3275 /*********************************************************************
3276  *
3277  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3278  *  the information needed to transmit a packet on the wire. This is
3279  *  called only once at attach, setup is done every reset.
3280  *
3281  **********************************************************************/
3282 static int
3283 em_allocate_transmit_buffers(struct tx_ring *txr)
3284 {
3285 	struct adapter *adapter = txr->adapter;
3286 	device_t dev = adapter->dev;
3287 	struct em_buffer *txbuf;
3288 	int error, i;
3289 
3290 	/*
3291 	 * Setup DMA descriptor areas.
3292 	 */
3293 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3294 			       1, 0,			/* alignment, bounds */
3295 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3296 			       BUS_SPACE_MAXADDR,	/* highaddr */
3297 			       NULL, NULL,		/* filter, filterarg */
3298 			       EM_TSO_SIZE,		/* maxsize */
3299 			       EM_MAX_SCATTER,		/* nsegments */
3300 			       PAGE_SIZE,		/* maxsegsize */
3301 			       0,			/* flags */
3302 			       NULL,			/* lockfunc */
3303 			       NULL,			/* lockfuncarg */
3304 			       &txr->txtag))) {
3305 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3306 		goto fail;
3307 	}
3308 
3309 	if (!(txr->tx_buffers =
3310 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3311 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3312 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3313 		error = ENOMEM;
3314 		goto fail;
3315 	}
3316 
3317         /* Create the descriptor buffer dma maps */
3318 	txbuf = txr->tx_buffers;
3319 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3320 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3321 		if (error != 0) {
3322 			device_printf(dev, "Unable to create TX DMA map\n");
3323 			goto fail;
3324 		}
3325 	}
3326 
3327 	return 0;
3328 fail:
3329 	/* We free all, it handles case where we are in the middle */
3330 	em_free_transmit_structures(adapter);
3331 	return (error);
3332 }
3333 
3334 /*********************************************************************
3335  *
3336  *  Initialize a transmit ring.
3337  *
3338  **********************************************************************/
3339 static void
3340 em_setup_transmit_ring(struct tx_ring *txr)
3341 {
3342 	struct adapter *adapter = txr->adapter;
3343 	struct em_buffer *txbuf;
3344 	int i;
3345 #ifdef DEV_NETMAP
3346 	struct netmap_adapter *na = NA(adapter->ifp);
3347 	struct netmap_slot *slot;
3348 #endif /* DEV_NETMAP */
3349 
3350 	/* Clear the old descriptor contents */
3351 	EM_TX_LOCK(txr);
3352 #ifdef DEV_NETMAP
3353 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3354 #endif /* DEV_NETMAP */
3355 
3356 	bzero((void *)txr->tx_base,
3357 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3358 	/* Reset indices */
3359 	txr->next_avail_desc = 0;
3360 	txr->next_to_clean = 0;
3361 
3362 	/* Free any existing tx buffers. */
3363         txbuf = txr->tx_buffers;
3364 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3365 		if (txbuf->m_head != NULL) {
3366 			bus_dmamap_sync(txr->txtag, txbuf->map,
3367 			    BUS_DMASYNC_POSTWRITE);
3368 			bus_dmamap_unload(txr->txtag, txbuf->map);
3369 			m_freem(txbuf->m_head);
3370 			txbuf->m_head = NULL;
3371 		}
3372 #ifdef DEV_NETMAP
3373 		if (slot) {
3374 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3375 			uint64_t paddr;
3376 			void *addr;
3377 
3378 			addr = PNMB(slot + si, &paddr);
3379 			txr->tx_base[i].buffer_addr = htole64(paddr);
3380 			/* reload the map for netmap mode */
3381 			netmap_load_map(txr->txtag, txbuf->map, addr);
3382 		}
3383 #endif /* DEV_NETMAP */
3384 
3385 		/* clear the watch index */
3386 		txbuf->next_eop = -1;
3387         }
3388 
3389 	/* Set number of descriptors available */
3390 	txr->tx_avail = adapter->num_tx_desc;
3391 	txr->queue_status = EM_QUEUE_IDLE;
3392 
3393 	/* Clear checksum offload context. */
3394 	txr->last_hw_offload = 0;
3395 	txr->last_hw_ipcss = 0;
3396 	txr->last_hw_ipcso = 0;
3397 	txr->last_hw_tucss = 0;
3398 	txr->last_hw_tucso = 0;
3399 
3400 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3401 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3402 	EM_TX_UNLOCK(txr);
3403 }
3404 
3405 /*********************************************************************
3406  *
3407  *  Initialize all transmit rings.
3408  *
3409  **********************************************************************/
3410 static void
3411 em_setup_transmit_structures(struct adapter *adapter)
3412 {
3413 	struct tx_ring *txr = adapter->tx_rings;
3414 
3415 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3416 		em_setup_transmit_ring(txr);
3417 
3418 	return;
3419 }
3420 
3421 /*********************************************************************
3422  *
3423  *  Enable transmit unit.
3424  *
3425  **********************************************************************/
3426 static void
3427 em_initialize_transmit_unit(struct adapter *adapter)
3428 {
3429 	struct tx_ring	*txr = adapter->tx_rings;
3430 	struct e1000_hw	*hw = &adapter->hw;
3431 	u32	tctl, tarc, tipg = 0;
3432 
3433 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3434 
3435 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3436 		u64 bus_addr = txr->txdma.dma_paddr;
3437 		/* Base and Len of TX Ring */
3438 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3439 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3440 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3441 	    	    (u32)(bus_addr >> 32));
3442 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3443 	    	    (u32)bus_addr);
3444 		/* Init the HEAD/TAIL indices */
3445 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3446 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3447 
3448 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3449 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3450 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3451 
3452 		txr->queue_status = EM_QUEUE_IDLE;
3453 	}
3454 
3455 	/* Set the default values for the Tx Inter Packet Gap timer */
3456 	switch (adapter->hw.mac.type) {
3457 	case e1000_80003es2lan:
3458 		tipg = DEFAULT_82543_TIPG_IPGR1;
3459 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3460 		    E1000_TIPG_IPGR2_SHIFT;
3461 		break;
3462 	default:
3463 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3464 		    (adapter->hw.phy.media_type ==
3465 		    e1000_media_type_internal_serdes))
3466 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3467 		else
3468 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3469 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3470 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3471 	}
3472 
3473 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3474 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3475 
3476 	if(adapter->hw.mac.type >= e1000_82540)
3477 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3478 		    adapter->tx_abs_int_delay.value);
3479 
3480 	if ((adapter->hw.mac.type == e1000_82571) ||
3481 	    (adapter->hw.mac.type == e1000_82572)) {
3482 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3483 		tarc |= SPEED_MODE_BIT;
3484 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3485 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3486 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3487 		tarc |= 1;
3488 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3489 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3490 		tarc |= 1;
3491 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3492 	}
3493 
3494 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3495 	if (adapter->tx_int_delay.value > 0)
3496 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3497 
3498 	/* Program the Transmit Control Register */
3499 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3500 	tctl &= ~E1000_TCTL_CT;
3501 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3502 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3503 
3504 	if (adapter->hw.mac.type >= e1000_82571)
3505 		tctl |= E1000_TCTL_MULR;
3506 
3507 	/* This write will effectively turn on the transmit unit. */
3508 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3509 
3510 }
3511 
3512 
3513 /*********************************************************************
3514  *
3515  *  Free all transmit rings.
3516  *
3517  **********************************************************************/
3518 static void
3519 em_free_transmit_structures(struct adapter *adapter)
3520 {
3521 	struct tx_ring *txr = adapter->tx_rings;
3522 
3523 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3524 		EM_TX_LOCK(txr);
3525 		em_free_transmit_buffers(txr);
3526 		em_dma_free(adapter, &txr->txdma);
3527 		EM_TX_UNLOCK(txr);
3528 		EM_TX_LOCK_DESTROY(txr);
3529 	}
3530 
3531 	free(adapter->tx_rings, M_DEVBUF);
3532 }
3533 
3534 /*********************************************************************
3535  *
3536  *  Free transmit ring related data structures.
3537  *
3538  **********************************************************************/
3539 static void
3540 em_free_transmit_buffers(struct tx_ring *txr)
3541 {
3542 	struct adapter		*adapter = txr->adapter;
3543 	struct em_buffer	*txbuf;
3544 
3545 	INIT_DEBUGOUT("free_transmit_ring: begin");
3546 
3547 	if (txr->tx_buffers == NULL)
3548 		return;
3549 
3550 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3551 		txbuf = &txr->tx_buffers[i];
3552 		if (txbuf->m_head != NULL) {
3553 			bus_dmamap_sync(txr->txtag, txbuf->map,
3554 			    BUS_DMASYNC_POSTWRITE);
3555 			bus_dmamap_unload(txr->txtag,
3556 			    txbuf->map);
3557 			m_freem(txbuf->m_head);
3558 			txbuf->m_head = NULL;
3559 			if (txbuf->map != NULL) {
3560 				bus_dmamap_destroy(txr->txtag,
3561 				    txbuf->map);
3562 				txbuf->map = NULL;
3563 			}
3564 		} else if (txbuf->map != NULL) {
3565 			bus_dmamap_unload(txr->txtag,
3566 			    txbuf->map);
3567 			bus_dmamap_destroy(txr->txtag,
3568 			    txbuf->map);
3569 			txbuf->map = NULL;
3570 		}
3571 	}
3572 #if __FreeBSD_version >= 800000
3573 	if (txr->br != NULL)
3574 		buf_ring_free(txr->br, M_DEVBUF);
3575 #endif
3576 	if (txr->tx_buffers != NULL) {
3577 		free(txr->tx_buffers, M_DEVBUF);
3578 		txr->tx_buffers = NULL;
3579 	}
3580 	if (txr->txtag != NULL) {
3581 		bus_dma_tag_destroy(txr->txtag);
3582 		txr->txtag = NULL;
3583 	}
3584 	return;
3585 }
3586 
3587 
3588 /*********************************************************************
3589  *  The offload context is protocol specific (TCP/UDP) and thus
3590  *  only needs to be set when the protocol changes. The occasion
3591  *  of a context change can be a performance detriment, and
3592  *  might be better just disabled. The reason arises in the way
3593  *  in which the controller supports pipelined requests from the
3594  *  Tx data DMA. Up to four requests can be pipelined, and they may
3595  *  belong to the same packet or to multiple packets. However all
3596  *  requests for one packet are issued before a request is issued
3597  *  for a subsequent packet and if a request for the next packet
3598  *  requires a context change, that request will be stalled
3599  *  until the previous request completes. This means setting up
3600  *  a new context effectively disables pipelined Tx data DMA which
3601  *  in turn greatly slow down performance to send small sized
3602  *  frames.
3603  **********************************************************************/
3604 static void
3605 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3606     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3607 {
3608 	struct adapter			*adapter = txr->adapter;
3609 	struct e1000_context_desc	*TXD = NULL;
3610 	struct em_buffer		*tx_buffer;
3611 	int				cur, hdr_len;
3612 	u32				cmd = 0;
3613 	u16				offload = 0;
3614 	u8				ipcso, ipcss, tucso, tucss;
3615 
3616 	ipcss = ipcso = tucss = tucso = 0;
3617 	hdr_len = ip_off + (ip->ip_hl << 2);
3618 	cur = txr->next_avail_desc;
3619 
3620 	/* Setup of IP header checksum. */
3621 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3622 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3623 		offload |= CSUM_IP;
3624 		ipcss = ip_off;
3625 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3626 		/*
3627 		 * Start offset for header checksum calculation.
3628 		 * End offset for header checksum calculation.
3629 		 * Offset of place to put the checksum.
3630 		 */
3631 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3632 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3633 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3634 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3635 		cmd |= E1000_TXD_CMD_IP;
3636 	}
3637 
3638 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3639  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3640  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3641  		offload |= CSUM_TCP;
3642  		tucss = hdr_len;
3643  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3644  		/*
3645  		 * Setting up new checksum offload context for every frames
3646  		 * takes a lot of processing time for hardware. This also
3647  		 * reduces performance a lot for small sized frames so avoid
3648  		 * it if driver can use previously configured checksum
3649  		 * offload context.
3650  		 */
3651  		if (txr->last_hw_offload == offload) {
3652  			if (offload & CSUM_IP) {
3653  				if (txr->last_hw_ipcss == ipcss &&
3654  				    txr->last_hw_ipcso == ipcso &&
3655  				    txr->last_hw_tucss == tucss &&
3656  				    txr->last_hw_tucso == tucso)
3657  					return;
3658  			} else {
3659  				if (txr->last_hw_tucss == tucss &&
3660  				    txr->last_hw_tucso == tucso)
3661  					return;
3662  			}
3663   		}
3664  		txr->last_hw_offload = offload;
3665  		txr->last_hw_tucss = tucss;
3666  		txr->last_hw_tucso = tucso;
3667  		/*
3668  		 * Start offset for payload checksum calculation.
3669  		 * End offset for payload checksum calculation.
3670  		 * Offset of place to put the checksum.
3671  		 */
3672 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3673  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3674  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3675  		TXD->upper_setup.tcp_fields.tucso = tucso;
3676  		cmd |= E1000_TXD_CMD_TCP;
3677  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3678  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3679  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3680  		tucss = hdr_len;
3681  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3682  		/*
3683  		 * Setting up new checksum offload context for every frames
3684  		 * takes a lot of processing time for hardware. This also
3685  		 * reduces performance a lot for small sized frames so avoid
3686  		 * it if driver can use previously configured checksum
3687  		 * offload context.
3688  		 */
3689  		if (txr->last_hw_offload == offload) {
3690  			if (offload & CSUM_IP) {
3691  				if (txr->last_hw_ipcss == ipcss &&
3692  				    txr->last_hw_ipcso == ipcso &&
3693  				    txr->last_hw_tucss == tucss &&
3694  				    txr->last_hw_tucso == tucso)
3695  					return;
3696  			} else {
3697  				if (txr->last_hw_tucss == tucss &&
3698  				    txr->last_hw_tucso == tucso)
3699  					return;
3700  			}
3701  		}
3702  		txr->last_hw_offload = offload;
3703  		txr->last_hw_tucss = tucss;
3704  		txr->last_hw_tucso = tucso;
3705  		/*
3706  		 * Start offset for header checksum calculation.
3707  		 * End offset for header checksum calculation.
3708  		 * Offset of place to put the checksum.
3709  		 */
3710 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3711  		TXD->upper_setup.tcp_fields.tucss = tucss;
3712  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3713  		TXD->upper_setup.tcp_fields.tucso = tucso;
3714   	}
3715 
3716  	if (offload & CSUM_IP) {
3717  		txr->last_hw_ipcss = ipcss;
3718  		txr->last_hw_ipcso = ipcso;
3719   	}
3720 
3721 	TXD->tcp_seg_setup.data = htole32(0);
3722 	TXD->cmd_and_length =
3723 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3724 	tx_buffer = &txr->tx_buffers[cur];
3725 	tx_buffer->m_head = NULL;
3726 	tx_buffer->next_eop = -1;
3727 
3728 	if (++cur == adapter->num_tx_desc)
3729 		cur = 0;
3730 
3731 	txr->tx_avail--;
3732 	txr->next_avail_desc = cur;
3733 }
3734 
3735 
3736 /**********************************************************************
3737  *
3738  *  Setup work for hardware segmentation offload (TSO)
3739  *
3740  **********************************************************************/
3741 static void
3742 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3743     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3744 {
3745 	struct adapter			*adapter = txr->adapter;
3746 	struct e1000_context_desc	*TXD;
3747 	struct em_buffer		*tx_buffer;
3748 	int cur, hdr_len;
3749 
3750 	/*
3751 	 * In theory we can use the same TSO context if and only if
3752 	 * frame is the same type(IP/TCP) and the same MSS. However
3753 	 * checking whether a frame has the same IP/TCP structure is
3754 	 * hard thing so just ignore that and always restablish a
3755 	 * new TSO context.
3756 	 */
3757 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3758 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3759 		      E1000_TXD_DTYP_D |	/* Data descr type */
3760 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3761 
3762 	/* IP and/or TCP header checksum calculation and insertion. */
3763 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3764 
3765 	cur = txr->next_avail_desc;
3766 	tx_buffer = &txr->tx_buffers[cur];
3767 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3768 
3769 	/*
3770 	 * Start offset for header checksum calculation.
3771 	 * End offset for header checksum calculation.
3772 	 * Offset of place put the checksum.
3773 	 */
3774 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3775 	TXD->lower_setup.ip_fields.ipcse =
3776 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3777 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3778 	/*
3779 	 * Start offset for payload checksum calculation.
3780 	 * End offset for payload checksum calculation.
3781 	 * Offset of place to put the checksum.
3782 	 */
3783 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3784 	TXD->upper_setup.tcp_fields.tucse = 0;
3785 	TXD->upper_setup.tcp_fields.tucso =
3786 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3787 	/*
3788 	 * Payload size per packet w/o any headers.
3789 	 * Length of all headers up to payload.
3790 	 */
3791 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3792 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3793 
3794 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3795 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3796 				E1000_TXD_CMD_TSE |	/* TSE context */
3797 				E1000_TXD_CMD_IP |	/* Do IP csum */
3798 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3799 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3800 
3801 	tx_buffer->m_head = NULL;
3802 	tx_buffer->next_eop = -1;
3803 
3804 	if (++cur == adapter->num_tx_desc)
3805 		cur = 0;
3806 
3807 	txr->tx_avail--;
3808 	txr->next_avail_desc = cur;
3809 	txr->tx_tso = TRUE;
3810 }
3811 
3812 
3813 /**********************************************************************
3814  *
3815  *  Examine each tx_buffer in the used queue. If the hardware is done
3816  *  processing the packet then free associated resources. The
3817  *  tx_buffer is put back on the free queue.
3818  *
3819  **********************************************************************/
3820 static void
3821 em_txeof(struct tx_ring *txr)
3822 {
3823 	struct adapter	*adapter = txr->adapter;
3824         int first, last, done, processed;
3825         struct em_buffer *tx_buffer;
3826         struct e1000_tx_desc   *tx_desc, *eop_desc;
3827 	struct ifnet   *ifp = adapter->ifp;
3828 
3829 	EM_TX_LOCK_ASSERT(txr);
3830 #ifdef DEV_NETMAP
3831 	if (ifp->if_capenable & IFCAP_NETMAP) {
3832 		struct netmap_adapter *na = NA(ifp);
3833 
3834 		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3835 		EM_TX_UNLOCK(txr);
3836 		EM_CORE_LOCK(adapter);
3837 		selwakeuppri(&na->tx_si, PI_NET);
3838 		EM_CORE_UNLOCK(adapter);
3839 		EM_TX_LOCK(txr);
3840 		return;
3841 	}
3842 #endif /* DEV_NETMAP */
3843 
3844 	/* No work, make sure watchdog is off */
3845         if (txr->tx_avail == adapter->num_tx_desc) {
3846 		txr->queue_status = EM_QUEUE_IDLE;
3847                 return;
3848 	}
3849 
3850 	processed = 0;
3851         first = txr->next_to_clean;
3852         tx_desc = &txr->tx_base[first];
3853         tx_buffer = &txr->tx_buffers[first];
3854 	last = tx_buffer->next_eop;
3855         eop_desc = &txr->tx_base[last];
3856 
3857 	/*
3858 	 * What this does is get the index of the
3859 	 * first descriptor AFTER the EOP of the
3860 	 * first packet, that way we can do the
3861 	 * simple comparison on the inner while loop.
3862 	 */
3863 	if (++last == adapter->num_tx_desc)
3864  		last = 0;
3865 	done = last;
3866 
3867         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3868             BUS_DMASYNC_POSTREAD);
3869 
3870         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3871 		/* We clean the range of the packet */
3872 		while (first != done) {
3873                 	tx_desc->upper.data = 0;
3874                 	tx_desc->lower.data = 0;
3875                 	tx_desc->buffer_addr = 0;
3876                 	++txr->tx_avail;
3877 			++processed;
3878 
3879 			if (tx_buffer->m_head) {
3880 				bus_dmamap_sync(txr->txtag,
3881 				    tx_buffer->map,
3882 				    BUS_DMASYNC_POSTWRITE);
3883 				bus_dmamap_unload(txr->txtag,
3884 				    tx_buffer->map);
3885                         	m_freem(tx_buffer->m_head);
3886                         	tx_buffer->m_head = NULL;
3887                 	}
3888 			tx_buffer->next_eop = -1;
3889 			txr->watchdog_time = ticks;
3890 
3891 	                if (++first == adapter->num_tx_desc)
3892 				first = 0;
3893 
3894 	                tx_buffer = &txr->tx_buffers[first];
3895 			tx_desc = &txr->tx_base[first];
3896 		}
3897 		++ifp->if_opackets;
3898 		/* See if we can continue to the next packet */
3899 		last = tx_buffer->next_eop;
3900 		if (last != -1) {
3901         		eop_desc = &txr->tx_base[last];
3902 			/* Get new done point */
3903 			if (++last == adapter->num_tx_desc) last = 0;
3904 			done = last;
3905 		} else
3906 			break;
3907         }
3908         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3909             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3910 
3911         txr->next_to_clean = first;
3912 
3913 	/*
3914 	** Watchdog calculation, we know there's
3915 	** work outstanding or the first return
3916 	** would have been taken, so none processed
3917 	** for too long indicates a hang. local timer
3918 	** will examine this and do a reset if needed.
3919 	*/
3920 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3921 		txr->queue_status = EM_QUEUE_HUNG;
3922 
3923         /*
3924          * If we have a minimum free, clear IFF_DRV_OACTIVE
3925          * to tell the stack that it is OK to send packets.
3926 	 * Notice that all writes of OACTIVE happen under the
3927 	 * TX lock which, with a single queue, guarantees
3928 	 * sanity.
3929          */
3930         if (txr->tx_avail >= EM_MAX_SCATTER)
3931 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3932 
3933 	/* Disable watchdog if all clean */
3934 	if (txr->tx_avail == adapter->num_tx_desc) {
3935 		txr->queue_status = EM_QUEUE_IDLE;
3936 	}
3937 }
3938 
3939 
3940 /*********************************************************************
3941  *
3942  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3943  *
3944  **********************************************************************/
3945 static void
3946 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3947 {
3948 	struct adapter		*adapter = rxr->adapter;
3949 	struct mbuf		*m;
3950 	bus_dma_segment_t	segs[1];
3951 	struct em_buffer	*rxbuf;
3952 	int			i, j, error, nsegs;
3953 	bool			cleaned = FALSE;
3954 
3955 	i = j = rxr->next_to_refresh;
3956 	/*
3957 	** Get one descriptor beyond
3958 	** our work mark to control
3959 	** the loop.
3960 	*/
3961 	if (++j == adapter->num_rx_desc)
3962 		j = 0;
3963 
3964 	while (j != limit) {
3965 		rxbuf = &rxr->rx_buffers[i];
3966 		if (rxbuf->m_head == NULL) {
3967 			m = m_getjcl(M_NOWAIT, MT_DATA,
3968 			    M_PKTHDR, adapter->rx_mbuf_sz);
3969 			/*
3970 			** If we have a temporary resource shortage
3971 			** that causes a failure, just abort refresh
3972 			** for now, we will return to this point when
3973 			** reinvoked from em_rxeof.
3974 			*/
3975 			if (m == NULL)
3976 				goto update;
3977 		} else
3978 			m = rxbuf->m_head;
3979 
3980 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3981 		m->m_flags |= M_PKTHDR;
3982 		m->m_data = m->m_ext.ext_buf;
3983 
3984 		/* Use bus_dma machinery to setup the memory mapping  */
3985 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3986 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3987 		if (error != 0) {
3988 			printf("Refresh mbufs: hdr dmamap load"
3989 			    " failure - %d\n", error);
3990 			m_free(m);
3991 			rxbuf->m_head = NULL;
3992 			goto update;
3993 		}
3994 		rxbuf->m_head = m;
3995 		bus_dmamap_sync(rxr->rxtag,
3996 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3997 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3998 		cleaned = TRUE;
3999 
4000 		i = j; /* Next is precalulated for us */
4001 		rxr->next_to_refresh = i;
4002 		/* Calculate next controlling index */
4003 		if (++j == adapter->num_rx_desc)
4004 			j = 0;
4005 	}
4006 update:
4007 	/*
4008 	** Update the tail pointer only if,
4009 	** and as far as we have refreshed.
4010 	*/
4011 	if (cleaned)
4012 		E1000_WRITE_REG(&adapter->hw,
4013 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4014 
4015 	return;
4016 }
4017 
4018 
4019 /*********************************************************************
4020  *
4021  *  Allocate memory for rx_buffer structures. Since we use one
4022  *  rx_buffer per received packet, the maximum number of rx_buffer's
4023  *  that we'll need is equal to the number of receive descriptors
4024  *  that we've allocated.
4025  *
4026  **********************************************************************/
4027 static int
4028 em_allocate_receive_buffers(struct rx_ring *rxr)
4029 {
4030 	struct adapter		*adapter = rxr->adapter;
4031 	device_t		dev = adapter->dev;
4032 	struct em_buffer	*rxbuf;
4033 	int			error;
4034 
4035 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4036 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4037 	if (rxr->rx_buffers == NULL) {
4038 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4039 		return (ENOMEM);
4040 	}
4041 
4042 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4043 				1, 0,			/* alignment, bounds */
4044 				BUS_SPACE_MAXADDR,	/* lowaddr */
4045 				BUS_SPACE_MAXADDR,	/* highaddr */
4046 				NULL, NULL,		/* filter, filterarg */
4047 				MJUM9BYTES,		/* maxsize */
4048 				1,			/* nsegments */
4049 				MJUM9BYTES,		/* maxsegsize */
4050 				0,			/* flags */
4051 				NULL,			/* lockfunc */
4052 				NULL,			/* lockarg */
4053 				&rxr->rxtag);
4054 	if (error) {
4055 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4056 		    __func__, error);
4057 		goto fail;
4058 	}
4059 
4060 	rxbuf = rxr->rx_buffers;
4061 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4062 		rxbuf = &rxr->rx_buffers[i];
4063 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4064 		    &rxbuf->map);
4065 		if (error) {
4066 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4067 			    __func__, error);
4068 			goto fail;
4069 		}
4070 	}
4071 
4072 	return (0);
4073 
4074 fail:
4075 	em_free_receive_structures(adapter);
4076 	return (error);
4077 }
4078 
4079 
4080 /*********************************************************************
4081  *
4082  *  Initialize a receive ring and its buffers.
4083  *
4084  **********************************************************************/
4085 static int
4086 em_setup_receive_ring(struct rx_ring *rxr)
4087 {
4088 	struct	adapter 	*adapter = rxr->adapter;
4089 	struct em_buffer	*rxbuf;
4090 	bus_dma_segment_t	seg[1];
4091 	int			rsize, nsegs, error = 0;
4092 #ifdef DEV_NETMAP
4093 	struct netmap_adapter *na = NA(adapter->ifp);
4094 	struct netmap_slot *slot;
4095 #endif
4096 
4097 
4098 	/* Clear the ring contents */
4099 	EM_RX_LOCK(rxr);
4100 	rsize = roundup2(adapter->num_rx_desc *
4101 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4102 	bzero((void *)rxr->rx_base, rsize);
4103 #ifdef DEV_NETMAP
4104 	slot = netmap_reset(na, NR_RX, 0, 0);
4105 #endif
4106 
4107 	/*
4108 	** Free current RX buffer structs and their mbufs
4109 	*/
4110 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4111 		rxbuf = &rxr->rx_buffers[i];
4112 		if (rxbuf->m_head != NULL) {
4113 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4114 			    BUS_DMASYNC_POSTREAD);
4115 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4116 			m_freem(rxbuf->m_head);
4117 			rxbuf->m_head = NULL; /* mark as freed */
4118 		}
4119 	}
4120 
4121 	/* Now replenish the mbufs */
4122         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4123 		rxbuf = &rxr->rx_buffers[j];
4124 #ifdef DEV_NETMAP
4125 		if (slot) {
4126 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4127 			uint64_t paddr;
4128 			void *addr;
4129 
4130 			addr = PNMB(slot + si, &paddr);
4131 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4132 			/* Update descriptor */
4133 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4134 			continue;
4135 		}
4136 #endif /* DEV_NETMAP */
4137 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4138 		    M_PKTHDR, adapter->rx_mbuf_sz);
4139 		if (rxbuf->m_head == NULL) {
4140 			error = ENOBUFS;
4141 			goto fail;
4142 		}
4143 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4144 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4145 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4146 
4147 		/* Get the memory mapping */
4148 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4149 		    rxbuf->map, rxbuf->m_head, seg,
4150 		    &nsegs, BUS_DMA_NOWAIT);
4151 		if (error != 0) {
4152 			m_freem(rxbuf->m_head);
4153 			rxbuf->m_head = NULL;
4154 			goto fail;
4155 		}
4156 		bus_dmamap_sync(rxr->rxtag,
4157 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4158 
4159 		/* Update descriptor */
4160 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4161 	}
4162 	rxr->next_to_check = 0;
4163 	rxr->next_to_refresh = 0;
4164 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4165 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4166 
4167 fail:
4168 	EM_RX_UNLOCK(rxr);
4169 	return (error);
4170 }
4171 
4172 /*********************************************************************
4173  *
4174  *  Initialize all receive rings.
4175  *
4176  **********************************************************************/
4177 static int
4178 em_setup_receive_structures(struct adapter *adapter)
4179 {
4180 	struct rx_ring *rxr = adapter->rx_rings;
4181 	int q;
4182 
4183 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4184 		if (em_setup_receive_ring(rxr))
4185 			goto fail;
4186 
4187 	return (0);
4188 fail:
4189 	/*
4190 	 * Free RX buffers allocated so far, we will only handle
4191 	 * the rings that completed, the failing case will have
4192 	 * cleaned up for itself. 'q' failed, so its the terminus.
4193 	 */
4194 	for (int i = 0; i < q; ++i) {
4195 		rxr = &adapter->rx_rings[i];
4196 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4197 			struct em_buffer *rxbuf;
4198 			rxbuf = &rxr->rx_buffers[n];
4199 			if (rxbuf->m_head != NULL) {
4200 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4201 			  	  BUS_DMASYNC_POSTREAD);
4202 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4203 				m_freem(rxbuf->m_head);
4204 				rxbuf->m_head = NULL;
4205 			}
4206 		}
4207 		rxr->next_to_check = 0;
4208 		rxr->next_to_refresh = 0;
4209 	}
4210 
4211 	return (ENOBUFS);
4212 }
4213 
4214 /*********************************************************************
4215  *
4216  *  Free all receive rings.
4217  *
4218  **********************************************************************/
4219 static void
4220 em_free_receive_structures(struct adapter *adapter)
4221 {
4222 	struct rx_ring *rxr = adapter->rx_rings;
4223 
4224 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4225 		em_free_receive_buffers(rxr);
4226 		/* Free the ring memory as well */
4227 		em_dma_free(adapter, &rxr->rxdma);
4228 		EM_RX_LOCK_DESTROY(rxr);
4229 	}
4230 
4231 	free(adapter->rx_rings, M_DEVBUF);
4232 }
4233 
4234 
4235 /*********************************************************************
4236  *
4237  *  Free receive ring data structures
4238  *
4239  **********************************************************************/
4240 static void
4241 em_free_receive_buffers(struct rx_ring *rxr)
4242 {
4243 	struct adapter		*adapter = rxr->adapter;
4244 	struct em_buffer	*rxbuf = NULL;
4245 
4246 	INIT_DEBUGOUT("free_receive_buffers: begin");
4247 
4248 	if (rxr->rx_buffers != NULL) {
4249 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4250 			rxbuf = &rxr->rx_buffers[i];
4251 			if (rxbuf->map != NULL) {
4252 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4253 				    BUS_DMASYNC_POSTREAD);
4254 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4255 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4256 			}
4257 			if (rxbuf->m_head != NULL) {
4258 				m_freem(rxbuf->m_head);
4259 				rxbuf->m_head = NULL;
4260 			}
4261 		}
4262 		free(rxr->rx_buffers, M_DEVBUF);
4263 		rxr->rx_buffers = NULL;
4264 		rxr->next_to_check = 0;
4265 		rxr->next_to_refresh = 0;
4266 	}
4267 
4268 	if (rxr->rxtag != NULL) {
4269 		bus_dma_tag_destroy(rxr->rxtag);
4270 		rxr->rxtag = NULL;
4271 	}
4272 
4273 	return;
4274 }
4275 
4276 
4277 /*********************************************************************
4278  *
4279  *  Enable receive unit.
4280  *
4281  **********************************************************************/
4282 #define MAX_INTS_PER_SEC	8000
4283 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4284 
4285 static void
4286 em_initialize_receive_unit(struct adapter *adapter)
4287 {
4288 	struct rx_ring	*rxr = adapter->rx_rings;
4289 	struct ifnet	*ifp = adapter->ifp;
4290 	struct e1000_hw	*hw = &adapter->hw;
4291 	u64	bus_addr;
4292 	u32	rctl, rxcsum;
4293 
4294 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4295 
4296 	/*
4297 	 * Make sure receives are disabled while setting
4298 	 * up the descriptor ring
4299 	 */
4300 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4301 	/* Do not disable if ever enabled on this hardware */
4302 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4303 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4304 
4305 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4306 	    adapter->rx_abs_int_delay.value);
4307 	/*
4308 	 * Set the interrupt throttling rate. Value is calculated
4309 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4310 	 */
4311 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4312 
4313 	/*
4314 	** When using MSIX interrupts we need to throttle
4315 	** using the EITR register (82574 only)
4316 	*/
4317 	if (hw->mac.type == e1000_82574) {
4318 		for (int i = 0; i < 4; i++)
4319 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4320 			    DEFAULT_ITR);
4321 		/* Disable accelerated acknowledge */
4322 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4323 	}
4324 
4325 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4326 	if (ifp->if_capenable & IFCAP_RXCSUM)
4327 		rxcsum |= E1000_RXCSUM_TUOFL;
4328 	else
4329 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4330 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4331 
4332 	/*
4333 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4334 	** long latencies are observed, like Lenovo X60. This
4335 	** change eliminates the problem, but since having positive
4336 	** values in RDTR is a known source of problems on other
4337 	** platforms another solution is being sought.
4338 	*/
4339 	if (hw->mac.type == e1000_82573)
4340 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4341 
4342 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4343 		/* Setup the Base and Length of the Rx Descriptor Ring */
4344 		bus_addr = rxr->rxdma.dma_paddr;
4345 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4346 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4347 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4348 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4349 		/* Setup the Head and Tail Descriptor Pointers */
4350 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4351 #ifdef DEV_NETMAP
4352 		/*
4353 		 * an init() while a netmap client is active must
4354 		 * preserve the rx buffers passed to userspace.
4355 		 * In this driver it means we adjust RDT to
4356 		 * something different from na->num_rx_desc - 1.
4357 		 */
4358 		if (ifp->if_capenable & IFCAP_NETMAP) {
4359 			struct netmap_adapter *na = NA(adapter->ifp);
4360 			struct netmap_kring *kring = &na->rx_rings[i];
4361 			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4362 
4363 			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4364 		} else
4365 #endif /* DEV_NETMAP */
4366 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4367 	}
4368 
4369 	/* Set PTHRESH for improved jumbo performance */
4370 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4371 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4372 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4373 	    (ifp->if_mtu > ETHERMTU)) {
4374 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4375 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4376 	}
4377 
4378 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4379 		if (ifp->if_mtu > ETHERMTU)
4380 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4381 		else
4382 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4383 	}
4384 
4385 	/* Setup the Receive Control Register */
4386 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4387 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4388 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4389 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4390 
4391         /* Strip the CRC */
4392         rctl |= E1000_RCTL_SECRC;
4393 
4394         /* Make sure VLAN Filters are off */
4395         rctl &= ~E1000_RCTL_VFE;
4396 	rctl &= ~E1000_RCTL_SBP;
4397 
4398 	if (adapter->rx_mbuf_sz == MCLBYTES)
4399 		rctl |= E1000_RCTL_SZ_2048;
4400 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4401 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4402 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4403 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4404 
4405 	if (ifp->if_mtu > ETHERMTU)
4406 		rctl |= E1000_RCTL_LPE;
4407 	else
4408 		rctl &= ~E1000_RCTL_LPE;
4409 
4410 	/* Write out the settings */
4411 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4412 
4413 	return;
4414 }
4415 
4416 
4417 /*********************************************************************
4418  *
4419  *  This routine executes in interrupt context. It replenishes
4420  *  the mbufs in the descriptor and sends data which has been
4421  *  dma'ed into host memory to upper layer.
4422  *
4423  *  We loop at most count times if count is > 0, or until done if
4424  *  count < 0.
4425  *
4426  *  For polling we also now return the number of cleaned packets
4427  *********************************************************************/
4428 static bool
4429 em_rxeof(struct rx_ring *rxr, int count, int *done)
4430 {
4431 	struct adapter		*adapter = rxr->adapter;
4432 	struct ifnet		*ifp = adapter->ifp;
4433 	struct mbuf		*mp, *sendmp;
4434 	u8			status = 0;
4435 	u16 			len;
4436 	int			i, processed, rxdone = 0;
4437 	bool			eop;
4438 	struct e1000_rx_desc	*cur;
4439 
4440 	EM_RX_LOCK(rxr);
4441 
4442 #ifdef DEV_NETMAP
4443 	if (ifp->if_capenable & IFCAP_NETMAP) {
4444 		struct netmap_adapter *na = NA(ifp);
4445 
4446 		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4447 		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4448 		EM_RX_UNLOCK(rxr);
4449 		EM_CORE_LOCK(adapter);
4450 		selwakeuppri(&na->rx_si, PI_NET);
4451 		EM_CORE_UNLOCK(adapter);
4452 		return (0);
4453 	}
4454 #endif /* DEV_NETMAP */
4455 
4456 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4457 
4458 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4459 			break;
4460 
4461 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4462 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4463 
4464 		cur = &rxr->rx_base[i];
4465 		status = cur->status;
4466 		mp = sendmp = NULL;
4467 
4468 		if ((status & E1000_RXD_STAT_DD) == 0)
4469 			break;
4470 
4471 		len = le16toh(cur->length);
4472 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4473 
4474 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4475 		    (rxr->discard == TRUE)) {
4476 			adapter->dropped_pkts++;
4477 			++rxr->rx_discarded;
4478 			if (!eop) /* Catch subsequent segs */
4479 				rxr->discard = TRUE;
4480 			else
4481 				rxr->discard = FALSE;
4482 			em_rx_discard(rxr, i);
4483 			goto next_desc;
4484 		}
4485 
4486 		/* Assign correct length to the current fragment */
4487 		mp = rxr->rx_buffers[i].m_head;
4488 		mp->m_len = len;
4489 
4490 		/* Trigger for refresh */
4491 		rxr->rx_buffers[i].m_head = NULL;
4492 
4493 		/* First segment? */
4494 		if (rxr->fmp == NULL) {
4495 			mp->m_pkthdr.len = len;
4496 			rxr->fmp = rxr->lmp = mp;
4497 		} else {
4498 			/* Chain mbuf's together */
4499 			mp->m_flags &= ~M_PKTHDR;
4500 			rxr->lmp->m_next = mp;
4501 			rxr->lmp = mp;
4502 			rxr->fmp->m_pkthdr.len += len;
4503 		}
4504 
4505 		if (eop) {
4506 			--count;
4507 			sendmp = rxr->fmp;
4508 			sendmp->m_pkthdr.rcvif = ifp;
4509 			ifp->if_ipackets++;
4510 			em_receive_checksum(cur, sendmp);
4511 #ifndef __NO_STRICT_ALIGNMENT
4512 			if (adapter->hw.mac.max_frame_size >
4513 			    (MCLBYTES - ETHER_ALIGN) &&
4514 			    em_fixup_rx(rxr) != 0)
4515 				goto skip;
4516 #endif
4517 			if (status & E1000_RXD_STAT_VP) {
4518 				sendmp->m_pkthdr.ether_vtag =
4519 				    le16toh(cur->special);
4520 				sendmp->m_flags |= M_VLANTAG;
4521 			}
4522 #ifndef __NO_STRICT_ALIGNMENT
4523 skip:
4524 #endif
4525 			rxr->fmp = rxr->lmp = NULL;
4526 		}
4527 next_desc:
4528 		/* Zero out the receive descriptors status. */
4529 		cur->status = 0;
4530 		++rxdone;	/* cumulative for POLL */
4531 		++processed;
4532 
4533 		/* Advance our pointers to the next descriptor. */
4534 		if (++i == adapter->num_rx_desc)
4535 			i = 0;
4536 
4537 		/* Send to the stack */
4538 		if (sendmp != NULL) {
4539 			rxr->next_to_check = i;
4540 			EM_RX_UNLOCK(rxr);
4541 			(*ifp->if_input)(ifp, sendmp);
4542 			EM_RX_LOCK(rxr);
4543 			i = rxr->next_to_check;
4544 		}
4545 
4546 		/* Only refresh mbufs every 8 descriptors */
4547 		if (processed == 8) {
4548 			em_refresh_mbufs(rxr, i);
4549 			processed = 0;
4550 		}
4551 	}
4552 
4553 	/* Catch any remaining refresh work */
4554 	if (e1000_rx_unrefreshed(rxr))
4555 		em_refresh_mbufs(rxr, i);
4556 
4557 	rxr->next_to_check = i;
4558 	if (done != NULL)
4559 		*done = rxdone;
4560 	EM_RX_UNLOCK(rxr);
4561 
4562 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4563 }
4564 
4565 static __inline void
4566 em_rx_discard(struct rx_ring *rxr, int i)
4567 {
4568 	struct em_buffer	*rbuf;
4569 
4570 	rbuf = &rxr->rx_buffers[i];
4571 	/* Free any previous pieces */
4572 	if (rxr->fmp != NULL) {
4573 		rxr->fmp->m_flags |= M_PKTHDR;
4574 		m_freem(rxr->fmp);
4575 		rxr->fmp = NULL;
4576 		rxr->lmp = NULL;
4577 	}
4578 	/*
4579 	** Free buffer and allow em_refresh_mbufs()
4580 	** to clean up and recharge buffer.
4581 	*/
4582 	if (rbuf->m_head) {
4583 		m_free(rbuf->m_head);
4584 		rbuf->m_head = NULL;
4585 	}
4586 	return;
4587 }
4588 
4589 #ifndef __NO_STRICT_ALIGNMENT
4590 /*
4591  * When jumbo frames are enabled we should realign entire payload on
4592  * architecures with strict alignment. This is serious design mistake of 8254x
4593  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4594  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4595  * payload. On architecures without strict alignment restrictions 8254x still
4596  * performs unaligned memory access which would reduce the performance too.
4597  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4598  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4599  * existing mbuf chain.
4600  *
4601  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4602  * not used at all on architectures with strict alignment.
4603  */
4604 static int
4605 em_fixup_rx(struct rx_ring *rxr)
4606 {
4607 	struct adapter *adapter = rxr->adapter;
4608 	struct mbuf *m, *n;
4609 	int error;
4610 
4611 	error = 0;
4612 	m = rxr->fmp;
4613 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4614 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4615 		m->m_data += ETHER_HDR_LEN;
4616 	} else {
4617 		MGETHDR(n, M_NOWAIT, MT_DATA);
4618 		if (n != NULL) {
4619 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4620 			m->m_data += ETHER_HDR_LEN;
4621 			m->m_len -= ETHER_HDR_LEN;
4622 			n->m_len = ETHER_HDR_LEN;
4623 			M_MOVE_PKTHDR(n, m);
4624 			n->m_next = m;
4625 			rxr->fmp = n;
4626 		} else {
4627 			adapter->dropped_pkts++;
4628 			m_freem(rxr->fmp);
4629 			rxr->fmp = NULL;
4630 			error = ENOMEM;
4631 		}
4632 	}
4633 
4634 	return (error);
4635 }
4636 #endif
4637 
4638 /*********************************************************************
4639  *
4640  *  Verify that the hardware indicated that the checksum is valid.
4641  *  Inform the stack about the status of checksum so that stack
4642  *  doesn't spend time verifying the checksum.
4643  *
4644  *********************************************************************/
4645 static void
4646 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4647 {
4648 	mp->m_pkthdr.csum_flags = 0;
4649 
4650 	/* Ignore Checksum bit is set */
4651 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4652 		return;
4653 
4654 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4655 		return;
4656 
4657 	/* IP Checksum Good? */
4658 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4659 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4660 
4661 	/* TCP or UDP checksum */
4662 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4663 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4664 		mp->m_pkthdr.csum_data = htons(0xffff);
4665 	}
4666 }
4667 
4668 /*
4669  * This routine is run via an vlan
4670  * config EVENT
4671  */
4672 static void
4673 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4674 {
4675 	struct adapter	*adapter = ifp->if_softc;
4676 	u32		index, bit;
4677 
4678 	if (ifp->if_softc !=  arg)   /* Not our event */
4679 		return;
4680 
4681 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4682                 return;
4683 
4684 	EM_CORE_LOCK(adapter);
4685 	index = (vtag >> 5) & 0x7F;
4686 	bit = vtag & 0x1F;
4687 	adapter->shadow_vfta[index] |= (1 << bit);
4688 	++adapter->num_vlans;
4689 	/* Re-init to load the changes */
4690 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4691 		em_init_locked(adapter);
4692 	EM_CORE_UNLOCK(adapter);
4693 }
4694 
4695 /*
4696  * This routine is run via an vlan
4697  * unconfig EVENT
4698  */
4699 static void
4700 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4701 {
4702 	struct adapter	*adapter = ifp->if_softc;
4703 	u32		index, bit;
4704 
4705 	if (ifp->if_softc !=  arg)
4706 		return;
4707 
4708 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4709                 return;
4710 
4711 	EM_CORE_LOCK(adapter);
4712 	index = (vtag >> 5) & 0x7F;
4713 	bit = vtag & 0x1F;
4714 	adapter->shadow_vfta[index] &= ~(1 << bit);
4715 	--adapter->num_vlans;
4716 	/* Re-init to load the changes */
4717 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4718 		em_init_locked(adapter);
4719 	EM_CORE_UNLOCK(adapter);
4720 }
4721 
4722 static void
4723 em_setup_vlan_hw_support(struct adapter *adapter)
4724 {
4725 	struct e1000_hw *hw = &adapter->hw;
4726 	u32             reg;
4727 
4728 	/*
4729 	** We get here thru init_locked, meaning
4730 	** a soft reset, this has already cleared
4731 	** the VFTA and other state, so if there
4732 	** have been no vlan's registered do nothing.
4733 	*/
4734 	if (adapter->num_vlans == 0)
4735                 return;
4736 
4737 	/*
4738 	** A soft reset zero's out the VFTA, so
4739 	** we need to repopulate it now.
4740 	*/
4741 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4742                 if (adapter->shadow_vfta[i] != 0)
4743 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4744                             i, adapter->shadow_vfta[i]);
4745 
4746 	reg = E1000_READ_REG(hw, E1000_CTRL);
4747 	reg |= E1000_CTRL_VME;
4748 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4749 
4750 	/* Enable the Filter Table */
4751 	reg = E1000_READ_REG(hw, E1000_RCTL);
4752 	reg &= ~E1000_RCTL_CFIEN;
4753 	reg |= E1000_RCTL_VFE;
4754 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4755 }
4756 
4757 static void
4758 em_enable_intr(struct adapter *adapter)
4759 {
4760 	struct e1000_hw *hw = &adapter->hw;
4761 	u32 ims_mask = IMS_ENABLE_MASK;
4762 
4763 	if (hw->mac.type == e1000_82574) {
4764 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4765 		ims_mask |= EM_MSIX_MASK;
4766 	}
4767 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4768 }
4769 
4770 static void
4771 em_disable_intr(struct adapter *adapter)
4772 {
4773 	struct e1000_hw *hw = &adapter->hw;
4774 
4775 	if (hw->mac.type == e1000_82574)
4776 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4777 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4778 }
4779 
4780 /*
4781  * Bit of a misnomer, what this really means is
4782  * to enable OS management of the system... aka
4783  * to disable special hardware management features
4784  */
4785 static void
4786 em_init_manageability(struct adapter *adapter)
4787 {
4788 	/* A shared code workaround */
4789 #define E1000_82542_MANC2H E1000_MANC2H
4790 	if (adapter->has_manage) {
4791 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4792 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4793 
4794 		/* disable hardware interception of ARP */
4795 		manc &= ~(E1000_MANC_ARP_EN);
4796 
4797                 /* enable receiving management packets to the host */
4798 		manc |= E1000_MANC_EN_MNG2HOST;
4799 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4800 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4801 		manc2h |= E1000_MNG2HOST_PORT_623;
4802 		manc2h |= E1000_MNG2HOST_PORT_664;
4803 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4804 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4805 	}
4806 }
4807 
4808 /*
4809  * Give control back to hardware management
4810  * controller if there is one.
4811  */
4812 static void
4813 em_release_manageability(struct adapter *adapter)
4814 {
4815 	if (adapter->has_manage) {
4816 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4817 
4818 		/* re-enable hardware interception of ARP */
4819 		manc |= E1000_MANC_ARP_EN;
4820 		manc &= ~E1000_MANC_EN_MNG2HOST;
4821 
4822 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4823 	}
4824 }
4825 
4826 /*
4827  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4828  * For ASF and Pass Through versions of f/w this means
4829  * that the driver is loaded. For AMT version type f/w
4830  * this means that the network i/f is open.
4831  */
4832 static void
4833 em_get_hw_control(struct adapter *adapter)
4834 {
4835 	u32 ctrl_ext, swsm;
4836 
4837 	if (adapter->hw.mac.type == e1000_82573) {
4838 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4839 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4840 		    swsm | E1000_SWSM_DRV_LOAD);
4841 		return;
4842 	}
4843 	/* else */
4844 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4845 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4846 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4847 	return;
4848 }
4849 
4850 /*
4851  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4852  * For ASF and Pass Through versions of f/w this means that
4853  * the driver is no longer loaded. For AMT versions of the
4854  * f/w this means that the network i/f is closed.
4855  */
4856 static void
4857 em_release_hw_control(struct adapter *adapter)
4858 {
4859 	u32 ctrl_ext, swsm;
4860 
4861 	if (!adapter->has_manage)
4862 		return;
4863 
4864 	if (adapter->hw.mac.type == e1000_82573) {
4865 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4866 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4867 		    swsm & ~E1000_SWSM_DRV_LOAD);
4868 		return;
4869 	}
4870 	/* else */
4871 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4872 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4873 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4874 	return;
4875 }
4876 
4877 static int
4878 em_is_valid_ether_addr(u8 *addr)
4879 {
4880 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4881 
4882 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4883 		return (FALSE);
4884 	}
4885 
4886 	return (TRUE);
4887 }
4888 
4889 /*
4890 ** Parse the interface capabilities with regard
4891 ** to both system management and wake-on-lan for
4892 ** later use.
4893 */
4894 static void
4895 em_get_wakeup(device_t dev)
4896 {
4897 	struct adapter	*adapter = device_get_softc(dev);
4898 	u16		eeprom_data = 0, device_id, apme_mask;
4899 
4900 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4901 	apme_mask = EM_EEPROM_APME;
4902 
4903 	switch (adapter->hw.mac.type) {
4904 	case e1000_82573:
4905 	case e1000_82583:
4906 		adapter->has_amt = TRUE;
4907 		/* Falls thru */
4908 	case e1000_82571:
4909 	case e1000_82572:
4910 	case e1000_80003es2lan:
4911 		if (adapter->hw.bus.func == 1) {
4912 			e1000_read_nvm(&adapter->hw,
4913 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4914 			break;
4915 		} else
4916 			e1000_read_nvm(&adapter->hw,
4917 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4918 		break;
4919 	case e1000_ich8lan:
4920 	case e1000_ich9lan:
4921 	case e1000_ich10lan:
4922 	case e1000_pchlan:
4923 	case e1000_pch2lan:
4924 		apme_mask = E1000_WUC_APME;
4925 		adapter->has_amt = TRUE;
4926 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4927 		break;
4928 	default:
4929 		e1000_read_nvm(&adapter->hw,
4930 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4931 		break;
4932 	}
4933 	if (eeprom_data & apme_mask)
4934 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4935 	/*
4936          * We have the eeprom settings, now apply the special cases
4937          * where the eeprom may be wrong or the board won't support
4938          * wake on lan on a particular port
4939 	 */
4940 	device_id = pci_get_device(dev);
4941         switch (device_id) {
4942 	case E1000_DEV_ID_82571EB_FIBER:
4943 		/* Wake events only supported on port A for dual fiber
4944 		 * regardless of eeprom setting */
4945 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4946 		    E1000_STATUS_FUNC_1)
4947 			adapter->wol = 0;
4948 		break;
4949 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4950 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4951 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4952                 /* if quad port adapter, disable WoL on all but port A */
4953 		if (global_quad_port_a != 0)
4954 			adapter->wol = 0;
4955 		/* Reset for multiple quad port adapters */
4956 		if (++global_quad_port_a == 4)
4957 			global_quad_port_a = 0;
4958                 break;
4959 	}
4960 	return;
4961 }
4962 
4963 
4964 /*
4965  * Enable PCI Wake On Lan capability
4966  */
4967 static void
4968 em_enable_wakeup(device_t dev)
4969 {
4970 	struct adapter	*adapter = device_get_softc(dev);
4971 	struct ifnet	*ifp = adapter->ifp;
4972 	u32		pmc, ctrl, ctrl_ext, rctl;
4973 	u16     	status;
4974 
4975 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4976 		return;
4977 
4978 	/* Advertise the wakeup capability */
4979 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4980 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4981 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4982 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4983 
4984 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4985 	    (adapter->hw.mac.type == e1000_pchlan) ||
4986 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4987 	    (adapter->hw.mac.type == e1000_ich10lan))
4988 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4989 
4990 	/* Keep the laser running on Fiber adapters */
4991 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4992 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4993 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4994 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4995 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4996 	}
4997 
4998 	/*
4999 	** Determine type of Wakeup: note that wol
5000 	** is set with all bits on by default.
5001 	*/
5002 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5003 		adapter->wol &= ~E1000_WUFC_MAG;
5004 
5005 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5006 		adapter->wol &= ~E1000_WUFC_MC;
5007 	else {
5008 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5009 		rctl |= E1000_RCTL_MPE;
5010 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5011 	}
5012 
5013 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5014 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5015 		if (em_enable_phy_wakeup(adapter))
5016 			return;
5017 	} else {
5018 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5019 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5020 	}
5021 
5022 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5023 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5024 
5025         /* Request PME */
5026         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5027 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5028 	if (ifp->if_capenable & IFCAP_WOL)
5029 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5030         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5031 
5032 	return;
5033 }
5034 
5035 /*
5036 ** WOL in the newer chipset interfaces (pchlan)
5037 ** require thing to be copied into the phy
5038 */
5039 static int
5040 em_enable_phy_wakeup(struct adapter *adapter)
5041 {
5042 	struct e1000_hw *hw = &adapter->hw;
5043 	u32 mreg, ret = 0;
5044 	u16 preg;
5045 
5046 	/* copy MAC RARs to PHY RARs */
5047 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5048 
5049 	/* copy MAC MTA to PHY MTA */
5050 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5051 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5052 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5053 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5054 		    (u16)((mreg >> 16) & 0xFFFF));
5055 	}
5056 
5057 	/* configure PHY Rx Control register */
5058 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5059 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5060 	if (mreg & E1000_RCTL_UPE)
5061 		preg |= BM_RCTL_UPE;
5062 	if (mreg & E1000_RCTL_MPE)
5063 		preg |= BM_RCTL_MPE;
5064 	preg &= ~(BM_RCTL_MO_MASK);
5065 	if (mreg & E1000_RCTL_MO_3)
5066 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5067 				<< BM_RCTL_MO_SHIFT);
5068 	if (mreg & E1000_RCTL_BAM)
5069 		preg |= BM_RCTL_BAM;
5070 	if (mreg & E1000_RCTL_PMCF)
5071 		preg |= BM_RCTL_PMCF;
5072 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5073 	if (mreg & E1000_CTRL_RFCE)
5074 		preg |= BM_RCTL_RFCE;
5075 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5076 
5077 	/* enable PHY wakeup in MAC register */
5078 	E1000_WRITE_REG(hw, E1000_WUC,
5079 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5080 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5081 
5082 	/* configure and enable PHY wakeup in PHY registers */
5083 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5084 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5085 
5086 	/* activate PHY wakeup */
5087 	ret = hw->phy.ops.acquire(hw);
5088 	if (ret) {
5089 		printf("Could not acquire PHY\n");
5090 		return ret;
5091 	}
5092 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5093 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5094 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5095 	if (ret) {
5096 		printf("Could not read PHY page 769\n");
5097 		goto out;
5098 	}
5099 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5100 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5101 	if (ret)
5102 		printf("Could not set PHY Host Wakeup bit\n");
5103 out:
5104 	hw->phy.ops.release(hw);
5105 
5106 	return ret;
5107 }
5108 
5109 static void
5110 em_led_func(void *arg, int onoff)
5111 {
5112 	struct adapter	*adapter = arg;
5113 
5114 	EM_CORE_LOCK(adapter);
5115 	if (onoff) {
5116 		e1000_setup_led(&adapter->hw);
5117 		e1000_led_on(&adapter->hw);
5118 	} else {
5119 		e1000_led_off(&adapter->hw);
5120 		e1000_cleanup_led(&adapter->hw);
5121 	}
5122 	EM_CORE_UNLOCK(adapter);
5123 }
5124 
5125 /*
5126 ** Disable the L0S and L1 LINK states
5127 */
5128 static void
5129 em_disable_aspm(struct adapter *adapter)
5130 {
5131 	int		base, reg;
5132 	u16		link_cap,link_ctrl;
5133 	device_t	dev = adapter->dev;
5134 
5135 	switch (adapter->hw.mac.type) {
5136 		case e1000_82573:
5137 		case e1000_82574:
5138 		case e1000_82583:
5139 			break;
5140 		default:
5141 			return;
5142 	}
5143 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5144 		return;
5145 	reg = base + PCIER_LINK_CAP;
5146 	link_cap = pci_read_config(dev, reg, 2);
5147 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5148 		return;
5149 	reg = base + PCIER_LINK_CTL;
5150 	link_ctrl = pci_read_config(dev, reg, 2);
5151 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5152 	pci_write_config(dev, reg, link_ctrl, 2);
5153 	return;
5154 }
5155 
5156 /**********************************************************************
5157  *
5158  *  Update the board statistics counters.
5159  *
5160  **********************************************************************/
5161 static void
5162 em_update_stats_counters(struct adapter *adapter)
5163 {
5164 	struct ifnet   *ifp;
5165 
5166 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5167 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5168 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5169 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5170 	}
5171 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5172 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5173 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5174 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5175 
5176 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5177 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5178 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5179 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5180 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5181 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5182 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5183 	/*
5184 	** For watchdog management we need to know if we have been
5185 	** paused during the last interval, so capture that here.
5186 	*/
5187 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5188 	adapter->stats.xoffrxc += adapter->pause_frames;
5189 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5190 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5191 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5192 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5193 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5194 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5195 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5196 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5197 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5198 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5199 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5200 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5201 
5202 	/* For the 64-bit byte counters the low dword must be read first. */
5203 	/* Both registers clear on the read of the high dword */
5204 
5205 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5206 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5207 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5208 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5209 
5210 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5211 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5212 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5213 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5214 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5215 
5216 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5217 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5218 
5219 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5220 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5221 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5222 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5223 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5224 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5225 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5226 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5227 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5228 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5229 
5230 	/* Interrupt Counts */
5231 
5232 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5233 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5234 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5235 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5236 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5237 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5238 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5239 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5240 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5241 
5242 	if (adapter->hw.mac.type >= e1000_82543) {
5243 		adapter->stats.algnerrc +=
5244 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5245 		adapter->stats.rxerrc +=
5246 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5247 		adapter->stats.tncrs +=
5248 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5249 		adapter->stats.cexterr +=
5250 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5251 		adapter->stats.tsctc +=
5252 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5253 		adapter->stats.tsctfc +=
5254 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5255 	}
5256 	ifp = adapter->ifp;
5257 
5258 	ifp->if_collisions = adapter->stats.colc;
5259 
5260 	/* Rx Errors */
5261 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5262 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5263 	    adapter->stats.ruc + adapter->stats.roc +
5264 	    adapter->stats.mpc + adapter->stats.cexterr;
5265 
5266 	/* Tx Errors */
5267 	ifp->if_oerrors = adapter->stats.ecol +
5268 	    adapter->stats.latecol + adapter->watchdog_events;
5269 }
5270 
5271 /* Export a single 32-bit register via a read-only sysctl. */
5272 static int
5273 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5274 {
5275 	struct adapter *adapter;
5276 	u_int val;
5277 
5278 	adapter = oidp->oid_arg1;
5279 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5280 	return (sysctl_handle_int(oidp, &val, 0, req));
5281 }
5282 
5283 /*
5284  * Add sysctl variables, one per statistic, to the system.
5285  */
5286 static void
5287 em_add_hw_stats(struct adapter *adapter)
5288 {
5289 	device_t dev = adapter->dev;
5290 
5291 	struct tx_ring *txr = adapter->tx_rings;
5292 	struct rx_ring *rxr = adapter->rx_rings;
5293 
5294 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5295 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5296 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5297 	struct e1000_hw_stats *stats = &adapter->stats;
5298 
5299 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5300 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5301 
5302 #define QUEUE_NAME_LEN 32
5303 	char namebuf[QUEUE_NAME_LEN];
5304 
5305 	/* Driver Statistics */
5306 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5307 			CTLFLAG_RD, &adapter->link_irq,
5308 			"Link MSIX IRQ Handled");
5309 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5310 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5311 			 "Std mbuf failed");
5312 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5313 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5314 			 "Std mbuf cluster failed");
5315 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5316 			CTLFLAG_RD, &adapter->dropped_pkts,
5317 			"Driver dropped packets");
5318 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5319 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5320 			"Driver tx dma failure in xmit");
5321 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5322 			CTLFLAG_RD, &adapter->rx_overruns,
5323 			"RX overruns");
5324 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5325 			CTLFLAG_RD, &adapter->watchdog_events,
5326 			"Watchdog timeouts");
5327 
5328 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5329 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5330 			em_sysctl_reg_handler, "IU",
5331 			"Device Control Register");
5332 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5333 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5334 			em_sysctl_reg_handler, "IU",
5335 			"Receiver Control Register");
5336 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5337 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5338 			"Flow Control High Watermark");
5339 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5340 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5341 			"Flow Control Low Watermark");
5342 
5343 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5344 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5345 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5346 					    CTLFLAG_RD, NULL, "Queue Name");
5347 		queue_list = SYSCTL_CHILDREN(queue_node);
5348 
5349 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5350 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5351 				E1000_TDH(txr->me),
5352 				em_sysctl_reg_handler, "IU",
5353  				"Transmit Descriptor Head");
5354 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5355 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5356 				E1000_TDT(txr->me),
5357 				em_sysctl_reg_handler, "IU",
5358  				"Transmit Descriptor Tail");
5359 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5360 				CTLFLAG_RD, &txr->tx_irq,
5361 				"Queue MSI-X Transmit Interrupts");
5362 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5363 				CTLFLAG_RD, &txr->no_desc_avail,
5364 				"Queue No Descriptor Available");
5365 
5366 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5367 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5368 				E1000_RDH(rxr->me),
5369 				em_sysctl_reg_handler, "IU",
5370 				"Receive Descriptor Head");
5371 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5372 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5373 				E1000_RDT(rxr->me),
5374 				em_sysctl_reg_handler, "IU",
5375 				"Receive Descriptor Tail");
5376 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5377 				CTLFLAG_RD, &rxr->rx_irq,
5378 				"Queue MSI-X Receive Interrupts");
5379 	}
5380 
5381 	/* MAC stats get their own sub node */
5382 
5383 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5384 				    CTLFLAG_RD, NULL, "Statistics");
5385 	stat_list = SYSCTL_CHILDREN(stat_node);
5386 
5387 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5388 			CTLFLAG_RD, &stats->ecol,
5389 			"Excessive collisions");
5390 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5391 			CTLFLAG_RD, &stats->scc,
5392 			"Single collisions");
5393 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5394 			CTLFLAG_RD, &stats->mcc,
5395 			"Multiple collisions");
5396 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5397 			CTLFLAG_RD, &stats->latecol,
5398 			"Late collisions");
5399 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5400 			CTLFLAG_RD, &stats->colc,
5401 			"Collision Count");
5402 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5403 			CTLFLAG_RD, &adapter->stats.symerrs,
5404 			"Symbol Errors");
5405 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5406 			CTLFLAG_RD, &adapter->stats.sec,
5407 			"Sequence Errors");
5408 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5409 			CTLFLAG_RD, &adapter->stats.dc,
5410 			"Defer Count");
5411 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5412 			CTLFLAG_RD, &adapter->stats.mpc,
5413 			"Missed Packets");
5414 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5415 			CTLFLAG_RD, &adapter->stats.rnbc,
5416 			"Receive No Buffers");
5417 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5418 			CTLFLAG_RD, &adapter->stats.ruc,
5419 			"Receive Undersize");
5420 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5421 			CTLFLAG_RD, &adapter->stats.rfc,
5422 			"Fragmented Packets Received ");
5423 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5424 			CTLFLAG_RD, &adapter->stats.roc,
5425 			"Oversized Packets Received");
5426 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5427 			CTLFLAG_RD, &adapter->stats.rjc,
5428 			"Recevied Jabber");
5429 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5430 			CTLFLAG_RD, &adapter->stats.rxerrc,
5431 			"Receive Errors");
5432 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5433 			CTLFLAG_RD, &adapter->stats.crcerrs,
5434 			"CRC errors");
5435 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5436 			CTLFLAG_RD, &adapter->stats.algnerrc,
5437 			"Alignment Errors");
5438 	/* On 82575 these are collision counts */
5439 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5440 			CTLFLAG_RD, &adapter->stats.cexterr,
5441 			"Collision/Carrier extension errors");
5442 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5443 			CTLFLAG_RD, &adapter->stats.xonrxc,
5444 			"XON Received");
5445 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5446 			CTLFLAG_RD, &adapter->stats.xontxc,
5447 			"XON Transmitted");
5448 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5449 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5450 			"XOFF Received");
5451 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5452 			CTLFLAG_RD, &adapter->stats.xofftxc,
5453 			"XOFF Transmitted");
5454 
5455 	/* Packet Reception Stats */
5456 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5457 			CTLFLAG_RD, &adapter->stats.tpr,
5458 			"Total Packets Received ");
5459 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5460 			CTLFLAG_RD, &adapter->stats.gprc,
5461 			"Good Packets Received");
5462 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5463 			CTLFLAG_RD, &adapter->stats.bprc,
5464 			"Broadcast Packets Received");
5465 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5466 			CTLFLAG_RD, &adapter->stats.mprc,
5467 			"Multicast Packets Received");
5468 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5469 			CTLFLAG_RD, &adapter->stats.prc64,
5470 			"64 byte frames received ");
5471 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5472 			CTLFLAG_RD, &adapter->stats.prc127,
5473 			"65-127 byte frames received");
5474 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5475 			CTLFLAG_RD, &adapter->stats.prc255,
5476 			"128-255 byte frames received");
5477 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5478 			CTLFLAG_RD, &adapter->stats.prc511,
5479 			"256-511 byte frames received");
5480 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5481 			CTLFLAG_RD, &adapter->stats.prc1023,
5482 			"512-1023 byte frames received");
5483 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5484 			CTLFLAG_RD, &adapter->stats.prc1522,
5485 			"1023-1522 byte frames received");
5486  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5487  			CTLFLAG_RD, &adapter->stats.gorc,
5488  			"Good Octets Received");
5489 
5490 	/* Packet Transmission Stats */
5491  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5492  			CTLFLAG_RD, &adapter->stats.gotc,
5493  			"Good Octets Transmitted");
5494 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5495 			CTLFLAG_RD, &adapter->stats.tpt,
5496 			"Total Packets Transmitted");
5497 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5498 			CTLFLAG_RD, &adapter->stats.gptc,
5499 			"Good Packets Transmitted");
5500 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5501 			CTLFLAG_RD, &adapter->stats.bptc,
5502 			"Broadcast Packets Transmitted");
5503 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5504 			CTLFLAG_RD, &adapter->stats.mptc,
5505 			"Multicast Packets Transmitted");
5506 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5507 			CTLFLAG_RD, &adapter->stats.ptc64,
5508 			"64 byte frames transmitted ");
5509 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5510 			CTLFLAG_RD, &adapter->stats.ptc127,
5511 			"65-127 byte frames transmitted");
5512 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5513 			CTLFLAG_RD, &adapter->stats.ptc255,
5514 			"128-255 byte frames transmitted");
5515 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5516 			CTLFLAG_RD, &adapter->stats.ptc511,
5517 			"256-511 byte frames transmitted");
5518 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5519 			CTLFLAG_RD, &adapter->stats.ptc1023,
5520 			"512-1023 byte frames transmitted");
5521 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5522 			CTLFLAG_RD, &adapter->stats.ptc1522,
5523 			"1024-1522 byte frames transmitted");
5524 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5525 			CTLFLAG_RD, &adapter->stats.tsctc,
5526 			"TSO Contexts Transmitted");
5527 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5528 			CTLFLAG_RD, &adapter->stats.tsctfc,
5529 			"TSO Contexts Failed");
5530 
5531 
5532 	/* Interrupt Stats */
5533 
5534 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5535 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5536 	int_list = SYSCTL_CHILDREN(int_node);
5537 
5538 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5539 			CTLFLAG_RD, &adapter->stats.iac,
5540 			"Interrupt Assertion Count");
5541 
5542 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5543 			CTLFLAG_RD, &adapter->stats.icrxptc,
5544 			"Interrupt Cause Rx Pkt Timer Expire Count");
5545 
5546 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5547 			CTLFLAG_RD, &adapter->stats.icrxatc,
5548 			"Interrupt Cause Rx Abs Timer Expire Count");
5549 
5550 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5551 			CTLFLAG_RD, &adapter->stats.ictxptc,
5552 			"Interrupt Cause Tx Pkt Timer Expire Count");
5553 
5554 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5555 			CTLFLAG_RD, &adapter->stats.ictxatc,
5556 			"Interrupt Cause Tx Abs Timer Expire Count");
5557 
5558 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5559 			CTLFLAG_RD, &adapter->stats.ictxqec,
5560 			"Interrupt Cause Tx Queue Empty Count");
5561 
5562 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5563 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5564 			"Interrupt Cause Tx Queue Min Thresh Count");
5565 
5566 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5567 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5568 			"Interrupt Cause Rx Desc Min Thresh Count");
5569 
5570 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5571 			CTLFLAG_RD, &adapter->stats.icrxoc,
5572 			"Interrupt Cause Receiver Overrun Count");
5573 }
5574 
5575 /**********************************************************************
5576  *
5577  *  This routine provides a way to dump out the adapter eeprom,
5578  *  often a useful debug/service tool. This only dumps the first
5579  *  32 words, stuff that matters is in that extent.
5580  *
5581  **********************************************************************/
5582 static int
5583 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5584 {
5585 	struct adapter *adapter = (struct adapter *)arg1;
5586 	int error;
5587 	int result;
5588 
5589 	result = -1;
5590 	error = sysctl_handle_int(oidp, &result, 0, req);
5591 
5592 	if (error || !req->newptr)
5593 		return (error);
5594 
5595 	/*
5596 	 * This value will cause a hex dump of the
5597 	 * first 32 16-bit words of the EEPROM to
5598 	 * the screen.
5599 	 */
5600 	if (result == 1)
5601 		em_print_nvm_info(adapter);
5602 
5603 	return (error);
5604 }
5605 
5606 static void
5607 em_print_nvm_info(struct adapter *adapter)
5608 {
5609 	u16	eeprom_data;
5610 	int	i, j, row = 0;
5611 
5612 	/* Its a bit crude, but it gets the job done */
5613 	printf("\nInterface EEPROM Dump:\n");
5614 	printf("Offset\n0x0000  ");
5615 	for (i = 0, j = 0; i < 32; i++, j++) {
5616 		if (j == 8) { /* Make the offset block */
5617 			j = 0; ++row;
5618 			printf("\n0x00%x0  ",row);
5619 		}
5620 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5621 		printf("%04x ", eeprom_data);
5622 	}
5623 	printf("\n");
5624 }
5625 
5626 static int
5627 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5628 {
5629 	struct em_int_delay_info *info;
5630 	struct adapter *adapter;
5631 	u32 regval;
5632 	int error, usecs, ticks;
5633 
5634 	info = (struct em_int_delay_info *)arg1;
5635 	usecs = info->value;
5636 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5637 	if (error != 0 || req->newptr == NULL)
5638 		return (error);
5639 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5640 		return (EINVAL);
5641 	info->value = usecs;
5642 	ticks = EM_USECS_TO_TICKS(usecs);
5643 
5644 	adapter = info->adapter;
5645 
5646 	EM_CORE_LOCK(adapter);
5647 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5648 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5649 	/* Handle a few special cases. */
5650 	switch (info->offset) {
5651 	case E1000_RDTR:
5652 		break;
5653 	case E1000_TIDV:
5654 		if (ticks == 0) {
5655 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5656 			/* Don't write 0 into the TIDV register. */
5657 			regval++;
5658 		} else
5659 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5660 		break;
5661 	}
5662 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5663 	EM_CORE_UNLOCK(adapter);
5664 	return (0);
5665 }
5666 
5667 static void
5668 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5669 	const char *description, struct em_int_delay_info *info,
5670 	int offset, int value)
5671 {
5672 	info->adapter = adapter;
5673 	info->offset = offset;
5674 	info->value = value;
5675 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5676 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5677 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5678 	    info, 0, em_sysctl_int_delay, "I", description);
5679 }
5680 
5681 static void
5682 em_set_sysctl_value(struct adapter *adapter, const char *name,
5683 	const char *description, int *limit, int value)
5684 {
5685 	*limit = value;
5686 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5687 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5688 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5689 }
5690 
5691 
5692 /*
5693 ** Set flow control using sysctl:
5694 ** Flow control values:
5695 **      0 - off
5696 **      1 - rx pause
5697 **      2 - tx pause
5698 **      3 - full
5699 */
5700 static int
5701 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5702 {
5703         int		error;
5704 	static int	input = 3; /* default is full */
5705         struct adapter	*adapter = (struct adapter *) arg1;
5706 
5707         error = sysctl_handle_int(oidp, &input, 0, req);
5708 
5709         if ((error) || (req->newptr == NULL))
5710                 return (error);
5711 
5712 	if (input == adapter->fc) /* no change? */
5713 		return (error);
5714 
5715         switch (input) {
5716                 case e1000_fc_rx_pause:
5717                 case e1000_fc_tx_pause:
5718                 case e1000_fc_full:
5719                 case e1000_fc_none:
5720                         adapter->hw.fc.requested_mode = input;
5721 			adapter->fc = input;
5722                         break;
5723                 default:
5724 			/* Do nothing */
5725 			return (error);
5726         }
5727 
5728         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5729         e1000_force_mac_fc(&adapter->hw);
5730         return (error);
5731 }
5732 
5733 /*
5734 ** Manage Energy Efficient Ethernet:
5735 ** Control values:
5736 **     0/1 - enabled/disabled
5737 */
5738 static int
5739 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5740 {
5741        struct adapter *adapter = (struct adapter *) arg1;
5742        int             error, value;
5743 
5744        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5745        error = sysctl_handle_int(oidp, &value, 0, req);
5746        if (error || req->newptr == NULL)
5747                return (error);
5748        EM_CORE_LOCK(adapter);
5749        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5750        em_init_locked(adapter);
5751        EM_CORE_UNLOCK(adapter);
5752        return (0);
5753 }
5754 
5755 static int
5756 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5757 {
5758 	struct adapter *adapter;
5759 	int error;
5760 	int result;
5761 
5762 	result = -1;
5763 	error = sysctl_handle_int(oidp, &result, 0, req);
5764 
5765 	if (error || !req->newptr)
5766 		return (error);
5767 
5768 	if (result == 1) {
5769 		adapter = (struct adapter *)arg1;
5770 		em_print_debug_info(adapter);
5771         }
5772 
5773 	return (error);
5774 }
5775 
5776 /*
5777 ** This routine is meant to be fluid, add whatever is
5778 ** needed for debugging a problem.  -jfv
5779 */
5780 static void
5781 em_print_debug_info(struct adapter *adapter)
5782 {
5783 	device_t dev = adapter->dev;
5784 	struct tx_ring *txr = adapter->tx_rings;
5785 	struct rx_ring *rxr = adapter->rx_rings;
5786 
5787 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5788 		printf("Interface is RUNNING ");
5789 	else
5790 		printf("Interface is NOT RUNNING\n");
5791 
5792 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5793 		printf("and INACTIVE\n");
5794 	else
5795 		printf("and ACTIVE\n");
5796 
5797 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5798 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5799 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5800 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5801 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5802 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5803 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5804 	device_printf(dev, "TX descriptors avail = %d\n",
5805 	    txr->tx_avail);
5806 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5807 	    txr->no_desc_avail);
5808 	device_printf(dev, "RX discarded packets = %ld\n",
5809 	    rxr->rx_discarded);
5810 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5811 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5812 }
5813