xref: /freebsd/sys/dev/e1000/if_em.c (revision 7aa65846327fe5bc7e5961c2f7fd0c61f2ec0b01)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2011, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71 
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79 
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84 
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88 
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int	em_display_debug_stats = 0;
93 
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.2";
98 
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108 
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111 	/* Intel(R) PRO/1000 Network Connection */
112 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116 						PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131 
132 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137 						PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	/* required last entry */
176 	{ 0, 0, 0, 0, 0}
177 };
178 
179 /*********************************************************************
180  *  Table of branding strings for all supported NICs.
181  *********************************************************************/
182 
183 static char *em_strings[] = {
184 	"Intel(R) PRO/1000 Network Connection"
185 };
186 
187 /*********************************************************************
188  *  Function prototypes
189  *********************************************************************/
190 static int	em_probe(device_t);
191 static int	em_attach(device_t);
192 static int	em_detach(device_t);
193 static int	em_shutdown(device_t);
194 static int	em_suspend(device_t);
195 static int	em_resume(device_t);
196 #ifdef EM_MULTIQUEUE
197 static int	em_mq_start(struct ifnet *, struct mbuf *);
198 static int	em_mq_start_locked(struct ifnet *,
199 		    struct tx_ring *, struct mbuf *);
200 static void	em_qflush(struct ifnet *);
201 #else
202 static void	em_start(struct ifnet *);
203 static void	em_start_locked(struct ifnet *, struct tx_ring *);
204 #endif
205 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
206 static void	em_init(void *);
207 static void	em_init_locked(struct adapter *);
208 static void	em_stop(void *);
209 static void	em_media_status(struct ifnet *, struct ifmediareq *);
210 static int	em_media_change(struct ifnet *);
211 static void	em_identify_hardware(struct adapter *);
212 static int	em_allocate_pci_resources(struct adapter *);
213 static int	em_allocate_legacy(struct adapter *);
214 static int	em_allocate_msix(struct adapter *);
215 static int	em_allocate_queues(struct adapter *);
216 static int	em_setup_msix(struct adapter *);
217 static void	em_free_pci_resources(struct adapter *);
218 static void	em_local_timer(void *);
219 static void	em_reset(struct adapter *);
220 static int	em_setup_interface(device_t, struct adapter *);
221 
222 static void	em_setup_transmit_structures(struct adapter *);
223 static void	em_initialize_transmit_unit(struct adapter *);
224 static int	em_allocate_transmit_buffers(struct tx_ring *);
225 static void	em_free_transmit_structures(struct adapter *);
226 static void	em_free_transmit_buffers(struct tx_ring *);
227 
228 static int	em_setup_receive_structures(struct adapter *);
229 static int	em_allocate_receive_buffers(struct rx_ring *);
230 static void	em_initialize_receive_unit(struct adapter *);
231 static void	em_free_receive_structures(struct adapter *);
232 static void	em_free_receive_buffers(struct rx_ring *);
233 
234 static void	em_enable_intr(struct adapter *);
235 static void	em_disable_intr(struct adapter *);
236 static void	em_update_stats_counters(struct adapter *);
237 static void	em_add_hw_stats(struct adapter *adapter);
238 static void	em_txeof(struct tx_ring *);
239 static bool	em_rxeof(struct rx_ring *, int, int *);
240 #ifndef __NO_STRICT_ALIGNMENT
241 static int	em_fixup_rx(struct rx_ring *);
242 #endif
243 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245 		    struct ip *, u32 *, u32 *);
246 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247 		    struct tcphdr *, u32 *, u32 *);
248 static void	em_set_promisc(struct adapter *);
249 static void	em_disable_promisc(struct adapter *);
250 static void	em_set_multi(struct adapter *);
251 static void	em_update_link_status(struct adapter *);
252 static void	em_refresh_mbufs(struct rx_ring *, int);
253 static void	em_register_vlan(void *, struct ifnet *, u16);
254 static void	em_unregister_vlan(void *, struct ifnet *, u16);
255 static void	em_setup_vlan_hw_support(struct adapter *);
256 static int	em_xmit(struct tx_ring *, struct mbuf **);
257 static int	em_dma_malloc(struct adapter *, bus_size_t,
258 		    struct em_dma_alloc *, int);
259 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
260 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261 static void	em_print_nvm_info(struct adapter *);
262 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263 static void	em_print_debug_info(struct adapter *);
264 static int 	em_is_valid_ether_addr(u8 *);
265 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
267 		    const char *, struct em_int_delay_info *, int, int);
268 /* Management and WOL Support */
269 static void	em_init_manageability(struct adapter *);
270 static void	em_release_manageability(struct adapter *);
271 static void     em_get_hw_control(struct adapter *);
272 static void     em_release_hw_control(struct adapter *);
273 static void	em_get_wakeup(device_t);
274 static void     em_enable_wakeup(device_t);
275 static int	em_enable_phy_wakeup(struct adapter *);
276 static void	em_led_func(void *, int);
277 static void	em_disable_aspm(struct adapter *);
278 
279 static int	em_irq_fast(void *);
280 
281 /* MSIX handlers */
282 static void	em_msix_tx(void *);
283 static void	em_msix_rx(void *);
284 static void	em_msix_link(void *);
285 static void	em_handle_tx(void *context, int pending);
286 static void	em_handle_rx(void *context, int pending);
287 static void	em_handle_link(void *context, int pending);
288 
289 static void	em_set_sysctl_value(struct adapter *, const char *,
290 		    const char *, int *, int);
291 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292 
293 static __inline void em_rx_discard(struct rx_ring *, int);
294 
295 #ifdef DEVICE_POLLING
296 static poll_handler_t em_poll;
297 #endif /* POLLING */
298 
299 /*********************************************************************
300  *  FreeBSD Device Interface Entry Points
301  *********************************************************************/
302 
303 static device_method_t em_methods[] = {
304 	/* Device interface */
305 	DEVMETHOD(device_probe, em_probe),
306 	DEVMETHOD(device_attach, em_attach),
307 	DEVMETHOD(device_detach, em_detach),
308 	DEVMETHOD(device_shutdown, em_shutdown),
309 	DEVMETHOD(device_suspend, em_suspend),
310 	DEVMETHOD(device_resume, em_resume),
311 	{0, 0}
312 };
313 
314 static driver_t em_driver = {
315 	"em", em_methods, sizeof(struct adapter),
316 };
317 
318 devclass_t em_devclass;
319 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
320 MODULE_DEPEND(em, pci, 1, 1, 1);
321 MODULE_DEPEND(em, ether, 1, 1, 1);
322 
323 /*********************************************************************
324  *  Tunable default values.
325  *********************************************************************/
326 
327 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
328 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
329 #define M_TSO_LEN			66
330 
331 /* Allow common code without TSO */
332 #ifndef CSUM_TSO
333 #define CSUM_TSO	0
334 #endif
335 
336 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
337 
338 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
339 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
340 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
341 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
342 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
343     0, "Default transmit interrupt delay in usecs");
344 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
345     0, "Default receive interrupt delay in usecs");
346 
347 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
348 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
349 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
350 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
351 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
352     &em_tx_abs_int_delay_dflt, 0,
353     "Default transmit interrupt delay limit in usecs");
354 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
355     &em_rx_abs_int_delay_dflt, 0,
356     "Default receive interrupt delay limit in usecs");
357 
358 static int em_rxd = EM_DEFAULT_RXD;
359 static int em_txd = EM_DEFAULT_TXD;
360 TUNABLE_INT("hw.em.rxd", &em_rxd);
361 TUNABLE_INT("hw.em.txd", &em_txd);
362 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
363     "Number of receive descriptors per queue");
364 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
365     "Number of transmit descriptors per queue");
366 
367 static int em_smart_pwr_down = FALSE;
368 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
369 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
370     0, "Set to true to leave smart power down enabled on newer adapters");
371 
372 /* Controls whether promiscuous also shows bad packets */
373 static int em_debug_sbp = FALSE;
374 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
375 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
376     "Show bad packets in promiscuous mode");
377 
378 static int em_enable_msix = TRUE;
379 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
380 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
381     "Enable MSI-X interrupts");
382 
383 /* How many packets rxeof tries to clean at a time */
384 static int em_rx_process_limit = 100;
385 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
386 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
387     &em_rx_process_limit, 0,
388     "Maximum number of received packets to process "
389     "at a time, -1 means unlimited");
390 
391 /* Energy efficient ethernet - default to OFF */
392 static int eee_setting = 0;
393 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
394 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
395     "Enable Energy Efficient Ethernet");
396 
397 /* Global used in WOL setup with multiport cards */
398 static int global_quad_port_a = 0;
399 
400 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
401 #include <dev/netmap/if_em_netmap.h>
402 #endif /* DEV_NETMAP */
403 
404 /*********************************************************************
405  *  Device identification routine
406  *
407  *  em_probe determines if the driver should be loaded on
408  *  adapter based on PCI vendor/device id of the adapter.
409  *
410  *  return BUS_PROBE_DEFAULT on success, positive on failure
411  *********************************************************************/
412 
413 static int
414 em_probe(device_t dev)
415 {
416 	char		adapter_name[60];
417 	u16		pci_vendor_id = 0;
418 	u16		pci_device_id = 0;
419 	u16		pci_subvendor_id = 0;
420 	u16		pci_subdevice_id = 0;
421 	em_vendor_info_t *ent;
422 
423 	INIT_DEBUGOUT("em_probe: begin");
424 
425 	pci_vendor_id = pci_get_vendor(dev);
426 	if (pci_vendor_id != EM_VENDOR_ID)
427 		return (ENXIO);
428 
429 	pci_device_id = pci_get_device(dev);
430 	pci_subvendor_id = pci_get_subvendor(dev);
431 	pci_subdevice_id = pci_get_subdevice(dev);
432 
433 	ent = em_vendor_info_array;
434 	while (ent->vendor_id != 0) {
435 		if ((pci_vendor_id == ent->vendor_id) &&
436 		    (pci_device_id == ent->device_id) &&
437 
438 		    ((pci_subvendor_id == ent->subvendor_id) ||
439 		    (ent->subvendor_id == PCI_ANY_ID)) &&
440 
441 		    ((pci_subdevice_id == ent->subdevice_id) ||
442 		    (ent->subdevice_id == PCI_ANY_ID))) {
443 			sprintf(adapter_name, "%s %s",
444 				em_strings[ent->index],
445 				em_driver_version);
446 			device_set_desc_copy(dev, adapter_name);
447 			return (BUS_PROBE_DEFAULT);
448 		}
449 		ent++;
450 	}
451 
452 	return (ENXIO);
453 }
454 
455 /*********************************************************************
456  *  Device initialization routine
457  *
458  *  The attach entry point is called when the driver is being loaded.
459  *  This routine identifies the type of hardware, allocates all resources
460  *  and initializes the hardware.
461  *
462  *  return 0 on success, positive on failure
463  *********************************************************************/
464 
465 static int
466 em_attach(device_t dev)
467 {
468 	struct adapter	*adapter;
469 	struct e1000_hw	*hw;
470 	int		error = 0;
471 
472 	INIT_DEBUGOUT("em_attach: begin");
473 
474 	if (resource_disabled("em", device_get_unit(dev))) {
475 		device_printf(dev, "Disabled by device hint\n");
476 		return (ENXIO);
477 	}
478 
479 	adapter = device_get_softc(dev);
480 	adapter->dev = adapter->osdep.dev = dev;
481 	hw = &adapter->hw;
482 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
483 
484 	/* SYSCTL stuff */
485 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
486 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
487 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
488 	    em_sysctl_nvm_info, "I", "NVM Information");
489 
490 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
491 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
492 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
493 	    em_sysctl_debug_info, "I", "Debug Information");
494 
495 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
496 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
497 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
498 	    em_set_flowcntl, "I", "Flow Control");
499 
500 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
501 
502 	/* Determine hardware and mac info */
503 	em_identify_hardware(adapter);
504 
505 	/* Setup PCI resources */
506 	if (em_allocate_pci_resources(adapter)) {
507 		device_printf(dev, "Allocation of PCI resources failed\n");
508 		error = ENXIO;
509 		goto err_pci;
510 	}
511 
512 	/*
513 	** For ICH8 and family we need to
514 	** map the flash memory, and this
515 	** must happen after the MAC is
516 	** identified
517 	*/
518 	if ((hw->mac.type == e1000_ich8lan) ||
519 	    (hw->mac.type == e1000_ich9lan) ||
520 	    (hw->mac.type == e1000_ich10lan) ||
521 	    (hw->mac.type == e1000_pchlan) ||
522 	    (hw->mac.type == e1000_pch2lan)) {
523 		int rid = EM_BAR_TYPE_FLASH;
524 		adapter->flash = bus_alloc_resource_any(dev,
525 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
526 		if (adapter->flash == NULL) {
527 			device_printf(dev, "Mapping of Flash failed\n");
528 			error = ENXIO;
529 			goto err_pci;
530 		}
531 		/* This is used in the shared code */
532 		hw->flash_address = (u8 *)adapter->flash;
533 		adapter->osdep.flash_bus_space_tag =
534 		    rman_get_bustag(adapter->flash);
535 		adapter->osdep.flash_bus_space_handle =
536 		    rman_get_bushandle(adapter->flash);
537 	}
538 
539 	/* Do Shared Code initialization */
540 	if (e1000_setup_init_funcs(hw, TRUE)) {
541 		device_printf(dev, "Setup of Shared code failed\n");
542 		error = ENXIO;
543 		goto err_pci;
544 	}
545 
546 	e1000_get_bus_info(hw);
547 
548 	/* Set up some sysctls for the tunable interrupt delays */
549 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
550 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
551 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
552 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
553 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
554 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
555 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
556 	    "receive interrupt delay limit in usecs",
557 	    &adapter->rx_abs_int_delay,
558 	    E1000_REGISTER(hw, E1000_RADV),
559 	    em_rx_abs_int_delay_dflt);
560 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
561 	    "transmit interrupt delay limit in usecs",
562 	    &adapter->tx_abs_int_delay,
563 	    E1000_REGISTER(hw, E1000_TADV),
564 	    em_tx_abs_int_delay_dflt);
565 
566 	/* Sysctl for limiting the amount of work done in the taskqueue */
567 	em_set_sysctl_value(adapter, "rx_processing_limit",
568 	    "max number of rx packets to process", &adapter->rx_process_limit,
569 	    em_rx_process_limit);
570 
571 	/*
572 	 * Validate number of transmit and receive descriptors. It
573 	 * must not exceed hardware maximum, and must be multiple
574 	 * of E1000_DBA_ALIGN.
575 	 */
576 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
577 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
578 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
579 		    EM_DEFAULT_TXD, em_txd);
580 		adapter->num_tx_desc = EM_DEFAULT_TXD;
581 	} else
582 		adapter->num_tx_desc = em_txd;
583 
584 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
585 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
586 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
587 		    EM_DEFAULT_RXD, em_rxd);
588 		adapter->num_rx_desc = EM_DEFAULT_RXD;
589 	} else
590 		adapter->num_rx_desc = em_rxd;
591 
592 	hw->mac.autoneg = DO_AUTO_NEG;
593 	hw->phy.autoneg_wait_to_complete = FALSE;
594 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
595 
596 	/* Copper options */
597 	if (hw->phy.media_type == e1000_media_type_copper) {
598 		hw->phy.mdix = AUTO_ALL_MODES;
599 		hw->phy.disable_polarity_correction = FALSE;
600 		hw->phy.ms_type = EM_MASTER_SLAVE;
601 	}
602 
603 	/*
604 	 * Set the frame limits assuming
605 	 * standard ethernet sized frames.
606 	 */
607 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
608 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
609 
610 	/*
611 	 * This controls when hardware reports transmit completion
612 	 * status.
613 	 */
614 	hw->mac.report_tx_early = 1;
615 
616 	/*
617 	** Get queue/ring memory
618 	*/
619 	if (em_allocate_queues(adapter)) {
620 		error = ENOMEM;
621 		goto err_pci;
622 	}
623 
624 	/* Allocate multicast array memory. */
625 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
626 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
627 	if (adapter->mta == NULL) {
628 		device_printf(dev, "Can not allocate multicast setup array\n");
629 		error = ENOMEM;
630 		goto err_late;
631 	}
632 
633 	/* Check SOL/IDER usage */
634 	if (e1000_check_reset_block(hw))
635 		device_printf(dev, "PHY reset is blocked"
636 		    " due to SOL/IDER session.\n");
637 
638 	/* Sysctl for setting Energy Efficient Ethernet */
639 	em_set_sysctl_value(adapter, "eee_control",
640 	    "enable Energy Efficient Ethernet",
641 	    &hw->dev_spec.ich8lan.eee_disable, eee_setting);
642 
643 	/*
644 	** Start from a known state, this is
645 	** important in reading the nvm and
646 	** mac from that.
647 	*/
648 	e1000_reset_hw(hw);
649 
650 
651 	/* Make sure we have a good EEPROM before we read from it */
652 	if (e1000_validate_nvm_checksum(hw) < 0) {
653 		/*
654 		** Some PCI-E parts fail the first check due to
655 		** the link being in sleep state, call it again,
656 		** if it fails a second time its a real issue.
657 		*/
658 		if (e1000_validate_nvm_checksum(hw) < 0) {
659 			device_printf(dev,
660 			    "The EEPROM Checksum Is Not Valid\n");
661 			error = EIO;
662 			goto err_late;
663 		}
664 	}
665 
666 	/* Copy the permanent MAC address out of the EEPROM */
667 	if (e1000_read_mac_addr(hw) < 0) {
668 		device_printf(dev, "EEPROM read error while reading MAC"
669 		    " address\n");
670 		error = EIO;
671 		goto err_late;
672 	}
673 
674 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
675 		device_printf(dev, "Invalid MAC address\n");
676 		error = EIO;
677 		goto err_late;
678 	}
679 
680 	/*
681 	**  Do interrupt configuration
682 	*/
683 	if (adapter->msix > 1) /* Do MSIX */
684 		error = em_allocate_msix(adapter);
685 	else  /* MSI or Legacy */
686 		error = em_allocate_legacy(adapter);
687 	if (error)
688 		goto err_late;
689 
690 	/*
691 	 * Get Wake-on-Lan and Management info for later use
692 	 */
693 	em_get_wakeup(dev);
694 
695 	/* Setup OS specific network interface */
696 	if (em_setup_interface(dev, adapter) != 0)
697 		goto err_late;
698 
699 	em_reset(adapter);
700 
701 	/* Initialize statistics */
702 	em_update_stats_counters(adapter);
703 
704 	hw->mac.get_link_status = 1;
705 	em_update_link_status(adapter);
706 
707 	/* Register for VLAN events */
708 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
709 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
710 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
711 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
712 
713 	em_add_hw_stats(adapter);
714 
715 	/* Non-AMT based hardware can now take control from firmware */
716 	if (adapter->has_manage && !adapter->has_amt)
717 		em_get_hw_control(adapter);
718 
719 	/* Tell the stack that the interface is not active */
720 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
721 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
722 
723 	adapter->led_dev = led_create(em_led_func, adapter,
724 	    device_get_nameunit(dev));
725 #ifdef DEV_NETMAP
726 	em_netmap_attach(adapter);
727 #endif /* DEV_NETMAP */
728 
729 	INIT_DEBUGOUT("em_attach: end");
730 
731 	return (0);
732 
733 err_late:
734 	em_free_transmit_structures(adapter);
735 	em_free_receive_structures(adapter);
736 	em_release_hw_control(adapter);
737 	if (adapter->ifp != NULL)
738 		if_free(adapter->ifp);
739 err_pci:
740 	em_free_pci_resources(adapter);
741 	free(adapter->mta, M_DEVBUF);
742 	EM_CORE_LOCK_DESTROY(adapter);
743 
744 	return (error);
745 }
746 
747 /*********************************************************************
748  *  Device removal routine
749  *
750  *  The detach entry point is called when the driver is being removed.
751  *  This routine stops the adapter and deallocates all the resources
752  *  that were allocated for driver operation.
753  *
754  *  return 0 on success, positive on failure
755  *********************************************************************/
756 
757 static int
758 em_detach(device_t dev)
759 {
760 	struct adapter	*adapter = device_get_softc(dev);
761 	struct ifnet	*ifp = adapter->ifp;
762 
763 	INIT_DEBUGOUT("em_detach: begin");
764 
765 	/* Make sure VLANS are not using driver */
766 	if (adapter->ifp->if_vlantrunk != NULL) {
767 		device_printf(dev,"Vlan in use, detach first\n");
768 		return (EBUSY);
769 	}
770 
771 #ifdef DEVICE_POLLING
772 	if (ifp->if_capenable & IFCAP_POLLING)
773 		ether_poll_deregister(ifp);
774 #endif
775 
776 	if (adapter->led_dev != NULL)
777 		led_destroy(adapter->led_dev);
778 
779 	EM_CORE_LOCK(adapter);
780 	adapter->in_detach = 1;
781 	em_stop(adapter);
782 	EM_CORE_UNLOCK(adapter);
783 	EM_CORE_LOCK_DESTROY(adapter);
784 
785 	e1000_phy_hw_reset(&adapter->hw);
786 
787 	em_release_manageability(adapter);
788 	em_release_hw_control(adapter);
789 
790 	/* Unregister VLAN events */
791 	if (adapter->vlan_attach != NULL)
792 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
793 	if (adapter->vlan_detach != NULL)
794 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
795 
796 	ether_ifdetach(adapter->ifp);
797 	callout_drain(&adapter->timer);
798 
799 #ifdef DEV_NETMAP
800 	netmap_detach(ifp);
801 #endif /* DEV_NETMAP */
802 
803 	em_free_pci_resources(adapter);
804 	bus_generic_detach(dev);
805 	if_free(ifp);
806 
807 	em_free_transmit_structures(adapter);
808 	em_free_receive_structures(adapter);
809 
810 	em_release_hw_control(adapter);
811 	free(adapter->mta, M_DEVBUF);
812 
813 	return (0);
814 }
815 
816 /*********************************************************************
817  *
818  *  Shutdown entry point
819  *
820  **********************************************************************/
821 
822 static int
823 em_shutdown(device_t dev)
824 {
825 	return em_suspend(dev);
826 }
827 
828 /*
829  * Suspend/resume device methods.
830  */
831 static int
832 em_suspend(device_t dev)
833 {
834 	struct adapter *adapter = device_get_softc(dev);
835 
836 	EM_CORE_LOCK(adapter);
837 
838         em_release_manageability(adapter);
839 	em_release_hw_control(adapter);
840 	em_enable_wakeup(dev);
841 
842 	EM_CORE_UNLOCK(adapter);
843 
844 	return bus_generic_suspend(dev);
845 }
846 
847 static int
848 em_resume(device_t dev)
849 {
850 	struct adapter *adapter = device_get_softc(dev);
851 	struct tx_ring	*txr = adapter->tx_rings;
852 	struct ifnet *ifp = adapter->ifp;
853 
854 	EM_CORE_LOCK(adapter);
855 	if (adapter->hw.mac.type == e1000_pch2lan)
856 		e1000_resume_workarounds_pchlan(&adapter->hw);
857 	em_init_locked(adapter);
858 	em_init_manageability(adapter);
859 
860 	if ((ifp->if_flags & IFF_UP) &&
861 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
862 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
863 			EM_TX_LOCK(txr);
864 #ifdef EM_MULTIQUEUE
865 			if (!drbr_empty(ifp, txr->br))
866 				em_mq_start_locked(ifp, txr, NULL);
867 #else
868 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
869 				em_start_locked(ifp, txr);
870 #endif
871 			EM_TX_UNLOCK(txr);
872 		}
873 	}
874 	EM_CORE_UNLOCK(adapter);
875 
876 	return bus_generic_resume(dev);
877 }
878 
879 
880 #ifdef EM_MULTIQUEUE
881 /*********************************************************************
882  *  Multiqueue Transmit routines
883  *
884  *  em_mq_start is called by the stack to initiate a transmit.
885  *  however, if busy the driver can queue the request rather
886  *  than do an immediate send. It is this that is an advantage
887  *  in this driver, rather than also having multiple tx queues.
888  **********************************************************************/
889 static int
890 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
891 {
892 	struct adapter  *adapter = txr->adapter;
893         struct mbuf     *next;
894         int             err = 0, enq = 0;
895 
896 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
897 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
898 		if (m != NULL)
899 			err = drbr_enqueue(ifp, txr->br, m);
900 		return (err);
901 	}
902 
903 	enq = 0;
904 	if (m == NULL) {
905 		next = drbr_dequeue(ifp, txr->br);
906 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
907 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
908 			return (err);
909 		next = drbr_dequeue(ifp, txr->br);
910 	} else
911 		next = m;
912 
913 	/* Process the queue */
914 	while (next != NULL) {
915 		if ((err = em_xmit(txr, &next)) != 0) {
916                         if (next != NULL)
917                                 err = drbr_enqueue(ifp, txr->br, next);
918                         break;
919 		}
920 		enq++;
921 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
922 		ETHER_BPF_MTAP(ifp, next);
923 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
924                         break;
925 		next = drbr_dequeue(ifp, txr->br);
926 	}
927 
928 	if (enq > 0) {
929                 /* Set the watchdog */
930                 txr->queue_status = EM_QUEUE_WORKING;
931 		txr->watchdog_time = ticks;
932 	}
933 
934 	if (txr->tx_avail < EM_MAX_SCATTER)
935 		em_txeof(txr);
936 	if (txr->tx_avail < EM_MAX_SCATTER)
937 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
938 	return (err);
939 }
940 
941 /*
942 ** Multiqueue capable stack interface
943 */
944 static int
945 em_mq_start(struct ifnet *ifp, struct mbuf *m)
946 {
947 	struct adapter	*adapter = ifp->if_softc;
948 	struct tx_ring	*txr = adapter->tx_rings;
949 	int 		error;
950 
951 	if (EM_TX_TRYLOCK(txr)) {
952 		error = em_mq_start_locked(ifp, txr, m);
953 		EM_TX_UNLOCK(txr);
954 	} else
955 		error = drbr_enqueue(ifp, txr->br, m);
956 
957 	return (error);
958 }
959 
960 /*
961 ** Flush all ring buffers
962 */
963 static void
964 em_qflush(struct ifnet *ifp)
965 {
966 	struct adapter  *adapter = ifp->if_softc;
967 	struct tx_ring  *txr = adapter->tx_rings;
968 	struct mbuf     *m;
969 
970 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
971 		EM_TX_LOCK(txr);
972 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
973 			m_freem(m);
974 		EM_TX_UNLOCK(txr);
975 	}
976 	if_qflush(ifp);
977 }
978 #else  /* !EM_MULTIQUEUE */
979 
980 static void
981 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
982 {
983 	struct adapter	*adapter = ifp->if_softc;
984 	struct mbuf	*m_head;
985 
986 	EM_TX_LOCK_ASSERT(txr);
987 
988 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
989 	    IFF_DRV_RUNNING)
990 		return;
991 
992 	if (!adapter->link_active)
993 		return;
994 
995 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
996         	/* Call cleanup if number of TX descriptors low */
997 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
998 			em_txeof(txr);
999 		if (txr->tx_avail < EM_MAX_SCATTER) {
1000 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1001 			break;
1002 		}
1003                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1004 		if (m_head == NULL)
1005 			break;
1006 		/*
1007 		 *  Encapsulation can modify our pointer, and or make it
1008 		 *  NULL on failure.  In that event, we can't requeue.
1009 		 */
1010 		if (em_xmit(txr, &m_head)) {
1011 			if (m_head == NULL)
1012 				break;
1013 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1014 			break;
1015 		}
1016 
1017 		/* Send a copy of the frame to the BPF listener */
1018 		ETHER_BPF_MTAP(ifp, m_head);
1019 
1020 		/* Set timeout in case hardware has problems transmitting. */
1021 		txr->watchdog_time = ticks;
1022                 txr->queue_status = EM_QUEUE_WORKING;
1023 	}
1024 
1025 	return;
1026 }
1027 
1028 static void
1029 em_start(struct ifnet *ifp)
1030 {
1031 	struct adapter	*adapter = ifp->if_softc;
1032 	struct tx_ring	*txr = adapter->tx_rings;
1033 
1034 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1035 		EM_TX_LOCK(txr);
1036 		em_start_locked(ifp, txr);
1037 		EM_TX_UNLOCK(txr);
1038 	}
1039 	return;
1040 }
1041 #endif /* EM_MULTIQUEUE */
1042 
1043 /*********************************************************************
1044  *  Ioctl entry point
1045  *
1046  *  em_ioctl is called when the user wants to configure the
1047  *  interface.
1048  *
1049  *  return 0 on success, positive on failure
1050  **********************************************************************/
1051 
1052 static int
1053 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1054 {
1055 	struct adapter	*adapter = ifp->if_softc;
1056 	struct ifreq	*ifr = (struct ifreq *)data;
1057 #if defined(INET) || defined(INET6)
1058 	struct ifaddr	*ifa = (struct ifaddr *)data;
1059 #endif
1060 	bool		avoid_reset = FALSE;
1061 	int		error = 0;
1062 
1063 	if (adapter->in_detach)
1064 		return (error);
1065 
1066 	switch (command) {
1067 	case SIOCSIFADDR:
1068 #ifdef INET
1069 		if (ifa->ifa_addr->sa_family == AF_INET)
1070 			avoid_reset = TRUE;
1071 #endif
1072 #ifdef INET6
1073 		if (ifa->ifa_addr->sa_family == AF_INET6)
1074 			avoid_reset = TRUE;
1075 #endif
1076 		/*
1077 		** Calling init results in link renegotiation,
1078 		** so we avoid doing it when possible.
1079 		*/
1080 		if (avoid_reset) {
1081 			ifp->if_flags |= IFF_UP;
1082 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1083 				em_init(adapter);
1084 #ifdef INET
1085 			if (!(ifp->if_flags & IFF_NOARP))
1086 				arp_ifinit(ifp, ifa);
1087 #endif
1088 		} else
1089 			error = ether_ioctl(ifp, command, data);
1090 		break;
1091 	case SIOCSIFMTU:
1092 	    {
1093 		int max_frame_size;
1094 
1095 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1096 
1097 		EM_CORE_LOCK(adapter);
1098 		switch (adapter->hw.mac.type) {
1099 		case e1000_82571:
1100 		case e1000_82572:
1101 		case e1000_ich9lan:
1102 		case e1000_ich10lan:
1103 		case e1000_pch2lan:
1104 		case e1000_82574:
1105 		case e1000_82583:
1106 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1107 			max_frame_size = 9234;
1108 			break;
1109 		case e1000_pchlan:
1110 			max_frame_size = 4096;
1111 			break;
1112 			/* Adapters that do not support jumbo frames */
1113 		case e1000_ich8lan:
1114 			max_frame_size = ETHER_MAX_LEN;
1115 			break;
1116 		default:
1117 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1118 		}
1119 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1120 		    ETHER_CRC_LEN) {
1121 			EM_CORE_UNLOCK(adapter);
1122 			error = EINVAL;
1123 			break;
1124 		}
1125 
1126 		ifp->if_mtu = ifr->ifr_mtu;
1127 		adapter->max_frame_size =
1128 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1129 		em_init_locked(adapter);
1130 		EM_CORE_UNLOCK(adapter);
1131 		break;
1132 	    }
1133 	case SIOCSIFFLAGS:
1134 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1135 		    SIOCSIFFLAGS (Set Interface Flags)");
1136 		EM_CORE_LOCK(adapter);
1137 		if (ifp->if_flags & IFF_UP) {
1138 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1139 				if ((ifp->if_flags ^ adapter->if_flags) &
1140 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1141 					em_disable_promisc(adapter);
1142 					em_set_promisc(adapter);
1143 				}
1144 			} else
1145 				em_init_locked(adapter);
1146 		} else
1147 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1148 				em_stop(adapter);
1149 		adapter->if_flags = ifp->if_flags;
1150 		EM_CORE_UNLOCK(adapter);
1151 		break;
1152 	case SIOCADDMULTI:
1153 	case SIOCDELMULTI:
1154 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1155 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1156 			EM_CORE_LOCK(adapter);
1157 			em_disable_intr(adapter);
1158 			em_set_multi(adapter);
1159 #ifdef DEVICE_POLLING
1160 			if (!(ifp->if_capenable & IFCAP_POLLING))
1161 #endif
1162 				em_enable_intr(adapter);
1163 			EM_CORE_UNLOCK(adapter);
1164 		}
1165 		break;
1166 	case SIOCSIFMEDIA:
1167 		/* Check SOL/IDER usage */
1168 		EM_CORE_LOCK(adapter);
1169 		if (e1000_check_reset_block(&adapter->hw)) {
1170 			EM_CORE_UNLOCK(adapter);
1171 			device_printf(adapter->dev, "Media change is"
1172 			    " blocked due to SOL/IDER session.\n");
1173 			break;
1174 		}
1175 		EM_CORE_UNLOCK(adapter);
1176 		/* falls thru */
1177 	case SIOCGIFMEDIA:
1178 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1179 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1180 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1181 		break;
1182 	case SIOCSIFCAP:
1183 	    {
1184 		int mask, reinit;
1185 
1186 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1187 		reinit = 0;
1188 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1189 #ifdef DEVICE_POLLING
1190 		if (mask & IFCAP_POLLING) {
1191 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1192 				error = ether_poll_register(em_poll, ifp);
1193 				if (error)
1194 					return (error);
1195 				EM_CORE_LOCK(adapter);
1196 				em_disable_intr(adapter);
1197 				ifp->if_capenable |= IFCAP_POLLING;
1198 				EM_CORE_UNLOCK(adapter);
1199 			} else {
1200 				error = ether_poll_deregister(ifp);
1201 				/* Enable interrupt even in error case */
1202 				EM_CORE_LOCK(adapter);
1203 				em_enable_intr(adapter);
1204 				ifp->if_capenable &= ~IFCAP_POLLING;
1205 				EM_CORE_UNLOCK(adapter);
1206 			}
1207 		}
1208 #endif
1209 		if (mask & IFCAP_HWCSUM) {
1210 			ifp->if_capenable ^= IFCAP_HWCSUM;
1211 			reinit = 1;
1212 		}
1213 		if (mask & IFCAP_TSO4) {
1214 			ifp->if_capenable ^= IFCAP_TSO4;
1215 			reinit = 1;
1216 		}
1217 		if (mask & IFCAP_VLAN_HWTAGGING) {
1218 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1219 			reinit = 1;
1220 		}
1221 		if (mask & IFCAP_VLAN_HWFILTER) {
1222 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1223 			reinit = 1;
1224 		}
1225 		if (mask & IFCAP_VLAN_HWTSO) {
1226 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1227 			reinit = 1;
1228 		}
1229 		if ((mask & IFCAP_WOL) &&
1230 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1231 			if (mask & IFCAP_WOL_MCAST)
1232 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1233 			if (mask & IFCAP_WOL_MAGIC)
1234 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1235 		}
1236 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1237 			em_init(adapter);
1238 		VLAN_CAPABILITIES(ifp);
1239 		break;
1240 	    }
1241 
1242 	default:
1243 		error = ether_ioctl(ifp, command, data);
1244 		break;
1245 	}
1246 
1247 	return (error);
1248 }
1249 
1250 
1251 /*********************************************************************
1252  *  Init entry point
1253  *
1254  *  This routine is used in two ways. It is used by the stack as
1255  *  init entry point in network interface structure. It is also used
1256  *  by the driver as a hw/sw initialization routine to get to a
1257  *  consistent state.
1258  *
1259  *  return 0 on success, positive on failure
1260  **********************************************************************/
1261 
1262 static void
1263 em_init_locked(struct adapter *adapter)
1264 {
1265 	struct ifnet	*ifp = adapter->ifp;
1266 	device_t	dev = adapter->dev;
1267 
1268 	INIT_DEBUGOUT("em_init: begin");
1269 
1270 	EM_CORE_LOCK_ASSERT(adapter);
1271 
1272 	em_disable_intr(adapter);
1273 	callout_stop(&adapter->timer);
1274 
1275 	/* Get the latest mac address, User can use a LAA */
1276         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1277               ETHER_ADDR_LEN);
1278 
1279 	/* Put the address into the Receive Address Array */
1280 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1281 
1282 	/*
1283 	 * With the 82571 adapter, RAR[0] may be overwritten
1284 	 * when the other port is reset, we make a duplicate
1285 	 * in RAR[14] for that eventuality, this assures
1286 	 * the interface continues to function.
1287 	 */
1288 	if (adapter->hw.mac.type == e1000_82571) {
1289 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1290 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1291 		    E1000_RAR_ENTRIES - 1);
1292 	}
1293 
1294 	/* Initialize the hardware */
1295 	em_reset(adapter);
1296 	em_update_link_status(adapter);
1297 
1298 	/* Setup VLAN support, basic and offload if available */
1299 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1300 
1301 	/* Set hardware offload abilities */
1302 	ifp->if_hwassist = 0;
1303 	if (ifp->if_capenable & IFCAP_TXCSUM)
1304 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1305 	if (ifp->if_capenable & IFCAP_TSO4)
1306 		ifp->if_hwassist |= CSUM_TSO;
1307 
1308 	/* Configure for OS presence */
1309 	em_init_manageability(adapter);
1310 
1311 	/* Prepare transmit descriptors and buffers */
1312 	em_setup_transmit_structures(adapter);
1313 	em_initialize_transmit_unit(adapter);
1314 
1315 	/* Setup Multicast table */
1316 	em_set_multi(adapter);
1317 
1318 	/*
1319 	** Figure out the desired mbuf
1320 	** pool for doing jumbos
1321 	*/
1322 	if (adapter->max_frame_size <= 2048)
1323 		adapter->rx_mbuf_sz = MCLBYTES;
1324 	else if (adapter->max_frame_size <= 4096)
1325 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1326 	else
1327 		adapter->rx_mbuf_sz = MJUM9BYTES;
1328 
1329 	/* Prepare receive descriptors and buffers */
1330 	if (em_setup_receive_structures(adapter)) {
1331 		device_printf(dev, "Could not setup receive structures\n");
1332 		em_stop(adapter);
1333 		return;
1334 	}
1335 	em_initialize_receive_unit(adapter);
1336 
1337 	/* Use real VLAN Filter support? */
1338 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1339 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1340 			/* Use real VLAN Filter support */
1341 			em_setup_vlan_hw_support(adapter);
1342 		else {
1343 			u32 ctrl;
1344 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1345 			ctrl |= E1000_CTRL_VME;
1346 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1347 		}
1348 	}
1349 
1350 	/* Don't lose promiscuous settings */
1351 	em_set_promisc(adapter);
1352 
1353 	/* Set the interface as ACTIVE */
1354 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1355 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1356 
1357 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1358 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1359 
1360 	/* MSI/X configuration for 82574 */
1361 	if (adapter->hw.mac.type == e1000_82574) {
1362 		int tmp;
1363 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1364 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1365 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1366 		/* Set the IVAR - interrupt vector routing. */
1367 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1368 	}
1369 
1370 #ifdef DEVICE_POLLING
1371 	/*
1372 	 * Only enable interrupts if we are not polling, make sure
1373 	 * they are off otherwise.
1374 	 */
1375 	if (ifp->if_capenable & IFCAP_POLLING)
1376 		em_disable_intr(adapter);
1377 	else
1378 #endif /* DEVICE_POLLING */
1379 		em_enable_intr(adapter);
1380 
1381 	/* AMT based hardware can now take control from firmware */
1382 	if (adapter->has_manage && adapter->has_amt)
1383 		em_get_hw_control(adapter);
1384 }
1385 
1386 static void
1387 em_init(void *arg)
1388 {
1389 	struct adapter *adapter = arg;
1390 
1391 	EM_CORE_LOCK(adapter);
1392 	em_init_locked(adapter);
1393 	EM_CORE_UNLOCK(adapter);
1394 }
1395 
1396 
1397 #ifdef DEVICE_POLLING
1398 /*********************************************************************
1399  *
1400  *  Legacy polling routine: note this only works with single queue
1401  *
1402  *********************************************************************/
1403 static int
1404 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1405 {
1406 	struct adapter *adapter = ifp->if_softc;
1407 	struct tx_ring	*txr = adapter->tx_rings;
1408 	struct rx_ring	*rxr = adapter->rx_rings;
1409 	u32		reg_icr;
1410 	int		rx_done;
1411 
1412 	EM_CORE_LOCK(adapter);
1413 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1414 		EM_CORE_UNLOCK(adapter);
1415 		return (0);
1416 	}
1417 
1418 	if (cmd == POLL_AND_CHECK_STATUS) {
1419 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1420 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1421 			callout_stop(&adapter->timer);
1422 			adapter->hw.mac.get_link_status = 1;
1423 			em_update_link_status(adapter);
1424 			callout_reset(&adapter->timer, hz,
1425 			    em_local_timer, adapter);
1426 		}
1427 	}
1428 	EM_CORE_UNLOCK(adapter);
1429 
1430 	em_rxeof(rxr, count, &rx_done);
1431 
1432 	EM_TX_LOCK(txr);
1433 	em_txeof(txr);
1434 #ifdef EM_MULTIQUEUE
1435 	if (!drbr_empty(ifp, txr->br))
1436 		em_mq_start_locked(ifp, txr, NULL);
1437 #else
1438 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1439 		em_start_locked(ifp, txr);
1440 #endif
1441 	EM_TX_UNLOCK(txr);
1442 
1443 	return (rx_done);
1444 }
1445 #endif /* DEVICE_POLLING */
1446 
1447 
1448 /*********************************************************************
1449  *
1450  *  Fast Legacy/MSI Combined Interrupt Service routine
1451  *
1452  *********************************************************************/
1453 static int
1454 em_irq_fast(void *arg)
1455 {
1456 	struct adapter	*adapter = arg;
1457 	struct ifnet	*ifp;
1458 	u32		reg_icr;
1459 
1460 	ifp = adapter->ifp;
1461 
1462 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1463 
1464 	/* Hot eject?  */
1465 	if (reg_icr == 0xffffffff)
1466 		return FILTER_STRAY;
1467 
1468 	/* Definitely not our interrupt.  */
1469 	if (reg_icr == 0x0)
1470 		return FILTER_STRAY;
1471 
1472 	/*
1473 	 * Starting with the 82571 chip, bit 31 should be used to
1474 	 * determine whether the interrupt belongs to us.
1475 	 */
1476 	if (adapter->hw.mac.type >= e1000_82571 &&
1477 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1478 		return FILTER_STRAY;
1479 
1480 	em_disable_intr(adapter);
1481 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1482 
1483 	/* Link status change */
1484 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1485 		adapter->hw.mac.get_link_status = 1;
1486 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1487 	}
1488 
1489 	if (reg_icr & E1000_ICR_RXO)
1490 		adapter->rx_overruns++;
1491 	return FILTER_HANDLED;
1492 }
1493 
1494 /* Combined RX/TX handler, used by Legacy and MSI */
1495 static void
1496 em_handle_que(void *context, int pending)
1497 {
1498 	struct adapter	*adapter = context;
1499 	struct ifnet	*ifp = adapter->ifp;
1500 	struct tx_ring	*txr = adapter->tx_rings;
1501 	struct rx_ring	*rxr = adapter->rx_rings;
1502 
1503 
1504 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1505 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1506 		EM_TX_LOCK(txr);
1507 		em_txeof(txr);
1508 #ifdef EM_MULTIQUEUE
1509 		if (!drbr_empty(ifp, txr->br))
1510 			em_mq_start_locked(ifp, txr, NULL);
1511 #else
1512 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1513 			em_start_locked(ifp, txr);
1514 #endif
1515 		EM_TX_UNLOCK(txr);
1516 		if (more) {
1517 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1518 			return;
1519 		}
1520 	}
1521 
1522 	em_enable_intr(adapter);
1523 	return;
1524 }
1525 
1526 
1527 /*********************************************************************
1528  *
1529  *  MSIX Interrupt Service Routines
1530  *
1531  **********************************************************************/
1532 static void
1533 em_msix_tx(void *arg)
1534 {
1535 	struct tx_ring *txr = arg;
1536 	struct adapter *adapter = txr->adapter;
1537 	struct ifnet	*ifp = adapter->ifp;
1538 
1539 	++txr->tx_irq;
1540 	EM_TX_LOCK(txr);
1541 	em_txeof(txr);
1542 #ifdef EM_MULTIQUEUE
1543 	if (!drbr_empty(ifp, txr->br))
1544 		em_mq_start_locked(ifp, txr, NULL);
1545 #else
1546 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1547 		em_start_locked(ifp, txr);
1548 #endif
1549 	/* Reenable this interrupt */
1550 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1551 	EM_TX_UNLOCK(txr);
1552 	return;
1553 }
1554 
1555 /*********************************************************************
1556  *
1557  *  MSIX RX Interrupt Service routine
1558  *
1559  **********************************************************************/
1560 
1561 static void
1562 em_msix_rx(void *arg)
1563 {
1564 	struct rx_ring	*rxr = arg;
1565 	struct adapter	*adapter = rxr->adapter;
1566 	bool		more;
1567 
1568 	++rxr->rx_irq;
1569 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1570 	if (more)
1571 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1572 	else
1573 		/* Reenable this interrupt */
1574 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1575 	return;
1576 }
1577 
1578 /*********************************************************************
1579  *
1580  *  MSIX Link Fast Interrupt Service routine
1581  *
1582  **********************************************************************/
1583 static void
1584 em_msix_link(void *arg)
1585 {
1586 	struct adapter	*adapter = arg;
1587 	u32		reg_icr;
1588 
1589 	++adapter->link_irq;
1590 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1591 
1592 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1593 		adapter->hw.mac.get_link_status = 1;
1594 		em_handle_link(adapter, 0);
1595 	} else
1596 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1597 		    EM_MSIX_LINK | E1000_IMS_LSC);
1598 	return;
1599 }
1600 
1601 static void
1602 em_handle_rx(void *context, int pending)
1603 {
1604 	struct rx_ring	*rxr = context;
1605 	struct adapter	*adapter = rxr->adapter;
1606         bool            more;
1607 
1608 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1609 	if (more)
1610 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1611 	else
1612 		/* Reenable this interrupt */
1613 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1614 }
1615 
1616 static void
1617 em_handle_tx(void *context, int pending)
1618 {
1619 	struct tx_ring	*txr = context;
1620 	struct adapter	*adapter = txr->adapter;
1621 	struct ifnet	*ifp = adapter->ifp;
1622 
1623 	EM_TX_LOCK(txr);
1624 	em_txeof(txr);
1625 #ifdef EM_MULTIQUEUE
1626 	if (!drbr_empty(ifp, txr->br))
1627 		em_mq_start_locked(ifp, txr, NULL);
1628 #else
1629 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1630 		em_start_locked(ifp, txr);
1631 #endif
1632 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1633 	EM_TX_UNLOCK(txr);
1634 }
1635 
1636 static void
1637 em_handle_link(void *context, int pending)
1638 {
1639 	struct adapter	*adapter = context;
1640 	struct tx_ring	*txr = adapter->tx_rings;
1641 	struct ifnet *ifp = adapter->ifp;
1642 
1643 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1644 		return;
1645 
1646 	EM_CORE_LOCK(adapter);
1647 	callout_stop(&adapter->timer);
1648 	em_update_link_status(adapter);
1649 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1650 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1651 	    EM_MSIX_LINK | E1000_IMS_LSC);
1652 	if (adapter->link_active) {
1653 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1654 			EM_TX_LOCK(txr);
1655 #ifdef EM_MULTIQUEUE
1656 			if (!drbr_empty(ifp, txr->br))
1657 				em_mq_start_locked(ifp, txr, NULL);
1658 #else
1659 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1660 				em_start_locked(ifp, txr);
1661 #endif
1662 			EM_TX_UNLOCK(txr);
1663 		}
1664 	}
1665 	EM_CORE_UNLOCK(adapter);
1666 }
1667 
1668 
1669 /*********************************************************************
1670  *
1671  *  Media Ioctl callback
1672  *
1673  *  This routine is called whenever the user queries the status of
1674  *  the interface using ifconfig.
1675  *
1676  **********************************************************************/
1677 static void
1678 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1679 {
1680 	struct adapter *adapter = ifp->if_softc;
1681 	u_char fiber_type = IFM_1000_SX;
1682 
1683 	INIT_DEBUGOUT("em_media_status: begin");
1684 
1685 	EM_CORE_LOCK(adapter);
1686 	em_update_link_status(adapter);
1687 
1688 	ifmr->ifm_status = IFM_AVALID;
1689 	ifmr->ifm_active = IFM_ETHER;
1690 
1691 	if (!adapter->link_active) {
1692 		EM_CORE_UNLOCK(adapter);
1693 		return;
1694 	}
1695 
1696 	ifmr->ifm_status |= IFM_ACTIVE;
1697 
1698 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1699 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1700 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1701 	} else {
1702 		switch (adapter->link_speed) {
1703 		case 10:
1704 			ifmr->ifm_active |= IFM_10_T;
1705 			break;
1706 		case 100:
1707 			ifmr->ifm_active |= IFM_100_TX;
1708 			break;
1709 		case 1000:
1710 			ifmr->ifm_active |= IFM_1000_T;
1711 			break;
1712 		}
1713 		if (adapter->link_duplex == FULL_DUPLEX)
1714 			ifmr->ifm_active |= IFM_FDX;
1715 		else
1716 			ifmr->ifm_active |= IFM_HDX;
1717 	}
1718 	EM_CORE_UNLOCK(adapter);
1719 }
1720 
1721 /*********************************************************************
1722  *
1723  *  Media Ioctl callback
1724  *
1725  *  This routine is called when the user changes speed/duplex using
1726  *  media/mediopt option with ifconfig.
1727  *
1728  **********************************************************************/
1729 static int
1730 em_media_change(struct ifnet *ifp)
1731 {
1732 	struct adapter *adapter = ifp->if_softc;
1733 	struct ifmedia  *ifm = &adapter->media;
1734 
1735 	INIT_DEBUGOUT("em_media_change: begin");
1736 
1737 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1738 		return (EINVAL);
1739 
1740 	EM_CORE_LOCK(adapter);
1741 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1742 	case IFM_AUTO:
1743 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1744 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1745 		break;
1746 	case IFM_1000_LX:
1747 	case IFM_1000_SX:
1748 	case IFM_1000_T:
1749 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1750 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1751 		break;
1752 	case IFM_100_TX:
1753 		adapter->hw.mac.autoneg = FALSE;
1754 		adapter->hw.phy.autoneg_advertised = 0;
1755 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1756 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1757 		else
1758 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1759 		break;
1760 	case IFM_10_T:
1761 		adapter->hw.mac.autoneg = FALSE;
1762 		adapter->hw.phy.autoneg_advertised = 0;
1763 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1764 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1765 		else
1766 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1767 		break;
1768 	default:
1769 		device_printf(adapter->dev, "Unsupported media type\n");
1770 	}
1771 
1772 	em_init_locked(adapter);
1773 	EM_CORE_UNLOCK(adapter);
1774 
1775 	return (0);
1776 }
1777 
1778 /*********************************************************************
1779  *
1780  *  This routine maps the mbufs to tx descriptors.
1781  *
1782  *  return 0 on success, positive on failure
1783  **********************************************************************/
1784 
1785 static int
1786 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1787 {
1788 	struct adapter		*adapter = txr->adapter;
1789 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1790 	bus_dmamap_t		map;
1791 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1792 	struct e1000_tx_desc	*ctxd = NULL;
1793 	struct mbuf		*m_head;
1794 	struct ether_header	*eh;
1795 	struct ip		*ip = NULL;
1796 	struct tcphdr		*tp = NULL;
1797 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1798 	int			ip_off, poff;
1799 	int			nsegs, i, j, first, last = 0;
1800 	int			error, do_tso, tso_desc = 0, remap = 1;
1801 
1802 retry:
1803 	m_head = *m_headp;
1804 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1805 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1806 	ip_off = poff = 0;
1807 
1808 	/*
1809 	 * Intel recommends entire IP/TCP header length reside in a single
1810 	 * buffer. If multiple descriptors are used to describe the IP and
1811 	 * TCP header, each descriptor should describe one or more
1812 	 * complete headers; descriptors referencing only parts of headers
1813 	 * are not supported. If all layer headers are not coalesced into
1814 	 * a single buffer, each buffer should not cross a 4KB boundary,
1815 	 * or be larger than the maximum read request size.
1816 	 * Controller also requires modifing IP/TCP header to make TSO work
1817 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1818 	 * IP/TCP header into a single buffer to meet the requirement of
1819 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1820 	 * which also has similiar restrictions.
1821 	 */
1822 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1823 		if (do_tso || (m_head->m_next != NULL &&
1824 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1825 			if (M_WRITABLE(*m_headp) == 0) {
1826 				m_head = m_dup(*m_headp, M_DONTWAIT);
1827 				m_freem(*m_headp);
1828 				if (m_head == NULL) {
1829 					*m_headp = NULL;
1830 					return (ENOBUFS);
1831 				}
1832 				*m_headp = m_head;
1833 			}
1834 		}
1835 		/*
1836 		 * XXX
1837 		 * Assume IPv4, we don't have TSO/checksum offload support
1838 		 * for IPv6 yet.
1839 		 */
1840 		ip_off = sizeof(struct ether_header);
1841 		m_head = m_pullup(m_head, ip_off);
1842 		if (m_head == NULL) {
1843 			*m_headp = NULL;
1844 			return (ENOBUFS);
1845 		}
1846 		eh = mtod(m_head, struct ether_header *);
1847 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1848 			ip_off = sizeof(struct ether_vlan_header);
1849 			m_head = m_pullup(m_head, ip_off);
1850 			if (m_head == NULL) {
1851 				*m_headp = NULL;
1852 				return (ENOBUFS);
1853 			}
1854 		}
1855 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1856 		if (m_head == NULL) {
1857 			*m_headp = NULL;
1858 			return (ENOBUFS);
1859 		}
1860 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1861 		poff = ip_off + (ip->ip_hl << 2);
1862 		if (do_tso) {
1863 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1864 			if (m_head == NULL) {
1865 				*m_headp = NULL;
1866 				return (ENOBUFS);
1867 			}
1868 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1869 			/*
1870 			 * TSO workaround:
1871 			 *   pull 4 more bytes of data into it.
1872 			 */
1873 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1874 			if (m_head == NULL) {
1875 				*m_headp = NULL;
1876 				return (ENOBUFS);
1877 			}
1878 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1879 			ip->ip_len = 0;
1880 			ip->ip_sum = 0;
1881 			/*
1882 			 * The pseudo TCP checksum does not include TCP payload
1883 			 * length so driver should recompute the checksum here
1884 			 * what hardware expect to see. This is adherence of
1885 			 * Microsoft's Large Send specification.
1886 			 */
1887 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1888 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1889 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1890 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1891 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1892 			if (m_head == NULL) {
1893 				*m_headp = NULL;
1894 				return (ENOBUFS);
1895 			}
1896 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1897 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1898 			if (m_head == NULL) {
1899 				*m_headp = NULL;
1900 				return (ENOBUFS);
1901 			}
1902 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1904 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1905 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1906 			if (m_head == NULL) {
1907 				*m_headp = NULL;
1908 				return (ENOBUFS);
1909 			}
1910 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1911 		}
1912 		*m_headp = m_head;
1913 	}
1914 
1915 	/*
1916 	 * Map the packet for DMA
1917 	 *
1918 	 * Capture the first descriptor index,
1919 	 * this descriptor will have the index
1920 	 * of the EOP which is the only one that
1921 	 * now gets a DONE bit writeback.
1922 	 */
1923 	first = txr->next_avail_desc;
1924 	tx_buffer = &txr->tx_buffers[first];
1925 	tx_buffer_mapped = tx_buffer;
1926 	map = tx_buffer->map;
1927 
1928 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1929 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1930 
1931 	/*
1932 	 * There are two types of errors we can (try) to handle:
1933 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1934 	 *   out of segments.  Defragment the mbuf chain and try again.
1935 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1936 	 *   at this point in time.  Defer sending and try again later.
1937 	 * All other errors, in particular EINVAL, are fatal and prevent the
1938 	 * mbuf chain from ever going through.  Drop it and report error.
1939 	 */
1940 	if (error == EFBIG && remap) {
1941 		struct mbuf *m;
1942 
1943 		m = m_defrag(*m_headp, M_DONTWAIT);
1944 		if (m == NULL) {
1945 			adapter->mbuf_alloc_failed++;
1946 			m_freem(*m_headp);
1947 			*m_headp = NULL;
1948 			return (ENOBUFS);
1949 		}
1950 		*m_headp = m;
1951 
1952 		/* Try it again, but only once */
1953 		remap = 0;
1954 		goto retry;
1955 	} else if (error == ENOMEM) {
1956 		adapter->no_tx_dma_setup++;
1957 		return (error);
1958 	} else if (error != 0) {
1959 		adapter->no_tx_dma_setup++;
1960 		m_freem(*m_headp);
1961 		*m_headp = NULL;
1962 		return (error);
1963 	}
1964 
1965 	/*
1966 	 * TSO Hardware workaround, if this packet is not
1967 	 * TSO, and is only a single descriptor long, and
1968 	 * it follows a TSO burst, then we need to add a
1969 	 * sentinel descriptor to prevent premature writeback.
1970 	 */
1971 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1972 		if (nsegs == 1)
1973 			tso_desc = TRUE;
1974 		txr->tx_tso = FALSE;
1975 	}
1976 
1977         if (nsegs > (txr->tx_avail - 2)) {
1978                 txr->no_desc_avail++;
1979 		bus_dmamap_unload(txr->txtag, map);
1980 		return (ENOBUFS);
1981         }
1982 	m_head = *m_headp;
1983 
1984 	/* Do hardware assists */
1985 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1986 		em_tso_setup(txr, m_head, ip_off, ip, tp,
1987 		    &txd_upper, &txd_lower);
1988 		/* we need to make a final sentinel transmit desc */
1989 		tso_desc = TRUE;
1990 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1991 		em_transmit_checksum_setup(txr, m_head,
1992 		    ip_off, ip, &txd_upper, &txd_lower);
1993 
1994 	if (m_head->m_flags & M_VLANTAG) {
1995 		/* Set the vlan id. */
1996 		txd_upper |=
1997 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
1998                 /* Tell hardware to add tag */
1999                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2000         }
2001 
2002 	i = txr->next_avail_desc;
2003 
2004 	/* Set up our transmit descriptors */
2005 	for (j = 0; j < nsegs; j++) {
2006 		bus_size_t seg_len;
2007 		bus_addr_t seg_addr;
2008 
2009 		tx_buffer = &txr->tx_buffers[i];
2010 		ctxd = &txr->tx_base[i];
2011 		seg_addr = segs[j].ds_addr;
2012 		seg_len  = segs[j].ds_len;
2013 		/*
2014 		** TSO Workaround:
2015 		** If this is the last descriptor, we want to
2016 		** split it so we have a small final sentinel
2017 		*/
2018 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2019 			seg_len -= 4;
2020 			ctxd->buffer_addr = htole64(seg_addr);
2021 			ctxd->lower.data = htole32(
2022 			adapter->txd_cmd | txd_lower | seg_len);
2023 			ctxd->upper.data =
2024 			    htole32(txd_upper);
2025 			if (++i == adapter->num_tx_desc)
2026 				i = 0;
2027 			/* Now make the sentinel */
2028 			++txd_used; /* using an extra txd */
2029 			ctxd = &txr->tx_base[i];
2030 			tx_buffer = &txr->tx_buffers[i];
2031 			ctxd->buffer_addr =
2032 			    htole64(seg_addr + seg_len);
2033 			ctxd->lower.data = htole32(
2034 			adapter->txd_cmd | txd_lower | 4);
2035 			ctxd->upper.data =
2036 			    htole32(txd_upper);
2037 			last = i;
2038 			if (++i == adapter->num_tx_desc)
2039 				i = 0;
2040 		} else {
2041 			ctxd->buffer_addr = htole64(seg_addr);
2042 			ctxd->lower.data = htole32(
2043 			adapter->txd_cmd | txd_lower | seg_len);
2044 			ctxd->upper.data =
2045 			    htole32(txd_upper);
2046 			last = i;
2047 			if (++i == adapter->num_tx_desc)
2048 				i = 0;
2049 		}
2050 		tx_buffer->m_head = NULL;
2051 		tx_buffer->next_eop = -1;
2052 	}
2053 
2054 	txr->next_avail_desc = i;
2055 	txr->tx_avail -= nsegs;
2056 	if (tso_desc) /* TSO used an extra for sentinel */
2057 		txr->tx_avail -= txd_used;
2058 
2059         tx_buffer->m_head = m_head;
2060 	/*
2061 	** Here we swap the map so the last descriptor,
2062 	** which gets the completion interrupt has the
2063 	** real map, and the first descriptor gets the
2064 	** unused map from this descriptor.
2065 	*/
2066 	tx_buffer_mapped->map = tx_buffer->map;
2067 	tx_buffer->map = map;
2068         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2069 
2070         /*
2071          * Last Descriptor of Packet
2072 	 * needs End Of Packet (EOP)
2073 	 * and Report Status (RS)
2074          */
2075         ctxd->lower.data |=
2076 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2077 	/*
2078 	 * Keep track in the first buffer which
2079 	 * descriptor will be written back
2080 	 */
2081 	tx_buffer = &txr->tx_buffers[first];
2082 	tx_buffer->next_eop = last;
2083 	/* Update the watchdog time early and often */
2084 	txr->watchdog_time = ticks;
2085 
2086 	/*
2087 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2088 	 * that this frame is available to transmit.
2089 	 */
2090 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2091 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2092 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2093 
2094 	return (0);
2095 }
2096 
2097 static void
2098 em_set_promisc(struct adapter *adapter)
2099 {
2100 	struct ifnet	*ifp = adapter->ifp;
2101 	u32		reg_rctl;
2102 
2103 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2104 
2105 	if (ifp->if_flags & IFF_PROMISC) {
2106 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2107 		/* Turn this on if you want to see bad packets */
2108 		if (em_debug_sbp)
2109 			reg_rctl |= E1000_RCTL_SBP;
2110 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2111 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2112 		reg_rctl |= E1000_RCTL_MPE;
2113 		reg_rctl &= ~E1000_RCTL_UPE;
2114 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2115 	}
2116 }
2117 
2118 static void
2119 em_disable_promisc(struct adapter *adapter)
2120 {
2121 	u32	reg_rctl;
2122 
2123 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2124 
2125 	reg_rctl &=  (~E1000_RCTL_UPE);
2126 	reg_rctl &=  (~E1000_RCTL_MPE);
2127 	reg_rctl &=  (~E1000_RCTL_SBP);
2128 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2129 }
2130 
2131 
2132 /*********************************************************************
2133  *  Multicast Update
2134  *
2135  *  This routine is called whenever multicast address list is updated.
2136  *
2137  **********************************************************************/
2138 
2139 static void
2140 em_set_multi(struct adapter *adapter)
2141 {
2142 	struct ifnet	*ifp = adapter->ifp;
2143 	struct ifmultiaddr *ifma;
2144 	u32 reg_rctl = 0;
2145 	u8  *mta; /* Multicast array memory */
2146 	int mcnt = 0;
2147 
2148 	IOCTL_DEBUGOUT("em_set_multi: begin");
2149 
2150 	mta = adapter->mta;
2151 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2152 
2153 	if (adapter->hw.mac.type == e1000_82542 &&
2154 	    adapter->hw.revision_id == E1000_REVISION_2) {
2155 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2156 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2157 			e1000_pci_clear_mwi(&adapter->hw);
2158 		reg_rctl |= E1000_RCTL_RST;
2159 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2160 		msec_delay(5);
2161 	}
2162 
2163 #if __FreeBSD_version < 800000
2164 	IF_ADDR_LOCK(ifp);
2165 #else
2166 	if_maddr_rlock(ifp);
2167 #endif
2168 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2169 		if (ifma->ifma_addr->sa_family != AF_LINK)
2170 			continue;
2171 
2172 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2173 			break;
2174 
2175 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2176 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2177 		mcnt++;
2178 	}
2179 #if __FreeBSD_version < 800000
2180 	IF_ADDR_UNLOCK(ifp);
2181 #else
2182 	if_maddr_runlock(ifp);
2183 #endif
2184 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2185 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2186 		reg_rctl |= E1000_RCTL_MPE;
2187 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2188 	} else
2189 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2190 
2191 	if (adapter->hw.mac.type == e1000_82542 &&
2192 	    adapter->hw.revision_id == E1000_REVISION_2) {
2193 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2194 		reg_rctl &= ~E1000_RCTL_RST;
2195 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2196 		msec_delay(5);
2197 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2198 			e1000_pci_set_mwi(&adapter->hw);
2199 	}
2200 }
2201 
2202 
2203 /*********************************************************************
2204  *  Timer routine
2205  *
2206  *  This routine checks for link status and updates statistics.
2207  *
2208  **********************************************************************/
2209 
2210 static void
2211 em_local_timer(void *arg)
2212 {
2213 	struct adapter	*adapter = arg;
2214 	struct ifnet	*ifp = adapter->ifp;
2215 	struct tx_ring	*txr = adapter->tx_rings;
2216 	struct rx_ring	*rxr = adapter->rx_rings;
2217 	u32		trigger;
2218 
2219 	EM_CORE_LOCK_ASSERT(adapter);
2220 
2221 	em_update_link_status(adapter);
2222 	em_update_stats_counters(adapter);
2223 
2224 	/* Reset LAA into RAR[0] on 82571 */
2225 	if ((adapter->hw.mac.type == e1000_82571) &&
2226 	    e1000_get_laa_state_82571(&adapter->hw))
2227 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2228 
2229 	/* Mask to use in the irq trigger */
2230 	if (adapter->msix_mem)
2231 		trigger = rxr->ims; /* RX for 82574 */
2232 	else
2233 		trigger = E1000_ICS_RXDMT0;
2234 
2235 	/*
2236 	** Check on the state of the TX queue(s), this
2237 	** can be done without the lock because its RO
2238 	** and the HUNG state will be static if set.
2239 	*/
2240 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2241 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2242 		    (adapter->pause_frames == 0))
2243 			goto hung;
2244 		/* Schedule a TX tasklet if needed */
2245 		if (txr->tx_avail <= EM_MAX_SCATTER)
2246 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2247 	}
2248 
2249 	adapter->pause_frames = 0;
2250 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2251 #ifndef DEVICE_POLLING
2252 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2253 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2254 #endif
2255 	return;
2256 hung:
2257 	/* Looks like we're hung */
2258 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2259 	device_printf(adapter->dev,
2260 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2261 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2262 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2263 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2264 	    "Next TX to Clean = %d\n",
2265 	    txr->me, txr->tx_avail, txr->next_to_clean);
2266 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2267 	adapter->watchdog_events++;
2268 	adapter->pause_frames = 0;
2269 	em_init_locked(adapter);
2270 }
2271 
2272 
2273 static void
2274 em_update_link_status(struct adapter *adapter)
2275 {
2276 	struct e1000_hw *hw = &adapter->hw;
2277 	struct ifnet *ifp = adapter->ifp;
2278 	device_t dev = adapter->dev;
2279 	struct tx_ring *txr = adapter->tx_rings;
2280 	u32 link_check = 0;
2281 
2282 	/* Get the cached link value or read phy for real */
2283 	switch (hw->phy.media_type) {
2284 	case e1000_media_type_copper:
2285 		if (hw->mac.get_link_status) {
2286 			/* Do the work to read phy */
2287 			e1000_check_for_link(hw);
2288 			link_check = !hw->mac.get_link_status;
2289 			if (link_check) /* ESB2 fix */
2290 				e1000_cfg_on_link_up(hw);
2291 		} else
2292 			link_check = TRUE;
2293 		break;
2294 	case e1000_media_type_fiber:
2295 		e1000_check_for_link(hw);
2296 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2297                                  E1000_STATUS_LU);
2298 		break;
2299 	case e1000_media_type_internal_serdes:
2300 		e1000_check_for_link(hw);
2301 		link_check = adapter->hw.mac.serdes_has_link;
2302 		break;
2303 	default:
2304 	case e1000_media_type_unknown:
2305 		break;
2306 	}
2307 
2308 	/* Now check for a transition */
2309 	if (link_check && (adapter->link_active == 0)) {
2310 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2311 		    &adapter->link_duplex);
2312 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2313 		if ((adapter->link_speed != SPEED_1000) &&
2314 		    ((hw->mac.type == e1000_82571) ||
2315 		    (hw->mac.type == e1000_82572))) {
2316 			int tarc0;
2317 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2318 			tarc0 &= ~SPEED_MODE_BIT;
2319 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2320 		}
2321 		if (bootverbose)
2322 			device_printf(dev, "Link is up %d Mbps %s\n",
2323 			    adapter->link_speed,
2324 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2325 			    "Full Duplex" : "Half Duplex"));
2326 		adapter->link_active = 1;
2327 		adapter->smartspeed = 0;
2328 		ifp->if_baudrate = adapter->link_speed * 1000000;
2329 		if_link_state_change(ifp, LINK_STATE_UP);
2330 	} else if (!link_check && (adapter->link_active == 1)) {
2331 		ifp->if_baudrate = adapter->link_speed = 0;
2332 		adapter->link_duplex = 0;
2333 		if (bootverbose)
2334 			device_printf(dev, "Link is Down\n");
2335 		adapter->link_active = 0;
2336 		/* Link down, disable watchdog */
2337 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2338 			txr->queue_status = EM_QUEUE_IDLE;
2339 		if_link_state_change(ifp, LINK_STATE_DOWN);
2340 	}
2341 }
2342 
2343 /*********************************************************************
2344  *
2345  *  This routine disables all traffic on the adapter by issuing a
2346  *  global reset on the MAC and deallocates TX/RX buffers.
2347  *
2348  *  This routine should always be called with BOTH the CORE
2349  *  and TX locks.
2350  **********************************************************************/
2351 
2352 static void
2353 em_stop(void *arg)
2354 {
2355 	struct adapter	*adapter = arg;
2356 	struct ifnet	*ifp = adapter->ifp;
2357 	struct tx_ring	*txr = adapter->tx_rings;
2358 
2359 	EM_CORE_LOCK_ASSERT(adapter);
2360 
2361 	INIT_DEBUGOUT("em_stop: begin");
2362 
2363 	em_disable_intr(adapter);
2364 	callout_stop(&adapter->timer);
2365 
2366 	/* Tell the stack that the interface is no longer active */
2367 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2368 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2369 
2370         /* Unarm watchdog timer. */
2371 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2372 		EM_TX_LOCK(txr);
2373 		txr->queue_status = EM_QUEUE_IDLE;
2374 		EM_TX_UNLOCK(txr);
2375 	}
2376 
2377 	e1000_reset_hw(&adapter->hw);
2378 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2379 
2380 	e1000_led_off(&adapter->hw);
2381 	e1000_cleanup_led(&adapter->hw);
2382 }
2383 
2384 
2385 /*********************************************************************
2386  *
2387  *  Determine hardware revision.
2388  *
2389  **********************************************************************/
2390 static void
2391 em_identify_hardware(struct adapter *adapter)
2392 {
2393 	device_t dev = adapter->dev;
2394 
2395 	/* Make sure our PCI config space has the necessary stuff set */
2396 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2397 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2398 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2399 		device_printf(dev, "Memory Access and/or Bus Master bits "
2400 		    "were not set!\n");
2401 		adapter->hw.bus.pci_cmd_word |=
2402 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2403 		pci_write_config(dev, PCIR_COMMAND,
2404 		    adapter->hw.bus.pci_cmd_word, 2);
2405 	}
2406 
2407 	/* Save off the information about this board */
2408 	adapter->hw.vendor_id = pci_get_vendor(dev);
2409 	adapter->hw.device_id = pci_get_device(dev);
2410 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2411 	adapter->hw.subsystem_vendor_id =
2412 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2413 	adapter->hw.subsystem_device_id =
2414 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2415 
2416 	/* Do Shared Code Init and Setup */
2417 	if (e1000_set_mac_type(&adapter->hw)) {
2418 		device_printf(dev, "Setup init failure\n");
2419 		return;
2420 	}
2421 }
2422 
2423 static int
2424 em_allocate_pci_resources(struct adapter *adapter)
2425 {
2426 	device_t	dev = adapter->dev;
2427 	int		rid;
2428 
2429 	rid = PCIR_BAR(0);
2430 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2431 	    &rid, RF_ACTIVE);
2432 	if (adapter->memory == NULL) {
2433 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2434 		return (ENXIO);
2435 	}
2436 	adapter->osdep.mem_bus_space_tag =
2437 	    rman_get_bustag(adapter->memory);
2438 	adapter->osdep.mem_bus_space_handle =
2439 	    rman_get_bushandle(adapter->memory);
2440 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2441 
2442 	/* Default to a single queue */
2443 	adapter->num_queues = 1;
2444 
2445 	/*
2446 	 * Setup MSI/X or MSI if PCI Express
2447 	 */
2448 	adapter->msix = em_setup_msix(adapter);
2449 
2450 	adapter->hw.back = &adapter->osdep;
2451 
2452 	return (0);
2453 }
2454 
2455 /*********************************************************************
2456  *
2457  *  Setup the Legacy or MSI Interrupt handler
2458  *
2459  **********************************************************************/
2460 int
2461 em_allocate_legacy(struct adapter *adapter)
2462 {
2463 	device_t dev = adapter->dev;
2464 	struct tx_ring	*txr = adapter->tx_rings;
2465 	int error, rid = 0;
2466 
2467 	/* Manually turn off all interrupts */
2468 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2469 
2470 	if (adapter->msix == 1) /* using MSI */
2471 		rid = 1;
2472 	/* We allocate a single interrupt resource */
2473 	adapter->res = bus_alloc_resource_any(dev,
2474 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2475 	if (adapter->res == NULL) {
2476 		device_printf(dev, "Unable to allocate bus resource: "
2477 		    "interrupt\n");
2478 		return (ENXIO);
2479 	}
2480 
2481 	/*
2482 	 * Allocate a fast interrupt and the associated
2483 	 * deferred processing contexts.
2484 	 */
2485 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2486 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2487 	    taskqueue_thread_enqueue, &adapter->tq);
2488 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2489 	    device_get_nameunit(adapter->dev));
2490 	/* Use a TX only tasklet for local timer */
2491 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2492 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2493 	    taskqueue_thread_enqueue, &txr->tq);
2494 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2495 	    device_get_nameunit(adapter->dev));
2496 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2497 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2498 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2499 		device_printf(dev, "Failed to register fast interrupt "
2500 			    "handler: %d\n", error);
2501 		taskqueue_free(adapter->tq);
2502 		adapter->tq = NULL;
2503 		return (error);
2504 	}
2505 
2506 	return (0);
2507 }
2508 
2509 /*********************************************************************
2510  *
2511  *  Setup the MSIX Interrupt handlers
2512  *   This is not really Multiqueue, rather
2513  *   its just seperate interrupt vectors
2514  *   for TX, RX, and Link.
2515  *
2516  **********************************************************************/
2517 int
2518 em_allocate_msix(struct adapter *adapter)
2519 {
2520 	device_t	dev = adapter->dev;
2521 	struct		tx_ring *txr = adapter->tx_rings;
2522 	struct		rx_ring *rxr = adapter->rx_rings;
2523 	int		error, rid, vector = 0;
2524 
2525 
2526 	/* Make sure all interrupts are disabled */
2527 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2528 
2529 	/* First set up ring resources */
2530 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2531 
2532 		/* RX ring */
2533 		rid = vector + 1;
2534 
2535 		rxr->res = bus_alloc_resource_any(dev,
2536 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2537 		if (rxr->res == NULL) {
2538 			device_printf(dev,
2539 			    "Unable to allocate bus resource: "
2540 			    "RX MSIX Interrupt %d\n", i);
2541 			return (ENXIO);
2542 		}
2543 		if ((error = bus_setup_intr(dev, rxr->res,
2544 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2545 		    rxr, &rxr->tag)) != 0) {
2546 			device_printf(dev, "Failed to register RX handler");
2547 			return (error);
2548 		}
2549 #if __FreeBSD_version >= 800504
2550 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2551 #endif
2552 		rxr->msix = vector++; /* NOTE increment vector for TX */
2553 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2554 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2555 		    taskqueue_thread_enqueue, &rxr->tq);
2556 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2557 		    device_get_nameunit(adapter->dev));
2558 		/*
2559 		** Set the bit to enable interrupt
2560 		** in E1000_IMS -- bits 20 and 21
2561 		** are for RX0 and RX1, note this has
2562 		** NOTHING to do with the MSIX vector
2563 		*/
2564 		rxr->ims = 1 << (20 + i);
2565 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2566 
2567 		/* TX ring */
2568 		rid = vector + 1;
2569 		txr->res = bus_alloc_resource_any(dev,
2570 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2571 		if (txr->res == NULL) {
2572 			device_printf(dev,
2573 			    "Unable to allocate bus resource: "
2574 			    "TX MSIX Interrupt %d\n", i);
2575 			return (ENXIO);
2576 		}
2577 		if ((error = bus_setup_intr(dev, txr->res,
2578 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2579 		    txr, &txr->tag)) != 0) {
2580 			device_printf(dev, "Failed to register TX handler");
2581 			return (error);
2582 		}
2583 #if __FreeBSD_version >= 800504
2584 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2585 #endif
2586 		txr->msix = vector++; /* Increment vector for next pass */
2587 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2588 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2589 		    taskqueue_thread_enqueue, &txr->tq);
2590 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2591 		    device_get_nameunit(adapter->dev));
2592 		/*
2593 		** Set the bit to enable interrupt
2594 		** in E1000_IMS -- bits 22 and 23
2595 		** are for TX0 and TX1, note this has
2596 		** NOTHING to do with the MSIX vector
2597 		*/
2598 		txr->ims = 1 << (22 + i);
2599 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2600 	}
2601 
2602 	/* Link interrupt */
2603 	++rid;
2604 	adapter->res = bus_alloc_resource_any(dev,
2605 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2606 	if (!adapter->res) {
2607 		device_printf(dev,"Unable to allocate "
2608 		    "bus resource: Link interrupt [%d]\n", rid);
2609 		return (ENXIO);
2610         }
2611 	/* Set the link handler function */
2612 	error = bus_setup_intr(dev, adapter->res,
2613 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2614 	    em_msix_link, adapter, &adapter->tag);
2615 	if (error) {
2616 		adapter->res = NULL;
2617 		device_printf(dev, "Failed to register LINK handler");
2618 		return (error);
2619 	}
2620 #if __FreeBSD_version >= 800504
2621 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2622 #endif
2623 	adapter->linkvec = vector;
2624 	adapter->ivars |=  (8 | vector) << 16;
2625 	adapter->ivars |= 0x80000000;
2626 
2627 	return (0);
2628 }
2629 
2630 
2631 static void
2632 em_free_pci_resources(struct adapter *adapter)
2633 {
2634 	device_t	dev = adapter->dev;
2635 	struct tx_ring	*txr;
2636 	struct rx_ring	*rxr;
2637 	int		rid;
2638 
2639 
2640 	/*
2641 	** Release all the queue interrupt resources:
2642 	*/
2643 	for (int i = 0; i < adapter->num_queues; i++) {
2644 		txr = &adapter->tx_rings[i];
2645 		rxr = &adapter->rx_rings[i];
2646 		/* an early abort? */
2647 		if ((txr == NULL) || (rxr == NULL))
2648 			break;
2649 		rid = txr->msix +1;
2650 		if (txr->tag != NULL) {
2651 			bus_teardown_intr(dev, txr->res, txr->tag);
2652 			txr->tag = NULL;
2653 		}
2654 		if (txr->res != NULL)
2655 			bus_release_resource(dev, SYS_RES_IRQ,
2656 			    rid, txr->res);
2657 		rid = rxr->msix +1;
2658 		if (rxr->tag != NULL) {
2659 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2660 			rxr->tag = NULL;
2661 		}
2662 		if (rxr->res != NULL)
2663 			bus_release_resource(dev, SYS_RES_IRQ,
2664 			    rid, rxr->res);
2665 	}
2666 
2667         if (adapter->linkvec) /* we are doing MSIX */
2668                 rid = adapter->linkvec + 1;
2669         else
2670                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2671 
2672 	if (adapter->tag != NULL) {
2673 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2674 		adapter->tag = NULL;
2675 	}
2676 
2677 	if (adapter->res != NULL)
2678 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2679 
2680 
2681 	if (adapter->msix)
2682 		pci_release_msi(dev);
2683 
2684 	if (adapter->msix_mem != NULL)
2685 		bus_release_resource(dev, SYS_RES_MEMORY,
2686 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2687 
2688 	if (adapter->memory != NULL)
2689 		bus_release_resource(dev, SYS_RES_MEMORY,
2690 		    PCIR_BAR(0), adapter->memory);
2691 
2692 	if (adapter->flash != NULL)
2693 		bus_release_resource(dev, SYS_RES_MEMORY,
2694 		    EM_FLASH, adapter->flash);
2695 }
2696 
2697 /*
2698  * Setup MSI or MSI/X
2699  */
2700 static int
2701 em_setup_msix(struct adapter *adapter)
2702 {
2703 	device_t dev = adapter->dev;
2704 	int val = 0;
2705 
2706 	/*
2707 	** Setup MSI/X for Hartwell: tests have shown
2708 	** use of two queues to be unstable, and to
2709 	** provide no great gain anyway, so we simply
2710 	** seperate the interrupts and use a single queue.
2711 	*/
2712 	if ((adapter->hw.mac.type == e1000_82574) &&
2713 	    (em_enable_msix == TRUE)) {
2714 		/* Map the MSIX BAR */
2715 		int rid = PCIR_BAR(EM_MSIX_BAR);
2716 		adapter->msix_mem = bus_alloc_resource_any(dev,
2717 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2718        		if (!adapter->msix_mem) {
2719 			/* May not be enabled */
2720                		device_printf(adapter->dev,
2721 			    "Unable to map MSIX table \n");
2722 			goto msi;
2723        		}
2724 		val = pci_msix_count(dev);
2725 		/* We only need 3 vectors */
2726 		if (val > 3)
2727 			val = 3;
2728 		if ((val != 3) && (val != 5)) {
2729 			bus_release_resource(dev, SYS_RES_MEMORY,
2730 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2731 			adapter->msix_mem = NULL;
2732                		device_printf(adapter->dev,
2733 			    "MSIX: incorrect vectors, using MSI\n");
2734 			goto msi;
2735 		}
2736 
2737 		if (pci_alloc_msix(dev, &val) == 0) {
2738 			device_printf(adapter->dev,
2739 			    "Using MSIX interrupts "
2740 			    "with %d vectors\n", val);
2741 		}
2742 
2743 		return (val);
2744 	}
2745 msi:
2746        	val = pci_msi_count(dev);
2747        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2748                	adapter->msix = 1;
2749                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2750 		return (val);
2751 	}
2752 	/* Should only happen due to manual configuration */
2753 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2754 	return (0);
2755 }
2756 
2757 
2758 /*********************************************************************
2759  *
2760  *  Initialize the hardware to a configuration
2761  *  as specified by the adapter structure.
2762  *
2763  **********************************************************************/
2764 static void
2765 em_reset(struct adapter *adapter)
2766 {
2767 	device_t	dev = adapter->dev;
2768 	struct ifnet	*ifp = adapter->ifp;
2769 	struct e1000_hw	*hw = &adapter->hw;
2770 	u16		rx_buffer_size;
2771 	u32		pba;
2772 
2773 	INIT_DEBUGOUT("em_reset: begin");
2774 
2775 	/* Set up smart power down as default off on newer adapters. */
2776 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2777 	    hw->mac.type == e1000_82572)) {
2778 		u16 phy_tmp = 0;
2779 
2780 		/* Speed up time to link by disabling smart power down. */
2781 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2782 		phy_tmp &= ~IGP02E1000_PM_SPD;
2783 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2784 	}
2785 
2786 	/*
2787 	 * Packet Buffer Allocation (PBA)
2788 	 * Writing PBA sets the receive portion of the buffer
2789 	 * the remainder is used for the transmit buffer.
2790 	 */
2791 	switch (hw->mac.type) {
2792 	/* Total Packet Buffer on these is 48K */
2793 	case e1000_82571:
2794 	case e1000_82572:
2795 	case e1000_80003es2lan:
2796 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2797 		break;
2798 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2799 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2800 		break;
2801 	case e1000_82574:
2802 	case e1000_82583:
2803 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2804 		break;
2805 	case e1000_ich8lan:
2806 		pba = E1000_PBA_8K;
2807 		break;
2808 	case e1000_ich9lan:
2809 	case e1000_ich10lan:
2810 		/* Boost Receive side for jumbo frames */
2811 		if (adapter->max_frame_size > 4096)
2812 			pba = E1000_PBA_14K;
2813 		else
2814 			pba = E1000_PBA_10K;
2815 		break;
2816 	case e1000_pchlan:
2817 	case e1000_pch2lan:
2818 		pba = E1000_PBA_26K;
2819 		break;
2820 	default:
2821 		if (adapter->max_frame_size > 8192)
2822 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2823 		else
2824 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2825 	}
2826 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2827 
2828 	/*
2829 	 * These parameters control the automatic generation (Tx) and
2830 	 * response (Rx) to Ethernet PAUSE frames.
2831 	 * - High water mark should allow for at least two frames to be
2832 	 *   received after sending an XOFF.
2833 	 * - Low water mark works best when it is very near the high water mark.
2834 	 *   This allows the receiver to restart by sending XON when it has
2835 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2836 	 *   restart after one full frame is pulled from the buffer. There
2837 	 *   could be several smaller frames in the buffer and if so they will
2838 	 *   not trigger the XON until their total number reduces the buffer
2839 	 *   by 1500.
2840 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2841 	 */
2842 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2843 	hw->fc.high_water = rx_buffer_size -
2844 	    roundup2(adapter->max_frame_size, 1024);
2845 	hw->fc.low_water = hw->fc.high_water - 1500;
2846 
2847 	if (adapter->fc) /* locally set flow control value? */
2848 		hw->fc.requested_mode = adapter->fc;
2849 	else
2850 		hw->fc.requested_mode = e1000_fc_full;
2851 
2852 	if (hw->mac.type == e1000_80003es2lan)
2853 		hw->fc.pause_time = 0xFFFF;
2854 	else
2855 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2856 
2857 	hw->fc.send_xon = TRUE;
2858 
2859 	/* Device specific overrides/settings */
2860 	switch (hw->mac.type) {
2861 	case e1000_pchlan:
2862 		/* Workaround: no TX flow ctrl for PCH */
2863                 hw->fc.requested_mode = e1000_fc_rx_pause;
2864 		hw->fc.pause_time = 0xFFFF; /* override */
2865 		if (ifp->if_mtu > ETHERMTU) {
2866 			hw->fc.high_water = 0x3500;
2867 			hw->fc.low_water = 0x1500;
2868 		} else {
2869 			hw->fc.high_water = 0x5000;
2870 			hw->fc.low_water = 0x3000;
2871 		}
2872 		hw->fc.refresh_time = 0x1000;
2873 		break;
2874 	case e1000_pch2lan:
2875 		hw->fc.high_water = 0x5C20;
2876 		hw->fc.low_water = 0x5048;
2877 		hw->fc.pause_time = 0x0650;
2878 		hw->fc.refresh_time = 0x0400;
2879 		/* Jumbos need adjusted PBA */
2880 		if (ifp->if_mtu > ETHERMTU)
2881 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2882 		else
2883 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2884 		break;
2885         case e1000_ich9lan:
2886         case e1000_ich10lan:
2887 		if (ifp->if_mtu > ETHERMTU) {
2888 			hw->fc.high_water = 0x2800;
2889 			hw->fc.low_water = hw->fc.high_water - 8;
2890 			break;
2891 		}
2892 		/* else fall thru */
2893 	default:
2894 		if (hw->mac.type == e1000_80003es2lan)
2895 			hw->fc.pause_time = 0xFFFF;
2896 		break;
2897 	}
2898 
2899 	/* Issue a global reset */
2900 	e1000_reset_hw(hw);
2901 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2902 	em_disable_aspm(adapter);
2903 	/* and a re-init */
2904 	if (e1000_init_hw(hw) < 0) {
2905 		device_printf(dev, "Hardware Initialization Failed\n");
2906 		return;
2907 	}
2908 
2909 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2910 	e1000_get_phy_info(hw);
2911 	e1000_check_for_link(hw);
2912 	return;
2913 }
2914 
2915 /*********************************************************************
2916  *
2917  *  Setup networking device structure and register an interface.
2918  *
2919  **********************************************************************/
2920 static int
2921 em_setup_interface(device_t dev, struct adapter *adapter)
2922 {
2923 	struct ifnet   *ifp;
2924 
2925 	INIT_DEBUGOUT("em_setup_interface: begin");
2926 
2927 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2928 	if (ifp == NULL) {
2929 		device_printf(dev, "can not allocate ifnet structure\n");
2930 		return (-1);
2931 	}
2932 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2933 	ifp->if_init =  em_init;
2934 	ifp->if_softc = adapter;
2935 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2936 	ifp->if_ioctl = em_ioctl;
2937 #ifdef EM_MULTIQUEUE
2938 	/* Multiqueue stack interface */
2939 	ifp->if_transmit = em_mq_start;
2940 	ifp->if_qflush = em_qflush;
2941 #else
2942 	ifp->if_start = em_start;
2943 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2944 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2945 	IFQ_SET_READY(&ifp->if_snd);
2946 #endif
2947 
2948 	ether_ifattach(ifp, adapter->hw.mac.addr);
2949 
2950 	ifp->if_capabilities = ifp->if_capenable = 0;
2951 
2952 
2953 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2954 	ifp->if_capabilities |= IFCAP_TSO4;
2955 	/*
2956 	 * Tell the upper layer(s) we
2957 	 * support full VLAN capability
2958 	 */
2959 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2960 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2961 			     |  IFCAP_VLAN_HWTSO
2962 			     |  IFCAP_VLAN_MTU;
2963 	ifp->if_capenable = ifp->if_capabilities;
2964 
2965 	/*
2966 	** Don't turn this on by default, if vlans are
2967 	** created on another pseudo device (eg. lagg)
2968 	** then vlan events are not passed thru, breaking
2969 	** operation, but with HW FILTER off it works. If
2970 	** using vlans directly on the em driver you can
2971 	** enable this and get full hardware tag filtering.
2972 	*/
2973 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2974 
2975 #ifdef DEVICE_POLLING
2976 	ifp->if_capabilities |= IFCAP_POLLING;
2977 #endif
2978 
2979 	/* Enable only WOL MAGIC by default */
2980 	if (adapter->wol) {
2981 		ifp->if_capabilities |= IFCAP_WOL;
2982 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2983 	}
2984 
2985 	/*
2986 	 * Specify the media types supported by this adapter and register
2987 	 * callbacks to update media and link information
2988 	 */
2989 	ifmedia_init(&adapter->media, IFM_IMASK,
2990 	    em_media_change, em_media_status);
2991 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2992 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2993 		u_char fiber_type = IFM_1000_SX;	/* default type */
2994 
2995 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2996 			    0, NULL);
2997 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2998 	} else {
2999 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3000 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3001 			    0, NULL);
3002 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3003 			    0, NULL);
3004 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3005 			    0, NULL);
3006 		if (adapter->hw.phy.type != e1000_phy_ife) {
3007 			ifmedia_add(&adapter->media,
3008 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3009 			ifmedia_add(&adapter->media,
3010 				IFM_ETHER | IFM_1000_T, 0, NULL);
3011 		}
3012 	}
3013 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3014 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3015 	return (0);
3016 }
3017 
3018 
3019 /*
3020  * Manage DMA'able memory.
3021  */
3022 static void
3023 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3024 {
3025 	if (error)
3026 		return;
3027 	*(bus_addr_t *) arg = segs[0].ds_addr;
3028 }
3029 
3030 static int
3031 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3032         struct em_dma_alloc *dma, int mapflags)
3033 {
3034 	int error;
3035 
3036 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3037 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3038 				BUS_SPACE_MAXADDR,	/* lowaddr */
3039 				BUS_SPACE_MAXADDR,	/* highaddr */
3040 				NULL, NULL,		/* filter, filterarg */
3041 				size,			/* maxsize */
3042 				1,			/* nsegments */
3043 				size,			/* maxsegsize */
3044 				0,			/* flags */
3045 				NULL,			/* lockfunc */
3046 				NULL,			/* lockarg */
3047 				&dma->dma_tag);
3048 	if (error) {
3049 		device_printf(adapter->dev,
3050 		    "%s: bus_dma_tag_create failed: %d\n",
3051 		    __func__, error);
3052 		goto fail_0;
3053 	}
3054 
3055 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3056 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3057 	if (error) {
3058 		device_printf(adapter->dev,
3059 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3060 		    __func__, (uintmax_t)size, error);
3061 		goto fail_2;
3062 	}
3063 
3064 	dma->dma_paddr = 0;
3065 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3066 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3067 	if (error || dma->dma_paddr == 0) {
3068 		device_printf(adapter->dev,
3069 		    "%s: bus_dmamap_load failed: %d\n",
3070 		    __func__, error);
3071 		goto fail_3;
3072 	}
3073 
3074 	return (0);
3075 
3076 fail_3:
3077 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3078 fail_2:
3079 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3080 	bus_dma_tag_destroy(dma->dma_tag);
3081 fail_0:
3082 	dma->dma_map = NULL;
3083 	dma->dma_tag = NULL;
3084 
3085 	return (error);
3086 }
3087 
3088 static void
3089 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3090 {
3091 	if (dma->dma_tag == NULL)
3092 		return;
3093 	if (dma->dma_map != NULL) {
3094 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3095 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3096 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3097 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3098 		dma->dma_map = NULL;
3099 	}
3100 	bus_dma_tag_destroy(dma->dma_tag);
3101 	dma->dma_tag = NULL;
3102 }
3103 
3104 
3105 /*********************************************************************
3106  *
3107  *  Allocate memory for the transmit and receive rings, and then
3108  *  the descriptors associated with each, called only once at attach.
3109  *
3110  **********************************************************************/
3111 static int
3112 em_allocate_queues(struct adapter *adapter)
3113 {
3114 	device_t		dev = adapter->dev;
3115 	struct tx_ring		*txr = NULL;
3116 	struct rx_ring		*rxr = NULL;
3117 	int rsize, tsize, error = E1000_SUCCESS;
3118 	int txconf = 0, rxconf = 0;
3119 
3120 
3121 	/* Allocate the TX ring struct memory */
3122 	if (!(adapter->tx_rings =
3123 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3124 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3125 		device_printf(dev, "Unable to allocate TX ring memory\n");
3126 		error = ENOMEM;
3127 		goto fail;
3128 	}
3129 
3130 	/* Now allocate the RX */
3131 	if (!(adapter->rx_rings =
3132 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3133 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3134 		device_printf(dev, "Unable to allocate RX ring memory\n");
3135 		error = ENOMEM;
3136 		goto rx_fail;
3137 	}
3138 
3139 	tsize = roundup2(adapter->num_tx_desc *
3140 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3141 	/*
3142 	 * Now set up the TX queues, txconf is needed to handle the
3143 	 * possibility that things fail midcourse and we need to
3144 	 * undo memory gracefully
3145 	 */
3146 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3147 		/* Set up some basics */
3148 		txr = &adapter->tx_rings[i];
3149 		txr->adapter = adapter;
3150 		txr->me = i;
3151 
3152 		/* Initialize the TX lock */
3153 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3154 		    device_get_nameunit(dev), txr->me);
3155 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3156 
3157 		if (em_dma_malloc(adapter, tsize,
3158 			&txr->txdma, BUS_DMA_NOWAIT)) {
3159 			device_printf(dev,
3160 			    "Unable to allocate TX Descriptor memory\n");
3161 			error = ENOMEM;
3162 			goto err_tx_desc;
3163 		}
3164 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3165 		bzero((void *)txr->tx_base, tsize);
3166 
3167         	if (em_allocate_transmit_buffers(txr)) {
3168 			device_printf(dev,
3169 			    "Critical Failure setting up transmit buffers\n");
3170 			error = ENOMEM;
3171 			goto err_tx_desc;
3172         	}
3173 #if __FreeBSD_version >= 800000
3174 		/* Allocate a buf ring */
3175 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3176 		    M_WAITOK, &txr->tx_mtx);
3177 #endif
3178 	}
3179 
3180 	/*
3181 	 * Next the RX queues...
3182 	 */
3183 	rsize = roundup2(adapter->num_rx_desc *
3184 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3185 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3186 		rxr = &adapter->rx_rings[i];
3187 		rxr->adapter = adapter;
3188 		rxr->me = i;
3189 
3190 		/* Initialize the RX lock */
3191 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3192 		    device_get_nameunit(dev), txr->me);
3193 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3194 
3195 		if (em_dma_malloc(adapter, rsize,
3196 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3197 			device_printf(dev,
3198 			    "Unable to allocate RxDescriptor memory\n");
3199 			error = ENOMEM;
3200 			goto err_rx_desc;
3201 		}
3202 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3203 		bzero((void *)rxr->rx_base, rsize);
3204 
3205         	/* Allocate receive buffers for the ring*/
3206 		if (em_allocate_receive_buffers(rxr)) {
3207 			device_printf(dev,
3208 			    "Critical Failure setting up receive buffers\n");
3209 			error = ENOMEM;
3210 			goto err_rx_desc;
3211 		}
3212 	}
3213 
3214 	return (0);
3215 
3216 err_rx_desc:
3217 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3218 		em_dma_free(adapter, &rxr->rxdma);
3219 err_tx_desc:
3220 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3221 		em_dma_free(adapter, &txr->txdma);
3222 	free(adapter->rx_rings, M_DEVBUF);
3223 rx_fail:
3224 #if __FreeBSD_version >= 800000
3225 	buf_ring_free(txr->br, M_DEVBUF);
3226 #endif
3227 	free(adapter->tx_rings, M_DEVBUF);
3228 fail:
3229 	return (error);
3230 }
3231 
3232 
3233 /*********************************************************************
3234  *
3235  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3236  *  the information needed to transmit a packet on the wire. This is
3237  *  called only once at attach, setup is done every reset.
3238  *
3239  **********************************************************************/
3240 static int
3241 em_allocate_transmit_buffers(struct tx_ring *txr)
3242 {
3243 	struct adapter *adapter = txr->adapter;
3244 	device_t dev = adapter->dev;
3245 	struct em_buffer *txbuf;
3246 	int error, i;
3247 
3248 	/*
3249 	 * Setup DMA descriptor areas.
3250 	 */
3251 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3252 			       1, 0,			/* alignment, bounds */
3253 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3254 			       BUS_SPACE_MAXADDR,	/* highaddr */
3255 			       NULL, NULL,		/* filter, filterarg */
3256 			       EM_TSO_SIZE,		/* maxsize */
3257 			       EM_MAX_SCATTER,		/* nsegments */
3258 			       PAGE_SIZE,		/* maxsegsize */
3259 			       0,			/* flags */
3260 			       NULL,			/* lockfunc */
3261 			       NULL,			/* lockfuncarg */
3262 			       &txr->txtag))) {
3263 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3264 		goto fail;
3265 	}
3266 
3267 	if (!(txr->tx_buffers =
3268 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3269 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3270 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3271 		error = ENOMEM;
3272 		goto fail;
3273 	}
3274 
3275         /* Create the descriptor buffer dma maps */
3276 	txbuf = txr->tx_buffers;
3277 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3278 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3279 		if (error != 0) {
3280 			device_printf(dev, "Unable to create TX DMA map\n");
3281 			goto fail;
3282 		}
3283 	}
3284 
3285 	return 0;
3286 fail:
3287 	/* We free all, it handles case where we are in the middle */
3288 	em_free_transmit_structures(adapter);
3289 	return (error);
3290 }
3291 
3292 /*********************************************************************
3293  *
3294  *  Initialize a transmit ring.
3295  *
3296  **********************************************************************/
3297 static void
3298 em_setup_transmit_ring(struct tx_ring *txr)
3299 {
3300 	struct adapter *adapter = txr->adapter;
3301 	struct em_buffer *txbuf;
3302 	int i;
3303 #ifdef DEV_NETMAP
3304 	struct netmap_adapter *na = NA(adapter->ifp);
3305 	struct netmap_slot *slot;
3306 #endif /* DEV_NETMAP */
3307 
3308 	/* Clear the old descriptor contents */
3309 	EM_TX_LOCK(txr);
3310 #ifdef DEV_NETMAP
3311 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3312 #endif /* DEV_NETMAP */
3313 
3314 	bzero((void *)txr->tx_base,
3315 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3316 	/* Reset indices */
3317 	txr->next_avail_desc = 0;
3318 	txr->next_to_clean = 0;
3319 
3320 	/* Free any existing tx buffers. */
3321         txbuf = txr->tx_buffers;
3322 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3323 		if (txbuf->m_head != NULL) {
3324 			bus_dmamap_sync(txr->txtag, txbuf->map,
3325 			    BUS_DMASYNC_POSTWRITE);
3326 			bus_dmamap_unload(txr->txtag, txbuf->map);
3327 			m_freem(txbuf->m_head);
3328 			txbuf->m_head = NULL;
3329 		}
3330 #ifdef DEV_NETMAP
3331 		if (slot) {
3332 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3333 			uint64_t paddr;
3334 			void *addr;
3335 
3336 			addr = PNMB(slot + si, &paddr);
3337 			txr->tx_base[i].buffer_addr = htole64(paddr);
3338 			/* reload the map for netmap mode */
3339 			netmap_load_map(txr->txtag, txbuf->map, addr);
3340 		}
3341 #endif /* DEV_NETMAP */
3342 
3343 		/* clear the watch index */
3344 		txbuf->next_eop = -1;
3345         }
3346 
3347 	/* Set number of descriptors available */
3348 	txr->tx_avail = adapter->num_tx_desc;
3349 	txr->queue_status = EM_QUEUE_IDLE;
3350 
3351 	/* Clear checksum offload context. */
3352 	txr->last_hw_offload = 0;
3353 	txr->last_hw_ipcss = 0;
3354 	txr->last_hw_ipcso = 0;
3355 	txr->last_hw_tucss = 0;
3356 	txr->last_hw_tucso = 0;
3357 
3358 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3359 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3360 	EM_TX_UNLOCK(txr);
3361 }
3362 
3363 /*********************************************************************
3364  *
3365  *  Initialize all transmit rings.
3366  *
3367  **********************************************************************/
3368 static void
3369 em_setup_transmit_structures(struct adapter *adapter)
3370 {
3371 	struct tx_ring *txr = adapter->tx_rings;
3372 
3373 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3374 		em_setup_transmit_ring(txr);
3375 
3376 	return;
3377 }
3378 
3379 /*********************************************************************
3380  *
3381  *  Enable transmit unit.
3382  *
3383  **********************************************************************/
3384 static void
3385 em_initialize_transmit_unit(struct adapter *adapter)
3386 {
3387 	struct tx_ring	*txr = adapter->tx_rings;
3388 	struct e1000_hw	*hw = &adapter->hw;
3389 	u32	tctl, tarc, tipg = 0;
3390 
3391 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3392 
3393 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3394 		u64 bus_addr = txr->txdma.dma_paddr;
3395 		/* Base and Len of TX Ring */
3396 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3397 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3398 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3399 	    	    (u32)(bus_addr >> 32));
3400 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3401 	    	    (u32)bus_addr);
3402 		/* Init the HEAD/TAIL indices */
3403 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3404 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3405 
3406 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3407 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3408 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3409 
3410 		txr->queue_status = EM_QUEUE_IDLE;
3411 	}
3412 
3413 	/* Set the default values for the Tx Inter Packet Gap timer */
3414 	switch (adapter->hw.mac.type) {
3415 	case e1000_80003es2lan:
3416 		tipg = DEFAULT_82543_TIPG_IPGR1;
3417 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3418 		    E1000_TIPG_IPGR2_SHIFT;
3419 		break;
3420 	default:
3421 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3422 		    (adapter->hw.phy.media_type ==
3423 		    e1000_media_type_internal_serdes))
3424 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3425 		else
3426 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3427 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3428 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3429 	}
3430 
3431 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3432 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3433 
3434 	if(adapter->hw.mac.type >= e1000_82540)
3435 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3436 		    adapter->tx_abs_int_delay.value);
3437 
3438 	if ((adapter->hw.mac.type == e1000_82571) ||
3439 	    (adapter->hw.mac.type == e1000_82572)) {
3440 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3441 		tarc |= SPEED_MODE_BIT;
3442 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3443 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3444 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3445 		tarc |= 1;
3446 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3447 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3448 		tarc |= 1;
3449 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3450 	}
3451 
3452 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3453 	if (adapter->tx_int_delay.value > 0)
3454 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3455 
3456 	/* Program the Transmit Control Register */
3457 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3458 	tctl &= ~E1000_TCTL_CT;
3459 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3460 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3461 
3462 	if (adapter->hw.mac.type >= e1000_82571)
3463 		tctl |= E1000_TCTL_MULR;
3464 
3465 	/* This write will effectively turn on the transmit unit. */
3466 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3467 
3468 }
3469 
3470 
3471 /*********************************************************************
3472  *
3473  *  Free all transmit rings.
3474  *
3475  **********************************************************************/
3476 static void
3477 em_free_transmit_structures(struct adapter *adapter)
3478 {
3479 	struct tx_ring *txr = adapter->tx_rings;
3480 
3481 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3482 		EM_TX_LOCK(txr);
3483 		em_free_transmit_buffers(txr);
3484 		em_dma_free(adapter, &txr->txdma);
3485 		EM_TX_UNLOCK(txr);
3486 		EM_TX_LOCK_DESTROY(txr);
3487 	}
3488 
3489 	free(adapter->tx_rings, M_DEVBUF);
3490 }
3491 
3492 /*********************************************************************
3493  *
3494  *  Free transmit ring related data structures.
3495  *
3496  **********************************************************************/
3497 static void
3498 em_free_transmit_buffers(struct tx_ring *txr)
3499 {
3500 	struct adapter		*adapter = txr->adapter;
3501 	struct em_buffer	*txbuf;
3502 
3503 	INIT_DEBUGOUT("free_transmit_ring: begin");
3504 
3505 	if (txr->tx_buffers == NULL)
3506 		return;
3507 
3508 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3509 		txbuf = &txr->tx_buffers[i];
3510 		if (txbuf->m_head != NULL) {
3511 			bus_dmamap_sync(txr->txtag, txbuf->map,
3512 			    BUS_DMASYNC_POSTWRITE);
3513 			bus_dmamap_unload(txr->txtag,
3514 			    txbuf->map);
3515 			m_freem(txbuf->m_head);
3516 			txbuf->m_head = NULL;
3517 			if (txbuf->map != NULL) {
3518 				bus_dmamap_destroy(txr->txtag,
3519 				    txbuf->map);
3520 				txbuf->map = NULL;
3521 			}
3522 		} else if (txbuf->map != NULL) {
3523 			bus_dmamap_unload(txr->txtag,
3524 			    txbuf->map);
3525 			bus_dmamap_destroy(txr->txtag,
3526 			    txbuf->map);
3527 			txbuf->map = NULL;
3528 		}
3529 	}
3530 #if __FreeBSD_version >= 800000
3531 	if (txr->br != NULL)
3532 		buf_ring_free(txr->br, M_DEVBUF);
3533 #endif
3534 	if (txr->tx_buffers != NULL) {
3535 		free(txr->tx_buffers, M_DEVBUF);
3536 		txr->tx_buffers = NULL;
3537 	}
3538 	if (txr->txtag != NULL) {
3539 		bus_dma_tag_destroy(txr->txtag);
3540 		txr->txtag = NULL;
3541 	}
3542 	return;
3543 }
3544 
3545 
3546 /*********************************************************************
3547  *  The offload context is protocol specific (TCP/UDP) and thus
3548  *  only needs to be set when the protocol changes. The occasion
3549  *  of a context change can be a performance detriment, and
3550  *  might be better just disabled. The reason arises in the way
3551  *  in which the controller supports pipelined requests from the
3552  *  Tx data DMA. Up to four requests can be pipelined, and they may
3553  *  belong to the same packet or to multiple packets. However all
3554  *  requests for one packet are issued before a request is issued
3555  *  for a subsequent packet and if a request for the next packet
3556  *  requires a context change, that request will be stalled
3557  *  until the previous request completes. This means setting up
3558  *  a new context effectively disables pipelined Tx data DMA which
3559  *  in turn greatly slow down performance to send small sized
3560  *  frames.
3561  **********************************************************************/
3562 static void
3563 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3564     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3565 {
3566 	struct adapter			*adapter = txr->adapter;
3567 	struct e1000_context_desc	*TXD = NULL;
3568 	struct em_buffer		*tx_buffer;
3569 	int				cur, hdr_len;
3570 	u32				cmd = 0;
3571 	u16				offload = 0;
3572 	u8				ipcso, ipcss, tucso, tucss;
3573 
3574 	ipcss = ipcso = tucss = tucso = 0;
3575 	hdr_len = ip_off + (ip->ip_hl << 2);
3576 	cur = txr->next_avail_desc;
3577 
3578 	/* Setup of IP header checksum. */
3579 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3580 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3581 		offload |= CSUM_IP;
3582 		ipcss = ip_off;
3583 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3584 		/*
3585 		 * Start offset for header checksum calculation.
3586 		 * End offset for header checksum calculation.
3587 		 * Offset of place to put the checksum.
3588 		 */
3589 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3590 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3591 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3592 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3593 		cmd |= E1000_TXD_CMD_IP;
3594 	}
3595 
3596 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3597  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3598  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3599  		offload |= CSUM_TCP;
3600  		tucss = hdr_len;
3601  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3602  		/*
3603  		 * Setting up new checksum offload context for every frames
3604  		 * takes a lot of processing time for hardware. This also
3605  		 * reduces performance a lot for small sized frames so avoid
3606  		 * it if driver can use previously configured checksum
3607  		 * offload context.
3608  		 */
3609  		if (txr->last_hw_offload == offload) {
3610  			if (offload & CSUM_IP) {
3611  				if (txr->last_hw_ipcss == ipcss &&
3612  				    txr->last_hw_ipcso == ipcso &&
3613  				    txr->last_hw_tucss == tucss &&
3614  				    txr->last_hw_tucso == tucso)
3615  					return;
3616  			} else {
3617  				if (txr->last_hw_tucss == tucss &&
3618  				    txr->last_hw_tucso == tucso)
3619  					return;
3620  			}
3621   		}
3622  		txr->last_hw_offload = offload;
3623  		txr->last_hw_tucss = tucss;
3624  		txr->last_hw_tucso = tucso;
3625  		/*
3626  		 * Start offset for payload checksum calculation.
3627  		 * End offset for payload checksum calculation.
3628  		 * Offset of place to put the checksum.
3629  		 */
3630 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3631  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3632  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3633  		TXD->upper_setup.tcp_fields.tucso = tucso;
3634  		cmd |= E1000_TXD_CMD_TCP;
3635  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3636  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3637  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3638  		tucss = hdr_len;
3639  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3640  		/*
3641  		 * Setting up new checksum offload context for every frames
3642  		 * takes a lot of processing time for hardware. This also
3643  		 * reduces performance a lot for small sized frames so avoid
3644  		 * it if driver can use previously configured checksum
3645  		 * offload context.
3646  		 */
3647  		if (txr->last_hw_offload == offload) {
3648  			if (offload & CSUM_IP) {
3649  				if (txr->last_hw_ipcss == ipcss &&
3650  				    txr->last_hw_ipcso == ipcso &&
3651  				    txr->last_hw_tucss == tucss &&
3652  				    txr->last_hw_tucso == tucso)
3653  					return;
3654  			} else {
3655  				if (txr->last_hw_tucss == tucss &&
3656  				    txr->last_hw_tucso == tucso)
3657  					return;
3658  			}
3659  		}
3660  		txr->last_hw_offload = offload;
3661  		txr->last_hw_tucss = tucss;
3662  		txr->last_hw_tucso = tucso;
3663  		/*
3664  		 * Start offset for header checksum calculation.
3665  		 * End offset for header checksum calculation.
3666  		 * Offset of place to put the checksum.
3667  		 */
3668 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3669  		TXD->upper_setup.tcp_fields.tucss = tucss;
3670  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3671  		TXD->upper_setup.tcp_fields.tucso = tucso;
3672   	}
3673 
3674  	if (offload & CSUM_IP) {
3675  		txr->last_hw_ipcss = ipcss;
3676  		txr->last_hw_ipcso = ipcso;
3677   	}
3678 
3679 	TXD->tcp_seg_setup.data = htole32(0);
3680 	TXD->cmd_and_length =
3681 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3682 	tx_buffer = &txr->tx_buffers[cur];
3683 	tx_buffer->m_head = NULL;
3684 	tx_buffer->next_eop = -1;
3685 
3686 	if (++cur == adapter->num_tx_desc)
3687 		cur = 0;
3688 
3689 	txr->tx_avail--;
3690 	txr->next_avail_desc = cur;
3691 }
3692 
3693 
3694 /**********************************************************************
3695  *
3696  *  Setup work for hardware segmentation offload (TSO)
3697  *
3698  **********************************************************************/
3699 static void
3700 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3701     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3702 {
3703 	struct adapter			*adapter = txr->adapter;
3704 	struct e1000_context_desc	*TXD;
3705 	struct em_buffer		*tx_buffer;
3706 	int cur, hdr_len;
3707 
3708 	/*
3709 	 * In theory we can use the same TSO context if and only if
3710 	 * frame is the same type(IP/TCP) and the same MSS. However
3711 	 * checking whether a frame has the same IP/TCP structure is
3712 	 * hard thing so just ignore that and always restablish a
3713 	 * new TSO context.
3714 	 */
3715 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3716 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3717 		      E1000_TXD_DTYP_D |	/* Data descr type */
3718 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3719 
3720 	/* IP and/or TCP header checksum calculation and insertion. */
3721 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3722 
3723 	cur = txr->next_avail_desc;
3724 	tx_buffer = &txr->tx_buffers[cur];
3725 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3726 
3727 	/*
3728 	 * Start offset for header checksum calculation.
3729 	 * End offset for header checksum calculation.
3730 	 * Offset of place put the checksum.
3731 	 */
3732 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3733 	TXD->lower_setup.ip_fields.ipcse =
3734 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3735 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3736 	/*
3737 	 * Start offset for payload checksum calculation.
3738 	 * End offset for payload checksum calculation.
3739 	 * Offset of place to put the checksum.
3740 	 */
3741 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3742 	TXD->upper_setup.tcp_fields.tucse = 0;
3743 	TXD->upper_setup.tcp_fields.tucso =
3744 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3745 	/*
3746 	 * Payload size per packet w/o any headers.
3747 	 * Length of all headers up to payload.
3748 	 */
3749 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3750 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3751 
3752 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3753 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3754 				E1000_TXD_CMD_TSE |	/* TSE context */
3755 				E1000_TXD_CMD_IP |	/* Do IP csum */
3756 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3757 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3758 
3759 	tx_buffer->m_head = NULL;
3760 	tx_buffer->next_eop = -1;
3761 
3762 	if (++cur == adapter->num_tx_desc)
3763 		cur = 0;
3764 
3765 	txr->tx_avail--;
3766 	txr->next_avail_desc = cur;
3767 	txr->tx_tso = TRUE;
3768 }
3769 
3770 
3771 /**********************************************************************
3772  *
3773  *  Examine each tx_buffer in the used queue. If the hardware is done
3774  *  processing the packet then free associated resources. The
3775  *  tx_buffer is put back on the free queue.
3776  *
3777  **********************************************************************/
3778 static void
3779 em_txeof(struct tx_ring *txr)
3780 {
3781 	struct adapter	*adapter = txr->adapter;
3782         int first, last, done, processed;
3783         struct em_buffer *tx_buffer;
3784         struct e1000_tx_desc   *tx_desc, *eop_desc;
3785 	struct ifnet   *ifp = adapter->ifp;
3786 
3787 	EM_TX_LOCK_ASSERT(txr);
3788 #ifdef DEV_NETMAP
3789 	if (ifp->if_capenable & IFCAP_NETMAP) {
3790 		struct netmap_adapter *na = NA(ifp);
3791 
3792 		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3793 		EM_TX_UNLOCK(txr);
3794 		EM_CORE_LOCK(adapter);
3795 		selwakeuppri(&na->tx_si, PI_NET);
3796 		EM_CORE_UNLOCK(adapter);
3797 		EM_TX_LOCK(txr);
3798 		return;
3799 	}
3800 #endif /* DEV_NETMAP */
3801 
3802 	/* No work, make sure watchdog is off */
3803         if (txr->tx_avail == adapter->num_tx_desc) {
3804 		txr->queue_status = EM_QUEUE_IDLE;
3805                 return;
3806 	}
3807 
3808 	processed = 0;
3809         first = txr->next_to_clean;
3810         tx_desc = &txr->tx_base[first];
3811         tx_buffer = &txr->tx_buffers[first];
3812 	last = tx_buffer->next_eop;
3813         eop_desc = &txr->tx_base[last];
3814 
3815 	/*
3816 	 * What this does is get the index of the
3817 	 * first descriptor AFTER the EOP of the
3818 	 * first packet, that way we can do the
3819 	 * simple comparison on the inner while loop.
3820 	 */
3821 	if (++last == adapter->num_tx_desc)
3822  		last = 0;
3823 	done = last;
3824 
3825         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3826             BUS_DMASYNC_POSTREAD);
3827 
3828         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3829 		/* We clean the range of the packet */
3830 		while (first != done) {
3831                 	tx_desc->upper.data = 0;
3832                 	tx_desc->lower.data = 0;
3833                 	tx_desc->buffer_addr = 0;
3834                 	++txr->tx_avail;
3835 			++processed;
3836 
3837 			if (tx_buffer->m_head) {
3838 				bus_dmamap_sync(txr->txtag,
3839 				    tx_buffer->map,
3840 				    BUS_DMASYNC_POSTWRITE);
3841 				bus_dmamap_unload(txr->txtag,
3842 				    tx_buffer->map);
3843                         	m_freem(tx_buffer->m_head);
3844                         	tx_buffer->m_head = NULL;
3845                 	}
3846 			tx_buffer->next_eop = -1;
3847 			txr->watchdog_time = ticks;
3848 
3849 	                if (++first == adapter->num_tx_desc)
3850 				first = 0;
3851 
3852 	                tx_buffer = &txr->tx_buffers[first];
3853 			tx_desc = &txr->tx_base[first];
3854 		}
3855 		++ifp->if_opackets;
3856 		/* See if we can continue to the next packet */
3857 		last = tx_buffer->next_eop;
3858 		if (last != -1) {
3859         		eop_desc = &txr->tx_base[last];
3860 			/* Get new done point */
3861 			if (++last == adapter->num_tx_desc) last = 0;
3862 			done = last;
3863 		} else
3864 			break;
3865         }
3866         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3867             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3868 
3869         txr->next_to_clean = first;
3870 
3871 	/*
3872 	** Watchdog calculation, we know there's
3873 	** work outstanding or the first return
3874 	** would have been taken, so none processed
3875 	** for too long indicates a hang. local timer
3876 	** will examine this and do a reset if needed.
3877 	*/
3878 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3879 		txr->queue_status = EM_QUEUE_HUNG;
3880 
3881         /*
3882          * If we have a minimum free, clear IFF_DRV_OACTIVE
3883          * to tell the stack that it is OK to send packets.
3884 	 * Notice that all writes of OACTIVE happen under the
3885 	 * TX lock which, with a single queue, guarantees
3886 	 * sanity.
3887          */
3888         if (txr->tx_avail >= EM_MAX_SCATTER)
3889 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3890 
3891 	/* Disable watchdog if all clean */
3892 	if (txr->tx_avail == adapter->num_tx_desc) {
3893 		txr->queue_status = EM_QUEUE_IDLE;
3894 	}
3895 }
3896 
3897 
3898 /*********************************************************************
3899  *
3900  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3901  *
3902  **********************************************************************/
3903 static void
3904 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3905 {
3906 	struct adapter		*adapter = rxr->adapter;
3907 	struct mbuf		*m;
3908 	bus_dma_segment_t	segs[1];
3909 	struct em_buffer	*rxbuf;
3910 	int			i, j, error, nsegs;
3911 	bool			cleaned = FALSE;
3912 
3913 	i = j = rxr->next_to_refresh;
3914 	/*
3915 	** Get one descriptor beyond
3916 	** our work mark to control
3917 	** the loop.
3918 	*/
3919 	if (++j == adapter->num_rx_desc)
3920 		j = 0;
3921 
3922 	while (j != limit) {
3923 		rxbuf = &rxr->rx_buffers[i];
3924 		if (rxbuf->m_head == NULL) {
3925 			m = m_getjcl(M_DONTWAIT, MT_DATA,
3926 			    M_PKTHDR, adapter->rx_mbuf_sz);
3927 			/*
3928 			** If we have a temporary resource shortage
3929 			** that causes a failure, just abort refresh
3930 			** for now, we will return to this point when
3931 			** reinvoked from em_rxeof.
3932 			*/
3933 			if (m == NULL)
3934 				goto update;
3935 		} else
3936 			m = rxbuf->m_head;
3937 
3938 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3939 		m->m_flags |= M_PKTHDR;
3940 		m->m_data = m->m_ext.ext_buf;
3941 
3942 		/* Use bus_dma machinery to setup the memory mapping  */
3943 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3944 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3945 		if (error != 0) {
3946 			printf("Refresh mbufs: hdr dmamap load"
3947 			    " failure - %d\n", error);
3948 			m_free(m);
3949 			rxbuf->m_head = NULL;
3950 			goto update;
3951 		}
3952 		rxbuf->m_head = m;
3953 		bus_dmamap_sync(rxr->rxtag,
3954 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3955 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3956 		cleaned = TRUE;
3957 
3958 		i = j; /* Next is precalulated for us */
3959 		rxr->next_to_refresh = i;
3960 		/* Calculate next controlling index */
3961 		if (++j == adapter->num_rx_desc)
3962 			j = 0;
3963 	}
3964 update:
3965 	/*
3966 	** Update the tail pointer only if,
3967 	** and as far as we have refreshed.
3968 	*/
3969 	if (cleaned)
3970 		E1000_WRITE_REG(&adapter->hw,
3971 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3972 
3973 	return;
3974 }
3975 
3976 
3977 /*********************************************************************
3978  *
3979  *  Allocate memory for rx_buffer structures. Since we use one
3980  *  rx_buffer per received packet, the maximum number of rx_buffer's
3981  *  that we'll need is equal to the number of receive descriptors
3982  *  that we've allocated.
3983  *
3984  **********************************************************************/
3985 static int
3986 em_allocate_receive_buffers(struct rx_ring *rxr)
3987 {
3988 	struct adapter		*adapter = rxr->adapter;
3989 	device_t		dev = adapter->dev;
3990 	struct em_buffer	*rxbuf;
3991 	int			error;
3992 
3993 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3994 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3995 	if (rxr->rx_buffers == NULL) {
3996 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3997 		return (ENOMEM);
3998 	}
3999 
4000 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4001 				1, 0,			/* alignment, bounds */
4002 				BUS_SPACE_MAXADDR,	/* lowaddr */
4003 				BUS_SPACE_MAXADDR,	/* highaddr */
4004 				NULL, NULL,		/* filter, filterarg */
4005 				MJUM9BYTES,		/* maxsize */
4006 				1,			/* nsegments */
4007 				MJUM9BYTES,		/* maxsegsize */
4008 				0,			/* flags */
4009 				NULL,			/* lockfunc */
4010 				NULL,			/* lockarg */
4011 				&rxr->rxtag);
4012 	if (error) {
4013 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4014 		    __func__, error);
4015 		goto fail;
4016 	}
4017 
4018 	rxbuf = rxr->rx_buffers;
4019 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4020 		rxbuf = &rxr->rx_buffers[i];
4021 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4022 		    &rxbuf->map);
4023 		if (error) {
4024 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4025 			    __func__, error);
4026 			goto fail;
4027 		}
4028 	}
4029 
4030 	return (0);
4031 
4032 fail:
4033 	em_free_receive_structures(adapter);
4034 	return (error);
4035 }
4036 
4037 
4038 /*********************************************************************
4039  *
4040  *  Initialize a receive ring and its buffers.
4041  *
4042  **********************************************************************/
4043 static int
4044 em_setup_receive_ring(struct rx_ring *rxr)
4045 {
4046 	struct	adapter 	*adapter = rxr->adapter;
4047 	struct em_buffer	*rxbuf;
4048 	bus_dma_segment_t	seg[1];
4049 	int			rsize, nsegs, error;
4050 #ifdef DEV_NETMAP
4051 	struct netmap_adapter *na = NA(adapter->ifp);
4052 	struct netmap_slot *slot;
4053 #endif
4054 
4055 
4056 	/* Clear the ring contents */
4057 	EM_RX_LOCK(rxr);
4058 	rsize = roundup2(adapter->num_rx_desc *
4059 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4060 	bzero((void *)rxr->rx_base, rsize);
4061 #ifdef DEV_NETMAP
4062 	slot = netmap_reset(na, NR_RX, 0, 0);
4063 #endif
4064 
4065 	/*
4066 	** Free current RX buffer structs and their mbufs
4067 	*/
4068 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4069 		rxbuf = &rxr->rx_buffers[i];
4070 		if (rxbuf->m_head != NULL) {
4071 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4072 			    BUS_DMASYNC_POSTREAD);
4073 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4074 			m_freem(rxbuf->m_head);
4075 			rxbuf->m_head = NULL; /* mark as freed */
4076 		}
4077 	}
4078 
4079 	/* Now replenish the mbufs */
4080         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4081 		rxbuf = &rxr->rx_buffers[j];
4082 #ifdef DEV_NETMAP
4083 		if (slot) {
4084 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4085 			uint64_t paddr;
4086 			void *addr;
4087 
4088 			addr = PNMB(slot + si, &paddr);
4089 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4090 			/* Update descriptor */
4091 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4092 			continue;
4093 		}
4094 #endif /* DEV_NETMAP */
4095 		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4096 		    M_PKTHDR, adapter->rx_mbuf_sz);
4097 		if (rxbuf->m_head == NULL) {
4098 			error = ENOBUFS;
4099 			goto fail;
4100 		}
4101 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4102 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4103 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4104 
4105 		/* Get the memory mapping */
4106 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4107 		    rxbuf->map, rxbuf->m_head, seg,
4108 		    &nsegs, BUS_DMA_NOWAIT);
4109 		if (error != 0) {
4110 			m_freem(rxbuf->m_head);
4111 			rxbuf->m_head = NULL;
4112 			goto fail;
4113 		}
4114 		bus_dmamap_sync(rxr->rxtag,
4115 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4116 
4117 		/* Update descriptor */
4118 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4119 	}
4120 	rxr->next_to_check = 0;
4121 	rxr->next_to_refresh = 0;
4122 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4123 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4124 
4125 fail:
4126 	EM_RX_UNLOCK(rxr);
4127 	return (error);
4128 }
4129 
4130 /*********************************************************************
4131  *
4132  *  Initialize all receive rings.
4133  *
4134  **********************************************************************/
4135 static int
4136 em_setup_receive_structures(struct adapter *adapter)
4137 {
4138 	struct rx_ring *rxr = adapter->rx_rings;
4139 	int q;
4140 
4141 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4142 		if (em_setup_receive_ring(rxr))
4143 			goto fail;
4144 
4145 	return (0);
4146 fail:
4147 	/*
4148 	 * Free RX buffers allocated so far, we will only handle
4149 	 * the rings that completed, the failing case will have
4150 	 * cleaned up for itself. 'q' failed, so its the terminus.
4151 	 */
4152 	for (int i = 0; i < q; ++i) {
4153 		rxr = &adapter->rx_rings[i];
4154 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4155 			struct em_buffer *rxbuf;
4156 			rxbuf = &rxr->rx_buffers[n];
4157 			if (rxbuf->m_head != NULL) {
4158 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4159 			  	  BUS_DMASYNC_POSTREAD);
4160 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4161 				m_freem(rxbuf->m_head);
4162 				rxbuf->m_head = NULL;
4163 			}
4164 		}
4165 		rxr->next_to_check = 0;
4166 		rxr->next_to_refresh = 0;
4167 	}
4168 
4169 	return (ENOBUFS);
4170 }
4171 
4172 /*********************************************************************
4173  *
4174  *  Free all receive rings.
4175  *
4176  **********************************************************************/
4177 static void
4178 em_free_receive_structures(struct adapter *adapter)
4179 {
4180 	struct rx_ring *rxr = adapter->rx_rings;
4181 
4182 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4183 		em_free_receive_buffers(rxr);
4184 		/* Free the ring memory as well */
4185 		em_dma_free(adapter, &rxr->rxdma);
4186 		EM_RX_LOCK_DESTROY(rxr);
4187 	}
4188 
4189 	free(adapter->rx_rings, M_DEVBUF);
4190 }
4191 
4192 
4193 /*********************************************************************
4194  *
4195  *  Free receive ring data structures
4196  *
4197  **********************************************************************/
4198 static void
4199 em_free_receive_buffers(struct rx_ring *rxr)
4200 {
4201 	struct adapter		*adapter = rxr->adapter;
4202 	struct em_buffer	*rxbuf = NULL;
4203 
4204 	INIT_DEBUGOUT("free_receive_buffers: begin");
4205 
4206 	if (rxr->rx_buffers != NULL) {
4207 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4208 			rxbuf = &rxr->rx_buffers[i];
4209 			if (rxbuf->map != NULL) {
4210 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4211 				    BUS_DMASYNC_POSTREAD);
4212 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4213 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4214 			}
4215 			if (rxbuf->m_head != NULL) {
4216 				m_freem(rxbuf->m_head);
4217 				rxbuf->m_head = NULL;
4218 			}
4219 		}
4220 		free(rxr->rx_buffers, M_DEVBUF);
4221 		rxr->rx_buffers = NULL;
4222 		rxr->next_to_check = 0;
4223 		rxr->next_to_refresh = 0;
4224 	}
4225 
4226 	if (rxr->rxtag != NULL) {
4227 		bus_dma_tag_destroy(rxr->rxtag);
4228 		rxr->rxtag = NULL;
4229 	}
4230 
4231 	return;
4232 }
4233 
4234 
4235 /*********************************************************************
4236  *
4237  *  Enable receive unit.
4238  *
4239  **********************************************************************/
4240 #define MAX_INTS_PER_SEC	8000
4241 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4242 
4243 static void
4244 em_initialize_receive_unit(struct adapter *adapter)
4245 {
4246 	struct rx_ring	*rxr = adapter->rx_rings;
4247 	struct ifnet	*ifp = adapter->ifp;
4248 	struct e1000_hw	*hw = &adapter->hw;
4249 	u64	bus_addr;
4250 	u32	rctl, rxcsum;
4251 
4252 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4253 
4254 	/*
4255 	 * Make sure receives are disabled while setting
4256 	 * up the descriptor ring
4257 	 */
4258 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4259 	/* Do not disable if ever enabled on this hardware */
4260 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4261 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4262 
4263 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4264 	    adapter->rx_abs_int_delay.value);
4265 	/*
4266 	 * Set the interrupt throttling rate. Value is calculated
4267 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4268 	 */
4269 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4270 
4271 	/*
4272 	** When using MSIX interrupts we need to throttle
4273 	** using the EITR register (82574 only)
4274 	*/
4275 	if (hw->mac.type == e1000_82574) {
4276 		for (int i = 0; i < 4; i++)
4277 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4278 			    DEFAULT_ITR);
4279 		/* Disable accelerated acknowledge */
4280 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4281 	}
4282 
4283 	if (ifp->if_capenable & IFCAP_RXCSUM) {
4284 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4285 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4286 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4287 	}
4288 
4289 	/*
4290 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4291 	** long latencies are observed, like Lenovo X60. This
4292 	** change eliminates the problem, but since having positive
4293 	** values in RDTR is a known source of problems on other
4294 	** platforms another solution is being sought.
4295 	*/
4296 	if (hw->mac.type == e1000_82573)
4297 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4298 
4299 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4300 		/* Setup the Base and Length of the Rx Descriptor Ring */
4301 		bus_addr = rxr->rxdma.dma_paddr;
4302 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4303 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4304 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4305 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4306 		/* Setup the Head and Tail Descriptor Pointers */
4307 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4308 #ifdef DEV_NETMAP
4309 		/*
4310 		 * an init() while a netmap client is active must
4311 		 * preserve the rx buffers passed to userspace.
4312 		 * In this driver it means we adjust RDT to
4313 		 * something different from na->num_rx_desc - 1.
4314 		 */
4315 		if (ifp->if_capenable & IFCAP_NETMAP) {
4316 			struct netmap_adapter *na = NA(adapter->ifp);
4317 			struct netmap_kring *kring = &na->rx_rings[i];
4318 			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4319 
4320 			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4321 		} else
4322 #endif /* DEV_NETMAP */
4323 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4324 	}
4325 
4326 	/* Set PTHRESH for improved jumbo performance */
4327 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4328 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4329 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4330 	    (ifp->if_mtu > ETHERMTU)) {
4331 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4332 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4333 	}
4334 
4335 	if (adapter->hw.mac.type == e1000_pch2lan) {
4336 		if (ifp->if_mtu > ETHERMTU)
4337 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4338 		else
4339 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4340 	}
4341 
4342 	/* Setup the Receive Control Register */
4343 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4344 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4345 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4346 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4347 
4348         /* Strip the CRC */
4349         rctl |= E1000_RCTL_SECRC;
4350 
4351         /* Make sure VLAN Filters are off */
4352         rctl &= ~E1000_RCTL_VFE;
4353 	rctl &= ~E1000_RCTL_SBP;
4354 
4355 	if (adapter->rx_mbuf_sz == MCLBYTES)
4356 		rctl |= E1000_RCTL_SZ_2048;
4357 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4358 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4359 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4360 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4361 
4362 	if (ifp->if_mtu > ETHERMTU)
4363 		rctl |= E1000_RCTL_LPE;
4364 	else
4365 		rctl &= ~E1000_RCTL_LPE;
4366 
4367 	/* Write out the settings */
4368 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4369 
4370 	return;
4371 }
4372 
4373 
4374 /*********************************************************************
4375  *
4376  *  This routine executes in interrupt context. It replenishes
4377  *  the mbufs in the descriptor and sends data which has been
4378  *  dma'ed into host memory to upper layer.
4379  *
4380  *  We loop at most count times if count is > 0, or until done if
4381  *  count < 0.
4382  *
4383  *  For polling we also now return the number of cleaned packets
4384  *********************************************************************/
4385 static bool
4386 em_rxeof(struct rx_ring *rxr, int count, int *done)
4387 {
4388 	struct adapter		*adapter = rxr->adapter;
4389 	struct ifnet		*ifp = adapter->ifp;
4390 	struct mbuf		*mp, *sendmp;
4391 	u8			status = 0;
4392 	u16 			len;
4393 	int			i, processed, rxdone = 0;
4394 	bool			eop;
4395 	struct e1000_rx_desc	*cur;
4396 
4397 	EM_RX_LOCK(rxr);
4398 
4399 #ifdef DEV_NETMAP
4400 	if (ifp->if_capenable & IFCAP_NETMAP) {
4401 		struct netmap_adapter *na = NA(ifp);
4402 
4403 		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4404 		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4405 		EM_RX_UNLOCK(rxr);
4406 		EM_CORE_LOCK(adapter);
4407 		selwakeuppri(&na->rx_si, PI_NET);
4408 		EM_CORE_UNLOCK(adapter);
4409 		return (0);
4410 	}
4411 #endif /* DEV_NETMAP */
4412 
4413 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4414 
4415 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4416 			break;
4417 
4418 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4419 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4420 
4421 		cur = &rxr->rx_base[i];
4422 		status = cur->status;
4423 		mp = sendmp = NULL;
4424 
4425 		if ((status & E1000_RXD_STAT_DD) == 0)
4426 			break;
4427 
4428 		len = le16toh(cur->length);
4429 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4430 
4431 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4432 		    (rxr->discard == TRUE)) {
4433 			ifp->if_ierrors++;
4434 			++rxr->rx_discarded;
4435 			if (!eop) /* Catch subsequent segs */
4436 				rxr->discard = TRUE;
4437 			else
4438 				rxr->discard = FALSE;
4439 			em_rx_discard(rxr, i);
4440 			goto next_desc;
4441 		}
4442 
4443 		/* Assign correct length to the current fragment */
4444 		mp = rxr->rx_buffers[i].m_head;
4445 		mp->m_len = len;
4446 
4447 		/* Trigger for refresh */
4448 		rxr->rx_buffers[i].m_head = NULL;
4449 
4450 		/* First segment? */
4451 		if (rxr->fmp == NULL) {
4452 			mp->m_pkthdr.len = len;
4453 			rxr->fmp = rxr->lmp = mp;
4454 		} else {
4455 			/* Chain mbuf's together */
4456 			mp->m_flags &= ~M_PKTHDR;
4457 			rxr->lmp->m_next = mp;
4458 			rxr->lmp = mp;
4459 			rxr->fmp->m_pkthdr.len += len;
4460 		}
4461 
4462 		if (eop) {
4463 			--count;
4464 			sendmp = rxr->fmp;
4465 			sendmp->m_pkthdr.rcvif = ifp;
4466 			ifp->if_ipackets++;
4467 			em_receive_checksum(cur, sendmp);
4468 #ifndef __NO_STRICT_ALIGNMENT
4469 			if (adapter->max_frame_size >
4470 			    (MCLBYTES - ETHER_ALIGN) &&
4471 			    em_fixup_rx(rxr) != 0)
4472 				goto skip;
4473 #endif
4474 			if (status & E1000_RXD_STAT_VP) {
4475 				sendmp->m_pkthdr.ether_vtag =
4476 				    le16toh(cur->special);
4477 				sendmp->m_flags |= M_VLANTAG;
4478 			}
4479 #ifndef __NO_STRICT_ALIGNMENT
4480 skip:
4481 #endif
4482 			rxr->fmp = rxr->lmp = NULL;
4483 		}
4484 next_desc:
4485 		/* Zero out the receive descriptors status. */
4486 		cur->status = 0;
4487 		++rxdone;	/* cumulative for POLL */
4488 		++processed;
4489 
4490 		/* Advance our pointers to the next descriptor. */
4491 		if (++i == adapter->num_rx_desc)
4492 			i = 0;
4493 
4494 		/* Send to the stack */
4495 		if (sendmp != NULL) {
4496 			rxr->next_to_check = i;
4497 			EM_RX_UNLOCK(rxr);
4498 			(*ifp->if_input)(ifp, sendmp);
4499 			EM_RX_LOCK(rxr);
4500 			i = rxr->next_to_check;
4501 		}
4502 
4503 		/* Only refresh mbufs every 8 descriptors */
4504 		if (processed == 8) {
4505 			em_refresh_mbufs(rxr, i);
4506 			processed = 0;
4507 		}
4508 	}
4509 
4510 	/* Catch any remaining refresh work */
4511 	if (e1000_rx_unrefreshed(rxr))
4512 		em_refresh_mbufs(rxr, i);
4513 
4514 	rxr->next_to_check = i;
4515 	if (done != NULL)
4516 		*done = rxdone;
4517 	EM_RX_UNLOCK(rxr);
4518 
4519 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4520 }
4521 
4522 static __inline void
4523 em_rx_discard(struct rx_ring *rxr, int i)
4524 {
4525 	struct em_buffer	*rbuf;
4526 
4527 	rbuf = &rxr->rx_buffers[i];
4528 	/* Free any previous pieces */
4529 	if (rxr->fmp != NULL) {
4530 		rxr->fmp->m_flags |= M_PKTHDR;
4531 		m_freem(rxr->fmp);
4532 		rxr->fmp = NULL;
4533 		rxr->lmp = NULL;
4534 	}
4535 	/*
4536 	** Free buffer and allow em_refresh_mbufs()
4537 	** to clean up and recharge buffer.
4538 	*/
4539 	if (rbuf->m_head) {
4540 		m_free(rbuf->m_head);
4541 		rbuf->m_head = NULL;
4542 	}
4543 	return;
4544 }
4545 
4546 #ifndef __NO_STRICT_ALIGNMENT
4547 /*
4548  * When jumbo frames are enabled we should realign entire payload on
4549  * architecures with strict alignment. This is serious design mistake of 8254x
4550  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4551  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4552  * payload. On architecures without strict alignment restrictions 8254x still
4553  * performs unaligned memory access which would reduce the performance too.
4554  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4555  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4556  * existing mbuf chain.
4557  *
4558  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4559  * not used at all on architectures with strict alignment.
4560  */
4561 static int
4562 em_fixup_rx(struct rx_ring *rxr)
4563 {
4564 	struct adapter *adapter = rxr->adapter;
4565 	struct mbuf *m, *n;
4566 	int error;
4567 
4568 	error = 0;
4569 	m = rxr->fmp;
4570 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4571 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4572 		m->m_data += ETHER_HDR_LEN;
4573 	} else {
4574 		MGETHDR(n, M_DONTWAIT, MT_DATA);
4575 		if (n != NULL) {
4576 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4577 			m->m_data += ETHER_HDR_LEN;
4578 			m->m_len -= ETHER_HDR_LEN;
4579 			n->m_len = ETHER_HDR_LEN;
4580 			M_MOVE_PKTHDR(n, m);
4581 			n->m_next = m;
4582 			rxr->fmp = n;
4583 		} else {
4584 			adapter->dropped_pkts++;
4585 			m_freem(rxr->fmp);
4586 			rxr->fmp = NULL;
4587 			error = ENOMEM;
4588 		}
4589 	}
4590 
4591 	return (error);
4592 }
4593 #endif
4594 
4595 /*********************************************************************
4596  *
4597  *  Verify that the hardware indicated that the checksum is valid.
4598  *  Inform the stack about the status of checksum so that stack
4599  *  doesn't spend time verifying the checksum.
4600  *
4601  *********************************************************************/
4602 static void
4603 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4604 {
4605 	/* Ignore Checksum bit is set */
4606 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4607 		mp->m_pkthdr.csum_flags = 0;
4608 		return;
4609 	}
4610 
4611 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4612 		/* Did it pass? */
4613 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4614 			/* IP Checksum Good */
4615 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4616 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4617 
4618 		} else {
4619 			mp->m_pkthdr.csum_flags = 0;
4620 		}
4621 	}
4622 
4623 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4624 		/* Did it pass? */
4625 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4626 			mp->m_pkthdr.csum_flags |=
4627 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4628 			mp->m_pkthdr.csum_data = htons(0xffff);
4629 		}
4630 	}
4631 }
4632 
4633 /*
4634  * This routine is run via an vlan
4635  * config EVENT
4636  */
4637 static void
4638 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4639 {
4640 	struct adapter	*adapter = ifp->if_softc;
4641 	u32		index, bit;
4642 
4643 	if (ifp->if_softc !=  arg)   /* Not our event */
4644 		return;
4645 
4646 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4647                 return;
4648 
4649 	EM_CORE_LOCK(adapter);
4650 	index = (vtag >> 5) & 0x7F;
4651 	bit = vtag & 0x1F;
4652 	adapter->shadow_vfta[index] |= (1 << bit);
4653 	++adapter->num_vlans;
4654 	/* Re-init to load the changes */
4655 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4656 		em_init_locked(adapter);
4657 	EM_CORE_UNLOCK(adapter);
4658 }
4659 
4660 /*
4661  * This routine is run via an vlan
4662  * unconfig EVENT
4663  */
4664 static void
4665 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4666 {
4667 	struct adapter	*adapter = ifp->if_softc;
4668 	u32		index, bit;
4669 
4670 	if (ifp->if_softc !=  arg)
4671 		return;
4672 
4673 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4674                 return;
4675 
4676 	EM_CORE_LOCK(adapter);
4677 	index = (vtag >> 5) & 0x7F;
4678 	bit = vtag & 0x1F;
4679 	adapter->shadow_vfta[index] &= ~(1 << bit);
4680 	--adapter->num_vlans;
4681 	/* Re-init to load the changes */
4682 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4683 		em_init_locked(adapter);
4684 	EM_CORE_UNLOCK(adapter);
4685 }
4686 
4687 static void
4688 em_setup_vlan_hw_support(struct adapter *adapter)
4689 {
4690 	struct e1000_hw *hw = &adapter->hw;
4691 	u32             reg;
4692 
4693 	/*
4694 	** We get here thru init_locked, meaning
4695 	** a soft reset, this has already cleared
4696 	** the VFTA and other state, so if there
4697 	** have been no vlan's registered do nothing.
4698 	*/
4699 	if (adapter->num_vlans == 0)
4700                 return;
4701 
4702 	/*
4703 	** A soft reset zero's out the VFTA, so
4704 	** we need to repopulate it now.
4705 	*/
4706 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4707                 if (adapter->shadow_vfta[i] != 0)
4708 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4709                             i, adapter->shadow_vfta[i]);
4710 
4711 	reg = E1000_READ_REG(hw, E1000_CTRL);
4712 	reg |= E1000_CTRL_VME;
4713 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4714 
4715 	/* Enable the Filter Table */
4716 	reg = E1000_READ_REG(hw, E1000_RCTL);
4717 	reg &= ~E1000_RCTL_CFIEN;
4718 	reg |= E1000_RCTL_VFE;
4719 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4720 }
4721 
4722 static void
4723 em_enable_intr(struct adapter *adapter)
4724 {
4725 	struct e1000_hw *hw = &adapter->hw;
4726 	u32 ims_mask = IMS_ENABLE_MASK;
4727 
4728 	if (hw->mac.type == e1000_82574) {
4729 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4730 		ims_mask |= EM_MSIX_MASK;
4731 	}
4732 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4733 }
4734 
4735 static void
4736 em_disable_intr(struct adapter *adapter)
4737 {
4738 	struct e1000_hw *hw = &adapter->hw;
4739 
4740 	if (hw->mac.type == e1000_82574)
4741 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4742 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4743 }
4744 
4745 /*
4746  * Bit of a misnomer, what this really means is
4747  * to enable OS management of the system... aka
4748  * to disable special hardware management features
4749  */
4750 static void
4751 em_init_manageability(struct adapter *adapter)
4752 {
4753 	/* A shared code workaround */
4754 #define E1000_82542_MANC2H E1000_MANC2H
4755 	if (adapter->has_manage) {
4756 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4757 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4758 
4759 		/* disable hardware interception of ARP */
4760 		manc &= ~(E1000_MANC_ARP_EN);
4761 
4762                 /* enable receiving management packets to the host */
4763 		manc |= E1000_MANC_EN_MNG2HOST;
4764 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4765 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4766 		manc2h |= E1000_MNG2HOST_PORT_623;
4767 		manc2h |= E1000_MNG2HOST_PORT_664;
4768 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4769 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4770 	}
4771 }
4772 
4773 /*
4774  * Give control back to hardware management
4775  * controller if there is one.
4776  */
4777 static void
4778 em_release_manageability(struct adapter *adapter)
4779 {
4780 	if (adapter->has_manage) {
4781 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4782 
4783 		/* re-enable hardware interception of ARP */
4784 		manc |= E1000_MANC_ARP_EN;
4785 		manc &= ~E1000_MANC_EN_MNG2HOST;
4786 
4787 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4788 	}
4789 }
4790 
4791 /*
4792  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4793  * For ASF and Pass Through versions of f/w this means
4794  * that the driver is loaded. For AMT version type f/w
4795  * this means that the network i/f is open.
4796  */
4797 static void
4798 em_get_hw_control(struct adapter *adapter)
4799 {
4800 	u32 ctrl_ext, swsm;
4801 
4802 	if (adapter->hw.mac.type == e1000_82573) {
4803 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4804 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4805 		    swsm | E1000_SWSM_DRV_LOAD);
4806 		return;
4807 	}
4808 	/* else */
4809 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4810 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4811 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4812 	return;
4813 }
4814 
4815 /*
4816  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4817  * For ASF and Pass Through versions of f/w this means that
4818  * the driver is no longer loaded. For AMT versions of the
4819  * f/w this means that the network i/f is closed.
4820  */
4821 static void
4822 em_release_hw_control(struct adapter *adapter)
4823 {
4824 	u32 ctrl_ext, swsm;
4825 
4826 	if (!adapter->has_manage)
4827 		return;
4828 
4829 	if (adapter->hw.mac.type == e1000_82573) {
4830 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4831 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4832 		    swsm & ~E1000_SWSM_DRV_LOAD);
4833 		return;
4834 	}
4835 	/* else */
4836 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4837 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4838 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4839 	return;
4840 }
4841 
4842 static int
4843 em_is_valid_ether_addr(u8 *addr)
4844 {
4845 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4846 
4847 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4848 		return (FALSE);
4849 	}
4850 
4851 	return (TRUE);
4852 }
4853 
4854 /*
4855 ** Parse the interface capabilities with regard
4856 ** to both system management and wake-on-lan for
4857 ** later use.
4858 */
4859 static void
4860 em_get_wakeup(device_t dev)
4861 {
4862 	struct adapter	*adapter = device_get_softc(dev);
4863 	u16		eeprom_data = 0, device_id, apme_mask;
4864 
4865 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4866 	apme_mask = EM_EEPROM_APME;
4867 
4868 	switch (adapter->hw.mac.type) {
4869 	case e1000_82573:
4870 	case e1000_82583:
4871 		adapter->has_amt = TRUE;
4872 		/* Falls thru */
4873 	case e1000_82571:
4874 	case e1000_82572:
4875 	case e1000_80003es2lan:
4876 		if (adapter->hw.bus.func == 1) {
4877 			e1000_read_nvm(&adapter->hw,
4878 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4879 			break;
4880 		} else
4881 			e1000_read_nvm(&adapter->hw,
4882 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4883 		break;
4884 	case e1000_ich8lan:
4885 	case e1000_ich9lan:
4886 	case e1000_ich10lan:
4887 	case e1000_pchlan:
4888 	case e1000_pch2lan:
4889 		apme_mask = E1000_WUC_APME;
4890 		adapter->has_amt = TRUE;
4891 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4892 		break;
4893 	default:
4894 		e1000_read_nvm(&adapter->hw,
4895 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4896 		break;
4897 	}
4898 	if (eeprom_data & apme_mask)
4899 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4900 	/*
4901          * We have the eeprom settings, now apply the special cases
4902          * where the eeprom may be wrong or the board won't support
4903          * wake on lan on a particular port
4904 	 */
4905 	device_id = pci_get_device(dev);
4906         switch (device_id) {
4907 	case E1000_DEV_ID_82571EB_FIBER:
4908 		/* Wake events only supported on port A for dual fiber
4909 		 * regardless of eeprom setting */
4910 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4911 		    E1000_STATUS_FUNC_1)
4912 			adapter->wol = 0;
4913 		break;
4914 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4915 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4916 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4917                 /* if quad port adapter, disable WoL on all but port A */
4918 		if (global_quad_port_a != 0)
4919 			adapter->wol = 0;
4920 		/* Reset for multiple quad port adapters */
4921 		if (++global_quad_port_a == 4)
4922 			global_quad_port_a = 0;
4923                 break;
4924 	}
4925 	return;
4926 }
4927 
4928 
4929 /*
4930  * Enable PCI Wake On Lan capability
4931  */
4932 static void
4933 em_enable_wakeup(device_t dev)
4934 {
4935 	struct adapter	*adapter = device_get_softc(dev);
4936 	struct ifnet	*ifp = adapter->ifp;
4937 	u32		pmc, ctrl, ctrl_ext, rctl;
4938 	u16     	status;
4939 
4940 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4941 		return;
4942 
4943 	/* Advertise the wakeup capability */
4944 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4945 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4946 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4947 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4948 
4949 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4950 	    (adapter->hw.mac.type == e1000_pchlan) ||
4951 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4952 	    (adapter->hw.mac.type == e1000_ich10lan))
4953 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4954 
4955 	/* Keep the laser running on Fiber adapters */
4956 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4957 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4958 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4959 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4960 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4961 	}
4962 
4963 	/*
4964 	** Determine type of Wakeup: note that wol
4965 	** is set with all bits on by default.
4966 	*/
4967 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4968 		adapter->wol &= ~E1000_WUFC_MAG;
4969 
4970 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4971 		adapter->wol &= ~E1000_WUFC_MC;
4972 	else {
4973 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4974 		rctl |= E1000_RCTL_MPE;
4975 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4976 	}
4977 
4978 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4979 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4980 		if (em_enable_phy_wakeup(adapter))
4981 			return;
4982 	} else {
4983 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4984 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4985 	}
4986 
4987 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4988 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4989 
4990         /* Request PME */
4991         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4992 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4993 	if (ifp->if_capenable & IFCAP_WOL)
4994 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4995         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4996 
4997 	return;
4998 }
4999 
5000 /*
5001 ** WOL in the newer chipset interfaces (pchlan)
5002 ** require thing to be copied into the phy
5003 */
5004 static int
5005 em_enable_phy_wakeup(struct adapter *adapter)
5006 {
5007 	struct e1000_hw *hw = &adapter->hw;
5008 	u32 mreg, ret = 0;
5009 	u16 preg;
5010 
5011 	/* copy MAC RARs to PHY RARs */
5012 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5013 
5014 	/* copy MAC MTA to PHY MTA */
5015 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5016 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5017 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5018 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5019 		    (u16)((mreg >> 16) & 0xFFFF));
5020 	}
5021 
5022 	/* configure PHY Rx Control register */
5023 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5024 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5025 	if (mreg & E1000_RCTL_UPE)
5026 		preg |= BM_RCTL_UPE;
5027 	if (mreg & E1000_RCTL_MPE)
5028 		preg |= BM_RCTL_MPE;
5029 	preg &= ~(BM_RCTL_MO_MASK);
5030 	if (mreg & E1000_RCTL_MO_3)
5031 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5032 				<< BM_RCTL_MO_SHIFT);
5033 	if (mreg & E1000_RCTL_BAM)
5034 		preg |= BM_RCTL_BAM;
5035 	if (mreg & E1000_RCTL_PMCF)
5036 		preg |= BM_RCTL_PMCF;
5037 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5038 	if (mreg & E1000_CTRL_RFCE)
5039 		preg |= BM_RCTL_RFCE;
5040 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5041 
5042 	/* enable PHY wakeup in MAC register */
5043 	E1000_WRITE_REG(hw, E1000_WUC,
5044 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5045 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5046 
5047 	/* configure and enable PHY wakeup in PHY registers */
5048 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5049 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5050 
5051 	/* activate PHY wakeup */
5052 	ret = hw->phy.ops.acquire(hw);
5053 	if (ret) {
5054 		printf("Could not acquire PHY\n");
5055 		return ret;
5056 	}
5057 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5058 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5059 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5060 	if (ret) {
5061 		printf("Could not read PHY page 769\n");
5062 		goto out;
5063 	}
5064 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5065 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5066 	if (ret)
5067 		printf("Could not set PHY Host Wakeup bit\n");
5068 out:
5069 	hw->phy.ops.release(hw);
5070 
5071 	return ret;
5072 }
5073 
5074 static void
5075 em_led_func(void *arg, int onoff)
5076 {
5077 	struct adapter	*adapter = arg;
5078 
5079 	EM_CORE_LOCK(adapter);
5080 	if (onoff) {
5081 		e1000_setup_led(&adapter->hw);
5082 		e1000_led_on(&adapter->hw);
5083 	} else {
5084 		e1000_led_off(&adapter->hw);
5085 		e1000_cleanup_led(&adapter->hw);
5086 	}
5087 	EM_CORE_UNLOCK(adapter);
5088 }
5089 
5090 /*
5091 ** Disable the L0S and L1 LINK states
5092 */
5093 static void
5094 em_disable_aspm(struct adapter *adapter)
5095 {
5096 	int		base, reg;
5097 	u16		link_cap,link_ctrl;
5098 	device_t	dev = adapter->dev;
5099 
5100 	switch (adapter->hw.mac.type) {
5101 		case e1000_82573:
5102 		case e1000_82574:
5103 		case e1000_82583:
5104 			break;
5105 		default:
5106 			return;
5107 	}
5108 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5109 		return;
5110 	reg = base + PCIR_EXPRESS_LINK_CAP;
5111 	link_cap = pci_read_config(dev, reg, 2);
5112 	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
5113 		return;
5114 	reg = base + PCIR_EXPRESS_LINK_CTL;
5115 	link_ctrl = pci_read_config(dev, reg, 2);
5116 	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
5117 	pci_write_config(dev, reg, link_ctrl, 2);
5118 	return;
5119 }
5120 
5121 /**********************************************************************
5122  *
5123  *  Update the board statistics counters.
5124  *
5125  **********************************************************************/
5126 static void
5127 em_update_stats_counters(struct adapter *adapter)
5128 {
5129 	struct ifnet   *ifp;
5130 
5131 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5132 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5133 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5134 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5135 	}
5136 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5137 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5138 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5139 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5140 
5141 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5142 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5143 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5144 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5145 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5146 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5147 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5148 	/*
5149 	** For watchdog management we need to know if we have been
5150 	** paused during the last interval, so capture that here.
5151 	*/
5152 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5153 	adapter->stats.xoffrxc += adapter->pause_frames;
5154 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5155 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5156 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5157 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5158 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5159 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5160 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5161 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5162 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5163 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5164 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5165 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5166 
5167 	/* For the 64-bit byte counters the low dword must be read first. */
5168 	/* Both registers clear on the read of the high dword */
5169 
5170 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5171 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5172 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5173 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5174 
5175 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5176 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5177 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5178 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5179 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5180 
5181 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5182 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5183 
5184 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5185 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5186 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5187 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5188 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5189 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5190 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5191 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5192 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5193 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5194 
5195 	/* Interrupt Counts */
5196 
5197 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5198 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5199 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5200 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5201 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5202 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5203 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5204 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5205 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5206 
5207 	if (adapter->hw.mac.type >= e1000_82543) {
5208 		adapter->stats.algnerrc +=
5209 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5210 		adapter->stats.rxerrc +=
5211 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5212 		adapter->stats.tncrs +=
5213 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5214 		adapter->stats.cexterr +=
5215 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5216 		adapter->stats.tsctc +=
5217 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5218 		adapter->stats.tsctfc +=
5219 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5220 	}
5221 	ifp = adapter->ifp;
5222 
5223 	ifp->if_collisions = adapter->stats.colc;
5224 
5225 	/* Rx Errors */
5226 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5227 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5228 	    adapter->stats.ruc + adapter->stats.roc +
5229 	    adapter->stats.mpc + adapter->stats.cexterr;
5230 
5231 	/* Tx Errors */
5232 	ifp->if_oerrors = adapter->stats.ecol +
5233 	    adapter->stats.latecol + adapter->watchdog_events;
5234 }
5235 
5236 /* Export a single 32-bit register via a read-only sysctl. */
5237 static int
5238 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5239 {
5240 	struct adapter *adapter;
5241 	u_int val;
5242 
5243 	adapter = oidp->oid_arg1;
5244 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5245 	return (sysctl_handle_int(oidp, &val, 0, req));
5246 }
5247 
5248 /*
5249  * Add sysctl variables, one per statistic, to the system.
5250  */
5251 static void
5252 em_add_hw_stats(struct adapter *adapter)
5253 {
5254 	device_t dev = adapter->dev;
5255 
5256 	struct tx_ring *txr = adapter->tx_rings;
5257 	struct rx_ring *rxr = adapter->rx_rings;
5258 
5259 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5260 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5261 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5262 	struct e1000_hw_stats *stats = &adapter->stats;
5263 
5264 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5265 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5266 
5267 #define QUEUE_NAME_LEN 32
5268 	char namebuf[QUEUE_NAME_LEN];
5269 
5270 	/* Driver Statistics */
5271 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5272 			CTLFLAG_RD, &adapter->link_irq,
5273 			"Link MSIX IRQ Handled");
5274 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5275 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5276 			 "Std mbuf failed");
5277 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5278 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5279 			 "Std mbuf cluster failed");
5280 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5281 			CTLFLAG_RD, &adapter->dropped_pkts,
5282 			"Driver dropped packets");
5283 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5284 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5285 			"Driver tx dma failure in xmit");
5286 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5287 			CTLFLAG_RD, &adapter->rx_overruns,
5288 			"RX overruns");
5289 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5290 			CTLFLAG_RD, &adapter->watchdog_events,
5291 			"Watchdog timeouts");
5292 
5293 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5294 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5295 			em_sysctl_reg_handler, "IU",
5296 			"Device Control Register");
5297 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5298 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5299 			em_sysctl_reg_handler, "IU",
5300 			"Receiver Control Register");
5301 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5302 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5303 			"Flow Control High Watermark");
5304 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5305 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5306 			"Flow Control Low Watermark");
5307 
5308 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5309 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5310 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5311 					    CTLFLAG_RD, NULL, "Queue Name");
5312 		queue_list = SYSCTL_CHILDREN(queue_node);
5313 
5314 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5315 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5316 				E1000_TDH(txr->me),
5317 				em_sysctl_reg_handler, "IU",
5318  				"Transmit Descriptor Head");
5319 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5320 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5321 				E1000_TDT(txr->me),
5322 				em_sysctl_reg_handler, "IU",
5323  				"Transmit Descriptor Tail");
5324 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5325 				CTLFLAG_RD, &txr->tx_irq,
5326 				"Queue MSI-X Transmit Interrupts");
5327 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5328 				CTLFLAG_RD, &txr->no_desc_avail,
5329 				"Queue No Descriptor Available");
5330 
5331 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5332 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5333 				E1000_RDH(rxr->me),
5334 				em_sysctl_reg_handler, "IU",
5335 				"Receive Descriptor Head");
5336 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5337 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5338 				E1000_RDT(rxr->me),
5339 				em_sysctl_reg_handler, "IU",
5340 				"Receive Descriptor Tail");
5341 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5342 				CTLFLAG_RD, &rxr->rx_irq,
5343 				"Queue MSI-X Receive Interrupts");
5344 	}
5345 
5346 	/* MAC stats get their own sub node */
5347 
5348 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5349 				    CTLFLAG_RD, NULL, "Statistics");
5350 	stat_list = SYSCTL_CHILDREN(stat_node);
5351 
5352 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5353 			CTLFLAG_RD, &stats->ecol,
5354 			"Excessive collisions");
5355 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5356 			CTLFLAG_RD, &stats->scc,
5357 			"Single collisions");
5358 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5359 			CTLFLAG_RD, &stats->mcc,
5360 			"Multiple collisions");
5361 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5362 			CTLFLAG_RD, &stats->latecol,
5363 			"Late collisions");
5364 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5365 			CTLFLAG_RD, &stats->colc,
5366 			"Collision Count");
5367 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5368 			CTLFLAG_RD, &adapter->stats.symerrs,
5369 			"Symbol Errors");
5370 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5371 			CTLFLAG_RD, &adapter->stats.sec,
5372 			"Sequence Errors");
5373 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5374 			CTLFLAG_RD, &adapter->stats.dc,
5375 			"Defer Count");
5376 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5377 			CTLFLAG_RD, &adapter->stats.mpc,
5378 			"Missed Packets");
5379 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5380 			CTLFLAG_RD, &adapter->stats.rnbc,
5381 			"Receive No Buffers");
5382 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5383 			CTLFLAG_RD, &adapter->stats.ruc,
5384 			"Receive Undersize");
5385 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5386 			CTLFLAG_RD, &adapter->stats.rfc,
5387 			"Fragmented Packets Received ");
5388 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5389 			CTLFLAG_RD, &adapter->stats.roc,
5390 			"Oversized Packets Received");
5391 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5392 			CTLFLAG_RD, &adapter->stats.rjc,
5393 			"Recevied Jabber");
5394 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5395 			CTLFLAG_RD, &adapter->stats.rxerrc,
5396 			"Receive Errors");
5397 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5398 			CTLFLAG_RD, &adapter->stats.crcerrs,
5399 			"CRC errors");
5400 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5401 			CTLFLAG_RD, &adapter->stats.algnerrc,
5402 			"Alignment Errors");
5403 	/* On 82575 these are collision counts */
5404 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5405 			CTLFLAG_RD, &adapter->stats.cexterr,
5406 			"Collision/Carrier extension errors");
5407 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5408 			CTLFLAG_RD, &adapter->stats.xonrxc,
5409 			"XON Received");
5410 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5411 			CTLFLAG_RD, &adapter->stats.xontxc,
5412 			"XON Transmitted");
5413 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5414 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5415 			"XOFF Received");
5416 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5417 			CTLFLAG_RD, &adapter->stats.xofftxc,
5418 			"XOFF Transmitted");
5419 
5420 	/* Packet Reception Stats */
5421 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5422 			CTLFLAG_RD, &adapter->stats.tpr,
5423 			"Total Packets Received ");
5424 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5425 			CTLFLAG_RD, &adapter->stats.gprc,
5426 			"Good Packets Received");
5427 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5428 			CTLFLAG_RD, &adapter->stats.bprc,
5429 			"Broadcast Packets Received");
5430 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5431 			CTLFLAG_RD, &adapter->stats.mprc,
5432 			"Multicast Packets Received");
5433 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5434 			CTLFLAG_RD, &adapter->stats.prc64,
5435 			"64 byte frames received ");
5436 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5437 			CTLFLAG_RD, &adapter->stats.prc127,
5438 			"65-127 byte frames received");
5439 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5440 			CTLFLAG_RD, &adapter->stats.prc255,
5441 			"128-255 byte frames received");
5442 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5443 			CTLFLAG_RD, &adapter->stats.prc511,
5444 			"256-511 byte frames received");
5445 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5446 			CTLFLAG_RD, &adapter->stats.prc1023,
5447 			"512-1023 byte frames received");
5448 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5449 			CTLFLAG_RD, &adapter->stats.prc1522,
5450 			"1023-1522 byte frames received");
5451  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5452  			CTLFLAG_RD, &adapter->stats.gorc,
5453  			"Good Octets Received");
5454 
5455 	/* Packet Transmission Stats */
5456  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5457  			CTLFLAG_RD, &adapter->stats.gotc,
5458  			"Good Octets Transmitted");
5459 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5460 			CTLFLAG_RD, &adapter->stats.tpt,
5461 			"Total Packets Transmitted");
5462 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5463 			CTLFLAG_RD, &adapter->stats.gptc,
5464 			"Good Packets Transmitted");
5465 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5466 			CTLFLAG_RD, &adapter->stats.bptc,
5467 			"Broadcast Packets Transmitted");
5468 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5469 			CTLFLAG_RD, &adapter->stats.mptc,
5470 			"Multicast Packets Transmitted");
5471 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5472 			CTLFLAG_RD, &adapter->stats.ptc64,
5473 			"64 byte frames transmitted ");
5474 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5475 			CTLFLAG_RD, &adapter->stats.ptc127,
5476 			"65-127 byte frames transmitted");
5477 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5478 			CTLFLAG_RD, &adapter->stats.ptc255,
5479 			"128-255 byte frames transmitted");
5480 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5481 			CTLFLAG_RD, &adapter->stats.ptc511,
5482 			"256-511 byte frames transmitted");
5483 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5484 			CTLFLAG_RD, &adapter->stats.ptc1023,
5485 			"512-1023 byte frames transmitted");
5486 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5487 			CTLFLAG_RD, &adapter->stats.ptc1522,
5488 			"1024-1522 byte frames transmitted");
5489 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5490 			CTLFLAG_RD, &adapter->stats.tsctc,
5491 			"TSO Contexts Transmitted");
5492 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5493 			CTLFLAG_RD, &adapter->stats.tsctfc,
5494 			"TSO Contexts Failed");
5495 
5496 
5497 	/* Interrupt Stats */
5498 
5499 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5500 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5501 	int_list = SYSCTL_CHILDREN(int_node);
5502 
5503 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5504 			CTLFLAG_RD, &adapter->stats.iac,
5505 			"Interrupt Assertion Count");
5506 
5507 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5508 			CTLFLAG_RD, &adapter->stats.icrxptc,
5509 			"Interrupt Cause Rx Pkt Timer Expire Count");
5510 
5511 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5512 			CTLFLAG_RD, &adapter->stats.icrxatc,
5513 			"Interrupt Cause Rx Abs Timer Expire Count");
5514 
5515 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5516 			CTLFLAG_RD, &adapter->stats.ictxptc,
5517 			"Interrupt Cause Tx Pkt Timer Expire Count");
5518 
5519 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5520 			CTLFLAG_RD, &adapter->stats.ictxatc,
5521 			"Interrupt Cause Tx Abs Timer Expire Count");
5522 
5523 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5524 			CTLFLAG_RD, &adapter->stats.ictxqec,
5525 			"Interrupt Cause Tx Queue Empty Count");
5526 
5527 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5528 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5529 			"Interrupt Cause Tx Queue Min Thresh Count");
5530 
5531 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5532 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5533 			"Interrupt Cause Rx Desc Min Thresh Count");
5534 
5535 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5536 			CTLFLAG_RD, &adapter->stats.icrxoc,
5537 			"Interrupt Cause Receiver Overrun Count");
5538 }
5539 
5540 /**********************************************************************
5541  *
5542  *  This routine provides a way to dump out the adapter eeprom,
5543  *  often a useful debug/service tool. This only dumps the first
5544  *  32 words, stuff that matters is in that extent.
5545  *
5546  **********************************************************************/
5547 static int
5548 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5549 {
5550 	struct adapter *adapter = (struct adapter *)arg1;
5551 	int error;
5552 	int result;
5553 
5554 	result = -1;
5555 	error = sysctl_handle_int(oidp, &result, 0, req);
5556 
5557 	if (error || !req->newptr)
5558 		return (error);
5559 
5560 	/*
5561 	 * This value will cause a hex dump of the
5562 	 * first 32 16-bit words of the EEPROM to
5563 	 * the screen.
5564 	 */
5565 	if (result == 1)
5566 		em_print_nvm_info(adapter);
5567 
5568 	return (error);
5569 }
5570 
5571 static void
5572 em_print_nvm_info(struct adapter *adapter)
5573 {
5574 	u16	eeprom_data;
5575 	int	i, j, row = 0;
5576 
5577 	/* Its a bit crude, but it gets the job done */
5578 	printf("\nInterface EEPROM Dump:\n");
5579 	printf("Offset\n0x0000  ");
5580 	for (i = 0, j = 0; i < 32; i++, j++) {
5581 		if (j == 8) { /* Make the offset block */
5582 			j = 0; ++row;
5583 			printf("\n0x00%x0  ",row);
5584 		}
5585 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5586 		printf("%04x ", eeprom_data);
5587 	}
5588 	printf("\n");
5589 }
5590 
5591 static int
5592 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5593 {
5594 	struct em_int_delay_info *info;
5595 	struct adapter *adapter;
5596 	u32 regval;
5597 	int error, usecs, ticks;
5598 
5599 	info = (struct em_int_delay_info *)arg1;
5600 	usecs = info->value;
5601 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5602 	if (error != 0 || req->newptr == NULL)
5603 		return (error);
5604 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5605 		return (EINVAL);
5606 	info->value = usecs;
5607 	ticks = EM_USECS_TO_TICKS(usecs);
5608 
5609 	adapter = info->adapter;
5610 
5611 	EM_CORE_LOCK(adapter);
5612 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5613 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5614 	/* Handle a few special cases. */
5615 	switch (info->offset) {
5616 	case E1000_RDTR:
5617 		break;
5618 	case E1000_TIDV:
5619 		if (ticks == 0) {
5620 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5621 			/* Don't write 0 into the TIDV register. */
5622 			regval++;
5623 		} else
5624 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5625 		break;
5626 	}
5627 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5628 	EM_CORE_UNLOCK(adapter);
5629 	return (0);
5630 }
5631 
5632 static void
5633 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5634 	const char *description, struct em_int_delay_info *info,
5635 	int offset, int value)
5636 {
5637 	info->adapter = adapter;
5638 	info->offset = offset;
5639 	info->value = value;
5640 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5641 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5642 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5643 	    info, 0, em_sysctl_int_delay, "I", description);
5644 }
5645 
5646 static void
5647 em_set_sysctl_value(struct adapter *adapter, const char *name,
5648 	const char *description, int *limit, int value)
5649 {
5650 	*limit = value;
5651 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5652 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5653 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5654 }
5655 
5656 
5657 /*
5658 ** Set flow control using sysctl:
5659 ** Flow control values:
5660 **      0 - off
5661 **      1 - rx pause
5662 **      2 - tx pause
5663 **      3 - full
5664 */
5665 static int
5666 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5667 {
5668         int		error;
5669 	static int	input = 3; /* default is full */
5670         struct adapter	*adapter = (struct adapter *) arg1;
5671 
5672         error = sysctl_handle_int(oidp, &input, 0, req);
5673 
5674         if ((error) || (req->newptr == NULL))
5675                 return (error);
5676 
5677 	if (input == adapter->fc) /* no change? */
5678 		return (error);
5679 
5680         switch (input) {
5681                 case e1000_fc_rx_pause:
5682                 case e1000_fc_tx_pause:
5683                 case e1000_fc_full:
5684                 case e1000_fc_none:
5685                         adapter->hw.fc.requested_mode = input;
5686 			adapter->fc = input;
5687                         break;
5688                 default:
5689 			/* Do nothing */
5690 			return (error);
5691         }
5692 
5693         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5694         e1000_force_mac_fc(&adapter->hw);
5695         return (error);
5696 }
5697 
5698 
5699 static int
5700 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5701 {
5702 	struct adapter *adapter;
5703 	int error;
5704 	int result;
5705 
5706 	result = -1;
5707 	error = sysctl_handle_int(oidp, &result, 0, req);
5708 
5709 	if (error || !req->newptr)
5710 		return (error);
5711 
5712 	if (result == 1) {
5713 		adapter = (struct adapter *)arg1;
5714 		em_print_debug_info(adapter);
5715         }
5716 
5717 	return (error);
5718 }
5719 
5720 /*
5721 ** This routine is meant to be fluid, add whatever is
5722 ** needed for debugging a problem.  -jfv
5723 */
5724 static void
5725 em_print_debug_info(struct adapter *adapter)
5726 {
5727 	device_t dev = adapter->dev;
5728 	struct tx_ring *txr = adapter->tx_rings;
5729 	struct rx_ring *rxr = adapter->rx_rings;
5730 
5731 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5732 		printf("Interface is RUNNING ");
5733 	else
5734 		printf("Interface is NOT RUNNING\n");
5735 
5736 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5737 		printf("and INACTIVE\n");
5738 	else
5739 		printf("and ACTIVE\n");
5740 
5741 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5742 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5743 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5744 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5745 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5746 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5747 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5748 	device_printf(dev, "TX descriptors avail = %d\n",
5749 	    txr->tx_avail);
5750 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5751 	    txr->no_desc_avail);
5752 	device_printf(dev, "RX discarded packets = %ld\n",
5753 	    rxr->rx_discarded);
5754 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5755 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5756 }
5757