xref: /freebsd/sys/dev/e1000/if_em.c (revision fe01740653ed066369f3c892b7aa18a710c608c5)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2013, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_var.h>
67 #include <net/if_arp.h>
68 #include <net/if_dl.h>
69 #include <net/if_media.h>
70 
71 #include <net/if_types.h>
72 #include <net/if_vlan_var.h>
73 
74 #include <netinet/in_systm.h>
75 #include <netinet/in.h>
76 #include <netinet/if_ether.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 #include <netinet/tcp.h>
80 #include <netinet/udp.h>
81 
82 #include <machine/in_cksum.h>
83 #include <dev/led/led.h>
84 #include <dev/pci/pcivar.h>
85 #include <dev/pci/pcireg.h>
86 
87 #include "e1000_api.h"
88 #include "e1000_82571.h"
89 #include "if_em.h"
90 
91 /*********************************************************************
92  *  Set this to one to display debug statistics
93  *********************************************************************/
94 int	em_display_debug_stats = 0;
95 
96 /*********************************************************************
97  *  Driver version:
98  *********************************************************************/
99 char em_driver_version[] = "7.3.8";
100 
101 /*********************************************************************
102  *  PCI Device ID Table
103  *
104  *  Used by probe to select devices to load on
105  *  Last field stores an index into e1000_strings
106  *  Last entry must be all 0s
107  *
108  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
109  *********************************************************************/
110 
111 static em_vendor_info_t em_vendor_info_array[] =
112 {
113 	/* Intel(R) PRO/1000 Network Connection */
114 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 
134 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
180 						PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	/* required last entry */
184 	{ 0, 0, 0, 0, 0}
185 };
186 
187 /*********************************************************************
188  *  Table of branding strings for all supported NICs.
189  *********************************************************************/
190 
191 static char *em_strings[] = {
192 	"Intel(R) PRO/1000 Network Connection"
193 };
194 
195 /*********************************************************************
196  *  Function prototypes
197  *********************************************************************/
198 static int	em_probe(device_t);
199 static int	em_attach(device_t);
200 static int	em_detach(device_t);
201 static int	em_shutdown(device_t);
202 static int	em_suspend(device_t);
203 static int	em_resume(device_t);
204 #ifdef EM_MULTIQUEUE
205 static int	em_mq_start(if_t, struct mbuf *);
206 static int	em_mq_start_locked(if_t,
207 		    struct tx_ring *, struct mbuf *);
208 static void	em_qflush(if_t);
209 #else
210 static void	em_start(if_t);
211 static void	em_start_locked(if_t, struct tx_ring *);
212 #endif
213 static int	em_ioctl(if_t, u_long, caddr_t);
214 static void	em_init(void *);
215 static void	em_init_locked(struct adapter *);
216 static void	em_stop(void *);
217 static void	em_media_status(if_t, struct ifmediareq *);
218 static int	em_media_change(if_t);
219 static void	em_identify_hardware(struct adapter *);
220 static int	em_allocate_pci_resources(struct adapter *);
221 static int	em_allocate_legacy(struct adapter *);
222 static int	em_allocate_msix(struct adapter *);
223 static int	em_allocate_queues(struct adapter *);
224 static int	em_setup_msix(struct adapter *);
225 static void	em_free_pci_resources(struct adapter *);
226 static void	em_local_timer(void *);
227 static void	em_reset(struct adapter *);
228 static int	em_setup_interface(device_t, struct adapter *);
229 
230 static void	em_setup_transmit_structures(struct adapter *);
231 static void	em_initialize_transmit_unit(struct adapter *);
232 static int	em_allocate_transmit_buffers(struct tx_ring *);
233 static void	em_free_transmit_structures(struct adapter *);
234 static void	em_free_transmit_buffers(struct tx_ring *);
235 
236 static int	em_setup_receive_structures(struct adapter *);
237 static int	em_allocate_receive_buffers(struct rx_ring *);
238 static void	em_initialize_receive_unit(struct adapter *);
239 static void	em_free_receive_structures(struct adapter *);
240 static void	em_free_receive_buffers(struct rx_ring *);
241 
242 static void	em_enable_intr(struct adapter *);
243 static void	em_disable_intr(struct adapter *);
244 static void	em_update_stats_counters(struct adapter *);
245 static void	em_add_hw_stats(struct adapter *adapter);
246 static void	em_txeof(struct tx_ring *);
247 static bool	em_rxeof(struct rx_ring *, int, int *);
248 #ifndef __NO_STRICT_ALIGNMENT
249 static int	em_fixup_rx(struct rx_ring *);
250 #endif
251 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
252 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
253 		    struct ip *, u32 *, u32 *);
254 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
255 		    struct tcphdr *, u32 *, u32 *);
256 static void	em_set_promisc(struct adapter *);
257 static void	em_disable_promisc(struct adapter *);
258 static void	em_set_multi(struct adapter *);
259 static void	em_update_link_status(struct adapter *);
260 static void	em_refresh_mbufs(struct rx_ring *, int);
261 static void	em_register_vlan(void *, if_t, u16);
262 static void	em_unregister_vlan(void *, if_t, u16);
263 static void	em_setup_vlan_hw_support(struct adapter *);
264 static int	em_xmit(struct tx_ring *, struct mbuf **);
265 static int	em_dma_malloc(struct adapter *, bus_size_t,
266 		    struct em_dma_alloc *, int);
267 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
268 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
269 static void	em_print_nvm_info(struct adapter *);
270 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
271 static void	em_print_debug_info(struct adapter *);
272 static int 	em_is_valid_ether_addr(u8 *);
273 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
274 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
275 		    const char *, struct em_int_delay_info *, int, int);
276 /* Management and WOL Support */
277 static void	em_init_manageability(struct adapter *);
278 static void	em_release_manageability(struct adapter *);
279 static void     em_get_hw_control(struct adapter *);
280 static void     em_release_hw_control(struct adapter *);
281 static void	em_get_wakeup(device_t);
282 static void     em_enable_wakeup(device_t);
283 static int	em_enable_phy_wakeup(struct adapter *);
284 static void	em_led_func(void *, int);
285 static void	em_disable_aspm(struct adapter *);
286 
287 static int	em_irq_fast(void *);
288 
289 /* MSIX handlers */
290 static void	em_msix_tx(void *);
291 static void	em_msix_rx(void *);
292 static void	em_msix_link(void *);
293 static void	em_handle_tx(void *context, int pending);
294 static void	em_handle_rx(void *context, int pending);
295 static void	em_handle_link(void *context, int pending);
296 
297 static void	em_set_sysctl_value(struct adapter *, const char *,
298 		    const char *, int *, int);
299 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
300 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
301 
302 static __inline void em_rx_discard(struct rx_ring *, int);
303 
304 #ifdef DEVICE_POLLING
305 static poll_handler_drv_t em_poll;
306 #endif /* POLLING */
307 
308 /*********************************************************************
309  *  FreeBSD Device Interface Entry Points
310  *********************************************************************/
311 
312 static device_method_t em_methods[] = {
313 	/* Device interface */
314 	DEVMETHOD(device_probe, em_probe),
315 	DEVMETHOD(device_attach, em_attach),
316 	DEVMETHOD(device_detach, em_detach),
317 	DEVMETHOD(device_shutdown, em_shutdown),
318 	DEVMETHOD(device_suspend, em_suspend),
319 	DEVMETHOD(device_resume, em_resume),
320 	DEVMETHOD_END
321 };
322 
323 static driver_t em_driver = {
324 	"em", em_methods, sizeof(struct adapter),
325 };
326 
327 devclass_t em_devclass;
328 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
329 MODULE_DEPEND(em, pci, 1, 1, 1);
330 MODULE_DEPEND(em, ether, 1, 1, 1);
331 
332 /*********************************************************************
333  *  Tunable default values.
334  *********************************************************************/
335 
336 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
337 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
338 #define M_TSO_LEN			66
339 
340 #define MAX_INTS_PER_SEC	8000
341 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
342 
343 /* Allow common code without TSO */
344 #ifndef CSUM_TSO
345 #define CSUM_TSO	0
346 #endif
347 
348 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
349 
350 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
351 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
352 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
353 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
354 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
355     0, "Default transmit interrupt delay in usecs");
356 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
357     0, "Default receive interrupt delay in usecs");
358 
359 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
360 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
361 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
362 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
363 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
364     &em_tx_abs_int_delay_dflt, 0,
365     "Default transmit interrupt delay limit in usecs");
366 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
367     &em_rx_abs_int_delay_dflt, 0,
368     "Default receive interrupt delay limit in usecs");
369 
370 static int em_rxd = EM_DEFAULT_RXD;
371 static int em_txd = EM_DEFAULT_TXD;
372 TUNABLE_INT("hw.em.rxd", &em_rxd);
373 TUNABLE_INT("hw.em.txd", &em_txd);
374 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
375     "Number of receive descriptors per queue");
376 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
377     "Number of transmit descriptors per queue");
378 
379 static int em_smart_pwr_down = FALSE;
380 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
381 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
382     0, "Set to true to leave smart power down enabled on newer adapters");
383 
384 /* Controls whether promiscuous also shows bad packets */
385 static int em_debug_sbp = FALSE;
386 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
387 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
388     "Show bad packets in promiscuous mode");
389 
390 static int em_enable_msix = TRUE;
391 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
392 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
393     "Enable MSI-X interrupts");
394 
395 /* How many packets rxeof tries to clean at a time */
396 static int em_rx_process_limit = 100;
397 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
398 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
399     &em_rx_process_limit, 0,
400     "Maximum number of received packets to process "
401     "at a time, -1 means unlimited");
402 
403 /* Energy efficient ethernet - default to OFF */
404 static int eee_setting = 1;
405 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
406 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
407     "Enable Energy Efficient Ethernet");
408 
409 /* Global used in WOL setup with multiport cards */
410 static int global_quad_port_a = 0;
411 
412 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
413 #include <dev/netmap/if_em_netmap.h>
414 #endif /* DEV_NETMAP */
415 
416 /*********************************************************************
417  *  Device identification routine
418  *
419  *  em_probe determines if the driver should be loaded on
420  *  adapter based on PCI vendor/device id of the adapter.
421  *
422  *  return BUS_PROBE_DEFAULT on success, positive on failure
423  *********************************************************************/
424 
425 static int
426 em_probe(device_t dev)
427 {
428 	char		adapter_name[60];
429 	u16		pci_vendor_id = 0;
430 	u16		pci_device_id = 0;
431 	u16		pci_subvendor_id = 0;
432 	u16		pci_subdevice_id = 0;
433 	em_vendor_info_t *ent;
434 
435 	INIT_DEBUGOUT("em_probe: begin");
436 
437 	pci_vendor_id = pci_get_vendor(dev);
438 	if (pci_vendor_id != EM_VENDOR_ID)
439 		return (ENXIO);
440 
441 	pci_device_id = pci_get_device(dev);
442 	pci_subvendor_id = pci_get_subvendor(dev);
443 	pci_subdevice_id = pci_get_subdevice(dev);
444 
445 	ent = em_vendor_info_array;
446 	while (ent->vendor_id != 0) {
447 		if ((pci_vendor_id == ent->vendor_id) &&
448 		    (pci_device_id == ent->device_id) &&
449 
450 		    ((pci_subvendor_id == ent->subvendor_id) ||
451 		    (ent->subvendor_id == PCI_ANY_ID)) &&
452 
453 		    ((pci_subdevice_id == ent->subdevice_id) ||
454 		    (ent->subdevice_id == PCI_ANY_ID))) {
455 			sprintf(adapter_name, "%s %s",
456 				em_strings[ent->index],
457 				em_driver_version);
458 			device_set_desc_copy(dev, adapter_name);
459 			return (BUS_PROBE_DEFAULT);
460 		}
461 		ent++;
462 	}
463 
464 	return (ENXIO);
465 }
466 
467 /*********************************************************************
468  *  Device initialization routine
469  *
470  *  The attach entry point is called when the driver is being loaded.
471  *  This routine identifies the type of hardware, allocates all resources
472  *  and initializes the hardware.
473  *
474  *  return 0 on success, positive on failure
475  *********************************************************************/
476 
477 static int
478 em_attach(device_t dev)
479 {
480 	struct adapter	*adapter;
481 	struct e1000_hw	*hw;
482 	int		error = 0;
483 
484 	INIT_DEBUGOUT("em_attach: begin");
485 
486 	if (resource_disabled("em", device_get_unit(dev))) {
487 		device_printf(dev, "Disabled by device hint\n");
488 		return (ENXIO);
489 	}
490 
491 	adapter = device_get_softc(dev);
492 	adapter->dev = adapter->osdep.dev = dev;
493 	hw = &adapter->hw;
494 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
495 
496 	/* SYSCTL stuff */
497 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
498 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
499 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
500 	    em_sysctl_nvm_info, "I", "NVM Information");
501 
502 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
503 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
504 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
505 	    em_sysctl_debug_info, "I", "Debug Information");
506 
507 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
508 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
509 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
510 	    em_set_flowcntl, "I", "Flow Control");
511 
512 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
513 
514 	/* Determine hardware and mac info */
515 	em_identify_hardware(adapter);
516 
517 	/* Setup PCI resources */
518 	if (em_allocate_pci_resources(adapter)) {
519 		device_printf(dev, "Allocation of PCI resources failed\n");
520 		error = ENXIO;
521 		goto err_pci;
522 	}
523 
524 	/*
525 	** For ICH8 and family we need to
526 	** map the flash memory, and this
527 	** must happen after the MAC is
528 	** identified
529 	*/
530 	if ((hw->mac.type == e1000_ich8lan) ||
531 	    (hw->mac.type == e1000_ich9lan) ||
532 	    (hw->mac.type == e1000_ich10lan) ||
533 	    (hw->mac.type == e1000_pchlan) ||
534 	    (hw->mac.type == e1000_pch2lan) ||
535 	    (hw->mac.type == e1000_pch_lpt)) {
536 		int rid = EM_BAR_TYPE_FLASH;
537 		adapter->flash = bus_alloc_resource_any(dev,
538 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
539 		if (adapter->flash == NULL) {
540 			device_printf(dev, "Mapping of Flash failed\n");
541 			error = ENXIO;
542 			goto err_pci;
543 		}
544 		/* This is used in the shared code */
545 		hw->flash_address = (u8 *)adapter->flash;
546 		adapter->osdep.flash_bus_space_tag =
547 		    rman_get_bustag(adapter->flash);
548 		adapter->osdep.flash_bus_space_handle =
549 		    rman_get_bushandle(adapter->flash);
550 	}
551 
552 	/* Do Shared Code initialization */
553 	if (e1000_setup_init_funcs(hw, TRUE)) {
554 		device_printf(dev, "Setup of Shared code failed\n");
555 		error = ENXIO;
556 		goto err_pci;
557 	}
558 
559 	e1000_get_bus_info(hw);
560 
561 	/* Set up some sysctls for the tunable interrupt delays */
562 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
563 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
564 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
565 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
566 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
567 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
568 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
569 	    "receive interrupt delay limit in usecs",
570 	    &adapter->rx_abs_int_delay,
571 	    E1000_REGISTER(hw, E1000_RADV),
572 	    em_rx_abs_int_delay_dflt);
573 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
574 	    "transmit interrupt delay limit in usecs",
575 	    &adapter->tx_abs_int_delay,
576 	    E1000_REGISTER(hw, E1000_TADV),
577 	    em_tx_abs_int_delay_dflt);
578 	em_add_int_delay_sysctl(adapter, "itr",
579 	    "interrupt delay limit in usecs/4",
580 	    &adapter->tx_itr,
581 	    E1000_REGISTER(hw, E1000_ITR),
582 	    DEFAULT_ITR);
583 
584 	/* Sysctl for limiting the amount of work done in the taskqueue */
585 	em_set_sysctl_value(adapter, "rx_processing_limit",
586 	    "max number of rx packets to process", &adapter->rx_process_limit,
587 	    em_rx_process_limit);
588 
589 	/*
590 	 * Validate number of transmit and receive descriptors. It
591 	 * must not exceed hardware maximum, and must be multiple
592 	 * of E1000_DBA_ALIGN.
593 	 */
594 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
595 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
596 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
597 		    EM_DEFAULT_TXD, em_txd);
598 		adapter->num_tx_desc = EM_DEFAULT_TXD;
599 	} else
600 		adapter->num_tx_desc = em_txd;
601 
602 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
603 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
604 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
605 		    EM_DEFAULT_RXD, em_rxd);
606 		adapter->num_rx_desc = EM_DEFAULT_RXD;
607 	} else
608 		adapter->num_rx_desc = em_rxd;
609 
610 	hw->mac.autoneg = DO_AUTO_NEG;
611 	hw->phy.autoneg_wait_to_complete = FALSE;
612 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
613 
614 	/* Copper options */
615 	if (hw->phy.media_type == e1000_media_type_copper) {
616 		hw->phy.mdix = AUTO_ALL_MODES;
617 		hw->phy.disable_polarity_correction = FALSE;
618 		hw->phy.ms_type = EM_MASTER_SLAVE;
619 	}
620 
621 	/*
622 	 * Set the frame limits assuming
623 	 * standard ethernet sized frames.
624 	 */
625 	adapter->hw.mac.max_frame_size =
626 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
627 
628 	/*
629 	 * This controls when hardware reports transmit completion
630 	 * status.
631 	 */
632 	hw->mac.report_tx_early = 1;
633 
634 	/*
635 	** Get queue/ring memory
636 	*/
637 	if (em_allocate_queues(adapter)) {
638 		error = ENOMEM;
639 		goto err_pci;
640 	}
641 
642 	/* Allocate multicast array memory. */
643 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
644 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
645 	if (adapter->mta == NULL) {
646 		device_printf(dev, "Can not allocate multicast setup array\n");
647 		error = ENOMEM;
648 		goto err_late;
649 	}
650 
651 	/* Check SOL/IDER usage */
652 	if (e1000_check_reset_block(hw))
653 		device_printf(dev, "PHY reset is blocked"
654 		    " due to SOL/IDER session.\n");
655 
656 	/* Sysctl for setting Energy Efficient Ethernet */
657 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
658 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
659 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
660 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
661 	    adapter, 0, em_sysctl_eee, "I",
662 	    "Disable Energy Efficient Ethernet");
663 
664 	/*
665 	** Start from a known state, this is
666 	** important in reading the nvm and
667 	** mac from that.
668 	*/
669 	e1000_reset_hw(hw);
670 
671 
672 	/* Make sure we have a good EEPROM before we read from it */
673 	if (e1000_validate_nvm_checksum(hw) < 0) {
674 		/*
675 		** Some PCI-E parts fail the first check due to
676 		** the link being in sleep state, call it again,
677 		** if it fails a second time its a real issue.
678 		*/
679 		if (e1000_validate_nvm_checksum(hw) < 0) {
680 			device_printf(dev,
681 			    "The EEPROM Checksum Is Not Valid\n");
682 			error = EIO;
683 			goto err_late;
684 		}
685 	}
686 
687 	/* Copy the permanent MAC address out of the EEPROM */
688 	if (e1000_read_mac_addr(hw) < 0) {
689 		device_printf(dev, "EEPROM read error while reading MAC"
690 		    " address\n");
691 		error = EIO;
692 		goto err_late;
693 	}
694 
695 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
696 		device_printf(dev, "Invalid MAC address\n");
697 		error = EIO;
698 		goto err_late;
699 	}
700 
701 	/*
702 	**  Do interrupt configuration
703 	*/
704 	if (adapter->msix > 1) /* Do MSIX */
705 		error = em_allocate_msix(adapter);
706 	else  /* MSI or Legacy */
707 		error = em_allocate_legacy(adapter);
708 	if (error)
709 		goto err_late;
710 
711 	/*
712 	 * Get Wake-on-Lan and Management info for later use
713 	 */
714 	em_get_wakeup(dev);
715 
716 	/* Setup OS specific network interface */
717 	if (em_setup_interface(dev, adapter) != 0)
718 		goto err_late;
719 
720 	em_reset(adapter);
721 
722 	/* Initialize statistics */
723 	em_update_stats_counters(adapter);
724 
725 	hw->mac.get_link_status = 1;
726 	em_update_link_status(adapter);
727 
728 	/* Register for VLAN events */
729 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
730 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
731 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
732 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
733 
734 	em_add_hw_stats(adapter);
735 
736 	/* Non-AMT based hardware can now take control from firmware */
737 	if (adapter->has_manage && !adapter->has_amt)
738 		em_get_hw_control(adapter);
739 
740 	/* Tell the stack that the interface is not active */
741 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
742 
743 	adapter->led_dev = led_create(em_led_func, adapter,
744 	    device_get_nameunit(dev));
745 #ifdef DEV_NETMAP
746 	em_netmap_attach(adapter);
747 #endif /* DEV_NETMAP */
748 
749 	INIT_DEBUGOUT("em_attach: end");
750 
751 	return (0);
752 
753 err_late:
754 	em_free_transmit_structures(adapter);
755 	em_free_receive_structures(adapter);
756 	em_release_hw_control(adapter);
757 	if (adapter->ifp != (void *)NULL)
758 		if_free_drv(adapter->ifp);
759 err_pci:
760 	em_free_pci_resources(adapter);
761 	free(adapter->mta, M_DEVBUF);
762 	EM_CORE_LOCK_DESTROY(adapter);
763 
764 	return (error);
765 }
766 
767 /*********************************************************************
768  *  Device removal routine
769  *
770  *  The detach entry point is called when the driver is being removed.
771  *  This routine stops the adapter and deallocates all the resources
772  *  that were allocated for driver operation.
773  *
774  *  return 0 on success, positive on failure
775  *********************************************************************/
776 
777 static int
778 em_detach(device_t dev)
779 {
780 	struct adapter	*adapter = device_get_softc(dev);
781 	if_t ifp = adapter->ifp;
782 
783 	INIT_DEBUGOUT("em_detach: begin");
784 
785 	/* Make sure VLANS are not using driver */
786 	if (if_vlantrunkinuse(ifp)) {
787 		device_printf(dev,"Vlan in use, detach first\n");
788 		return (EBUSY);
789 	}
790 
791 #ifdef DEVICE_POLLING
792 	if (if_getcapenable(ifp) & IFCAP_POLLING)
793 		ether_poll_deregister_drv(ifp);
794 #endif
795 
796 	if (adapter->led_dev != NULL)
797 		led_destroy(adapter->led_dev);
798 
799 	EM_CORE_LOCK(adapter);
800 	adapter->in_detach = 1;
801 	em_stop(adapter);
802 	EM_CORE_UNLOCK(adapter);
803 	EM_CORE_LOCK_DESTROY(adapter);
804 
805 	e1000_phy_hw_reset(&adapter->hw);
806 
807 	em_release_manageability(adapter);
808 	em_release_hw_control(adapter);
809 
810 	/* Unregister VLAN events */
811 	if (adapter->vlan_attach != NULL)
812 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
813 	if (adapter->vlan_detach != NULL)
814 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
815 
816 	ether_ifdetach_drv(adapter->ifp);
817 	callout_drain(&adapter->timer);
818 
819 #ifdef DEV_NETMAP
820 	netmap_detach(ifp);
821 #endif /* DEV_NETMAP */
822 
823 	em_free_pci_resources(adapter);
824 	bus_generic_detach(dev);
825 	if_free_drv(ifp);
826 
827 	em_free_transmit_structures(adapter);
828 	em_free_receive_structures(adapter);
829 
830 	em_release_hw_control(adapter);
831 	free(adapter->mta, M_DEVBUF);
832 
833 	return (0);
834 }
835 
836 /*********************************************************************
837  *
838  *  Shutdown entry point
839  *
840  **********************************************************************/
841 
842 static int
843 em_shutdown(device_t dev)
844 {
845 	return em_suspend(dev);
846 }
847 
848 /*
849  * Suspend/resume device methods.
850  */
851 static int
852 em_suspend(device_t dev)
853 {
854 	struct adapter *adapter = device_get_softc(dev);
855 
856 	EM_CORE_LOCK(adapter);
857 
858         em_release_manageability(adapter);
859 	em_release_hw_control(adapter);
860 	em_enable_wakeup(dev);
861 
862 	EM_CORE_UNLOCK(adapter);
863 
864 	return bus_generic_suspend(dev);
865 }
866 
867 static int
868 em_resume(device_t dev)
869 {
870 	struct adapter *adapter = device_get_softc(dev);
871 	struct tx_ring	*txr = adapter->tx_rings;
872 	if_t ifp = adapter->ifp;
873 
874 	EM_CORE_LOCK(adapter);
875 	if (adapter->hw.mac.type == e1000_pch2lan)
876 		e1000_resume_workarounds_pchlan(&adapter->hw);
877 	em_init_locked(adapter);
878 	em_init_manageability(adapter);
879 
880 	if ((if_getflags(ifp) & IFF_UP) &&
881 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
882 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
883 			EM_TX_LOCK(txr);
884 #ifdef EM_MULTIQUEUE
885 			if (!drbr_empty(ifp, txr->br))
886 				em_mq_start_locked(ifp, txr, NULL);
887 #else
888 			if (!if_sendq_empty(ifp))
889 				em_start_locked(ifp, txr);
890 #endif
891 			EM_TX_UNLOCK(txr);
892 		}
893 	}
894 	EM_CORE_UNLOCK(adapter);
895 
896 	return bus_generic_resume(dev);
897 }
898 
899 
900 #ifdef EM_MULTIQUEUE
901 /*********************************************************************
902  *  Multiqueue Transmit routines
903  *
904  *  em_mq_start is called by the stack to initiate a transmit.
905  *  however, if busy the driver can queue the request rather
906  *  than do an immediate send. It is this that is an advantage
907  *  in this driver, rather than also having multiple tx queues.
908  **********************************************************************/
909 static int
910 em_mq_start_locked(if_t ifp, struct tx_ring *txr, struct mbuf *m)
911 {
912 	struct adapter  *adapter = txr->adapter;
913         struct mbuf     *next;
914         int             err = 0, enq = 0;
915 
916 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
918 		if (m != NULL)
919 			err = drbr_enqueue(ifp, txr->br, m);
920 		return (err);
921 	}
922 
923 	enq = 0;
924 	if (m != NULL) {
925 		err = drbr_enqueue(ifp, txr->br, m);
926 		if (err)
927 			return (err);
928 	}
929 
930 	/* Process the queue */
931 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
932 		if ((err = em_xmit(txr, &next)) != 0) {
933 			if (next == NULL)
934 				drbr_advance(ifp, txr->br);
935 			else
936 				drbr_putback(ifp, txr->br, next);
937 			break;
938 		}
939 		drbr_advance(ifp, txr->br);
940 		enq++;
941 		if_incobytes(ifp,  next->m_pkthdr.len);
942 		if (next->m_flags & M_MCAST)
943 			if_incomcasts(ifp, 1);
944 		if_etherbpfmtap(ifp, next);
945 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
946                         break;
947 	}
948 
949 	if (enq > 0) {
950                 /* Set the watchdog */
951                 txr->queue_status = EM_QUEUE_WORKING;
952 		txr->watchdog_time = ticks;
953 	}
954 
955 	if (txr->tx_avail < EM_MAX_SCATTER)
956 		em_txeof(txr);
957 	if (txr->tx_avail < EM_MAX_SCATTER)
958 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
959 	return (err);
960 }
961 
962 /*
963 ** Multiqueue capable stack interface
964 */
965 static int
966 em_mq_start(if_t ifp, struct mbuf *m)
967 {
968 	struct adapter	*adapter = if_getsoftc(ifp);
969 	struct tx_ring	*txr = adapter->tx_rings;
970 	int 		error;
971 
972 	if (EM_TX_TRYLOCK(txr)) {
973 		error = em_mq_start_locked(ifp, txr, m);
974 		EM_TX_UNLOCK(txr);
975 	} else
976 		error = drbr_enqueue(ifp, txr->br, m);
977 
978 	return (error);
979 }
980 
981 /*
982 ** Flush all ring buffers
983 */
984 static void
985 em_qflush(if_t ifp)
986 {
987 	struct adapter  *adapter = if_getsoftc(ifp);
988 	struct tx_ring  *txr = adapter->tx_rings;
989 	struct mbuf     *m;
990 
991 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
992 		EM_TX_LOCK(txr);
993 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994 			m_freem(m);
995 		EM_TX_UNLOCK(txr);
996 	}
997 	if_qflush(ifp);
998 }
999 #else  /* !EM_MULTIQUEUE */
1000 
1001 static void
1002 em_start_locked(if_t ifp, struct tx_ring *txr)
1003 {
1004 	struct adapter	*adapter = if_getsoftc(ifp);
1005 	struct mbuf	*m_head;
1006 
1007 	EM_TX_LOCK_ASSERT(txr);
1008 
1009 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1010 	    IFF_DRV_RUNNING)
1011 		return;
1012 
1013 	if (!adapter->link_active)
1014 		return;
1015 
1016 	while (!if_sendq_empty(ifp)) {
1017         	/* Call cleanup if number of TX descriptors low */
1018 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1019 			em_txeof(txr);
1020 		if (txr->tx_avail < EM_MAX_SCATTER) {
1021 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
1022 			break;
1023 		}
1024 		m_head = if_dequeue(ifp);
1025 		if (m_head == NULL)
1026 			break;
1027 		/*
1028 		 *  Encapsulation can modify our pointer, and or make it
1029 		 *  NULL on failure.  In that event, we can't requeue.
1030 		 */
1031 		if (em_xmit(txr, &m_head)) {
1032 			if (m_head == NULL)
1033 				break;
1034 			if_sendq_prepend(ifp, m_head);
1035 			break;
1036 		}
1037 
1038 		/* Send a copy of the frame to the BPF listener */
1039 		if_etherbpfmtap(ifp, m_head);
1040 
1041 		/* Set timeout in case hardware has problems transmitting. */
1042 		txr->watchdog_time = ticks;
1043                 txr->queue_status = EM_QUEUE_WORKING;
1044 	}
1045 
1046 	return;
1047 }
1048 
1049 static void
1050 em_start(if_t ifp)
1051 {
1052 	struct adapter	*adapter = if_getsoftc(ifp);
1053 	struct tx_ring	*txr = adapter->tx_rings;
1054 
1055 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1056 		EM_TX_LOCK(txr);
1057 		em_start_locked(ifp, txr);
1058 		EM_TX_UNLOCK(txr);
1059 	}
1060 	return;
1061 }
1062 #endif /* EM_MULTIQUEUE */
1063 
1064 /*********************************************************************
1065  *  Ioctl entry point
1066  *
1067  *  em_ioctl is called when the user wants to configure the
1068  *  interface.
1069  *
1070  *  return 0 on success, positive on failure
1071  **********************************************************************/
1072 
1073 static int
1074 em_ioctl(if_t ifp, u_long command, caddr_t data)
1075 {
1076 	struct adapter	*adapter = if_getsoftc(ifp);
1077 	struct ifreq	*ifr = (struct ifreq *)data;
1078 #if defined(INET) || defined(INET6)
1079 	struct ifaddr	*ifa = (struct ifaddr *)data;
1080 #endif
1081 	bool		avoid_reset = FALSE;
1082 	int		error = 0;
1083 
1084 	if (adapter->in_detach)
1085 		return (error);
1086 
1087 	switch (command) {
1088 	case SIOCSIFADDR:
1089 #ifdef INET
1090 		if (ifa->ifa_addr->sa_family == AF_INET)
1091 			avoid_reset = TRUE;
1092 #endif
1093 #ifdef INET6
1094 		if (ifa->ifa_addr->sa_family == AF_INET6)
1095 			avoid_reset = TRUE;
1096 #endif
1097 		/*
1098 		** Calling init results in link renegotiation,
1099 		** so we avoid doing it when possible.
1100 		*/
1101 		if (avoid_reset) {
1102 			if_setflagbits(ifp,IFF_UP,0);
1103 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1104 				em_init(adapter);
1105 #ifdef INET
1106 			if (!(if_getflags(ifp) & IFF_NOARP))
1107 				arp_ifinit_drv(ifp, ifa);
1108 #endif
1109 		} else
1110 			error = ether_ioctl_drv(ifp, command, data);
1111 		break;
1112 	case SIOCSIFMTU:
1113 	    {
1114 		int max_frame_size;
1115 
1116 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1117 
1118 		EM_CORE_LOCK(adapter);
1119 		switch (adapter->hw.mac.type) {
1120 		case e1000_82571:
1121 		case e1000_82572:
1122 		case e1000_ich9lan:
1123 		case e1000_ich10lan:
1124 		case e1000_pch2lan:
1125 		case e1000_pch_lpt:
1126 		case e1000_82574:
1127 		case e1000_82583:
1128 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1129 			max_frame_size = 9234;
1130 			break;
1131 		case e1000_pchlan:
1132 			max_frame_size = 4096;
1133 			break;
1134 			/* Adapters that do not support jumbo frames */
1135 		case e1000_ich8lan:
1136 			max_frame_size = ETHER_MAX_LEN;
1137 			break;
1138 		default:
1139 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1140 		}
1141 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1142 		    ETHER_CRC_LEN) {
1143 			EM_CORE_UNLOCK(adapter);
1144 			error = EINVAL;
1145 			break;
1146 		}
1147 
1148 		if_setmtu(ifp, ifr->ifr_mtu);
1149 		adapter->hw.mac.max_frame_size =
1150 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1151 		em_init_locked(adapter);
1152 		EM_CORE_UNLOCK(adapter);
1153 		break;
1154 	    }
1155 	case SIOCSIFFLAGS:
1156 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1157 		    SIOCSIFFLAGS (Set Interface Flags)");
1158 		EM_CORE_LOCK(adapter);
1159 		if (if_getflags(ifp) & IFF_UP) {
1160 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1161 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1162 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1163 					em_disable_promisc(adapter);
1164 					em_set_promisc(adapter);
1165 				}
1166 			} else
1167 				em_init_locked(adapter);
1168 		} else
1169 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1170 				em_stop(adapter);
1171 		adapter->if_flags = if_getflags(ifp);
1172 		EM_CORE_UNLOCK(adapter);
1173 		break;
1174 	case SIOCADDMULTI:
1175 	case SIOCDELMULTI:
1176 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1177 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1178 			EM_CORE_LOCK(adapter);
1179 			em_disable_intr(adapter);
1180 			em_set_multi(adapter);
1181 #ifdef DEVICE_POLLING
1182 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1183 #endif
1184 				em_enable_intr(adapter);
1185 			EM_CORE_UNLOCK(adapter);
1186 		}
1187 		break;
1188 	case SIOCSIFMEDIA:
1189 		/* Check SOL/IDER usage */
1190 		EM_CORE_LOCK(adapter);
1191 		if (e1000_check_reset_block(&adapter->hw)) {
1192 			EM_CORE_UNLOCK(adapter);
1193 			device_printf(adapter->dev, "Media change is"
1194 			    " blocked due to SOL/IDER session.\n");
1195 			break;
1196 		}
1197 		EM_CORE_UNLOCK(adapter);
1198 		/* falls thru */
1199 	case SIOCGIFMEDIA:
1200 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1201 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1202 		error = ifmedia_ioctl_drv(ifp, ifr, &adapter->media, command);
1203 		break;
1204 	case SIOCSIFCAP:
1205 	    {
1206 		int mask, reinit;
1207 
1208 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209 		reinit = 0;
1210 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1211 #ifdef DEVICE_POLLING
1212 		if (mask & IFCAP_POLLING) {
1213 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214 				error = ether_poll_register_drv(em_poll, ifp);
1215 				if (error)
1216 					return (error);
1217 				EM_CORE_LOCK(adapter);
1218 				em_disable_intr(adapter);
1219 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1220 				EM_CORE_UNLOCK(adapter);
1221 			} else {
1222 				error = ether_poll_deregister_drv(ifp);
1223 				/* Enable interrupt even in error case */
1224 				EM_CORE_LOCK(adapter);
1225 				em_enable_intr(adapter);
1226 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1227 				EM_CORE_UNLOCK(adapter);
1228 			}
1229 		}
1230 #endif
1231 		if (mask & IFCAP_HWCSUM) {
1232 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1233 			reinit = 1;
1234 		}
1235 		if (mask & IFCAP_TSO4) {
1236 			if_togglecapenable(ifp,IFCAP_TSO4);
1237 			reinit = 1;
1238 		}
1239 		if (mask & IFCAP_VLAN_HWTAGGING) {
1240 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1241 			reinit = 1;
1242 		}
1243 		if (mask & IFCAP_VLAN_HWFILTER) {
1244 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1245 			reinit = 1;
1246 		}
1247 		if (mask & IFCAP_VLAN_HWTSO) {
1248 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1249 			reinit = 1;
1250 		}
1251 		if ((mask & IFCAP_WOL) &&
1252 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1253 			if (mask & IFCAP_WOL_MCAST)
1254 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1255 			if (mask & IFCAP_WOL_MAGIC)
1256 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1257 		}
1258 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1259 			em_init(adapter);
1260 		if_vlancap(ifp);
1261 		break;
1262 	    }
1263 
1264 	default:
1265 		error = ether_ioctl_drv(ifp, command, data);
1266 		break;
1267 	}
1268 
1269 	return (error);
1270 }
1271 
1272 
1273 /*********************************************************************
1274  *  Init entry point
1275  *
1276  *  This routine is used in two ways. It is used by the stack as
1277  *  init entry point in network interface structure. It is also used
1278  *  by the driver as a hw/sw initialization routine to get to a
1279  *  consistent state.
1280  *
1281  *  return 0 on success, positive on failure
1282  **********************************************************************/
1283 
1284 static void
1285 em_init_locked(struct adapter *adapter)
1286 {
1287 	if_t ifp = adapter->ifp;
1288 	device_t	dev = adapter->dev;
1289 
1290 	INIT_DEBUGOUT("em_init: begin");
1291 
1292 	EM_CORE_LOCK_ASSERT(adapter);
1293 
1294 	em_disable_intr(adapter);
1295 	callout_stop(&adapter->timer);
1296 
1297 	/* Get the latest mac address, User can use a LAA */
1298         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1299               ETHER_ADDR_LEN);
1300 
1301 	/* Put the address into the Receive Address Array */
1302 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1303 
1304 	/*
1305 	 * With the 82571 adapter, RAR[0] may be overwritten
1306 	 * when the other port is reset, we make a duplicate
1307 	 * in RAR[14] for that eventuality, this assures
1308 	 * the interface continues to function.
1309 	 */
1310 	if (adapter->hw.mac.type == e1000_82571) {
1311 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1312 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1313 		    E1000_RAR_ENTRIES - 1);
1314 	}
1315 
1316 	/* Initialize the hardware */
1317 	em_reset(adapter);
1318 	em_update_link_status(adapter);
1319 
1320 	/* Setup VLAN support, basic and offload if available */
1321 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1322 
1323 	/* Set hardware offload abilities */
1324 	if_clearhwassist(ifp);
1325 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1326 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1327 	if (if_getcapenable(ifp) & IFCAP_TSO4)
1328 		if_sethwassistbits(ifp, CSUM_TSO, 0);
1329 
1330 	/* Configure for OS presence */
1331 	em_init_manageability(adapter);
1332 
1333 	/* Prepare transmit descriptors and buffers */
1334 	em_setup_transmit_structures(adapter);
1335 	em_initialize_transmit_unit(adapter);
1336 
1337 	/* Setup Multicast table */
1338 	em_set_multi(adapter);
1339 
1340 	/*
1341 	** Figure out the desired mbuf
1342 	** pool for doing jumbos
1343 	*/
1344 	if (adapter->hw.mac.max_frame_size <= 2048)
1345 		adapter->rx_mbuf_sz = MCLBYTES;
1346 	else if (adapter->hw.mac.max_frame_size <= 4096)
1347 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1348 	else
1349 		adapter->rx_mbuf_sz = MJUM9BYTES;
1350 
1351 	/* Prepare receive descriptors and buffers */
1352 	if (em_setup_receive_structures(adapter)) {
1353 		device_printf(dev, "Could not setup receive structures\n");
1354 		em_stop(adapter);
1355 		return;
1356 	}
1357 	em_initialize_receive_unit(adapter);
1358 
1359 	/* Use real VLAN Filter support? */
1360 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1361 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1362 			/* Use real VLAN Filter support */
1363 			em_setup_vlan_hw_support(adapter);
1364 		else {
1365 			u32 ctrl;
1366 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1367 			ctrl |= E1000_CTRL_VME;
1368 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1369 		}
1370 	}
1371 
1372 	/* Don't lose promiscuous settings */
1373 	em_set_promisc(adapter);
1374 
1375 	/* Set the interface as ACTIVE */
1376 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1377 
1378 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1379 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1380 
1381 	/* MSI/X configuration for 82574 */
1382 	if (adapter->hw.mac.type == e1000_82574) {
1383 		int tmp;
1384 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1385 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1386 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1387 		/* Set the IVAR - interrupt vector routing. */
1388 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1389 	}
1390 
1391 #ifdef DEVICE_POLLING
1392 	/*
1393 	 * Only enable interrupts if we are not polling, make sure
1394 	 * they are off otherwise.
1395 	 */
1396 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1397 		em_disable_intr(adapter);
1398 	else
1399 #endif /* DEVICE_POLLING */
1400 		em_enable_intr(adapter);
1401 
1402 	/* AMT based hardware can now take control from firmware */
1403 	if (adapter->has_manage && adapter->has_amt)
1404 		em_get_hw_control(adapter);
1405 }
1406 
1407 static void
1408 em_init(void *arg)
1409 {
1410 	struct adapter *adapter = arg;
1411 
1412 	EM_CORE_LOCK(adapter);
1413 	em_init_locked(adapter);
1414 	EM_CORE_UNLOCK(adapter);
1415 }
1416 
1417 
1418 #ifdef DEVICE_POLLING
1419 /*********************************************************************
1420  *
1421  *  Legacy polling routine: note this only works with single queue
1422  *
1423  *********************************************************************/
1424 static int
1425 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1426 {
1427 	struct adapter *adapter = if_getsoftc(ifp);
1428 	struct tx_ring	*txr = adapter->tx_rings;
1429 	struct rx_ring	*rxr = adapter->rx_rings;
1430 	u32		reg_icr;
1431 	int		rx_done;
1432 
1433 	EM_CORE_LOCK(adapter);
1434 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1435 		EM_CORE_UNLOCK(adapter);
1436 		return (0);
1437 	}
1438 
1439 	if (cmd == POLL_AND_CHECK_STATUS) {
1440 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1441 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1442 			callout_stop(&adapter->timer);
1443 			adapter->hw.mac.get_link_status = 1;
1444 			em_update_link_status(adapter);
1445 			callout_reset(&adapter->timer, hz,
1446 			    em_local_timer, adapter);
1447 		}
1448 	}
1449 	EM_CORE_UNLOCK(adapter);
1450 
1451 	em_rxeof(rxr, count, &rx_done);
1452 
1453 	EM_TX_LOCK(txr);
1454 	em_txeof(txr);
1455 #ifdef EM_MULTIQUEUE
1456 	if (!drbr_empty(ifp, txr->br))
1457 		em_mq_start_locked(ifp, txr, NULL);
1458 #else
1459 	if (!if_sendq_empty(ifp))
1460 		em_start_locked(ifp, txr);
1461 #endif
1462 	EM_TX_UNLOCK(txr);
1463 
1464 	return (rx_done);
1465 }
1466 #endif /* DEVICE_POLLING */
1467 
1468 
1469 /*********************************************************************
1470  *
1471  *  Fast Legacy/MSI Combined Interrupt Service routine
1472  *
1473  *********************************************************************/
1474 static int
1475 em_irq_fast(void *arg)
1476 {
1477 	struct adapter	*adapter = arg;
1478 	if_t ifp;
1479 	u32		reg_icr;
1480 
1481 	ifp = adapter->ifp;
1482 
1483 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1484 
1485 	/* Hot eject?  */
1486 	if (reg_icr == 0xffffffff)
1487 		return FILTER_STRAY;
1488 
1489 	/* Definitely not our interrupt.  */
1490 	if (reg_icr == 0x0)
1491 		return FILTER_STRAY;
1492 
1493 	/*
1494 	 * Starting with the 82571 chip, bit 31 should be used to
1495 	 * determine whether the interrupt belongs to us.
1496 	 */
1497 	if (adapter->hw.mac.type >= e1000_82571 &&
1498 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1499 		return FILTER_STRAY;
1500 
1501 	em_disable_intr(adapter);
1502 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1503 
1504 	/* Link status change */
1505 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1506 		adapter->hw.mac.get_link_status = 1;
1507 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1508 	}
1509 
1510 	if (reg_icr & E1000_ICR_RXO)
1511 		adapter->rx_overruns++;
1512 	return FILTER_HANDLED;
1513 }
1514 
1515 /* Combined RX/TX handler, used by Legacy and MSI */
1516 static void
1517 em_handle_que(void *context, int pending)
1518 {
1519 	struct adapter	*adapter = context;
1520 	if_t ifp = adapter->ifp;
1521 	struct tx_ring	*txr = adapter->tx_rings;
1522 	struct rx_ring	*rxr = adapter->rx_rings;
1523 
1524 
1525 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1526 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1527 		EM_TX_LOCK(txr);
1528 		em_txeof(txr);
1529 #ifdef EM_MULTIQUEUE
1530 		if (!drbr_empty(ifp, txr->br))
1531 			em_mq_start_locked(ifp, txr, NULL);
1532 #else
1533 		if (!if_sendq_empty(ifp))
1534 			em_start_locked(ifp, txr);
1535 #endif
1536 		EM_TX_UNLOCK(txr);
1537 		if (more) {
1538 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1539 			return;
1540 		}
1541 	}
1542 
1543 	em_enable_intr(adapter);
1544 	return;
1545 }
1546 
1547 
1548 /*********************************************************************
1549  *
1550  *  MSIX Interrupt Service Routines
1551  *
1552  **********************************************************************/
1553 static void
1554 em_msix_tx(void *arg)
1555 {
1556 	struct tx_ring *txr = arg;
1557 	struct adapter *adapter = txr->adapter;
1558 	if_t ifp = adapter->ifp;
1559 
1560 	++txr->tx_irq;
1561 	EM_TX_LOCK(txr);
1562 	em_txeof(txr);
1563 #ifdef EM_MULTIQUEUE
1564 	if (!drbr_empty(ifp, txr->br))
1565 		em_mq_start_locked(ifp, txr, NULL);
1566 #else
1567 	if (!if_sendq_empty(ifp))
1568 		em_start_locked(ifp, txr);
1569 #endif
1570 	/* Reenable this interrupt */
1571 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1572 	EM_TX_UNLOCK(txr);
1573 	return;
1574 }
1575 
1576 /*********************************************************************
1577  *
1578  *  MSIX RX Interrupt Service routine
1579  *
1580  **********************************************************************/
1581 
1582 static void
1583 em_msix_rx(void *arg)
1584 {
1585 	struct rx_ring	*rxr = arg;
1586 	struct adapter	*adapter = rxr->adapter;
1587 	bool		more;
1588 
1589 	++rxr->rx_irq;
1590 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1591 		return;
1592 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1593 	if (more)
1594 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1595 	else
1596 		/* Reenable this interrupt */
1597 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1598 	return;
1599 }
1600 
1601 /*********************************************************************
1602  *
1603  *  MSIX Link Fast Interrupt Service routine
1604  *
1605  **********************************************************************/
1606 static void
1607 em_msix_link(void *arg)
1608 {
1609 	struct adapter	*adapter = arg;
1610 	u32		reg_icr;
1611 
1612 	++adapter->link_irq;
1613 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1614 
1615 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1616 		adapter->hw.mac.get_link_status = 1;
1617 		em_handle_link(adapter, 0);
1618 	} else
1619 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1620 		    EM_MSIX_LINK | E1000_IMS_LSC);
1621 	return;
1622 }
1623 
1624 static void
1625 em_handle_rx(void *context, int pending)
1626 {
1627 	struct rx_ring	*rxr = context;
1628 	struct adapter	*adapter = rxr->adapter;
1629         bool            more;
1630 
1631 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1632 	if (more)
1633 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1634 	else
1635 		/* Reenable this interrupt */
1636 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1637 }
1638 
1639 static void
1640 em_handle_tx(void *context, int pending)
1641 {
1642 	struct tx_ring	*txr = context;
1643 	struct adapter	*adapter = txr->adapter;
1644 	if_t ifp = adapter->ifp;
1645 
1646 	EM_TX_LOCK(txr);
1647 	em_txeof(txr);
1648 #ifdef EM_MULTIQUEUE
1649 	if (!drbr_empty(ifp, txr->br))
1650 		em_mq_start_locked(ifp, txr, NULL);
1651 #else
1652 	if (!if_sendq_empty(ifp))
1653 		em_start_locked(ifp, txr);
1654 #endif
1655 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1656 	EM_TX_UNLOCK(txr);
1657 }
1658 
1659 static void
1660 em_handle_link(void *context, int pending)
1661 {
1662 	struct adapter	*adapter = context;
1663 	struct tx_ring	*txr = adapter->tx_rings;
1664 	if_t ifp = adapter->ifp;
1665 
1666 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1667 		return;
1668 
1669 	EM_CORE_LOCK(adapter);
1670 	callout_stop(&adapter->timer);
1671 	em_update_link_status(adapter);
1672 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1673 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1674 	    EM_MSIX_LINK | E1000_IMS_LSC);
1675 	if (adapter->link_active) {
1676 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1677 			EM_TX_LOCK(txr);
1678 #ifdef EM_MULTIQUEUE
1679 			if (!drbr_empty(ifp, txr->br))
1680 				em_mq_start_locked(ifp, txr, NULL);
1681 #else
1682 			if (if_sendq_empty(ifp))
1683 				em_start_locked(ifp, txr);
1684 #endif
1685 			EM_TX_UNLOCK(txr);
1686 		}
1687 	}
1688 	EM_CORE_UNLOCK(adapter);
1689 }
1690 
1691 
1692 /*********************************************************************
1693  *
1694  *  Media Ioctl callback
1695  *
1696  *  This routine is called whenever the user queries the status of
1697  *  the interface using ifconfig.
1698  *
1699  **********************************************************************/
1700 static void
1701 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1702 {
1703 	struct adapter *adapter = if_getsoftc(ifp);
1704 	u_char fiber_type = IFM_1000_SX;
1705 
1706 	INIT_DEBUGOUT("em_media_status: begin");
1707 
1708 	EM_CORE_LOCK(adapter);
1709 	em_update_link_status(adapter);
1710 
1711 	ifmr->ifm_status = IFM_AVALID;
1712 	ifmr->ifm_active = IFM_ETHER;
1713 
1714 	if (!adapter->link_active) {
1715 		EM_CORE_UNLOCK(adapter);
1716 		return;
1717 	}
1718 
1719 	ifmr->ifm_status |= IFM_ACTIVE;
1720 
1721 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1722 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1723 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1724 	} else {
1725 		switch (adapter->link_speed) {
1726 		case 10:
1727 			ifmr->ifm_active |= IFM_10_T;
1728 			break;
1729 		case 100:
1730 			ifmr->ifm_active |= IFM_100_TX;
1731 			break;
1732 		case 1000:
1733 			ifmr->ifm_active |= IFM_1000_T;
1734 			break;
1735 		}
1736 		if (adapter->link_duplex == FULL_DUPLEX)
1737 			ifmr->ifm_active |= IFM_FDX;
1738 		else
1739 			ifmr->ifm_active |= IFM_HDX;
1740 	}
1741 	EM_CORE_UNLOCK(adapter);
1742 }
1743 
1744 /*********************************************************************
1745  *
1746  *  Media Ioctl callback
1747  *
1748  *  This routine is called when the user changes speed/duplex using
1749  *  media/mediopt option with ifconfig.
1750  *
1751  **********************************************************************/
1752 static int
1753 em_media_change(if_t ifp)
1754 {
1755 	struct adapter *adapter = if_getsoftc(ifp);
1756 	struct ifmedia  *ifm = &adapter->media;
1757 
1758 	INIT_DEBUGOUT("em_media_change: begin");
1759 
1760 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1761 		return (EINVAL);
1762 
1763 	EM_CORE_LOCK(adapter);
1764 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1765 	case IFM_AUTO:
1766 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1767 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1768 		break;
1769 	case IFM_1000_LX:
1770 	case IFM_1000_SX:
1771 	case IFM_1000_T:
1772 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1773 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1774 		break;
1775 	case IFM_100_TX:
1776 		adapter->hw.mac.autoneg = FALSE;
1777 		adapter->hw.phy.autoneg_advertised = 0;
1778 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1780 		else
1781 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1782 		break;
1783 	case IFM_10_T:
1784 		adapter->hw.mac.autoneg = FALSE;
1785 		adapter->hw.phy.autoneg_advertised = 0;
1786 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1787 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1788 		else
1789 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1790 		break;
1791 	default:
1792 		device_printf(adapter->dev, "Unsupported media type\n");
1793 	}
1794 
1795 	em_init_locked(adapter);
1796 	EM_CORE_UNLOCK(adapter);
1797 
1798 	return (0);
1799 }
1800 
1801 /*********************************************************************
1802  *
1803  *  This routine maps the mbufs to tx descriptors.
1804  *
1805  *  return 0 on success, positive on failure
1806  **********************************************************************/
1807 
1808 static int
1809 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1810 {
1811 	struct adapter		*adapter = txr->adapter;
1812 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1813 	bus_dmamap_t		map;
1814 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1815 	struct e1000_tx_desc	*ctxd = NULL;
1816 	struct mbuf		*m_head;
1817 	struct ether_header	*eh;
1818 	struct ip		*ip = NULL;
1819 	struct tcphdr		*tp = NULL;
1820 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1821 	int			ip_off, poff;
1822 	int			nsegs, i, j, first, last = 0;
1823 	int			error, do_tso, tso_desc = 0, remap = 1;
1824 
1825 retry:
1826 	m_head = *m_headp;
1827 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1828 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1829 	ip_off = poff = 0;
1830 
1831 	/*
1832 	 * Intel recommends entire IP/TCP header length reside in a single
1833 	 * buffer. If multiple descriptors are used to describe the IP and
1834 	 * TCP header, each descriptor should describe one or more
1835 	 * complete headers; descriptors referencing only parts of headers
1836 	 * are not supported. If all layer headers are not coalesced into
1837 	 * a single buffer, each buffer should not cross a 4KB boundary,
1838 	 * or be larger than the maximum read request size.
1839 	 * Controller also requires modifing IP/TCP header to make TSO work
1840 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1841 	 * IP/TCP header into a single buffer to meet the requirement of
1842 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1843 	 * which also has similiar restrictions.
1844 	 */
1845 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1846 		if (do_tso || (m_head->m_next != NULL &&
1847 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1848 			if (M_WRITABLE(*m_headp) == 0) {
1849 				m_head = m_dup(*m_headp, M_NOWAIT);
1850 				m_freem(*m_headp);
1851 				if (m_head == NULL) {
1852 					*m_headp = NULL;
1853 					return (ENOBUFS);
1854 				}
1855 				*m_headp = m_head;
1856 			}
1857 		}
1858 		/*
1859 		 * XXX
1860 		 * Assume IPv4, we don't have TSO/checksum offload support
1861 		 * for IPv6 yet.
1862 		 */
1863 		ip_off = sizeof(struct ether_header);
1864 		m_head = m_pullup(m_head, ip_off);
1865 		if (m_head == NULL) {
1866 			*m_headp = NULL;
1867 			return (ENOBUFS);
1868 		}
1869 		eh = mtod(m_head, struct ether_header *);
1870 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1871 			ip_off = sizeof(struct ether_vlan_header);
1872 			m_head = m_pullup(m_head, ip_off);
1873 			if (m_head == NULL) {
1874 				*m_headp = NULL;
1875 				return (ENOBUFS);
1876 			}
1877 		}
1878 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1879 		if (m_head == NULL) {
1880 			*m_headp = NULL;
1881 			return (ENOBUFS);
1882 		}
1883 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1884 		poff = ip_off + (ip->ip_hl << 2);
1885 		if (do_tso) {
1886 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1887 			if (m_head == NULL) {
1888 				*m_headp = NULL;
1889 				return (ENOBUFS);
1890 			}
1891 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1892 			/*
1893 			 * TSO workaround:
1894 			 *   pull 4 more bytes of data into it.
1895 			 */
1896 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1897 			if (m_head == NULL) {
1898 				*m_headp = NULL;
1899 				return (ENOBUFS);
1900 			}
1901 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1902 			ip->ip_len = 0;
1903 			ip->ip_sum = 0;
1904 			/*
1905 			 * The pseudo TCP checksum does not include TCP payload
1906 			 * length so driver should recompute the checksum here
1907 			 * what hardware expect to see. This is adherence of
1908 			 * Microsoft's Large Send specification.
1909 			 */
1910 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1911 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1912 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1913 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1914 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1915 			if (m_head == NULL) {
1916 				*m_headp = NULL;
1917 				return (ENOBUFS);
1918 			}
1919 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1920 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1921 			if (m_head == NULL) {
1922 				*m_headp = NULL;
1923 				return (ENOBUFS);
1924 			}
1925 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1926 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1927 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1928 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1929 			if (m_head == NULL) {
1930 				*m_headp = NULL;
1931 				return (ENOBUFS);
1932 			}
1933 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1934 		}
1935 		*m_headp = m_head;
1936 	}
1937 
1938 	/*
1939 	 * Map the packet for DMA
1940 	 *
1941 	 * Capture the first descriptor index,
1942 	 * this descriptor will have the index
1943 	 * of the EOP which is the only one that
1944 	 * now gets a DONE bit writeback.
1945 	 */
1946 	first = txr->next_avail_desc;
1947 	tx_buffer = &txr->tx_buffers[first];
1948 	tx_buffer_mapped = tx_buffer;
1949 	map = tx_buffer->map;
1950 
1951 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1952 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1953 
1954 	/*
1955 	 * There are two types of errors we can (try) to handle:
1956 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1957 	 *   out of segments.  Defragment the mbuf chain and try again.
1958 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1959 	 *   at this point in time.  Defer sending and try again later.
1960 	 * All other errors, in particular EINVAL, are fatal and prevent the
1961 	 * mbuf chain from ever going through.  Drop it and report error.
1962 	 */
1963 	if (error == EFBIG && remap) {
1964 		struct mbuf *m;
1965 
1966 		m = m_defrag(*m_headp, M_NOWAIT);
1967 		if (m == NULL) {
1968 			adapter->mbuf_alloc_failed++;
1969 			m_freem(*m_headp);
1970 			*m_headp = NULL;
1971 			return (ENOBUFS);
1972 		}
1973 		*m_headp = m;
1974 
1975 		/* Try it again, but only once */
1976 		remap = 0;
1977 		goto retry;
1978 	} else if (error == ENOMEM) {
1979 		adapter->no_tx_dma_setup++;
1980 		return (error);
1981 	} else if (error != 0) {
1982 		adapter->no_tx_dma_setup++;
1983 		m_freem(*m_headp);
1984 		*m_headp = NULL;
1985 		return (error);
1986 	}
1987 
1988 	/*
1989 	 * TSO Hardware workaround, if this packet is not
1990 	 * TSO, and is only a single descriptor long, and
1991 	 * it follows a TSO burst, then we need to add a
1992 	 * sentinel descriptor to prevent premature writeback.
1993 	 */
1994 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1995 		if (nsegs == 1)
1996 			tso_desc = TRUE;
1997 		txr->tx_tso = FALSE;
1998 	}
1999 
2000         if (nsegs > (txr->tx_avail - 2)) {
2001                 txr->no_desc_avail++;
2002 		bus_dmamap_unload(txr->txtag, map);
2003 		return (ENOBUFS);
2004         }
2005 	m_head = *m_headp;
2006 
2007 	/* Do hardware assists */
2008 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2009 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2010 		    &txd_upper, &txd_lower);
2011 		/* we need to make a final sentinel transmit desc */
2012 		tso_desc = TRUE;
2013 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2014 		em_transmit_checksum_setup(txr, m_head,
2015 		    ip_off, ip, &txd_upper, &txd_lower);
2016 
2017 	if (m_head->m_flags & M_VLANTAG) {
2018 		/* Set the vlan id. */
2019 		txd_upper |= htole16((if_getvtag(m_head)) << 16);
2020                 /* Tell hardware to add tag */
2021                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2022         }
2023 
2024 	i = txr->next_avail_desc;
2025 
2026 	/* Set up our transmit descriptors */
2027 	for (j = 0; j < nsegs; j++) {
2028 		bus_size_t seg_len;
2029 		bus_addr_t seg_addr;
2030 
2031 		tx_buffer = &txr->tx_buffers[i];
2032 		ctxd = &txr->tx_base[i];
2033 		seg_addr = segs[j].ds_addr;
2034 		seg_len  = segs[j].ds_len;
2035 		/*
2036 		** TSO Workaround:
2037 		** If this is the last descriptor, we want to
2038 		** split it so we have a small final sentinel
2039 		*/
2040 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2041 			seg_len -= 4;
2042 			ctxd->buffer_addr = htole64(seg_addr);
2043 			ctxd->lower.data = htole32(
2044 			adapter->txd_cmd | txd_lower | seg_len);
2045 			ctxd->upper.data =
2046 			    htole32(txd_upper);
2047 			if (++i == adapter->num_tx_desc)
2048 				i = 0;
2049 			/* Now make the sentinel */
2050 			++txd_used; /* using an extra txd */
2051 			ctxd = &txr->tx_base[i];
2052 			tx_buffer = &txr->tx_buffers[i];
2053 			ctxd->buffer_addr =
2054 			    htole64(seg_addr + seg_len);
2055 			ctxd->lower.data = htole32(
2056 			adapter->txd_cmd | txd_lower | 4);
2057 			ctxd->upper.data =
2058 			    htole32(txd_upper);
2059 			last = i;
2060 			if (++i == adapter->num_tx_desc)
2061 				i = 0;
2062 		} else {
2063 			ctxd->buffer_addr = htole64(seg_addr);
2064 			ctxd->lower.data = htole32(
2065 			adapter->txd_cmd | txd_lower | seg_len);
2066 			ctxd->upper.data =
2067 			    htole32(txd_upper);
2068 			last = i;
2069 			if (++i == adapter->num_tx_desc)
2070 				i = 0;
2071 		}
2072 		tx_buffer->m_head = NULL;
2073 		tx_buffer->next_eop = -1;
2074 	}
2075 
2076 	txr->next_avail_desc = i;
2077 	txr->tx_avail -= nsegs;
2078 	if (tso_desc) /* TSO used an extra for sentinel */
2079 		txr->tx_avail -= txd_used;
2080 
2081         tx_buffer->m_head = m_head;
2082 	/*
2083 	** Here we swap the map so the last descriptor,
2084 	** which gets the completion interrupt has the
2085 	** real map, and the first descriptor gets the
2086 	** unused map from this descriptor.
2087 	*/
2088 	tx_buffer_mapped->map = tx_buffer->map;
2089 	tx_buffer->map = map;
2090         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2091 
2092         /*
2093          * Last Descriptor of Packet
2094 	 * needs End Of Packet (EOP)
2095 	 * and Report Status (RS)
2096          */
2097         ctxd->lower.data |=
2098 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2099 	/*
2100 	 * Keep track in the first buffer which
2101 	 * descriptor will be written back
2102 	 */
2103 	tx_buffer = &txr->tx_buffers[first];
2104 	tx_buffer->next_eop = last;
2105 	/* Update the watchdog time early and often */
2106 	txr->watchdog_time = ticks;
2107 
2108 	/*
2109 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2110 	 * that this frame is available to transmit.
2111 	 */
2112 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2113 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2114 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2115 
2116 	return (0);
2117 }
2118 
2119 static void
2120 em_set_promisc(struct adapter *adapter)
2121 {
2122 	if_t ifp = adapter->ifp;
2123 	u32		reg_rctl;
2124 
2125 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2126 
2127 	if (if_getflags(ifp) & IFF_PROMISC) {
2128 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2129 		/* Turn this on if you want to see bad packets */
2130 		if (em_debug_sbp)
2131 			reg_rctl |= E1000_RCTL_SBP;
2132 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2133 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2134 		reg_rctl |= E1000_RCTL_MPE;
2135 		reg_rctl &= ~E1000_RCTL_UPE;
2136 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2137 	}
2138 }
2139 
2140 static void
2141 em_disable_promisc(struct adapter *adapter)
2142 {
2143 	if_t		ifp = adapter->ifp;
2144 	u32		reg_rctl;
2145 	int		mcnt = 0;
2146 
2147 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2148 	reg_rctl &=  (~E1000_RCTL_UPE);
2149 	if (if_getflags(ifp) & IFF_ALLMULTI)
2150 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2151 	else
2152 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2153 	/* Don't disable if in MAX groups */
2154 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2155 		reg_rctl &=  (~E1000_RCTL_MPE);
2156 	reg_rctl &=  (~E1000_RCTL_SBP);
2157 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2158 }
2159 
2160 
2161 /*********************************************************************
2162  *  Multicast Update
2163  *
2164  *  This routine is called whenever multicast address list is updated.
2165  *
2166  **********************************************************************/
2167 
2168 static void
2169 em_set_multi(struct adapter *adapter)
2170 {
2171 	if_t ifp = adapter->ifp;
2172 	u32 reg_rctl = 0;
2173 	u8  *mta; /* Multicast array memory */
2174 	int mcnt = 0;
2175 
2176 	IOCTL_DEBUGOUT("em_set_multi: begin");
2177 
2178 	mta = adapter->mta;
2179 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2180 
2181 	if (adapter->hw.mac.type == e1000_82542 &&
2182 	    adapter->hw.revision_id == E1000_REVISION_2) {
2183 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2184 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2185 			e1000_pci_clear_mwi(&adapter->hw);
2186 		reg_rctl |= E1000_RCTL_RST;
2187 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2188 		msec_delay(5);
2189 	}
2190 
2191 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2192 
2193 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2194 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2195 		reg_rctl |= E1000_RCTL_MPE;
2196 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2197 	} else
2198 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2199 
2200 	if (adapter->hw.mac.type == e1000_82542 &&
2201 	    adapter->hw.revision_id == E1000_REVISION_2) {
2202 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2203 		reg_rctl &= ~E1000_RCTL_RST;
2204 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2205 		msec_delay(5);
2206 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2207 			e1000_pci_set_mwi(&adapter->hw);
2208 	}
2209 }
2210 
2211 
2212 /*********************************************************************
2213  *  Timer routine
2214  *
2215  *  This routine checks for link status and updates statistics.
2216  *
2217  **********************************************************************/
2218 
2219 static void
2220 em_local_timer(void *arg)
2221 {
2222 	struct adapter	*adapter = arg;
2223 	if_t ifp = adapter->ifp;
2224 	struct tx_ring	*txr = adapter->tx_rings;
2225 	struct rx_ring	*rxr = adapter->rx_rings;
2226 	u32		trigger;
2227 
2228 	EM_CORE_LOCK_ASSERT(adapter);
2229 
2230 	em_update_link_status(adapter);
2231 	em_update_stats_counters(adapter);
2232 
2233 	/* Reset LAA into RAR[0] on 82571 */
2234 	if ((adapter->hw.mac.type == e1000_82571) &&
2235 	    e1000_get_laa_state_82571(&adapter->hw))
2236 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2237 
2238 	/* Mask to use in the irq trigger */
2239 	if (adapter->msix_mem)
2240 		trigger = rxr->ims;
2241 	else
2242 		trigger = E1000_ICS_RXDMT0;
2243 
2244 	/*
2245 	** Check on the state of the TX queue(s), this
2246 	** can be done without the lock because its RO
2247 	** and the HUNG state will be static if set.
2248 	*/
2249 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2250 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2251 		    (adapter->pause_frames == 0))
2252 			goto hung;
2253 		/* Schedule a TX tasklet if needed */
2254 		if (txr->tx_avail <= EM_MAX_SCATTER)
2255 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2256 	}
2257 
2258 	adapter->pause_frames = 0;
2259 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2260 #ifndef DEVICE_POLLING
2261 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2262 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2263 #endif
2264 	return;
2265 hung:
2266 	/* Looks like we're hung */
2267 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2268 	device_printf(adapter->dev,
2269 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2270 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2271 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2272 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2273 	    "Next TX to Clean = %d\n",
2274 	    txr->me, txr->tx_avail, txr->next_to_clean);
2275 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2276 	adapter->watchdog_events++;
2277 	adapter->pause_frames = 0;
2278 	em_init_locked(adapter);
2279 }
2280 
2281 
2282 static void
2283 em_update_link_status(struct adapter *adapter)
2284 {
2285 	struct e1000_hw *hw = &adapter->hw;
2286 	if_t ifp = adapter->ifp;
2287 	device_t dev = adapter->dev;
2288 	struct tx_ring *txr = adapter->tx_rings;
2289 	u32 link_check = 0;
2290 
2291 	/* Get the cached link value or read phy for real */
2292 	switch (hw->phy.media_type) {
2293 	case e1000_media_type_copper:
2294 		if (hw->mac.get_link_status) {
2295 			/* Do the work to read phy */
2296 			e1000_check_for_link(hw);
2297 			link_check = !hw->mac.get_link_status;
2298 			if (link_check) /* ESB2 fix */
2299 				e1000_cfg_on_link_up(hw);
2300 		} else
2301 			link_check = TRUE;
2302 		break;
2303 	case e1000_media_type_fiber:
2304 		e1000_check_for_link(hw);
2305 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2306                                  E1000_STATUS_LU);
2307 		break;
2308 	case e1000_media_type_internal_serdes:
2309 		e1000_check_for_link(hw);
2310 		link_check = adapter->hw.mac.serdes_has_link;
2311 		break;
2312 	default:
2313 	case e1000_media_type_unknown:
2314 		break;
2315 	}
2316 
2317 	/* Now check for a transition */
2318 	if (link_check && (adapter->link_active == 0)) {
2319 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2320 		    &adapter->link_duplex);
2321 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2322 		if ((adapter->link_speed != SPEED_1000) &&
2323 		    ((hw->mac.type == e1000_82571) ||
2324 		    (hw->mac.type == e1000_82572))) {
2325 			int tarc0;
2326 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2327 			tarc0 &= ~SPEED_MODE_BIT;
2328 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2329 		}
2330 		if (bootverbose)
2331 			device_printf(dev, "Link is up %d Mbps %s\n",
2332 			    adapter->link_speed,
2333 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2334 			    "Full Duplex" : "Half Duplex"));
2335 		adapter->link_active = 1;
2336 		adapter->smartspeed = 0;
2337 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2338 		if_linkstate_change_drv(ifp, LINK_STATE_UP);
2339 	} else if (!link_check && (adapter->link_active == 1)) {
2340 		if_setbaudrate(ifp, 0);
2341 		adapter->link_speed = 0;
2342 		adapter->link_duplex = 0;
2343 		if (bootverbose)
2344 			device_printf(dev, "Link is Down\n");
2345 		adapter->link_active = 0;
2346 		/* Link down, disable watchdog */
2347 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2348 			txr->queue_status = EM_QUEUE_IDLE;
2349 		if_linkstate_change_drv(ifp, LINK_STATE_DOWN);
2350 	}
2351 }
2352 
2353 /*********************************************************************
2354  *
2355  *  This routine disables all traffic on the adapter by issuing a
2356  *  global reset on the MAC and deallocates TX/RX buffers.
2357  *
2358  *  This routine should always be called with BOTH the CORE
2359  *  and TX locks.
2360  **********************************************************************/
2361 
2362 static void
2363 em_stop(void *arg)
2364 {
2365 	struct adapter	*adapter = arg;
2366 	if_t ifp = adapter->ifp;
2367 	struct tx_ring	*txr = adapter->tx_rings;
2368 
2369 	EM_CORE_LOCK_ASSERT(adapter);
2370 
2371 	INIT_DEBUGOUT("em_stop: begin");
2372 
2373 	em_disable_intr(adapter);
2374 	callout_stop(&adapter->timer);
2375 
2376 	/* Tell the stack that the interface is no longer active */
2377 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2378 
2379         /* Unarm watchdog timer. */
2380 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2381 		EM_TX_LOCK(txr);
2382 		txr->queue_status = EM_QUEUE_IDLE;
2383 		EM_TX_UNLOCK(txr);
2384 	}
2385 
2386 	e1000_reset_hw(&adapter->hw);
2387 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2388 
2389 	e1000_led_off(&adapter->hw);
2390 	e1000_cleanup_led(&adapter->hw);
2391 }
2392 
2393 
2394 /*********************************************************************
2395  *
2396  *  Determine hardware revision.
2397  *
2398  **********************************************************************/
2399 static void
2400 em_identify_hardware(struct adapter *adapter)
2401 {
2402 	device_t dev = adapter->dev;
2403 
2404 	/* Make sure our PCI config space has the necessary stuff set */
2405 	pci_enable_busmaster(dev);
2406 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2407 
2408 	/* Save off the information about this board */
2409 	adapter->hw.vendor_id = pci_get_vendor(dev);
2410 	adapter->hw.device_id = pci_get_device(dev);
2411 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2412 	adapter->hw.subsystem_vendor_id =
2413 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2414 	adapter->hw.subsystem_device_id =
2415 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2416 
2417 	/* Do Shared Code Init and Setup */
2418 	if (e1000_set_mac_type(&adapter->hw)) {
2419 		device_printf(dev, "Setup init failure\n");
2420 		return;
2421 	}
2422 }
2423 
2424 static int
2425 em_allocate_pci_resources(struct adapter *adapter)
2426 {
2427 	device_t	dev = adapter->dev;
2428 	int		rid;
2429 
2430 	rid = PCIR_BAR(0);
2431 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2432 	    &rid, RF_ACTIVE);
2433 	if (adapter->memory == NULL) {
2434 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2435 		return (ENXIO);
2436 	}
2437 	adapter->osdep.mem_bus_space_tag =
2438 	    rman_get_bustag(adapter->memory);
2439 	adapter->osdep.mem_bus_space_handle =
2440 	    rman_get_bushandle(adapter->memory);
2441 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2442 
2443 	/* Default to a single queue */
2444 	adapter->num_queues = 1;
2445 
2446 	/*
2447 	 * Setup MSI/X or MSI if PCI Express
2448 	 */
2449 	adapter->msix = em_setup_msix(adapter);
2450 
2451 	adapter->hw.back = &adapter->osdep;
2452 
2453 	return (0);
2454 }
2455 
2456 /*********************************************************************
2457  *
2458  *  Setup the Legacy or MSI Interrupt handler
2459  *
2460  **********************************************************************/
2461 int
2462 em_allocate_legacy(struct adapter *adapter)
2463 {
2464 	device_t dev = adapter->dev;
2465 	struct tx_ring	*txr = adapter->tx_rings;
2466 	int error, rid = 0;
2467 
2468 	/* Manually turn off all interrupts */
2469 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2470 
2471 	if (adapter->msix == 1) /* using MSI */
2472 		rid = 1;
2473 	/* We allocate a single interrupt resource */
2474 	adapter->res = bus_alloc_resource_any(dev,
2475 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2476 	if (adapter->res == NULL) {
2477 		device_printf(dev, "Unable to allocate bus resource: "
2478 		    "interrupt\n");
2479 		return (ENXIO);
2480 	}
2481 
2482 	/*
2483 	 * Allocate a fast interrupt and the associated
2484 	 * deferred processing contexts.
2485 	 */
2486 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2487 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2488 	    taskqueue_thread_enqueue, &adapter->tq);
2489 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2490 	    device_get_nameunit(adapter->dev));
2491 	/* Use a TX only tasklet for local timer */
2492 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2493 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2494 	    taskqueue_thread_enqueue, &txr->tq);
2495 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2496 	    device_get_nameunit(adapter->dev));
2497 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2498 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2499 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2500 		device_printf(dev, "Failed to register fast interrupt "
2501 			    "handler: %d\n", error);
2502 		taskqueue_free(adapter->tq);
2503 		adapter->tq = NULL;
2504 		return (error);
2505 	}
2506 
2507 	return (0);
2508 }
2509 
2510 /*********************************************************************
2511  *
2512  *  Setup the MSIX Interrupt handlers
2513  *   This is not really Multiqueue, rather
2514  *   its just seperate interrupt vectors
2515  *   for TX, RX, and Link.
2516  *
2517  **********************************************************************/
2518 int
2519 em_allocate_msix(struct adapter *adapter)
2520 {
2521 	device_t	dev = adapter->dev;
2522 	struct		tx_ring *txr = adapter->tx_rings;
2523 	struct		rx_ring *rxr = adapter->rx_rings;
2524 	int		error, rid, vector = 0;
2525 
2526 
2527 	/* Make sure all interrupts are disabled */
2528 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2529 
2530 	/* First set up ring resources */
2531 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2532 
2533 		/* RX ring */
2534 		rid = vector + 1;
2535 
2536 		rxr->res = bus_alloc_resource_any(dev,
2537 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2538 		if (rxr->res == NULL) {
2539 			device_printf(dev,
2540 			    "Unable to allocate bus resource: "
2541 			    "RX MSIX Interrupt %d\n", i);
2542 			return (ENXIO);
2543 		}
2544 		if ((error = bus_setup_intr(dev, rxr->res,
2545 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2546 		    rxr, &rxr->tag)) != 0) {
2547 			device_printf(dev, "Failed to register RX handler");
2548 			return (error);
2549 		}
2550 #if __FreeBSD_version >= 800504
2551 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2552 #endif
2553 		rxr->msix = vector++; /* NOTE increment vector for TX */
2554 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2555 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2556 		    taskqueue_thread_enqueue, &rxr->tq);
2557 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2558 		    device_get_nameunit(adapter->dev));
2559 		/*
2560 		** Set the bit to enable interrupt
2561 		** in E1000_IMS -- bits 20 and 21
2562 		** are for RX0 and RX1, note this has
2563 		** NOTHING to do with the MSIX vector
2564 		*/
2565 		rxr->ims = 1 << (20 + i);
2566 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2567 
2568 		/* TX ring */
2569 		rid = vector + 1;
2570 		txr->res = bus_alloc_resource_any(dev,
2571 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2572 		if (txr->res == NULL) {
2573 			device_printf(dev,
2574 			    "Unable to allocate bus resource: "
2575 			    "TX MSIX Interrupt %d\n", i);
2576 			return (ENXIO);
2577 		}
2578 		if ((error = bus_setup_intr(dev, txr->res,
2579 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2580 		    txr, &txr->tag)) != 0) {
2581 			device_printf(dev, "Failed to register TX handler");
2582 			return (error);
2583 		}
2584 #if __FreeBSD_version >= 800504
2585 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2586 #endif
2587 		txr->msix = vector++; /* Increment vector for next pass */
2588 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2589 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2590 		    taskqueue_thread_enqueue, &txr->tq);
2591 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2592 		    device_get_nameunit(adapter->dev));
2593 		/*
2594 		** Set the bit to enable interrupt
2595 		** in E1000_IMS -- bits 22 and 23
2596 		** are for TX0 and TX1, note this has
2597 		** NOTHING to do with the MSIX vector
2598 		*/
2599 		txr->ims = 1 << (22 + i);
2600 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2601 	}
2602 
2603 	/* Link interrupt */
2604 	++rid;
2605 	adapter->res = bus_alloc_resource_any(dev,
2606 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2607 	if (!adapter->res) {
2608 		device_printf(dev,"Unable to allocate "
2609 		    "bus resource: Link interrupt [%d]\n", rid);
2610 		return (ENXIO);
2611         }
2612 	/* Set the link handler function */
2613 	error = bus_setup_intr(dev, adapter->res,
2614 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2615 	    em_msix_link, adapter, &adapter->tag);
2616 	if (error) {
2617 		adapter->res = NULL;
2618 		device_printf(dev, "Failed to register LINK handler");
2619 		return (error);
2620 	}
2621 #if __FreeBSD_version >= 800504
2622 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2623 #endif
2624 	adapter->linkvec = vector;
2625 	adapter->ivars |=  (8 | vector) << 16;
2626 	adapter->ivars |= 0x80000000;
2627 
2628 	return (0);
2629 }
2630 
2631 
2632 static void
2633 em_free_pci_resources(struct adapter *adapter)
2634 {
2635 	device_t	dev = adapter->dev;
2636 	struct tx_ring	*txr;
2637 	struct rx_ring	*rxr;
2638 	int		rid;
2639 
2640 
2641 	/*
2642 	** Release all the queue interrupt resources:
2643 	*/
2644 	for (int i = 0; i < adapter->num_queues; i++) {
2645 		txr = &adapter->tx_rings[i];
2646 		rxr = &adapter->rx_rings[i];
2647 		/* an early abort? */
2648 		if ((txr == NULL) || (rxr == NULL))
2649 			break;
2650 		rid = txr->msix +1;
2651 		if (txr->tag != NULL) {
2652 			bus_teardown_intr(dev, txr->res, txr->tag);
2653 			txr->tag = NULL;
2654 		}
2655 		if (txr->res != NULL)
2656 			bus_release_resource(dev, SYS_RES_IRQ,
2657 			    rid, txr->res);
2658 		rid = rxr->msix +1;
2659 		if (rxr->tag != NULL) {
2660 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2661 			rxr->tag = NULL;
2662 		}
2663 		if (rxr->res != NULL)
2664 			bus_release_resource(dev, SYS_RES_IRQ,
2665 			    rid, rxr->res);
2666 	}
2667 
2668         if (adapter->linkvec) /* we are doing MSIX */
2669                 rid = adapter->linkvec + 1;
2670         else
2671                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2672 
2673 	if (adapter->tag != NULL) {
2674 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2675 		adapter->tag = NULL;
2676 	}
2677 
2678 	if (adapter->res != NULL)
2679 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2680 
2681 
2682 	if (adapter->msix)
2683 		pci_release_msi(dev);
2684 
2685 	if (adapter->msix_mem != NULL)
2686 		bus_release_resource(dev, SYS_RES_MEMORY,
2687 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2688 
2689 	if (adapter->memory != NULL)
2690 		bus_release_resource(dev, SYS_RES_MEMORY,
2691 		    PCIR_BAR(0), adapter->memory);
2692 
2693 	if (adapter->flash != NULL)
2694 		bus_release_resource(dev, SYS_RES_MEMORY,
2695 		    EM_FLASH, adapter->flash);
2696 }
2697 
2698 /*
2699  * Setup MSI or MSI/X
2700  */
2701 static int
2702 em_setup_msix(struct adapter *adapter)
2703 {
2704 	device_t dev = adapter->dev;
2705 	int val;
2706 
2707 	/*
2708 	** Setup MSI/X for Hartwell: tests have shown
2709 	** use of two queues to be unstable, and to
2710 	** provide no great gain anyway, so we simply
2711 	** seperate the interrupts and use a single queue.
2712 	*/
2713 	if ((adapter->hw.mac.type == e1000_82574) &&
2714 	    (em_enable_msix == TRUE)) {
2715 		/* Map the MSIX BAR */
2716 		int rid = PCIR_BAR(EM_MSIX_BAR);
2717 		adapter->msix_mem = bus_alloc_resource_any(dev,
2718 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2719        		if (adapter->msix_mem == NULL) {
2720 			/* May not be enabled */
2721                		device_printf(adapter->dev,
2722 			    "Unable to map MSIX table \n");
2723 			goto msi;
2724        		}
2725 		val = pci_msix_count(dev);
2726 		/* We only need/want 3 vectors */
2727 		if (val >= 3)
2728 			val = 3;
2729 		else {
2730                		device_printf(adapter->dev,
2731 			    "MSIX: insufficient vectors, using MSI\n");
2732 			goto msi;
2733 		}
2734 
2735 		if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2736 			device_printf(adapter->dev,
2737 			    "Using MSIX interrupts "
2738 			    "with %d vectors\n", val);
2739 			return (val);
2740 		}
2741 
2742 		/*
2743 		** If MSIX alloc failed or provided us with
2744 		** less than needed, free and fall through to MSI
2745 		*/
2746 		pci_release_msi(dev);
2747 	}
2748 msi:
2749 	if (adapter->msix_mem != NULL) {
2750 		bus_release_resource(dev, SYS_RES_MEMORY,
2751 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2752 		adapter->msix_mem = NULL;
2753 	}
2754        	val = 1;
2755        	if (pci_alloc_msi(dev, &val) == 0) {
2756                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2757 		return (val);
2758 	}
2759 	/* Should only happen due to manual configuration */
2760 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2761 	return (0);
2762 }
2763 
2764 
2765 /*********************************************************************
2766  *
2767  *  Initialize the hardware to a configuration
2768  *  as specified by the adapter structure.
2769  *
2770  **********************************************************************/
2771 static void
2772 em_reset(struct adapter *adapter)
2773 {
2774 	device_t	dev = adapter->dev;
2775 	if_t ifp = adapter->ifp;
2776 	struct e1000_hw	*hw = &adapter->hw;
2777 	u16		rx_buffer_size;
2778 	u32		pba;
2779 
2780 	INIT_DEBUGOUT("em_reset: begin");
2781 
2782 	/* Set up smart power down as default off on newer adapters. */
2783 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2784 	    hw->mac.type == e1000_82572)) {
2785 		u16 phy_tmp = 0;
2786 
2787 		/* Speed up time to link by disabling smart power down. */
2788 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2789 		phy_tmp &= ~IGP02E1000_PM_SPD;
2790 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2791 	}
2792 
2793 	/*
2794 	 * Packet Buffer Allocation (PBA)
2795 	 * Writing PBA sets the receive portion of the buffer
2796 	 * the remainder is used for the transmit buffer.
2797 	 */
2798 	switch (hw->mac.type) {
2799 	/* Total Packet Buffer on these is 48K */
2800 	case e1000_82571:
2801 	case e1000_82572:
2802 	case e1000_80003es2lan:
2803 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2804 		break;
2805 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2806 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2807 		break;
2808 	case e1000_82574:
2809 	case e1000_82583:
2810 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2811 		break;
2812 	case e1000_ich8lan:
2813 		pba = E1000_PBA_8K;
2814 		break;
2815 	case e1000_ich9lan:
2816 	case e1000_ich10lan:
2817 		/* Boost Receive side for jumbo frames */
2818 		if (adapter->hw.mac.max_frame_size > 4096)
2819 			pba = E1000_PBA_14K;
2820 		else
2821 			pba = E1000_PBA_10K;
2822 		break;
2823 	case e1000_pchlan:
2824 	case e1000_pch2lan:
2825 	case e1000_pch_lpt:
2826 		pba = E1000_PBA_26K;
2827 		break;
2828 	default:
2829 		if (adapter->hw.mac.max_frame_size > 8192)
2830 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2831 		else
2832 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2833 	}
2834 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2835 
2836 	/*
2837 	 * These parameters control the automatic generation (Tx) and
2838 	 * response (Rx) to Ethernet PAUSE frames.
2839 	 * - High water mark should allow for at least two frames to be
2840 	 *   received after sending an XOFF.
2841 	 * - Low water mark works best when it is very near the high water mark.
2842 	 *   This allows the receiver to restart by sending XON when it has
2843 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2844 	 *   restart after one full frame is pulled from the buffer. There
2845 	 *   could be several smaller frames in the buffer and if so they will
2846 	 *   not trigger the XON until their total number reduces the buffer
2847 	 *   by 1500.
2848 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2849 	 */
2850 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2851 	hw->fc.high_water = rx_buffer_size -
2852 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2853 	hw->fc.low_water = hw->fc.high_water - 1500;
2854 
2855 	if (adapter->fc) /* locally set flow control value? */
2856 		hw->fc.requested_mode = adapter->fc;
2857 	else
2858 		hw->fc.requested_mode = e1000_fc_full;
2859 
2860 	if (hw->mac.type == e1000_80003es2lan)
2861 		hw->fc.pause_time = 0xFFFF;
2862 	else
2863 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2864 
2865 	hw->fc.send_xon = TRUE;
2866 
2867 	/* Device specific overrides/settings */
2868 	switch (hw->mac.type) {
2869 	case e1000_pchlan:
2870 		/* Workaround: no TX flow ctrl for PCH */
2871                 hw->fc.requested_mode = e1000_fc_rx_pause;
2872 		hw->fc.pause_time = 0xFFFF; /* override */
2873 		if (if_getmtu(ifp) > ETHERMTU) {
2874 			hw->fc.high_water = 0x3500;
2875 			hw->fc.low_water = 0x1500;
2876 		} else {
2877 			hw->fc.high_water = 0x5000;
2878 			hw->fc.low_water = 0x3000;
2879 		}
2880 		hw->fc.refresh_time = 0x1000;
2881 		break;
2882 	case e1000_pch2lan:
2883 	case e1000_pch_lpt:
2884 		hw->fc.high_water = 0x5C20;
2885 		hw->fc.low_water = 0x5048;
2886 		hw->fc.pause_time = 0x0650;
2887 		hw->fc.refresh_time = 0x0400;
2888 		/* Jumbos need adjusted PBA */
2889 		if (if_getmtu(ifp) > ETHERMTU)
2890 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2891 		else
2892 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2893 		break;
2894         case e1000_ich9lan:
2895         case e1000_ich10lan:
2896 		if (if_getmtu(ifp) > ETHERMTU) {
2897 			hw->fc.high_water = 0x2800;
2898 			hw->fc.low_water = hw->fc.high_water - 8;
2899 			break;
2900 		}
2901 		/* else fall thru */
2902 	default:
2903 		if (hw->mac.type == e1000_80003es2lan)
2904 			hw->fc.pause_time = 0xFFFF;
2905 		break;
2906 	}
2907 
2908 	/* Issue a global reset */
2909 	e1000_reset_hw(hw);
2910 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2911 	em_disable_aspm(adapter);
2912 	/* and a re-init */
2913 	if (e1000_init_hw(hw) < 0) {
2914 		device_printf(dev, "Hardware Initialization Failed\n");
2915 		return;
2916 	}
2917 
2918 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2919 	e1000_get_phy_info(hw);
2920 	e1000_check_for_link(hw);
2921 	return;
2922 }
2923 
2924 /*********************************************************************
2925  *
2926  *  Setup networking device structure and register an interface.
2927  *
2928  **********************************************************************/
2929 static int
2930 em_setup_interface(device_t dev, struct adapter *adapter)
2931 {
2932 	if_t ifp;
2933 
2934 	INIT_DEBUGOUT("em_setup_interface: begin");
2935 
2936 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2937 	if (ifp == 0) {
2938 		device_printf(dev, "can not allocate ifnet structure\n");
2939 		return (-1);
2940 	}
2941 	if_initname_drv(ifp, device_get_name(dev), device_get_unit(dev));
2942 	if_setdev(ifp, dev);
2943 	if_setinitfn(ifp, em_init);
2944 	if_setsoftc(ifp, adapter);
2945 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2946 	if_setioctlfn(ifp, em_ioctl);
2947 #ifdef EM_MULTIQUEUE
2948 	/* Multiqueue stack interface */
2949 	if_settransmitfn(ifp, em_mq_start);
2950 	if_setqflushfn(ifp, em_qflush);
2951 #else
2952 	if_setstartfn(ifp, em_start);
2953 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
2954 	if_setsendqready(ifp);
2955 #endif
2956 
2957 	ether_ifattach_drv(ifp, adapter->hw.mac.addr);
2958 
2959 	if_setcapabilities(ifp, 0);
2960 	if_setcapenable(ifp, 0);
2961 
2962 
2963 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
2964 	    IFCAP_TSO4, 0);
2965 	/*
2966 	 * Tell the upper layer(s) we
2967 	 * support full VLAN capability
2968 	 */
2969 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2970 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
2971 	    IFCAP_VLAN_MTU, 0);
2972 	if_setcapenable(ifp, if_getcapabilities(ifp));
2973 
2974 	/*
2975 	** Don't turn this on by default, if vlans are
2976 	** created on another pseudo device (eg. lagg)
2977 	** then vlan events are not passed thru, breaking
2978 	** operation, but with HW FILTER off it works. If
2979 	** using vlans directly on the em driver you can
2980 	** enable this and get full hardware tag filtering.
2981 	*/
2982 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
2983 
2984 #ifdef DEVICE_POLLING
2985 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
2986 #endif
2987 
2988 	/* Enable only WOL MAGIC by default */
2989 	if (adapter->wol) {
2990 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
2991 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
2992 	}
2993 
2994 	/*
2995 	 * Specify the media types supported by this adapter and register
2996 	 * callbacks to update media and link information
2997 	 */
2998 	ifmedia_init_drv(&adapter->media, IFM_IMASK,
2999 	    em_media_change, em_media_status);
3000 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3001 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3002 		u_char fiber_type = IFM_1000_SX;	/* default type */
3003 
3004 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3005 			    0, NULL);
3006 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3007 	} else {
3008 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3009 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3010 			    0, NULL);
3011 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3012 			    0, NULL);
3013 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3014 			    0, NULL);
3015 		if (adapter->hw.phy.type != e1000_phy_ife) {
3016 			ifmedia_add(&adapter->media,
3017 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3018 			ifmedia_add(&adapter->media,
3019 				IFM_ETHER | IFM_1000_T, 0, NULL);
3020 		}
3021 	}
3022 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3023 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3024 	return (0);
3025 }
3026 
3027 
3028 /*
3029  * Manage DMA'able memory.
3030  */
3031 static void
3032 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3033 {
3034 	if (error)
3035 		return;
3036 	*(bus_addr_t *) arg = segs[0].ds_addr;
3037 }
3038 
3039 static int
3040 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3041         struct em_dma_alloc *dma, int mapflags)
3042 {
3043 	int error;
3044 
3045 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3046 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3047 				BUS_SPACE_MAXADDR,	/* lowaddr */
3048 				BUS_SPACE_MAXADDR,	/* highaddr */
3049 				NULL, NULL,		/* filter, filterarg */
3050 				size,			/* maxsize */
3051 				1,			/* nsegments */
3052 				size,			/* maxsegsize */
3053 				0,			/* flags */
3054 				NULL,			/* lockfunc */
3055 				NULL,			/* lockarg */
3056 				&dma->dma_tag);
3057 	if (error) {
3058 		device_printf(adapter->dev,
3059 		    "%s: bus_dma_tag_create failed: %d\n",
3060 		    __func__, error);
3061 		goto fail_0;
3062 	}
3063 
3064 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3065 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3066 	if (error) {
3067 		device_printf(adapter->dev,
3068 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3069 		    __func__, (uintmax_t)size, error);
3070 		goto fail_2;
3071 	}
3072 
3073 	dma->dma_paddr = 0;
3074 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3075 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3076 	if (error || dma->dma_paddr == 0) {
3077 		device_printf(adapter->dev,
3078 		    "%s: bus_dmamap_load failed: %d\n",
3079 		    __func__, error);
3080 		goto fail_3;
3081 	}
3082 
3083 	return (0);
3084 
3085 fail_3:
3086 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3087 fail_2:
3088 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3089 	bus_dma_tag_destroy(dma->dma_tag);
3090 fail_0:
3091 	dma->dma_tag = NULL;
3092 
3093 	return (error);
3094 }
3095 
3096 static void
3097 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3098 {
3099 	if (dma->dma_tag == NULL)
3100 		return;
3101 	if (dma->dma_paddr != 0) {
3102 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3103 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3104 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3105 		dma->dma_paddr = 0;
3106 	}
3107 	if (dma->dma_vaddr != NULL) {
3108 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3109 		dma->dma_vaddr = NULL;
3110 	}
3111 	bus_dma_tag_destroy(dma->dma_tag);
3112 	dma->dma_tag = NULL;
3113 }
3114 
3115 
3116 /*********************************************************************
3117  *
3118  *  Allocate memory for the transmit and receive rings, and then
3119  *  the descriptors associated with each, called only once at attach.
3120  *
3121  **********************************************************************/
3122 static int
3123 em_allocate_queues(struct adapter *adapter)
3124 {
3125 	device_t		dev = adapter->dev;
3126 	struct tx_ring		*txr = NULL;
3127 	struct rx_ring		*rxr = NULL;
3128 	int rsize, tsize, error = E1000_SUCCESS;
3129 	int txconf = 0, rxconf = 0;
3130 
3131 
3132 	/* Allocate the TX ring struct memory */
3133 	if (!(adapter->tx_rings =
3134 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3135 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3136 		device_printf(dev, "Unable to allocate TX ring memory\n");
3137 		error = ENOMEM;
3138 		goto fail;
3139 	}
3140 
3141 	/* Now allocate the RX */
3142 	if (!(adapter->rx_rings =
3143 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3144 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3145 		device_printf(dev, "Unable to allocate RX ring memory\n");
3146 		error = ENOMEM;
3147 		goto rx_fail;
3148 	}
3149 
3150 	tsize = roundup2(adapter->num_tx_desc *
3151 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3152 	/*
3153 	 * Now set up the TX queues, txconf is needed to handle the
3154 	 * possibility that things fail midcourse and we need to
3155 	 * undo memory gracefully
3156 	 */
3157 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3158 		/* Set up some basics */
3159 		txr = &adapter->tx_rings[i];
3160 		txr->adapter = adapter;
3161 		txr->me = i;
3162 
3163 		/* Initialize the TX lock */
3164 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3165 		    device_get_nameunit(dev), txr->me);
3166 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3167 
3168 		if (em_dma_malloc(adapter, tsize,
3169 			&txr->txdma, BUS_DMA_NOWAIT)) {
3170 			device_printf(dev,
3171 			    "Unable to allocate TX Descriptor memory\n");
3172 			error = ENOMEM;
3173 			goto err_tx_desc;
3174 		}
3175 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3176 		bzero((void *)txr->tx_base, tsize);
3177 
3178         	if (em_allocate_transmit_buffers(txr)) {
3179 			device_printf(dev,
3180 			    "Critical Failure setting up transmit buffers\n");
3181 			error = ENOMEM;
3182 			goto err_tx_desc;
3183         	}
3184 #if __FreeBSD_version >= 800000
3185 		/* Allocate a buf ring */
3186 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3187 		    M_WAITOK, &txr->tx_mtx);
3188 #endif
3189 	}
3190 
3191 	/*
3192 	 * Next the RX queues...
3193 	 */
3194 	rsize = roundup2(adapter->num_rx_desc *
3195 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3196 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3197 		rxr = &adapter->rx_rings[i];
3198 		rxr->adapter = adapter;
3199 		rxr->me = i;
3200 
3201 		/* Initialize the RX lock */
3202 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3203 		    device_get_nameunit(dev), txr->me);
3204 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3205 
3206 		if (em_dma_malloc(adapter, rsize,
3207 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3208 			device_printf(dev,
3209 			    "Unable to allocate RxDescriptor memory\n");
3210 			error = ENOMEM;
3211 			goto err_rx_desc;
3212 		}
3213 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3214 		bzero((void *)rxr->rx_base, rsize);
3215 
3216         	/* Allocate receive buffers for the ring*/
3217 		if (em_allocate_receive_buffers(rxr)) {
3218 			device_printf(dev,
3219 			    "Critical Failure setting up receive buffers\n");
3220 			error = ENOMEM;
3221 			goto err_rx_desc;
3222 		}
3223 	}
3224 
3225 	return (0);
3226 
3227 err_rx_desc:
3228 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3229 		em_dma_free(adapter, &rxr->rxdma);
3230 err_tx_desc:
3231 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3232 		em_dma_free(adapter, &txr->txdma);
3233 	free(adapter->rx_rings, M_DEVBUF);
3234 rx_fail:
3235 #if __FreeBSD_version >= 800000
3236 	buf_ring_free(txr->br, M_DEVBUF);
3237 #endif
3238 	free(adapter->tx_rings, M_DEVBUF);
3239 fail:
3240 	return (error);
3241 }
3242 
3243 
3244 /*********************************************************************
3245  *
3246  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3247  *  the information needed to transmit a packet on the wire. This is
3248  *  called only once at attach, setup is done every reset.
3249  *
3250  **********************************************************************/
3251 static int
3252 em_allocate_transmit_buffers(struct tx_ring *txr)
3253 {
3254 	struct adapter *adapter = txr->adapter;
3255 	device_t dev = adapter->dev;
3256 	struct em_buffer *txbuf;
3257 	int error, i;
3258 
3259 	/*
3260 	 * Setup DMA descriptor areas.
3261 	 */
3262 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3263 			       1, 0,			/* alignment, bounds */
3264 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3265 			       BUS_SPACE_MAXADDR,	/* highaddr */
3266 			       NULL, NULL,		/* filter, filterarg */
3267 			       EM_TSO_SIZE,		/* maxsize */
3268 			       EM_MAX_SCATTER,		/* nsegments */
3269 			       PAGE_SIZE,		/* maxsegsize */
3270 			       0,			/* flags */
3271 			       NULL,			/* lockfunc */
3272 			       NULL,			/* lockfuncarg */
3273 			       &txr->txtag))) {
3274 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3275 		goto fail;
3276 	}
3277 
3278 	if (!(txr->tx_buffers =
3279 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3280 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3281 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3282 		error = ENOMEM;
3283 		goto fail;
3284 	}
3285 
3286         /* Create the descriptor buffer dma maps */
3287 	txbuf = txr->tx_buffers;
3288 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3289 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3290 		if (error != 0) {
3291 			device_printf(dev, "Unable to create TX DMA map\n");
3292 			goto fail;
3293 		}
3294 	}
3295 
3296 	return 0;
3297 fail:
3298 	/* We free all, it handles case where we are in the middle */
3299 	em_free_transmit_structures(adapter);
3300 	return (error);
3301 }
3302 
3303 /*********************************************************************
3304  *
3305  *  Initialize a transmit ring.
3306  *
3307  **********************************************************************/
3308 static void
3309 em_setup_transmit_ring(struct tx_ring *txr)
3310 {
3311 	struct adapter *adapter = txr->adapter;
3312 	struct em_buffer *txbuf;
3313 	int i;
3314 #ifdef DEV_NETMAP
3315 	struct netmap_slot *slot;
3316 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3317 #endif /* DEV_NETMAP */
3318 
3319 	/* Clear the old descriptor contents */
3320 	EM_TX_LOCK(txr);
3321 #ifdef DEV_NETMAP
3322 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3323 #endif /* DEV_NETMAP */
3324 
3325 	bzero((void *)txr->tx_base,
3326 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3327 	/* Reset indices */
3328 	txr->next_avail_desc = 0;
3329 	txr->next_to_clean = 0;
3330 
3331 	/* Free any existing tx buffers. */
3332         txbuf = txr->tx_buffers;
3333 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3334 		if (txbuf->m_head != NULL) {
3335 			bus_dmamap_sync(txr->txtag, txbuf->map,
3336 			    BUS_DMASYNC_POSTWRITE);
3337 			bus_dmamap_unload(txr->txtag, txbuf->map);
3338 			m_freem(txbuf->m_head);
3339 			txbuf->m_head = NULL;
3340 		}
3341 #ifdef DEV_NETMAP
3342 		if (slot) {
3343 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3344 			uint64_t paddr;
3345 			void *addr;
3346 
3347 			addr = PNMB(slot + si, &paddr);
3348 			txr->tx_base[i].buffer_addr = htole64(paddr);
3349 			/* reload the map for netmap mode */
3350 			netmap_load_map(txr->txtag, txbuf->map, addr);
3351 		}
3352 #endif /* DEV_NETMAP */
3353 
3354 		/* clear the watch index */
3355 		txbuf->next_eop = -1;
3356         }
3357 
3358 	/* Set number of descriptors available */
3359 	txr->tx_avail = adapter->num_tx_desc;
3360 	txr->queue_status = EM_QUEUE_IDLE;
3361 
3362 	/* Clear checksum offload context. */
3363 	txr->last_hw_offload = 0;
3364 	txr->last_hw_ipcss = 0;
3365 	txr->last_hw_ipcso = 0;
3366 	txr->last_hw_tucss = 0;
3367 	txr->last_hw_tucso = 0;
3368 
3369 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3370 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3371 	EM_TX_UNLOCK(txr);
3372 }
3373 
3374 /*********************************************************************
3375  *
3376  *  Initialize all transmit rings.
3377  *
3378  **********************************************************************/
3379 static void
3380 em_setup_transmit_structures(struct adapter *adapter)
3381 {
3382 	struct tx_ring *txr = adapter->tx_rings;
3383 
3384 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3385 		em_setup_transmit_ring(txr);
3386 
3387 	return;
3388 }
3389 
3390 /*********************************************************************
3391  *
3392  *  Enable transmit unit.
3393  *
3394  **********************************************************************/
3395 static void
3396 em_initialize_transmit_unit(struct adapter *adapter)
3397 {
3398 	struct tx_ring	*txr = adapter->tx_rings;
3399 	struct e1000_hw	*hw = &adapter->hw;
3400 	u32	tctl, tarc, tipg = 0;
3401 
3402 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3403 
3404 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3405 		u64 bus_addr = txr->txdma.dma_paddr;
3406 		/* Base and Len of TX Ring */
3407 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3408 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3409 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3410 	    	    (u32)(bus_addr >> 32));
3411 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3412 	    	    (u32)bus_addr);
3413 		/* Init the HEAD/TAIL indices */
3414 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3415 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3416 
3417 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3418 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3419 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3420 
3421 		txr->queue_status = EM_QUEUE_IDLE;
3422 	}
3423 
3424 	/* Set the default values for the Tx Inter Packet Gap timer */
3425 	switch (adapter->hw.mac.type) {
3426 	case e1000_80003es2lan:
3427 		tipg = DEFAULT_82543_TIPG_IPGR1;
3428 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3429 		    E1000_TIPG_IPGR2_SHIFT;
3430 		break;
3431 	default:
3432 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3433 		    (adapter->hw.phy.media_type ==
3434 		    e1000_media_type_internal_serdes))
3435 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3436 		else
3437 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3438 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3439 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3440 	}
3441 
3442 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3443 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3444 
3445 	if(adapter->hw.mac.type >= e1000_82540)
3446 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3447 		    adapter->tx_abs_int_delay.value);
3448 
3449 	if ((adapter->hw.mac.type == e1000_82571) ||
3450 	    (adapter->hw.mac.type == e1000_82572)) {
3451 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3452 		tarc |= SPEED_MODE_BIT;
3453 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3454 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3455 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3456 		tarc |= 1;
3457 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3458 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3459 		tarc |= 1;
3460 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3461 	}
3462 
3463 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3464 	if (adapter->tx_int_delay.value > 0)
3465 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3466 
3467 	/* Program the Transmit Control Register */
3468 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3469 	tctl &= ~E1000_TCTL_CT;
3470 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3471 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3472 
3473 	if (adapter->hw.mac.type >= e1000_82571)
3474 		tctl |= E1000_TCTL_MULR;
3475 
3476 	/* This write will effectively turn on the transmit unit. */
3477 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3478 
3479 }
3480 
3481 
3482 /*********************************************************************
3483  *
3484  *  Free all transmit rings.
3485  *
3486  **********************************************************************/
3487 static void
3488 em_free_transmit_structures(struct adapter *adapter)
3489 {
3490 	struct tx_ring *txr = adapter->tx_rings;
3491 
3492 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3493 		EM_TX_LOCK(txr);
3494 		em_free_transmit_buffers(txr);
3495 		em_dma_free(adapter, &txr->txdma);
3496 		EM_TX_UNLOCK(txr);
3497 		EM_TX_LOCK_DESTROY(txr);
3498 	}
3499 
3500 	free(adapter->tx_rings, M_DEVBUF);
3501 }
3502 
3503 /*********************************************************************
3504  *
3505  *  Free transmit ring related data structures.
3506  *
3507  **********************************************************************/
3508 static void
3509 em_free_transmit_buffers(struct tx_ring *txr)
3510 {
3511 	struct adapter		*adapter = txr->adapter;
3512 	struct em_buffer	*txbuf;
3513 
3514 	INIT_DEBUGOUT("free_transmit_ring: begin");
3515 
3516 	if (txr->tx_buffers == NULL)
3517 		return;
3518 
3519 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3520 		txbuf = &txr->tx_buffers[i];
3521 		if (txbuf->m_head != NULL) {
3522 			bus_dmamap_sync(txr->txtag, txbuf->map,
3523 			    BUS_DMASYNC_POSTWRITE);
3524 			bus_dmamap_unload(txr->txtag,
3525 			    txbuf->map);
3526 			m_freem(txbuf->m_head);
3527 			txbuf->m_head = NULL;
3528 			if (txbuf->map != NULL) {
3529 				bus_dmamap_destroy(txr->txtag,
3530 				    txbuf->map);
3531 				txbuf->map = NULL;
3532 			}
3533 		} else if (txbuf->map != NULL) {
3534 			bus_dmamap_unload(txr->txtag,
3535 			    txbuf->map);
3536 			bus_dmamap_destroy(txr->txtag,
3537 			    txbuf->map);
3538 			txbuf->map = NULL;
3539 		}
3540 	}
3541 #if __FreeBSD_version >= 800000
3542 	if (txr->br != NULL)
3543 		buf_ring_free(txr->br, M_DEVBUF);
3544 #endif
3545 	if (txr->tx_buffers != NULL) {
3546 		free(txr->tx_buffers, M_DEVBUF);
3547 		txr->tx_buffers = NULL;
3548 	}
3549 	if (txr->txtag != NULL) {
3550 		bus_dma_tag_destroy(txr->txtag);
3551 		txr->txtag = NULL;
3552 	}
3553 	return;
3554 }
3555 
3556 
3557 /*********************************************************************
3558  *  The offload context is protocol specific (TCP/UDP) and thus
3559  *  only needs to be set when the protocol changes. The occasion
3560  *  of a context change can be a performance detriment, and
3561  *  might be better just disabled. The reason arises in the way
3562  *  in which the controller supports pipelined requests from the
3563  *  Tx data DMA. Up to four requests can be pipelined, and they may
3564  *  belong to the same packet or to multiple packets. However all
3565  *  requests for one packet are issued before a request is issued
3566  *  for a subsequent packet and if a request for the next packet
3567  *  requires a context change, that request will be stalled
3568  *  until the previous request completes. This means setting up
3569  *  a new context effectively disables pipelined Tx data DMA which
3570  *  in turn greatly slow down performance to send small sized
3571  *  frames.
3572  **********************************************************************/
3573 static void
3574 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3575     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3576 {
3577 	struct adapter			*adapter = txr->adapter;
3578 	struct e1000_context_desc	*TXD = NULL;
3579 	struct em_buffer		*tx_buffer;
3580 	int				cur, hdr_len;
3581 	u32				cmd = 0;
3582 	u16				offload = 0;
3583 	u8				ipcso, ipcss, tucso, tucss;
3584 
3585 	ipcss = ipcso = tucss = tucso = 0;
3586 	hdr_len = ip_off + (ip->ip_hl << 2);
3587 	cur = txr->next_avail_desc;
3588 
3589 	/* Setup of IP header checksum. */
3590 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3591 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3592 		offload |= CSUM_IP;
3593 		ipcss = ip_off;
3594 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3595 		/*
3596 		 * Start offset for header checksum calculation.
3597 		 * End offset for header checksum calculation.
3598 		 * Offset of place to put the checksum.
3599 		 */
3600 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3601 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3602 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3603 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3604 		cmd |= E1000_TXD_CMD_IP;
3605 	}
3606 
3607 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3608  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3609  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3610  		offload |= CSUM_TCP;
3611  		tucss = hdr_len;
3612  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3613  		/*
3614  		 * Setting up new checksum offload context for every frames
3615  		 * takes a lot of processing time for hardware. This also
3616  		 * reduces performance a lot for small sized frames so avoid
3617  		 * it if driver can use previously configured checksum
3618  		 * offload context.
3619  		 */
3620  		if (txr->last_hw_offload == offload) {
3621  			if (offload & CSUM_IP) {
3622  				if (txr->last_hw_ipcss == ipcss &&
3623  				    txr->last_hw_ipcso == ipcso &&
3624  				    txr->last_hw_tucss == tucss &&
3625  				    txr->last_hw_tucso == tucso)
3626  					return;
3627  			} else {
3628  				if (txr->last_hw_tucss == tucss &&
3629  				    txr->last_hw_tucso == tucso)
3630  					return;
3631  			}
3632   		}
3633  		txr->last_hw_offload = offload;
3634  		txr->last_hw_tucss = tucss;
3635  		txr->last_hw_tucso = tucso;
3636  		/*
3637  		 * Start offset for payload checksum calculation.
3638  		 * End offset for payload checksum calculation.
3639  		 * Offset of place to put the checksum.
3640  		 */
3641 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3642  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3643  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3644  		TXD->upper_setup.tcp_fields.tucso = tucso;
3645  		cmd |= E1000_TXD_CMD_TCP;
3646  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3647  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3648  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3649  		tucss = hdr_len;
3650  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3651  		/*
3652  		 * Setting up new checksum offload context for every frames
3653  		 * takes a lot of processing time for hardware. This also
3654  		 * reduces performance a lot for small sized frames so avoid
3655  		 * it if driver can use previously configured checksum
3656  		 * offload context.
3657  		 */
3658  		if (txr->last_hw_offload == offload) {
3659  			if (offload & CSUM_IP) {
3660  				if (txr->last_hw_ipcss == ipcss &&
3661  				    txr->last_hw_ipcso == ipcso &&
3662  				    txr->last_hw_tucss == tucss &&
3663  				    txr->last_hw_tucso == tucso)
3664  					return;
3665  			} else {
3666  				if (txr->last_hw_tucss == tucss &&
3667  				    txr->last_hw_tucso == tucso)
3668  					return;
3669  			}
3670  		}
3671  		txr->last_hw_offload = offload;
3672  		txr->last_hw_tucss = tucss;
3673  		txr->last_hw_tucso = tucso;
3674  		/*
3675  		 * Start offset for header checksum calculation.
3676  		 * End offset for header checksum calculation.
3677  		 * Offset of place to put the checksum.
3678  		 */
3679 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3680  		TXD->upper_setup.tcp_fields.tucss = tucss;
3681  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3682  		TXD->upper_setup.tcp_fields.tucso = tucso;
3683   	}
3684 
3685  	if (offload & CSUM_IP) {
3686  		txr->last_hw_ipcss = ipcss;
3687  		txr->last_hw_ipcso = ipcso;
3688   	}
3689 
3690 	TXD->tcp_seg_setup.data = htole32(0);
3691 	TXD->cmd_and_length =
3692 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3693 	tx_buffer = &txr->tx_buffers[cur];
3694 	tx_buffer->m_head = NULL;
3695 	tx_buffer->next_eop = -1;
3696 
3697 	if (++cur == adapter->num_tx_desc)
3698 		cur = 0;
3699 
3700 	txr->tx_avail--;
3701 	txr->next_avail_desc = cur;
3702 }
3703 
3704 
3705 /**********************************************************************
3706  *
3707  *  Setup work for hardware segmentation offload (TSO)
3708  *
3709  **********************************************************************/
3710 static void
3711 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3712     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3713 {
3714 	struct adapter			*adapter = txr->adapter;
3715 	struct e1000_context_desc	*TXD;
3716 	struct em_buffer		*tx_buffer;
3717 	int cur, hdr_len;
3718 
3719 	/*
3720 	 * In theory we can use the same TSO context if and only if
3721 	 * frame is the same type(IP/TCP) and the same MSS. However
3722 	 * checking whether a frame has the same IP/TCP structure is
3723 	 * hard thing so just ignore that and always restablish a
3724 	 * new TSO context.
3725 	 */
3726 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3727 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3728 		      E1000_TXD_DTYP_D |	/* Data descr type */
3729 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3730 
3731 	/* IP and/or TCP header checksum calculation and insertion. */
3732 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3733 
3734 	cur = txr->next_avail_desc;
3735 	tx_buffer = &txr->tx_buffers[cur];
3736 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3737 
3738 	/*
3739 	 * Start offset for header checksum calculation.
3740 	 * End offset for header checksum calculation.
3741 	 * Offset of place put the checksum.
3742 	 */
3743 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3744 	TXD->lower_setup.ip_fields.ipcse =
3745 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3746 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3747 	/*
3748 	 * Start offset for payload checksum calculation.
3749 	 * End offset for payload checksum calculation.
3750 	 * Offset of place to put the checksum.
3751 	 */
3752 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3753 	TXD->upper_setup.tcp_fields.tucse = 0;
3754 	TXD->upper_setup.tcp_fields.tucso =
3755 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3756 	/*
3757 	 * Payload size per packet w/o any headers.
3758 	 * Length of all headers up to payload.
3759 	 */
3760 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3761 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3762 
3763 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3764 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3765 				E1000_TXD_CMD_TSE |	/* TSE context */
3766 				E1000_TXD_CMD_IP |	/* Do IP csum */
3767 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3768 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3769 
3770 	tx_buffer->m_head = NULL;
3771 	tx_buffer->next_eop = -1;
3772 
3773 	if (++cur == adapter->num_tx_desc)
3774 		cur = 0;
3775 
3776 	txr->tx_avail--;
3777 	txr->next_avail_desc = cur;
3778 	txr->tx_tso = TRUE;
3779 }
3780 
3781 
3782 /**********************************************************************
3783  *
3784  *  Examine each tx_buffer in the used queue. If the hardware is done
3785  *  processing the packet then free associated resources. The
3786  *  tx_buffer is put back on the free queue.
3787  *
3788  **********************************************************************/
3789 static void
3790 em_txeof(struct tx_ring *txr)
3791 {
3792 	struct adapter	*adapter = txr->adapter;
3793         int first, last, done, processed;
3794         struct em_buffer *tx_buffer;
3795         struct e1000_tx_desc   *tx_desc, *eop_desc;
3796 	if_t ifp = adapter->ifp;
3797 
3798 	EM_TX_LOCK_ASSERT(txr);
3799 #ifdef DEV_NETMAP
3800 	if (netmap_tx_irq(ifp, txr->me))
3801 		return;
3802 #endif /* DEV_NETMAP */
3803 
3804 	/* No work, make sure watchdog is off */
3805         if (txr->tx_avail == adapter->num_tx_desc) {
3806 		txr->queue_status = EM_QUEUE_IDLE;
3807                 return;
3808 	}
3809 
3810 	processed = 0;
3811         first = txr->next_to_clean;
3812         tx_desc = &txr->tx_base[first];
3813         tx_buffer = &txr->tx_buffers[first];
3814 	last = tx_buffer->next_eop;
3815         eop_desc = &txr->tx_base[last];
3816 
3817 	/*
3818 	 * What this does is get the index of the
3819 	 * first descriptor AFTER the EOP of the
3820 	 * first packet, that way we can do the
3821 	 * simple comparison on the inner while loop.
3822 	 */
3823 	if (++last == adapter->num_tx_desc)
3824  		last = 0;
3825 	done = last;
3826 
3827         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3828             BUS_DMASYNC_POSTREAD);
3829 
3830         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3831 		/* We clean the range of the packet */
3832 		while (first != done) {
3833                 	tx_desc->upper.data = 0;
3834                 	tx_desc->lower.data = 0;
3835                 	tx_desc->buffer_addr = 0;
3836                 	++txr->tx_avail;
3837 			++processed;
3838 
3839 			if (tx_buffer->m_head) {
3840 				bus_dmamap_sync(txr->txtag,
3841 				    tx_buffer->map,
3842 				    BUS_DMASYNC_POSTWRITE);
3843 				bus_dmamap_unload(txr->txtag,
3844 				    tx_buffer->map);
3845                         	m_freem(tx_buffer->m_head);
3846                         	tx_buffer->m_head = NULL;
3847                 	}
3848 			tx_buffer->next_eop = -1;
3849 			txr->watchdog_time = ticks;
3850 
3851 	                if (++first == adapter->num_tx_desc)
3852 				first = 0;
3853 
3854 	                tx_buffer = &txr->tx_buffers[first];
3855 			tx_desc = &txr->tx_base[first];
3856 		}
3857 		if_incopackets(ifp, 1);
3858 		/* See if we can continue to the next packet */
3859 		last = tx_buffer->next_eop;
3860 		if (last != -1) {
3861         		eop_desc = &txr->tx_base[last];
3862 			/* Get new done point */
3863 			if (++last == adapter->num_tx_desc) last = 0;
3864 			done = last;
3865 		} else
3866 			break;
3867         }
3868         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3869             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3870 
3871         txr->next_to_clean = first;
3872 
3873 	/*
3874 	** Watchdog calculation, we know there's
3875 	** work outstanding or the first return
3876 	** would have been taken, so none processed
3877 	** for too long indicates a hang. local timer
3878 	** will examine this and do a reset if needed.
3879 	*/
3880 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3881 		txr->queue_status = EM_QUEUE_HUNG;
3882 
3883         /*
3884          * If we have a minimum free, clear IFF_DRV_OACTIVE
3885          * to tell the stack that it is OK to send packets.
3886 	 * Notice that all writes of OACTIVE happen under the
3887 	 * TX lock which, with a single queue, guarantees
3888 	 * sanity.
3889          */
3890         if (txr->tx_avail >= EM_MAX_SCATTER)
3891 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
3892 
3893 	/* Disable watchdog if all clean */
3894 	if (txr->tx_avail == adapter->num_tx_desc) {
3895 		txr->queue_status = EM_QUEUE_IDLE;
3896 	}
3897 }
3898 
3899 
3900 /*********************************************************************
3901  *
3902  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3903  *
3904  **********************************************************************/
3905 static void
3906 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3907 {
3908 	struct adapter		*adapter = rxr->adapter;
3909 	struct mbuf		*m;
3910 	bus_dma_segment_t	segs[1];
3911 	struct em_buffer	*rxbuf;
3912 	int			i, j, error, nsegs;
3913 	bool			cleaned = FALSE;
3914 
3915 	i = j = rxr->next_to_refresh;
3916 	/*
3917 	** Get one descriptor beyond
3918 	** our work mark to control
3919 	** the loop.
3920 	*/
3921 	if (++j == adapter->num_rx_desc)
3922 		j = 0;
3923 
3924 	while (j != limit) {
3925 		rxbuf = &rxr->rx_buffers[i];
3926 		if (rxbuf->m_head == NULL) {
3927 			m = m_getjcl(M_NOWAIT, MT_DATA,
3928 			    M_PKTHDR, adapter->rx_mbuf_sz);
3929 			/*
3930 			** If we have a temporary resource shortage
3931 			** that causes a failure, just abort refresh
3932 			** for now, we will return to this point when
3933 			** reinvoked from em_rxeof.
3934 			*/
3935 			if (m == NULL)
3936 				goto update;
3937 		} else
3938 			m = rxbuf->m_head;
3939 
3940 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3941 		m->m_flags |= M_PKTHDR;
3942 		m->m_data = m->m_ext.ext_buf;
3943 
3944 		/* Use bus_dma machinery to setup the memory mapping  */
3945 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3946 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3947 		if (error != 0) {
3948 			printf("Refresh mbufs: hdr dmamap load"
3949 			    " failure - %d\n", error);
3950 			m_free(m);
3951 			rxbuf->m_head = NULL;
3952 			goto update;
3953 		}
3954 		rxbuf->m_head = m;
3955 		bus_dmamap_sync(rxr->rxtag,
3956 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3957 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3958 		cleaned = TRUE;
3959 
3960 		i = j; /* Next is precalulated for us */
3961 		rxr->next_to_refresh = i;
3962 		/* Calculate next controlling index */
3963 		if (++j == adapter->num_rx_desc)
3964 			j = 0;
3965 	}
3966 update:
3967 	/*
3968 	** Update the tail pointer only if,
3969 	** and as far as we have refreshed.
3970 	*/
3971 	if (cleaned)
3972 		E1000_WRITE_REG(&adapter->hw,
3973 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3974 
3975 	return;
3976 }
3977 
3978 
3979 /*********************************************************************
3980  *
3981  *  Allocate memory for rx_buffer structures. Since we use one
3982  *  rx_buffer per received packet, the maximum number of rx_buffer's
3983  *  that we'll need is equal to the number of receive descriptors
3984  *  that we've allocated.
3985  *
3986  **********************************************************************/
3987 static int
3988 em_allocate_receive_buffers(struct rx_ring *rxr)
3989 {
3990 	struct adapter		*adapter = rxr->adapter;
3991 	device_t		dev = adapter->dev;
3992 	struct em_buffer	*rxbuf;
3993 	int			error;
3994 
3995 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3996 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3997 	if (rxr->rx_buffers == NULL) {
3998 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3999 		return (ENOMEM);
4000 	}
4001 
4002 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4003 				1, 0,			/* alignment, bounds */
4004 				BUS_SPACE_MAXADDR,	/* lowaddr */
4005 				BUS_SPACE_MAXADDR,	/* highaddr */
4006 				NULL, NULL,		/* filter, filterarg */
4007 				MJUM9BYTES,		/* maxsize */
4008 				1,			/* nsegments */
4009 				MJUM9BYTES,		/* maxsegsize */
4010 				0,			/* flags */
4011 				NULL,			/* lockfunc */
4012 				NULL,			/* lockarg */
4013 				&rxr->rxtag);
4014 	if (error) {
4015 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4016 		    __func__, error);
4017 		goto fail;
4018 	}
4019 
4020 	rxbuf = rxr->rx_buffers;
4021 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4022 		rxbuf = &rxr->rx_buffers[i];
4023 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4024 		if (error) {
4025 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4026 			    __func__, error);
4027 			goto fail;
4028 		}
4029 	}
4030 
4031 	return (0);
4032 
4033 fail:
4034 	em_free_receive_structures(adapter);
4035 	return (error);
4036 }
4037 
4038 
4039 /*********************************************************************
4040  *
4041  *  Initialize a receive ring and its buffers.
4042  *
4043  **********************************************************************/
4044 static int
4045 em_setup_receive_ring(struct rx_ring *rxr)
4046 {
4047 	struct	adapter 	*adapter = rxr->adapter;
4048 	struct em_buffer	*rxbuf;
4049 	bus_dma_segment_t	seg[1];
4050 	int			rsize, nsegs, error = 0;
4051 #ifdef DEV_NETMAP
4052 	struct netmap_slot *slot;
4053 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4054 #endif
4055 
4056 
4057 	/* Clear the ring contents */
4058 	EM_RX_LOCK(rxr);
4059 	rsize = roundup2(adapter->num_rx_desc *
4060 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4061 	bzero((void *)rxr->rx_base, rsize);
4062 #ifdef DEV_NETMAP
4063 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4064 #endif
4065 
4066 	/*
4067 	** Free current RX buffer structs and their mbufs
4068 	*/
4069 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4070 		rxbuf = &rxr->rx_buffers[i];
4071 		if (rxbuf->m_head != NULL) {
4072 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4073 			    BUS_DMASYNC_POSTREAD);
4074 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4075 			m_freem(rxbuf->m_head);
4076 			rxbuf->m_head = NULL; /* mark as freed */
4077 		}
4078 	}
4079 
4080 	/* Now replenish the mbufs */
4081         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4082 		rxbuf = &rxr->rx_buffers[j];
4083 #ifdef DEV_NETMAP
4084 		if (slot) {
4085 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4086 			uint64_t paddr;
4087 			void *addr;
4088 
4089 			addr = PNMB(slot + si, &paddr);
4090 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4091 			/* Update descriptor */
4092 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4093 			continue;
4094 		}
4095 #endif /* DEV_NETMAP */
4096 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4097 		    M_PKTHDR, adapter->rx_mbuf_sz);
4098 		if (rxbuf->m_head == NULL) {
4099 			error = ENOBUFS;
4100 			goto fail;
4101 		}
4102 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4103 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4104 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4105 
4106 		/* Get the memory mapping */
4107 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4108 		    rxbuf->map, rxbuf->m_head, seg,
4109 		    &nsegs, BUS_DMA_NOWAIT);
4110 		if (error != 0) {
4111 			m_freem(rxbuf->m_head);
4112 			rxbuf->m_head = NULL;
4113 			goto fail;
4114 		}
4115 		bus_dmamap_sync(rxr->rxtag,
4116 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4117 
4118 		/* Update descriptor */
4119 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4120 	}
4121 	rxr->next_to_check = 0;
4122 	rxr->next_to_refresh = 0;
4123 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4124 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4125 
4126 fail:
4127 	EM_RX_UNLOCK(rxr);
4128 	return (error);
4129 }
4130 
4131 /*********************************************************************
4132  *
4133  *  Initialize all receive rings.
4134  *
4135  **********************************************************************/
4136 static int
4137 em_setup_receive_structures(struct adapter *adapter)
4138 {
4139 	struct rx_ring *rxr = adapter->rx_rings;
4140 	int q;
4141 
4142 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4143 		if (em_setup_receive_ring(rxr))
4144 			goto fail;
4145 
4146 	return (0);
4147 fail:
4148 	/*
4149 	 * Free RX buffers allocated so far, we will only handle
4150 	 * the rings that completed, the failing case will have
4151 	 * cleaned up for itself. 'q' failed, so its the terminus.
4152 	 */
4153 	for (int i = 0; i < q; ++i) {
4154 		rxr = &adapter->rx_rings[i];
4155 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4156 			struct em_buffer *rxbuf;
4157 			rxbuf = &rxr->rx_buffers[n];
4158 			if (rxbuf->m_head != NULL) {
4159 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4160 			  	  BUS_DMASYNC_POSTREAD);
4161 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4162 				m_freem(rxbuf->m_head);
4163 				rxbuf->m_head = NULL;
4164 			}
4165 		}
4166 		rxr->next_to_check = 0;
4167 		rxr->next_to_refresh = 0;
4168 	}
4169 
4170 	return (ENOBUFS);
4171 }
4172 
4173 /*********************************************************************
4174  *
4175  *  Free all receive rings.
4176  *
4177  **********************************************************************/
4178 static void
4179 em_free_receive_structures(struct adapter *adapter)
4180 {
4181 	struct rx_ring *rxr = adapter->rx_rings;
4182 
4183 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4184 		em_free_receive_buffers(rxr);
4185 		/* Free the ring memory as well */
4186 		em_dma_free(adapter, &rxr->rxdma);
4187 		EM_RX_LOCK_DESTROY(rxr);
4188 	}
4189 
4190 	free(adapter->rx_rings, M_DEVBUF);
4191 }
4192 
4193 
4194 /*********************************************************************
4195  *
4196  *  Free receive ring data structures
4197  *
4198  **********************************************************************/
4199 static void
4200 em_free_receive_buffers(struct rx_ring *rxr)
4201 {
4202 	struct adapter		*adapter = rxr->adapter;
4203 	struct em_buffer	*rxbuf = NULL;
4204 
4205 	INIT_DEBUGOUT("free_receive_buffers: begin");
4206 
4207 	if (rxr->rx_buffers != NULL) {
4208 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4209 			rxbuf = &rxr->rx_buffers[i];
4210 			if (rxbuf->map != NULL) {
4211 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4212 				    BUS_DMASYNC_POSTREAD);
4213 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4214 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4215 			}
4216 			if (rxbuf->m_head != NULL) {
4217 				m_freem(rxbuf->m_head);
4218 				rxbuf->m_head = NULL;
4219 			}
4220 		}
4221 		free(rxr->rx_buffers, M_DEVBUF);
4222 		rxr->rx_buffers = NULL;
4223 		rxr->next_to_check = 0;
4224 		rxr->next_to_refresh = 0;
4225 	}
4226 
4227 	if (rxr->rxtag != NULL) {
4228 		bus_dma_tag_destroy(rxr->rxtag);
4229 		rxr->rxtag = NULL;
4230 	}
4231 
4232 	return;
4233 }
4234 
4235 
4236 /*********************************************************************
4237  *
4238  *  Enable receive unit.
4239  *
4240  **********************************************************************/
4241 
4242 static void
4243 em_initialize_receive_unit(struct adapter *adapter)
4244 {
4245 	struct rx_ring	*rxr = adapter->rx_rings;
4246 	if_t ifp = adapter->ifp;
4247 	struct e1000_hw	*hw = &adapter->hw;
4248 	u64	bus_addr;
4249 	u32	rctl, rxcsum;
4250 
4251 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4252 
4253 	/*
4254 	 * Make sure receives are disabled while setting
4255 	 * up the descriptor ring
4256 	 */
4257 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4258 	/* Do not disable if ever enabled on this hardware */
4259 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4260 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4261 
4262 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4263 	    adapter->rx_abs_int_delay.value);
4264 	/*
4265 	 * Set the interrupt throttling rate. Value is calculated
4266 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4267 	 */
4268 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4269 
4270 	/*
4271 	** When using MSIX interrupts we need to throttle
4272 	** using the EITR register (82574 only)
4273 	*/
4274 	if (hw->mac.type == e1000_82574) {
4275 		for (int i = 0; i < 4; i++)
4276 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4277 			    DEFAULT_ITR);
4278 		/* Disable accelerated acknowledge */
4279 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4280 	}
4281 
4282 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4283 	if (if_getcapenable(ifp) & IFCAP_RXCSUM)
4284 		rxcsum |= E1000_RXCSUM_TUOFL;
4285 	else
4286 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4287 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4288 
4289 	/*
4290 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4291 	** long latencies are observed, like Lenovo X60. This
4292 	** change eliminates the problem, but since having positive
4293 	** values in RDTR is a known source of problems on other
4294 	** platforms another solution is being sought.
4295 	*/
4296 	if (hw->mac.type == e1000_82573)
4297 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4298 
4299 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4300 		/* Setup the Base and Length of the Rx Descriptor Ring */
4301 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4302 
4303 		bus_addr = rxr->rxdma.dma_paddr;
4304 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4305 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4306 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4307 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4308 		/* Setup the Head and Tail Descriptor Pointers */
4309 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4310 #ifdef DEV_NETMAP
4311 		/*
4312 		 * an init() while a netmap client is active must
4313 		 * preserve the rx buffers passed to userspace.
4314 		 */
4315 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4316 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4317 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4318 		}
4319 #endif /* DEV_NETMAP */
4320 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4321 	}
4322 
4323 	/* Set PTHRESH for improved jumbo performance */
4324 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4325 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4326 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4327 	    (if_getmtu(ifp) > ETHERMTU)) {
4328 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4329 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4330 	}
4331 
4332 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4333 		if (if_getmtu(ifp) > ETHERMTU)
4334 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4335 		else
4336 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4337 	}
4338 
4339 	/* Setup the Receive Control Register */
4340 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4341 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4342 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4343 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4344 
4345         /* Strip the CRC */
4346         rctl |= E1000_RCTL_SECRC;
4347 
4348         /* Make sure VLAN Filters are off */
4349         rctl &= ~E1000_RCTL_VFE;
4350 	rctl &= ~E1000_RCTL_SBP;
4351 
4352 	if (adapter->rx_mbuf_sz == MCLBYTES)
4353 		rctl |= E1000_RCTL_SZ_2048;
4354 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4355 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4356 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4357 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4358 
4359 	if (if_getmtu(ifp) > ETHERMTU)
4360 		rctl |= E1000_RCTL_LPE;
4361 	else
4362 		rctl &= ~E1000_RCTL_LPE;
4363 
4364 	/* Write out the settings */
4365 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4366 
4367 	return;
4368 }
4369 
4370 
4371 /*********************************************************************
4372  *
4373  *  This routine executes in interrupt context. It replenishes
4374  *  the mbufs in the descriptor and sends data which has been
4375  *  dma'ed into host memory to upper layer.
4376  *
4377  *  We loop at most count times if count is > 0, or until done if
4378  *  count < 0.
4379  *
4380  *  For polling we also now return the number of cleaned packets
4381  *********************************************************************/
4382 static bool
4383 em_rxeof(struct rx_ring *rxr, int count, int *done)
4384 {
4385 	struct adapter		*adapter = rxr->adapter;
4386 	if_t ifp = adapter->ifp;
4387 	struct mbuf		*mp, *sendmp;
4388 	u8			status = 0;
4389 	u16 			len;
4390 	int			i, processed, rxdone = 0;
4391 	bool			eop;
4392 	struct e1000_rx_desc	*cur;
4393 
4394 	EM_RX_LOCK(rxr);
4395 
4396 #ifdef DEV_NETMAP
4397 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4398 		EM_RX_UNLOCK(rxr);
4399 		return (FALSE);
4400 	}
4401 #endif /* DEV_NETMAP */
4402 
4403 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4404 
4405 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4406 			break;
4407 
4408 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4409 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4410 
4411 		cur = &rxr->rx_base[i];
4412 		status = cur->status;
4413 		mp = sendmp = NULL;
4414 
4415 		if ((status & E1000_RXD_STAT_DD) == 0)
4416 			break;
4417 
4418 		len = le16toh(cur->length);
4419 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4420 
4421 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4422 		    (rxr->discard == TRUE)) {
4423 			adapter->dropped_pkts++;
4424 			++rxr->rx_discarded;
4425 			if (!eop) /* Catch subsequent segs */
4426 				rxr->discard = TRUE;
4427 			else
4428 				rxr->discard = FALSE;
4429 			em_rx_discard(rxr, i);
4430 			goto next_desc;
4431 		}
4432 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4433 
4434 		/* Assign correct length to the current fragment */
4435 		mp = rxr->rx_buffers[i].m_head;
4436 		mp->m_len = len;
4437 
4438 		/* Trigger for refresh */
4439 		rxr->rx_buffers[i].m_head = NULL;
4440 
4441 		/* First segment? */
4442 		if (rxr->fmp == NULL) {
4443 			mp->m_pkthdr.len = len;
4444 			rxr->fmp = rxr->lmp = mp;
4445 		} else {
4446 			/* Chain mbuf's together */
4447 			mp->m_flags &= ~M_PKTHDR;
4448 			rxr->lmp->m_next = mp;
4449 			rxr->lmp = mp;
4450 			rxr->fmp->m_pkthdr.len += len;
4451 		}
4452 
4453 		if (eop) {
4454 			--count;
4455 			sendmp = rxr->fmp;
4456 			if_setrcvif(sendmp, ifp);
4457 			if_incipackets(ifp, 1);
4458 			em_receive_checksum(cur, sendmp);
4459 #ifndef __NO_STRICT_ALIGNMENT
4460 			if (adapter->hw.mac.max_frame_size >
4461 			    (MCLBYTES - ETHER_ALIGN) &&
4462 			    em_fixup_rx(rxr) != 0)
4463 				goto skip;
4464 #endif
4465 			if (status & E1000_RXD_STAT_VP) {
4466 				if_setvtag(sendmp,
4467 				    le16toh(cur->special));
4468 				sendmp->m_flags |= M_VLANTAG;
4469 			}
4470 #ifndef __NO_STRICT_ALIGNMENT
4471 skip:
4472 #endif
4473 			rxr->fmp = rxr->lmp = NULL;
4474 		}
4475 next_desc:
4476 		/* Zero out the receive descriptors status. */
4477 		cur->status = 0;
4478 		++rxdone;	/* cumulative for POLL */
4479 		++processed;
4480 
4481 		/* Advance our pointers to the next descriptor. */
4482 		if (++i == adapter->num_rx_desc)
4483 			i = 0;
4484 
4485 		/* Send to the stack */
4486 		if (sendmp != NULL) {
4487 			rxr->next_to_check = i;
4488 			EM_RX_UNLOCK(rxr);
4489 			if_input(ifp, sendmp);
4490 			EM_RX_LOCK(rxr);
4491 			i = rxr->next_to_check;
4492 		}
4493 
4494 		/* Only refresh mbufs every 8 descriptors */
4495 		if (processed == 8) {
4496 			em_refresh_mbufs(rxr, i);
4497 			processed = 0;
4498 		}
4499 	}
4500 
4501 	/* Catch any remaining refresh work */
4502 	if (e1000_rx_unrefreshed(rxr))
4503 		em_refresh_mbufs(rxr, i);
4504 
4505 	rxr->next_to_check = i;
4506 	if (done != NULL)
4507 		*done = rxdone;
4508 	EM_RX_UNLOCK(rxr);
4509 
4510 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4511 }
4512 
4513 static __inline void
4514 em_rx_discard(struct rx_ring *rxr, int i)
4515 {
4516 	struct em_buffer	*rbuf;
4517 
4518 	rbuf = &rxr->rx_buffers[i];
4519 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4520 
4521 	/* Free any previous pieces */
4522 	if (rxr->fmp != NULL) {
4523 		rxr->fmp->m_flags |= M_PKTHDR;
4524 		m_freem(rxr->fmp);
4525 		rxr->fmp = NULL;
4526 		rxr->lmp = NULL;
4527 	}
4528 	/*
4529 	** Free buffer and allow em_refresh_mbufs()
4530 	** to clean up and recharge buffer.
4531 	*/
4532 	if (rbuf->m_head) {
4533 		m_free(rbuf->m_head);
4534 		rbuf->m_head = NULL;
4535 	}
4536 	return;
4537 }
4538 
4539 #ifndef __NO_STRICT_ALIGNMENT
4540 /*
4541  * When jumbo frames are enabled we should realign entire payload on
4542  * architecures with strict alignment. This is serious design mistake of 8254x
4543  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4544  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4545  * payload. On architecures without strict alignment restrictions 8254x still
4546  * performs unaligned memory access which would reduce the performance too.
4547  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4548  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4549  * existing mbuf chain.
4550  *
4551  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4552  * not used at all on architectures with strict alignment.
4553  */
4554 static int
4555 em_fixup_rx(struct rx_ring *rxr)
4556 {
4557 	struct adapter *adapter = rxr->adapter;
4558 	struct mbuf *m, *n;
4559 	int error;
4560 
4561 	error = 0;
4562 	m = rxr->fmp;
4563 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4564 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4565 		m->m_data += ETHER_HDR_LEN;
4566 	} else {
4567 		MGETHDR(n, M_NOWAIT, MT_DATA);
4568 		if (n != NULL) {
4569 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4570 			m->m_data += ETHER_HDR_LEN;
4571 			m->m_len -= ETHER_HDR_LEN;
4572 			n->m_len = ETHER_HDR_LEN;
4573 			M_MOVE_PKTHDR(n, m);
4574 			n->m_next = m;
4575 			rxr->fmp = n;
4576 		} else {
4577 			adapter->dropped_pkts++;
4578 			m_freem(rxr->fmp);
4579 			rxr->fmp = NULL;
4580 			error = ENOMEM;
4581 		}
4582 	}
4583 
4584 	return (error);
4585 }
4586 #endif
4587 
4588 /*********************************************************************
4589  *
4590  *  Verify that the hardware indicated that the checksum is valid.
4591  *  Inform the stack about the status of checksum so that stack
4592  *  doesn't spend time verifying the checksum.
4593  *
4594  *********************************************************************/
4595 static void
4596 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4597 {
4598 	mp->m_pkthdr.csum_flags = 0;
4599 
4600 	/* Ignore Checksum bit is set */
4601 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4602 		return;
4603 
4604 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4605 		return;
4606 
4607 	/* IP Checksum Good? */
4608 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4609 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4610 
4611 	/* TCP or UDP checksum */
4612 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4613 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4614 		mp->m_pkthdr.csum_data = htons(0xffff);
4615 	}
4616 }
4617 
4618 /*
4619  * This routine is run via an vlan
4620  * config EVENT
4621  */
4622 static void
4623 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4624 {
4625 	struct adapter	*adapter = if_getsoftc(ifp);
4626 	u32		index, bit;
4627 
4628 	if ((void*)adapter !=  arg)   /* Not our event */
4629 		return;
4630 
4631 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4632                 return;
4633 
4634 	EM_CORE_LOCK(adapter);
4635 	index = (vtag >> 5) & 0x7F;
4636 	bit = vtag & 0x1F;
4637 	adapter->shadow_vfta[index] |= (1 << bit);
4638 	++adapter->num_vlans;
4639 	/* Re-init to load the changes */
4640 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4641 		em_init_locked(adapter);
4642 	EM_CORE_UNLOCK(adapter);
4643 }
4644 
4645 /*
4646  * This routine is run via an vlan
4647  * unconfig EVENT
4648  */
4649 static void
4650 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4651 {
4652 	struct adapter	*adapter = if_getsoftc(ifp);
4653 	u32		index, bit;
4654 
4655 	if (adapter != arg)
4656 		return;
4657 
4658 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4659                 return;
4660 
4661 	EM_CORE_LOCK(adapter);
4662 	index = (vtag >> 5) & 0x7F;
4663 	bit = vtag & 0x1F;
4664 	adapter->shadow_vfta[index] &= ~(1 << bit);
4665 	--adapter->num_vlans;
4666 	/* Re-init to load the changes */
4667 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4668 		em_init_locked(adapter);
4669 	EM_CORE_UNLOCK(adapter);
4670 }
4671 
4672 static void
4673 em_setup_vlan_hw_support(struct adapter *adapter)
4674 {
4675 	struct e1000_hw *hw = &adapter->hw;
4676 	u32             reg;
4677 
4678 	/*
4679 	** We get here thru init_locked, meaning
4680 	** a soft reset, this has already cleared
4681 	** the VFTA and other state, so if there
4682 	** have been no vlan's registered do nothing.
4683 	*/
4684 	if (adapter->num_vlans == 0)
4685                 return;
4686 
4687 	/*
4688 	** A soft reset zero's out the VFTA, so
4689 	** we need to repopulate it now.
4690 	*/
4691 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4692                 if (adapter->shadow_vfta[i] != 0)
4693 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4694                             i, adapter->shadow_vfta[i]);
4695 
4696 	reg = E1000_READ_REG(hw, E1000_CTRL);
4697 	reg |= E1000_CTRL_VME;
4698 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4699 
4700 	/* Enable the Filter Table */
4701 	reg = E1000_READ_REG(hw, E1000_RCTL);
4702 	reg &= ~E1000_RCTL_CFIEN;
4703 	reg |= E1000_RCTL_VFE;
4704 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4705 }
4706 
4707 static void
4708 em_enable_intr(struct adapter *adapter)
4709 {
4710 	struct e1000_hw *hw = &adapter->hw;
4711 	u32 ims_mask = IMS_ENABLE_MASK;
4712 
4713 	if (hw->mac.type == e1000_82574) {
4714 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4715 		ims_mask |= EM_MSIX_MASK;
4716 	}
4717 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4718 }
4719 
4720 static void
4721 em_disable_intr(struct adapter *adapter)
4722 {
4723 	struct e1000_hw *hw = &adapter->hw;
4724 
4725 	if (hw->mac.type == e1000_82574)
4726 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4727 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4728 }
4729 
4730 /*
4731  * Bit of a misnomer, what this really means is
4732  * to enable OS management of the system... aka
4733  * to disable special hardware management features
4734  */
4735 static void
4736 em_init_manageability(struct adapter *adapter)
4737 {
4738 	/* A shared code workaround */
4739 #define E1000_82542_MANC2H E1000_MANC2H
4740 	if (adapter->has_manage) {
4741 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4742 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4743 
4744 		/* disable hardware interception of ARP */
4745 		manc &= ~(E1000_MANC_ARP_EN);
4746 
4747                 /* enable receiving management packets to the host */
4748 		manc |= E1000_MANC_EN_MNG2HOST;
4749 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4750 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4751 		manc2h |= E1000_MNG2HOST_PORT_623;
4752 		manc2h |= E1000_MNG2HOST_PORT_664;
4753 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4754 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4755 	}
4756 }
4757 
4758 /*
4759  * Give control back to hardware management
4760  * controller if there is one.
4761  */
4762 static void
4763 em_release_manageability(struct adapter *adapter)
4764 {
4765 	if (adapter->has_manage) {
4766 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4767 
4768 		/* re-enable hardware interception of ARP */
4769 		manc |= E1000_MANC_ARP_EN;
4770 		manc &= ~E1000_MANC_EN_MNG2HOST;
4771 
4772 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4773 	}
4774 }
4775 
4776 /*
4777  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4778  * For ASF and Pass Through versions of f/w this means
4779  * that the driver is loaded. For AMT version type f/w
4780  * this means that the network i/f is open.
4781  */
4782 static void
4783 em_get_hw_control(struct adapter *adapter)
4784 {
4785 	u32 ctrl_ext, swsm;
4786 
4787 	if (adapter->hw.mac.type == e1000_82573) {
4788 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4789 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4790 		    swsm | E1000_SWSM_DRV_LOAD);
4791 		return;
4792 	}
4793 	/* else */
4794 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4795 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4796 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4797 	return;
4798 }
4799 
4800 /*
4801  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4802  * For ASF and Pass Through versions of f/w this means that
4803  * the driver is no longer loaded. For AMT versions of the
4804  * f/w this means that the network i/f is closed.
4805  */
4806 static void
4807 em_release_hw_control(struct adapter *adapter)
4808 {
4809 	u32 ctrl_ext, swsm;
4810 
4811 	if (!adapter->has_manage)
4812 		return;
4813 
4814 	if (adapter->hw.mac.type == e1000_82573) {
4815 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4816 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4817 		    swsm & ~E1000_SWSM_DRV_LOAD);
4818 		return;
4819 	}
4820 	/* else */
4821 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4822 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4823 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4824 	return;
4825 }
4826 
4827 static int
4828 em_is_valid_ether_addr(u8 *addr)
4829 {
4830 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4831 
4832 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4833 		return (FALSE);
4834 	}
4835 
4836 	return (TRUE);
4837 }
4838 
4839 /*
4840 ** Parse the interface capabilities with regard
4841 ** to both system management and wake-on-lan for
4842 ** later use.
4843 */
4844 static void
4845 em_get_wakeup(device_t dev)
4846 {
4847 	struct adapter	*adapter = device_get_softc(dev);
4848 	u16		eeprom_data = 0, device_id, apme_mask;
4849 
4850 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4851 	apme_mask = EM_EEPROM_APME;
4852 
4853 	switch (adapter->hw.mac.type) {
4854 	case e1000_82573:
4855 	case e1000_82583:
4856 		adapter->has_amt = TRUE;
4857 		/* Falls thru */
4858 	case e1000_82571:
4859 	case e1000_82572:
4860 	case e1000_80003es2lan:
4861 		if (adapter->hw.bus.func == 1) {
4862 			e1000_read_nvm(&adapter->hw,
4863 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4864 			break;
4865 		} else
4866 			e1000_read_nvm(&adapter->hw,
4867 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4868 		break;
4869 	case e1000_ich8lan:
4870 	case e1000_ich9lan:
4871 	case e1000_ich10lan:
4872 	case e1000_pchlan:
4873 	case e1000_pch2lan:
4874 		apme_mask = E1000_WUC_APME;
4875 		adapter->has_amt = TRUE;
4876 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4877 		break;
4878 	default:
4879 		e1000_read_nvm(&adapter->hw,
4880 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4881 		break;
4882 	}
4883 	if (eeprom_data & apme_mask)
4884 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4885 	/*
4886          * We have the eeprom settings, now apply the special cases
4887          * where the eeprom may be wrong or the board won't support
4888          * wake on lan on a particular port
4889 	 */
4890 	device_id = pci_get_device(dev);
4891         switch (device_id) {
4892 	case E1000_DEV_ID_82571EB_FIBER:
4893 		/* Wake events only supported on port A for dual fiber
4894 		 * regardless of eeprom setting */
4895 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4896 		    E1000_STATUS_FUNC_1)
4897 			adapter->wol = 0;
4898 		break;
4899 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4900 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4901 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4902                 /* if quad port adapter, disable WoL on all but port A */
4903 		if (global_quad_port_a != 0)
4904 			adapter->wol = 0;
4905 		/* Reset for multiple quad port adapters */
4906 		if (++global_quad_port_a == 4)
4907 			global_quad_port_a = 0;
4908                 break;
4909 	}
4910 	return;
4911 }
4912 
4913 
4914 /*
4915  * Enable PCI Wake On Lan capability
4916  */
4917 static void
4918 em_enable_wakeup(device_t dev)
4919 {
4920 	struct adapter	*adapter = device_get_softc(dev);
4921 	if_t ifp = adapter->ifp;
4922 	u32		pmc, ctrl, ctrl_ext, rctl;
4923 	u16     	status;
4924 
4925 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4926 		return;
4927 
4928 	/* Advertise the wakeup capability */
4929 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4930 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4931 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4932 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4933 
4934 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4935 	    (adapter->hw.mac.type == e1000_pchlan) ||
4936 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4937 	    (adapter->hw.mac.type == e1000_ich10lan))
4938 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4939 
4940 	/* Keep the laser running on Fiber adapters */
4941 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4942 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4943 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4944 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4945 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4946 	}
4947 
4948 	/*
4949 	** Determine type of Wakeup: note that wol
4950 	** is set with all bits on by default.
4951 	*/
4952 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
4953 		adapter->wol &= ~E1000_WUFC_MAG;
4954 
4955 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
4956 		adapter->wol &= ~E1000_WUFC_MC;
4957 	else {
4958 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4959 		rctl |= E1000_RCTL_MPE;
4960 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4961 	}
4962 
4963 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4964 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4965 		if (em_enable_phy_wakeup(adapter))
4966 			return;
4967 	} else {
4968 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4969 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4970 	}
4971 
4972 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4973 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4974 
4975         /* Request PME */
4976         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4977 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4978 	if (if_getcapenable(ifp) & IFCAP_WOL)
4979 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4980         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4981 
4982 	return;
4983 }
4984 
4985 /*
4986 ** WOL in the newer chipset interfaces (pchlan)
4987 ** require thing to be copied into the phy
4988 */
4989 static int
4990 em_enable_phy_wakeup(struct adapter *adapter)
4991 {
4992 	struct e1000_hw *hw = &adapter->hw;
4993 	u32 mreg, ret = 0;
4994 	u16 preg;
4995 
4996 	/* copy MAC RARs to PHY RARs */
4997 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4998 
4999 	/* copy MAC MTA to PHY MTA */
5000 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5001 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5002 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5003 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5004 		    (u16)((mreg >> 16) & 0xFFFF));
5005 	}
5006 
5007 	/* configure PHY Rx Control register */
5008 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5009 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5010 	if (mreg & E1000_RCTL_UPE)
5011 		preg |= BM_RCTL_UPE;
5012 	if (mreg & E1000_RCTL_MPE)
5013 		preg |= BM_RCTL_MPE;
5014 	preg &= ~(BM_RCTL_MO_MASK);
5015 	if (mreg & E1000_RCTL_MO_3)
5016 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5017 				<< BM_RCTL_MO_SHIFT);
5018 	if (mreg & E1000_RCTL_BAM)
5019 		preg |= BM_RCTL_BAM;
5020 	if (mreg & E1000_RCTL_PMCF)
5021 		preg |= BM_RCTL_PMCF;
5022 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5023 	if (mreg & E1000_CTRL_RFCE)
5024 		preg |= BM_RCTL_RFCE;
5025 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5026 
5027 	/* enable PHY wakeup in MAC register */
5028 	E1000_WRITE_REG(hw, E1000_WUC,
5029 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5030 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5031 
5032 	/* configure and enable PHY wakeup in PHY registers */
5033 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5034 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5035 
5036 	/* activate PHY wakeup */
5037 	ret = hw->phy.ops.acquire(hw);
5038 	if (ret) {
5039 		printf("Could not acquire PHY\n");
5040 		return ret;
5041 	}
5042 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5043 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5044 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5045 	if (ret) {
5046 		printf("Could not read PHY page 769\n");
5047 		goto out;
5048 	}
5049 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5050 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5051 	if (ret)
5052 		printf("Could not set PHY Host Wakeup bit\n");
5053 out:
5054 	hw->phy.ops.release(hw);
5055 
5056 	return ret;
5057 }
5058 
5059 static void
5060 em_led_func(void *arg, int onoff)
5061 {
5062 	struct adapter	*adapter = arg;
5063 
5064 	EM_CORE_LOCK(adapter);
5065 	if (onoff) {
5066 		e1000_setup_led(&adapter->hw);
5067 		e1000_led_on(&adapter->hw);
5068 	} else {
5069 		e1000_led_off(&adapter->hw);
5070 		e1000_cleanup_led(&adapter->hw);
5071 	}
5072 	EM_CORE_UNLOCK(adapter);
5073 }
5074 
5075 /*
5076 ** Disable the L0S and L1 LINK states
5077 */
5078 static void
5079 em_disable_aspm(struct adapter *adapter)
5080 {
5081 	int		base, reg;
5082 	u16		link_cap,link_ctrl;
5083 	device_t	dev = adapter->dev;
5084 
5085 	switch (adapter->hw.mac.type) {
5086 		case e1000_82573:
5087 		case e1000_82574:
5088 		case e1000_82583:
5089 			break;
5090 		default:
5091 			return;
5092 	}
5093 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5094 		return;
5095 	reg = base + PCIER_LINK_CAP;
5096 	link_cap = pci_read_config(dev, reg, 2);
5097 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5098 		return;
5099 	reg = base + PCIER_LINK_CTL;
5100 	link_ctrl = pci_read_config(dev, reg, 2);
5101 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5102 	pci_write_config(dev, reg, link_ctrl, 2);
5103 	return;
5104 }
5105 
5106 /**********************************************************************
5107  *
5108  *  Update the board statistics counters.
5109  *
5110  **********************************************************************/
5111 static void
5112 em_update_stats_counters(struct adapter *adapter)
5113 {
5114 	if_t ifp;
5115 
5116 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5117 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5118 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5119 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5120 	}
5121 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5122 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5123 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5124 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5125 
5126 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5127 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5128 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5129 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5130 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5131 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5132 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5133 	/*
5134 	** For watchdog management we need to know if we have been
5135 	** paused during the last interval, so capture that here.
5136 	*/
5137 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5138 	adapter->stats.xoffrxc += adapter->pause_frames;
5139 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5140 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5141 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5142 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5143 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5144 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5145 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5146 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5147 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5148 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5149 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5150 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5151 
5152 	/* For the 64-bit byte counters the low dword must be read first. */
5153 	/* Both registers clear on the read of the high dword */
5154 
5155 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5156 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5157 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5158 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5159 
5160 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5161 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5162 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5163 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5164 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5165 
5166 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5167 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5168 
5169 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5170 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5171 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5172 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5173 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5174 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5175 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5176 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5177 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5178 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5179 
5180 	/* Interrupt Counts */
5181 
5182 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5183 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5184 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5185 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5186 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5187 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5188 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5189 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5190 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5191 
5192 	if (adapter->hw.mac.type >= e1000_82543) {
5193 		adapter->stats.algnerrc +=
5194 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5195 		adapter->stats.rxerrc +=
5196 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5197 		adapter->stats.tncrs +=
5198 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5199 		adapter->stats.cexterr +=
5200 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5201 		adapter->stats.tsctc +=
5202 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5203 		adapter->stats.tsctfc +=
5204 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5205 	}
5206 	ifp = adapter->ifp;
5207 
5208 	if_setcollisions(ifp, adapter->stats.colc);
5209 
5210 	/* Rx Errors */
5211 	if_setierrors(ifp, adapter->dropped_pkts + adapter->stats.rxerrc +
5212 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5213 	    adapter->stats.ruc + adapter->stats.roc +
5214 	    adapter->stats.mpc + adapter->stats.cexterr);
5215 
5216 	/* Tx Errors */
5217 	if_setoerrors(ifp, adapter->stats.ecol + adapter->stats.latecol +
5218 	    adapter->watchdog_events);
5219 }
5220 
5221 /* Export a single 32-bit register via a read-only sysctl. */
5222 static int
5223 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5224 {
5225 	struct adapter *adapter;
5226 	u_int val;
5227 
5228 	adapter = oidp->oid_arg1;
5229 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5230 	return (sysctl_handle_int(oidp, &val, 0, req));
5231 }
5232 
5233 /*
5234  * Add sysctl variables, one per statistic, to the system.
5235  */
5236 static void
5237 em_add_hw_stats(struct adapter *adapter)
5238 {
5239 	device_t dev = adapter->dev;
5240 
5241 	struct tx_ring *txr = adapter->tx_rings;
5242 	struct rx_ring *rxr = adapter->rx_rings;
5243 
5244 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5245 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5246 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5247 	struct e1000_hw_stats *stats = &adapter->stats;
5248 
5249 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5250 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5251 
5252 #define QUEUE_NAME_LEN 32
5253 	char namebuf[QUEUE_NAME_LEN];
5254 
5255 	/* Driver Statistics */
5256 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5257 			CTLFLAG_RD, &adapter->link_irq,
5258 			"Link MSIX IRQ Handled");
5259 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5260 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5261 			 "Std mbuf failed");
5262 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5263 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5264 			 "Std mbuf cluster failed");
5265 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5266 			CTLFLAG_RD, &adapter->dropped_pkts,
5267 			"Driver dropped packets");
5268 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5269 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5270 			"Driver tx dma failure in xmit");
5271 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5272 			CTLFLAG_RD, &adapter->rx_overruns,
5273 			"RX overruns");
5274 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5275 			CTLFLAG_RD, &adapter->watchdog_events,
5276 			"Watchdog timeouts");
5277 
5278 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5279 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5280 			em_sysctl_reg_handler, "IU",
5281 			"Device Control Register");
5282 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5283 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5284 			em_sysctl_reg_handler, "IU",
5285 			"Receiver Control Register");
5286 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5287 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5288 			"Flow Control High Watermark");
5289 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5290 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5291 			"Flow Control Low Watermark");
5292 
5293 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5294 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5295 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5296 					    CTLFLAG_RD, NULL, "Queue Name");
5297 		queue_list = SYSCTL_CHILDREN(queue_node);
5298 
5299 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5300 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5301 				E1000_TDH(txr->me),
5302 				em_sysctl_reg_handler, "IU",
5303  				"Transmit Descriptor Head");
5304 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5305 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5306 				E1000_TDT(txr->me),
5307 				em_sysctl_reg_handler, "IU",
5308  				"Transmit Descriptor Tail");
5309 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5310 				CTLFLAG_RD, &txr->tx_irq,
5311 				"Queue MSI-X Transmit Interrupts");
5312 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5313 				CTLFLAG_RD, &txr->no_desc_avail,
5314 				"Queue No Descriptor Available");
5315 
5316 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5317 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5318 				E1000_RDH(rxr->me),
5319 				em_sysctl_reg_handler, "IU",
5320 				"Receive Descriptor Head");
5321 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5322 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5323 				E1000_RDT(rxr->me),
5324 				em_sysctl_reg_handler, "IU",
5325 				"Receive Descriptor Tail");
5326 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5327 				CTLFLAG_RD, &rxr->rx_irq,
5328 				"Queue MSI-X Receive Interrupts");
5329 	}
5330 
5331 	/* MAC stats get their own sub node */
5332 
5333 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5334 				    CTLFLAG_RD, NULL, "Statistics");
5335 	stat_list = SYSCTL_CHILDREN(stat_node);
5336 
5337 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5338 			CTLFLAG_RD, &stats->ecol,
5339 			"Excessive collisions");
5340 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5341 			CTLFLAG_RD, &stats->scc,
5342 			"Single collisions");
5343 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5344 			CTLFLAG_RD, &stats->mcc,
5345 			"Multiple collisions");
5346 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5347 			CTLFLAG_RD, &stats->latecol,
5348 			"Late collisions");
5349 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5350 			CTLFLAG_RD, &stats->colc,
5351 			"Collision Count");
5352 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5353 			CTLFLAG_RD, &adapter->stats.symerrs,
5354 			"Symbol Errors");
5355 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5356 			CTLFLAG_RD, &adapter->stats.sec,
5357 			"Sequence Errors");
5358 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5359 			CTLFLAG_RD, &adapter->stats.dc,
5360 			"Defer Count");
5361 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5362 			CTLFLAG_RD, &adapter->stats.mpc,
5363 			"Missed Packets");
5364 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5365 			CTLFLAG_RD, &adapter->stats.rnbc,
5366 			"Receive No Buffers");
5367 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5368 			CTLFLAG_RD, &adapter->stats.ruc,
5369 			"Receive Undersize");
5370 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5371 			CTLFLAG_RD, &adapter->stats.rfc,
5372 			"Fragmented Packets Received ");
5373 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5374 			CTLFLAG_RD, &adapter->stats.roc,
5375 			"Oversized Packets Received");
5376 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5377 			CTLFLAG_RD, &adapter->stats.rjc,
5378 			"Recevied Jabber");
5379 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5380 			CTLFLAG_RD, &adapter->stats.rxerrc,
5381 			"Receive Errors");
5382 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5383 			CTLFLAG_RD, &adapter->stats.crcerrs,
5384 			"CRC errors");
5385 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5386 			CTLFLAG_RD, &adapter->stats.algnerrc,
5387 			"Alignment Errors");
5388 	/* On 82575 these are collision counts */
5389 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5390 			CTLFLAG_RD, &adapter->stats.cexterr,
5391 			"Collision/Carrier extension errors");
5392 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5393 			CTLFLAG_RD, &adapter->stats.xonrxc,
5394 			"XON Received");
5395 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5396 			CTLFLAG_RD, &adapter->stats.xontxc,
5397 			"XON Transmitted");
5398 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5399 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5400 			"XOFF Received");
5401 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5402 			CTLFLAG_RD, &adapter->stats.xofftxc,
5403 			"XOFF Transmitted");
5404 
5405 	/* Packet Reception Stats */
5406 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5407 			CTLFLAG_RD, &adapter->stats.tpr,
5408 			"Total Packets Received ");
5409 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5410 			CTLFLAG_RD, &adapter->stats.gprc,
5411 			"Good Packets Received");
5412 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5413 			CTLFLAG_RD, &adapter->stats.bprc,
5414 			"Broadcast Packets Received");
5415 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5416 			CTLFLAG_RD, &adapter->stats.mprc,
5417 			"Multicast Packets Received");
5418 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5419 			CTLFLAG_RD, &adapter->stats.prc64,
5420 			"64 byte frames received ");
5421 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5422 			CTLFLAG_RD, &adapter->stats.prc127,
5423 			"65-127 byte frames received");
5424 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5425 			CTLFLAG_RD, &adapter->stats.prc255,
5426 			"128-255 byte frames received");
5427 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5428 			CTLFLAG_RD, &adapter->stats.prc511,
5429 			"256-511 byte frames received");
5430 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5431 			CTLFLAG_RD, &adapter->stats.prc1023,
5432 			"512-1023 byte frames received");
5433 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5434 			CTLFLAG_RD, &adapter->stats.prc1522,
5435 			"1023-1522 byte frames received");
5436  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5437  			CTLFLAG_RD, &adapter->stats.gorc,
5438  			"Good Octets Received");
5439 
5440 	/* Packet Transmission Stats */
5441  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5442  			CTLFLAG_RD, &adapter->stats.gotc,
5443  			"Good Octets Transmitted");
5444 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5445 			CTLFLAG_RD, &adapter->stats.tpt,
5446 			"Total Packets Transmitted");
5447 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5448 			CTLFLAG_RD, &adapter->stats.gptc,
5449 			"Good Packets Transmitted");
5450 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5451 			CTLFLAG_RD, &adapter->stats.bptc,
5452 			"Broadcast Packets Transmitted");
5453 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5454 			CTLFLAG_RD, &adapter->stats.mptc,
5455 			"Multicast Packets Transmitted");
5456 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5457 			CTLFLAG_RD, &adapter->stats.ptc64,
5458 			"64 byte frames transmitted ");
5459 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5460 			CTLFLAG_RD, &adapter->stats.ptc127,
5461 			"65-127 byte frames transmitted");
5462 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5463 			CTLFLAG_RD, &adapter->stats.ptc255,
5464 			"128-255 byte frames transmitted");
5465 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5466 			CTLFLAG_RD, &adapter->stats.ptc511,
5467 			"256-511 byte frames transmitted");
5468 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5469 			CTLFLAG_RD, &adapter->stats.ptc1023,
5470 			"512-1023 byte frames transmitted");
5471 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5472 			CTLFLAG_RD, &adapter->stats.ptc1522,
5473 			"1024-1522 byte frames transmitted");
5474 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5475 			CTLFLAG_RD, &adapter->stats.tsctc,
5476 			"TSO Contexts Transmitted");
5477 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5478 			CTLFLAG_RD, &adapter->stats.tsctfc,
5479 			"TSO Contexts Failed");
5480 
5481 
5482 	/* Interrupt Stats */
5483 
5484 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5485 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5486 	int_list = SYSCTL_CHILDREN(int_node);
5487 
5488 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5489 			CTLFLAG_RD, &adapter->stats.iac,
5490 			"Interrupt Assertion Count");
5491 
5492 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5493 			CTLFLAG_RD, &adapter->stats.icrxptc,
5494 			"Interrupt Cause Rx Pkt Timer Expire Count");
5495 
5496 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5497 			CTLFLAG_RD, &adapter->stats.icrxatc,
5498 			"Interrupt Cause Rx Abs Timer Expire Count");
5499 
5500 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5501 			CTLFLAG_RD, &adapter->stats.ictxptc,
5502 			"Interrupt Cause Tx Pkt Timer Expire Count");
5503 
5504 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5505 			CTLFLAG_RD, &adapter->stats.ictxatc,
5506 			"Interrupt Cause Tx Abs Timer Expire Count");
5507 
5508 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5509 			CTLFLAG_RD, &adapter->stats.ictxqec,
5510 			"Interrupt Cause Tx Queue Empty Count");
5511 
5512 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5513 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5514 			"Interrupt Cause Tx Queue Min Thresh Count");
5515 
5516 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5517 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5518 			"Interrupt Cause Rx Desc Min Thresh Count");
5519 
5520 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5521 			CTLFLAG_RD, &adapter->stats.icrxoc,
5522 			"Interrupt Cause Receiver Overrun Count");
5523 }
5524 
5525 /**********************************************************************
5526  *
5527  *  This routine provides a way to dump out the adapter eeprom,
5528  *  often a useful debug/service tool. This only dumps the first
5529  *  32 words, stuff that matters is in that extent.
5530  *
5531  **********************************************************************/
5532 static int
5533 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5534 {
5535 	struct adapter *adapter = (struct adapter *)arg1;
5536 	int error;
5537 	int result;
5538 
5539 	result = -1;
5540 	error = sysctl_handle_int(oidp, &result, 0, req);
5541 
5542 	if (error || !req->newptr)
5543 		return (error);
5544 
5545 	/*
5546 	 * This value will cause a hex dump of the
5547 	 * first 32 16-bit words of the EEPROM to
5548 	 * the screen.
5549 	 */
5550 	if (result == 1)
5551 		em_print_nvm_info(adapter);
5552 
5553 	return (error);
5554 }
5555 
5556 static void
5557 em_print_nvm_info(struct adapter *adapter)
5558 {
5559 	u16	eeprom_data;
5560 	int	i, j, row = 0;
5561 
5562 	/* Its a bit crude, but it gets the job done */
5563 	printf("\nInterface EEPROM Dump:\n");
5564 	printf("Offset\n0x0000  ");
5565 	for (i = 0, j = 0; i < 32; i++, j++) {
5566 		if (j == 8) { /* Make the offset block */
5567 			j = 0; ++row;
5568 			printf("\n0x00%x0  ",row);
5569 		}
5570 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5571 		printf("%04x ", eeprom_data);
5572 	}
5573 	printf("\n");
5574 }
5575 
5576 static int
5577 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5578 {
5579 	struct em_int_delay_info *info;
5580 	struct adapter *adapter;
5581 	u32 regval;
5582 	int error, usecs, ticks;
5583 
5584 	info = (struct em_int_delay_info *)arg1;
5585 	usecs = info->value;
5586 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5587 	if (error != 0 || req->newptr == NULL)
5588 		return (error);
5589 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5590 		return (EINVAL);
5591 	info->value = usecs;
5592 	ticks = EM_USECS_TO_TICKS(usecs);
5593 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5594 		ticks *= 4;
5595 
5596 	adapter = info->adapter;
5597 
5598 	EM_CORE_LOCK(adapter);
5599 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5600 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5601 	/* Handle a few special cases. */
5602 	switch (info->offset) {
5603 	case E1000_RDTR:
5604 		break;
5605 	case E1000_TIDV:
5606 		if (ticks == 0) {
5607 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5608 			/* Don't write 0 into the TIDV register. */
5609 			regval++;
5610 		} else
5611 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5612 		break;
5613 	}
5614 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5615 	EM_CORE_UNLOCK(adapter);
5616 	return (0);
5617 }
5618 
5619 static void
5620 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5621 	const char *description, struct em_int_delay_info *info,
5622 	int offset, int value)
5623 {
5624 	info->adapter = adapter;
5625 	info->offset = offset;
5626 	info->value = value;
5627 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5628 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5629 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5630 	    info, 0, em_sysctl_int_delay, "I", description);
5631 }
5632 
5633 static void
5634 em_set_sysctl_value(struct adapter *adapter, const char *name,
5635 	const char *description, int *limit, int value)
5636 {
5637 	*limit = value;
5638 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5639 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5640 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5641 }
5642 
5643 
5644 /*
5645 ** Set flow control using sysctl:
5646 ** Flow control values:
5647 **      0 - off
5648 **      1 - rx pause
5649 **      2 - tx pause
5650 **      3 - full
5651 */
5652 static int
5653 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5654 {
5655         int		error;
5656 	static int	input = 3; /* default is full */
5657         struct adapter	*adapter = (struct adapter *) arg1;
5658 
5659         error = sysctl_handle_int(oidp, &input, 0, req);
5660 
5661         if ((error) || (req->newptr == NULL))
5662                 return (error);
5663 
5664 	if (input == adapter->fc) /* no change? */
5665 		return (error);
5666 
5667         switch (input) {
5668                 case e1000_fc_rx_pause:
5669                 case e1000_fc_tx_pause:
5670                 case e1000_fc_full:
5671                 case e1000_fc_none:
5672                         adapter->hw.fc.requested_mode = input;
5673 			adapter->fc = input;
5674                         break;
5675                 default:
5676 			/* Do nothing */
5677 			return (error);
5678         }
5679 
5680         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5681         e1000_force_mac_fc(&adapter->hw);
5682         return (error);
5683 }
5684 
5685 /*
5686 ** Manage Energy Efficient Ethernet:
5687 ** Control values:
5688 **     0/1 - enabled/disabled
5689 */
5690 static int
5691 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5692 {
5693        struct adapter *adapter = (struct adapter *) arg1;
5694        int             error, value;
5695 
5696        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5697        error = sysctl_handle_int(oidp, &value, 0, req);
5698        if (error || req->newptr == NULL)
5699                return (error);
5700        EM_CORE_LOCK(adapter);
5701        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5702        em_init_locked(adapter);
5703        EM_CORE_UNLOCK(adapter);
5704        return (0);
5705 }
5706 
5707 static int
5708 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5709 {
5710 	struct adapter *adapter;
5711 	int error;
5712 	int result;
5713 
5714 	result = -1;
5715 	error = sysctl_handle_int(oidp, &result, 0, req);
5716 
5717 	if (error || !req->newptr)
5718 		return (error);
5719 
5720 	if (result == 1) {
5721 		adapter = (struct adapter *)arg1;
5722 		em_print_debug_info(adapter);
5723         }
5724 
5725 	return (error);
5726 }
5727 
5728 /*
5729 ** This routine is meant to be fluid, add whatever is
5730 ** needed for debugging a problem.  -jfv
5731 */
5732 static void
5733 em_print_debug_info(struct adapter *adapter)
5734 {
5735 	device_t dev = adapter->dev;
5736 	struct tx_ring *txr = adapter->tx_rings;
5737 	struct rx_ring *rxr = adapter->rx_rings;
5738 
5739 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5740 		printf("Interface is RUNNING ");
5741 	else
5742 		printf("Interface is NOT RUNNING\n");
5743 
5744 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5745 		printf("and INACTIVE\n");
5746 	else
5747 		printf("and ACTIVE\n");
5748 
5749 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5750 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5751 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5752 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5753 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5754 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5755 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5756 	device_printf(dev, "TX descriptors avail = %d\n",
5757 	    txr->tx_avail);
5758 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5759 	    txr->no_desc_avail);
5760 	device_printf(dev, "RX discarded packets = %ld\n",
5761 	    rxr->rx_discarded);
5762 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5763 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5764 }
5765