xref: /freebsd/sys/dev/e1000/if_em.c (revision 3823d5e198425b4f5e5a80267d195769d1063773)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2014, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_var.h>
67 #include <net/if_arp.h>
68 #include <net/if_dl.h>
69 #include <net/if_media.h>
70 
71 #include <net/if_types.h>
72 #include <net/if_vlan_var.h>
73 
74 #include <netinet/in_systm.h>
75 #include <netinet/in.h>
76 #include <netinet/if_ether.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 #include <netinet/tcp.h>
80 #include <netinet/udp.h>
81 
82 #include <machine/in_cksum.h>
83 #include <dev/led/led.h>
84 #include <dev/pci/pcivar.h>
85 #include <dev/pci/pcireg.h>
86 
87 #include "e1000_api.h"
88 #include "e1000_82571.h"
89 #include "if_em.h"
90 
91 /*********************************************************************
92  *  Set this to one to display debug statistics
93  *********************************************************************/
94 int	em_display_debug_stats = 0;
95 
96 /*********************************************************************
97  *  Driver version:
98  *********************************************************************/
99 char em_driver_version[] = "7.4.2";
100 
101 /*********************************************************************
102  *  PCI Device ID Table
103  *
104  *  Used by probe to select devices to load on
105  *  Last field stores an index into e1000_strings
106  *  Last entry must be all 0s
107  *
108  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
109  *********************************************************************/
110 
111 static em_vendor_info_t em_vendor_info_array[] =
112 {
113 	/* Intel(R) PRO/1000 Network Connection */
114 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 
134 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
180 						PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
184 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
187 	/* required last entry */
188 	{ 0, 0, 0, 0, 0}
189 };
190 
191 /*********************************************************************
192  *  Table of branding strings for all supported NICs.
193  *********************************************************************/
194 
195 static char *em_strings[] = {
196 	"Intel(R) PRO/1000 Network Connection"
197 };
198 
199 /*********************************************************************
200  *  Function prototypes
201  *********************************************************************/
202 static int	em_probe(device_t);
203 static int	em_attach(device_t);
204 static int	em_detach(device_t);
205 static int	em_shutdown(device_t);
206 static int	em_suspend(device_t);
207 static int	em_resume(device_t);
208 #ifdef EM_MULTIQUEUE
209 static int	em_mq_start(if_t, struct mbuf *);
210 static int	em_mq_start_locked(if_t,
211 		    struct tx_ring *, struct mbuf *);
212 static void	em_qflush(if_t);
213 #else
214 static void	em_start(if_t);
215 static void	em_start_locked(if_t, struct tx_ring *);
216 #endif
217 static int	em_ioctl(if_t, u_long, caddr_t);
218 static uint64_t	em_get_counter(if_t, ift_counter);
219 static void	em_init(void *);
220 static void	em_init_locked(struct adapter *);
221 static void	em_stop(void *);
222 static void	em_media_status(if_t, struct ifmediareq *);
223 static int	em_media_change(if_t);
224 static void	em_identify_hardware(struct adapter *);
225 static int	em_allocate_pci_resources(struct adapter *);
226 static int	em_allocate_legacy(struct adapter *);
227 static int	em_allocate_msix(struct adapter *);
228 static int	em_allocate_queues(struct adapter *);
229 static int	em_setup_msix(struct adapter *);
230 static void	em_free_pci_resources(struct adapter *);
231 static void	em_local_timer(void *);
232 static void	em_reset(struct adapter *);
233 static int	em_setup_interface(device_t, struct adapter *);
234 
235 static void	em_setup_transmit_structures(struct adapter *);
236 static void	em_initialize_transmit_unit(struct adapter *);
237 static int	em_allocate_transmit_buffers(struct tx_ring *);
238 static void	em_free_transmit_structures(struct adapter *);
239 static void	em_free_transmit_buffers(struct tx_ring *);
240 
241 static int	em_setup_receive_structures(struct adapter *);
242 static int	em_allocate_receive_buffers(struct rx_ring *);
243 static void	em_initialize_receive_unit(struct adapter *);
244 static void	em_free_receive_structures(struct adapter *);
245 static void	em_free_receive_buffers(struct rx_ring *);
246 
247 static void	em_enable_intr(struct adapter *);
248 static void	em_disable_intr(struct adapter *);
249 static void	em_update_stats_counters(struct adapter *);
250 static void	em_add_hw_stats(struct adapter *adapter);
251 static void	em_txeof(struct tx_ring *);
252 static bool	em_rxeof(struct rx_ring *, int, int *);
253 #ifndef __NO_STRICT_ALIGNMENT
254 static int	em_fixup_rx(struct rx_ring *);
255 #endif
256 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
257 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
258 		    struct ip *, u32 *, u32 *);
259 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
260 		    struct tcphdr *, u32 *, u32 *);
261 static void	em_set_promisc(struct adapter *);
262 static void	em_disable_promisc(struct adapter *);
263 static void	em_set_multi(struct adapter *);
264 static void	em_update_link_status(struct adapter *);
265 static void	em_refresh_mbufs(struct rx_ring *, int);
266 static void	em_register_vlan(void *, if_t, u16);
267 static void	em_unregister_vlan(void *, if_t, u16);
268 static void	em_setup_vlan_hw_support(struct adapter *);
269 static int	em_xmit(struct tx_ring *, struct mbuf **);
270 static int	em_dma_malloc(struct adapter *, bus_size_t,
271 		    struct em_dma_alloc *, int);
272 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
273 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
274 static void	em_print_nvm_info(struct adapter *);
275 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
276 static void	em_print_debug_info(struct adapter *);
277 static int 	em_is_valid_ether_addr(u8 *);
278 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
279 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
280 		    const char *, struct em_int_delay_info *, int, int);
281 /* Management and WOL Support */
282 static void	em_init_manageability(struct adapter *);
283 static void	em_release_manageability(struct adapter *);
284 static void     em_get_hw_control(struct adapter *);
285 static void     em_release_hw_control(struct adapter *);
286 static void	em_get_wakeup(device_t);
287 static void     em_enable_wakeup(device_t);
288 static int	em_enable_phy_wakeup(struct adapter *);
289 static void	em_led_func(void *, int);
290 static void	em_disable_aspm(struct adapter *);
291 
292 static int	em_irq_fast(void *);
293 
294 /* MSIX handlers */
295 static void	em_msix_tx(void *);
296 static void	em_msix_rx(void *);
297 static void	em_msix_link(void *);
298 static void	em_handle_tx(void *context, int pending);
299 static void	em_handle_rx(void *context, int pending);
300 static void	em_handle_link(void *context, int pending);
301 
302 static void	em_set_sysctl_value(struct adapter *, const char *,
303 		    const char *, int *, int);
304 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
305 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
306 
307 static __inline void em_rx_discard(struct rx_ring *, int);
308 
309 #ifdef DEVICE_POLLING
310 static poll_handler_t em_poll;
311 #endif /* POLLING */
312 
313 /*********************************************************************
314  *  FreeBSD Device Interface Entry Points
315  *********************************************************************/
316 
317 static device_method_t em_methods[] = {
318 	/* Device interface */
319 	DEVMETHOD(device_probe, em_probe),
320 	DEVMETHOD(device_attach, em_attach),
321 	DEVMETHOD(device_detach, em_detach),
322 	DEVMETHOD(device_shutdown, em_shutdown),
323 	DEVMETHOD(device_suspend, em_suspend),
324 	DEVMETHOD(device_resume, em_resume),
325 	DEVMETHOD_END
326 };
327 
328 static driver_t em_driver = {
329 	"em", em_methods, sizeof(struct adapter),
330 };
331 
332 devclass_t em_devclass;
333 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
334 MODULE_DEPEND(em, pci, 1, 1, 1);
335 MODULE_DEPEND(em, ether, 1, 1, 1);
336 
337 /*********************************************************************
338  *  Tunable default values.
339  *********************************************************************/
340 
341 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
342 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
343 #define M_TSO_LEN			66
344 
345 #define MAX_INTS_PER_SEC	8000
346 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
347 
348 /* Allow common code without TSO */
349 #ifndef CSUM_TSO
350 #define CSUM_TSO	0
351 #endif
352 
353 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
354 
355 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
356 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
357 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
358     0, "Default transmit interrupt delay in usecs");
359 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
360     0, "Default receive interrupt delay in usecs");
361 
362 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
363 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
364 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
365     &em_tx_abs_int_delay_dflt, 0,
366     "Default transmit interrupt delay limit in usecs");
367 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
368     &em_rx_abs_int_delay_dflt, 0,
369     "Default receive interrupt delay limit in usecs");
370 
371 static int em_rxd = EM_DEFAULT_RXD;
372 static int em_txd = EM_DEFAULT_TXD;
373 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
374     "Number of receive descriptors per queue");
375 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
376     "Number of transmit descriptors per queue");
377 
378 static int em_smart_pwr_down = FALSE;
379 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
380     0, "Set to true to leave smart power down enabled on newer adapters");
381 
382 /* Controls whether promiscuous also shows bad packets */
383 static int em_debug_sbp = FALSE;
384 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
385     "Show bad packets in promiscuous mode");
386 
387 static int em_enable_msix = TRUE;
388 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
389     "Enable MSI-X interrupts");
390 
391 /* How many packets rxeof tries to clean at a time */
392 static int em_rx_process_limit = 100;
393 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
394     &em_rx_process_limit, 0,
395     "Maximum number of received packets to process "
396     "at a time, -1 means unlimited");
397 
398 /* Energy efficient ethernet - default to OFF */
399 static int eee_setting = 1;
400 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
401     "Enable Energy Efficient Ethernet");
402 
403 /* Global used in WOL setup with multiport cards */
404 static int global_quad_port_a = 0;
405 
406 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
407 #include <dev/netmap/if_em_netmap.h>
408 #endif /* DEV_NETMAP */
409 
410 /*********************************************************************
411  *  Device identification routine
412  *
413  *  em_probe determines if the driver should be loaded on
414  *  adapter based on PCI vendor/device id of the adapter.
415  *
416  *  return BUS_PROBE_DEFAULT on success, positive on failure
417  *********************************************************************/
418 
419 static int
420 em_probe(device_t dev)
421 {
422 	char		adapter_name[60];
423 	u16		pci_vendor_id = 0;
424 	u16		pci_device_id = 0;
425 	u16		pci_subvendor_id = 0;
426 	u16		pci_subdevice_id = 0;
427 	em_vendor_info_t *ent;
428 
429 	INIT_DEBUGOUT("em_probe: begin");
430 
431 	pci_vendor_id = pci_get_vendor(dev);
432 	if (pci_vendor_id != EM_VENDOR_ID)
433 		return (ENXIO);
434 
435 	pci_device_id = pci_get_device(dev);
436 	pci_subvendor_id = pci_get_subvendor(dev);
437 	pci_subdevice_id = pci_get_subdevice(dev);
438 
439 	ent = em_vendor_info_array;
440 	while (ent->vendor_id != 0) {
441 		if ((pci_vendor_id == ent->vendor_id) &&
442 		    (pci_device_id == ent->device_id) &&
443 
444 		    ((pci_subvendor_id == ent->subvendor_id) ||
445 		    (ent->subvendor_id == PCI_ANY_ID)) &&
446 
447 		    ((pci_subdevice_id == ent->subdevice_id) ||
448 		    (ent->subdevice_id == PCI_ANY_ID))) {
449 			sprintf(adapter_name, "%s %s",
450 				em_strings[ent->index],
451 				em_driver_version);
452 			device_set_desc_copy(dev, adapter_name);
453 			return (BUS_PROBE_DEFAULT);
454 		}
455 		ent++;
456 	}
457 
458 	return (ENXIO);
459 }
460 
461 /*********************************************************************
462  *  Device initialization routine
463  *
464  *  The attach entry point is called when the driver is being loaded.
465  *  This routine identifies the type of hardware, allocates all resources
466  *  and initializes the hardware.
467  *
468  *  return 0 on success, positive on failure
469  *********************************************************************/
470 
471 static int
472 em_attach(device_t dev)
473 {
474 	struct adapter	*adapter;
475 	struct e1000_hw	*hw;
476 	int		error = 0;
477 
478 	INIT_DEBUGOUT("em_attach: begin");
479 
480 	if (resource_disabled("em", device_get_unit(dev))) {
481 		device_printf(dev, "Disabled by device hint\n");
482 		return (ENXIO);
483 	}
484 
485 	adapter = device_get_softc(dev);
486 	adapter->dev = adapter->osdep.dev = dev;
487 	hw = &adapter->hw;
488 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
489 
490 	/* SYSCTL stuff */
491 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494 	    em_sysctl_nvm_info, "I", "NVM Information");
495 
496 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499 	    em_sysctl_debug_info, "I", "Debug Information");
500 
501 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
502 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
503 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
504 	    em_set_flowcntl, "I", "Flow Control");
505 
506 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
507 
508 	/* Determine hardware and mac info */
509 	em_identify_hardware(adapter);
510 
511 	/* Setup PCI resources */
512 	if (em_allocate_pci_resources(adapter)) {
513 		device_printf(dev, "Allocation of PCI resources failed\n");
514 		error = ENXIO;
515 		goto err_pci;
516 	}
517 
518 	/*
519 	** For ICH8 and family we need to
520 	** map the flash memory, and this
521 	** must happen after the MAC is
522 	** identified
523 	*/
524 	if ((hw->mac.type == e1000_ich8lan) ||
525 	    (hw->mac.type == e1000_ich9lan) ||
526 	    (hw->mac.type == e1000_ich10lan) ||
527 	    (hw->mac.type == e1000_pchlan) ||
528 	    (hw->mac.type == e1000_pch2lan) ||
529 	    (hw->mac.type == e1000_pch_lpt)) {
530 		int rid = EM_BAR_TYPE_FLASH;
531 		adapter->flash = bus_alloc_resource_any(dev,
532 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
533 		if (adapter->flash == NULL) {
534 			device_printf(dev, "Mapping of Flash failed\n");
535 			error = ENXIO;
536 			goto err_pci;
537 		}
538 		/* This is used in the shared code */
539 		hw->flash_address = (u8 *)adapter->flash;
540 		adapter->osdep.flash_bus_space_tag =
541 		    rman_get_bustag(adapter->flash);
542 		adapter->osdep.flash_bus_space_handle =
543 		    rman_get_bushandle(adapter->flash);
544 	}
545 
546 	/* Do Shared Code initialization */
547 	if (e1000_setup_init_funcs(hw, TRUE)) {
548 		device_printf(dev, "Setup of Shared code failed\n");
549 		error = ENXIO;
550 		goto err_pci;
551 	}
552 
553 	e1000_get_bus_info(hw);
554 
555 	/* Set up some sysctls for the tunable interrupt delays */
556 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
557 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
558 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
559 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
560 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
561 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
562 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
563 	    "receive interrupt delay limit in usecs",
564 	    &adapter->rx_abs_int_delay,
565 	    E1000_REGISTER(hw, E1000_RADV),
566 	    em_rx_abs_int_delay_dflt);
567 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
568 	    "transmit interrupt delay limit in usecs",
569 	    &adapter->tx_abs_int_delay,
570 	    E1000_REGISTER(hw, E1000_TADV),
571 	    em_tx_abs_int_delay_dflt);
572 	em_add_int_delay_sysctl(adapter, "itr",
573 	    "interrupt delay limit in usecs/4",
574 	    &adapter->tx_itr,
575 	    E1000_REGISTER(hw, E1000_ITR),
576 	    DEFAULT_ITR);
577 
578 	/* Sysctl for limiting the amount of work done in the taskqueue */
579 	em_set_sysctl_value(adapter, "rx_processing_limit",
580 	    "max number of rx packets to process", &adapter->rx_process_limit,
581 	    em_rx_process_limit);
582 
583 	/*
584 	 * Validate number of transmit and receive descriptors. It
585 	 * must not exceed hardware maximum, and must be multiple
586 	 * of E1000_DBA_ALIGN.
587 	 */
588 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
589 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
590 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
591 		    EM_DEFAULT_TXD, em_txd);
592 		adapter->num_tx_desc = EM_DEFAULT_TXD;
593 	} else
594 		adapter->num_tx_desc = em_txd;
595 
596 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
597 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
598 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
599 		    EM_DEFAULT_RXD, em_rxd);
600 		adapter->num_rx_desc = EM_DEFAULT_RXD;
601 	} else
602 		adapter->num_rx_desc = em_rxd;
603 
604 	hw->mac.autoneg = DO_AUTO_NEG;
605 	hw->phy.autoneg_wait_to_complete = FALSE;
606 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
607 
608 	/* Copper options */
609 	if (hw->phy.media_type == e1000_media_type_copper) {
610 		hw->phy.mdix = AUTO_ALL_MODES;
611 		hw->phy.disable_polarity_correction = FALSE;
612 		hw->phy.ms_type = EM_MASTER_SLAVE;
613 	}
614 
615 	/*
616 	 * Set the frame limits assuming
617 	 * standard ethernet sized frames.
618 	 */
619 	adapter->hw.mac.max_frame_size =
620 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
621 
622 	/*
623 	 * This controls when hardware reports transmit completion
624 	 * status.
625 	 */
626 	hw->mac.report_tx_early = 1;
627 
628 	/*
629 	** Get queue/ring memory
630 	*/
631 	if (em_allocate_queues(adapter)) {
632 		error = ENOMEM;
633 		goto err_pci;
634 	}
635 
636 	/* Allocate multicast array memory. */
637 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
638 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
639 	if (adapter->mta == NULL) {
640 		device_printf(dev, "Can not allocate multicast setup array\n");
641 		error = ENOMEM;
642 		goto err_late;
643 	}
644 
645 	/* Check SOL/IDER usage */
646 	if (e1000_check_reset_block(hw))
647 		device_printf(dev, "PHY reset is blocked"
648 		    " due to SOL/IDER session.\n");
649 
650 	/* Sysctl for setting Energy Efficient Ethernet */
651 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
652 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
653 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
654 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
655 	    adapter, 0, em_sysctl_eee, "I",
656 	    "Disable Energy Efficient Ethernet");
657 
658 	/*
659 	** Start from a known state, this is
660 	** important in reading the nvm and
661 	** mac from that.
662 	*/
663 	e1000_reset_hw(hw);
664 
665 
666 	/* Make sure we have a good EEPROM before we read from it */
667 	if (e1000_validate_nvm_checksum(hw) < 0) {
668 		/*
669 		** Some PCI-E parts fail the first check due to
670 		** the link being in sleep state, call it again,
671 		** if it fails a second time its a real issue.
672 		*/
673 		if (e1000_validate_nvm_checksum(hw) < 0) {
674 			device_printf(dev,
675 			    "The EEPROM Checksum Is Not Valid\n");
676 			error = EIO;
677 			goto err_late;
678 		}
679 	}
680 
681 	/* Copy the permanent MAC address out of the EEPROM */
682 	if (e1000_read_mac_addr(hw) < 0) {
683 		device_printf(dev, "EEPROM read error while reading MAC"
684 		    " address\n");
685 		error = EIO;
686 		goto err_late;
687 	}
688 
689 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
690 		device_printf(dev, "Invalid MAC address\n");
691 		error = EIO;
692 		goto err_late;
693 	}
694 
695 	/* Disable ULP support */
696 	e1000_disable_ulp_lpt_lp(hw, TRUE);
697 
698 	/*
699 	**  Do interrupt configuration
700 	*/
701 	if (adapter->msix > 1) /* Do MSIX */
702 		error = em_allocate_msix(adapter);
703 	else  /* MSI or Legacy */
704 		error = em_allocate_legacy(adapter);
705 	if (error)
706 		goto err_late;
707 
708 	/*
709 	 * Get Wake-on-Lan and Management info for later use
710 	 */
711 	em_get_wakeup(dev);
712 
713 	/* Setup OS specific network interface */
714 	if (em_setup_interface(dev, adapter) != 0)
715 		goto err_late;
716 
717 	em_reset(adapter);
718 
719 	/* Initialize statistics */
720 	em_update_stats_counters(adapter);
721 
722 	hw->mac.get_link_status = 1;
723 	em_update_link_status(adapter);
724 
725 	/* Register for VLAN events */
726 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
727 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
728 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
729 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
730 
731 	em_add_hw_stats(adapter);
732 
733 	/* Non-AMT based hardware can now take control from firmware */
734 	if (adapter->has_manage && !adapter->has_amt)
735 		em_get_hw_control(adapter);
736 
737 	/* Tell the stack that the interface is not active */
738 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
739 
740 	adapter->led_dev = led_create(em_led_func, adapter,
741 	    device_get_nameunit(dev));
742 #ifdef DEV_NETMAP
743 	em_netmap_attach(adapter);
744 #endif /* DEV_NETMAP */
745 
746 	INIT_DEBUGOUT("em_attach: end");
747 
748 	return (0);
749 
750 err_late:
751 	em_free_transmit_structures(adapter);
752 	em_free_receive_structures(adapter);
753 	em_release_hw_control(adapter);
754 	if (adapter->ifp != (void *)NULL)
755 		if_free(adapter->ifp);
756 err_pci:
757 	em_free_pci_resources(adapter);
758 	free(adapter->mta, M_DEVBUF);
759 	EM_CORE_LOCK_DESTROY(adapter);
760 
761 	return (error);
762 }
763 
764 /*********************************************************************
765  *  Device removal routine
766  *
767  *  The detach entry point is called when the driver is being removed.
768  *  This routine stops the adapter and deallocates all the resources
769  *  that were allocated for driver operation.
770  *
771  *  return 0 on success, positive on failure
772  *********************************************************************/
773 
774 static int
775 em_detach(device_t dev)
776 {
777 	struct adapter	*adapter = device_get_softc(dev);
778 	if_t ifp = adapter->ifp;
779 
780 	INIT_DEBUGOUT("em_detach: begin");
781 
782 	/* Make sure VLANS are not using driver */
783 	if (if_vlantrunkinuse(ifp)) {
784 		device_printf(dev,"Vlan in use, detach first\n");
785 		return (EBUSY);
786 	}
787 
788 #ifdef DEVICE_POLLING
789 	if (if_getcapenable(ifp) & IFCAP_POLLING)
790 		ether_poll_deregister(ifp);
791 #endif
792 
793 	if (adapter->led_dev != NULL)
794 		led_destroy(adapter->led_dev);
795 
796 	EM_CORE_LOCK(adapter);
797 	adapter->in_detach = 1;
798 	em_stop(adapter);
799 	EM_CORE_UNLOCK(adapter);
800 	EM_CORE_LOCK_DESTROY(adapter);
801 
802 	e1000_phy_hw_reset(&adapter->hw);
803 
804 	em_release_manageability(adapter);
805 	em_release_hw_control(adapter);
806 
807 	/* Unregister VLAN events */
808 	if (adapter->vlan_attach != NULL)
809 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
810 	if (adapter->vlan_detach != NULL)
811 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
812 
813 	ether_ifdetach(adapter->ifp);
814 	callout_drain(&adapter->timer);
815 
816 #ifdef DEV_NETMAP
817 	netmap_detach(ifp);
818 #endif /* DEV_NETMAP */
819 
820 	em_free_pci_resources(adapter);
821 	bus_generic_detach(dev);
822 	if_free(ifp);
823 
824 	em_free_transmit_structures(adapter);
825 	em_free_receive_structures(adapter);
826 
827 	em_release_hw_control(adapter);
828 	free(adapter->mta, M_DEVBUF);
829 
830 	return (0);
831 }
832 
833 /*********************************************************************
834  *
835  *  Shutdown entry point
836  *
837  **********************************************************************/
838 
839 static int
840 em_shutdown(device_t dev)
841 {
842 	return em_suspend(dev);
843 }
844 
845 /*
846  * Suspend/resume device methods.
847  */
848 static int
849 em_suspend(device_t dev)
850 {
851 	struct adapter *adapter = device_get_softc(dev);
852 
853 	EM_CORE_LOCK(adapter);
854 
855         em_release_manageability(adapter);
856 	em_release_hw_control(adapter);
857 	em_enable_wakeup(dev);
858 
859 	EM_CORE_UNLOCK(adapter);
860 
861 	return bus_generic_suspend(dev);
862 }
863 
864 static int
865 em_resume(device_t dev)
866 {
867 	struct adapter *adapter = device_get_softc(dev);
868 	struct tx_ring	*txr = adapter->tx_rings;
869 	if_t ifp = adapter->ifp;
870 
871 	EM_CORE_LOCK(adapter);
872 	if (adapter->hw.mac.type == e1000_pch2lan)
873 		e1000_resume_workarounds_pchlan(&adapter->hw);
874 	em_init_locked(adapter);
875 	em_init_manageability(adapter);
876 
877 	if ((if_getflags(ifp) & IFF_UP) &&
878 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
879 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
880 			EM_TX_LOCK(txr);
881 #ifdef EM_MULTIQUEUE
882 			if (!drbr_empty(ifp, txr->br))
883 				em_mq_start_locked(ifp, txr, NULL);
884 #else
885 			if (!if_sendq_empty(ifp))
886 				em_start_locked(ifp, txr);
887 #endif
888 			EM_TX_UNLOCK(txr);
889 		}
890 	}
891 	EM_CORE_UNLOCK(adapter);
892 
893 	return bus_generic_resume(dev);
894 }
895 
896 
897 #ifdef EM_MULTIQUEUE
898 /*********************************************************************
899  *  Multiqueue Transmit routines
900  *
901  *  em_mq_start is called by the stack to initiate a transmit.
902  *  however, if busy the driver can queue the request rather
903  *  than do an immediate send. It is this that is an advantage
904  *  in this driver, rather than also having multiple tx queues.
905  **********************************************************************/
906 static int
907 em_mq_start_locked(if_t ifp, struct tx_ring *txr, struct mbuf *m)
908 {
909 	struct adapter  *adapter = txr->adapter;
910         struct mbuf     *next;
911         int             err = 0, enq = 0;
912 
913 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
914 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
915 		if (m != NULL)
916 			err = drbr_enqueue(ifp, txr->br, m);
917 		return (err);
918 	}
919 
920 	enq = 0;
921 	if (m != NULL) {
922 		err = drbr_enqueue(ifp, txr->br, m);
923 		if (err)
924 			return (err);
925 	}
926 
927 	/* Process the queue */
928 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
929 		if ((err = em_xmit(txr, &next)) != 0) {
930 			if (next == NULL)
931 				drbr_advance(ifp, txr->br);
932 			else
933 				drbr_putback(ifp, txr->br, next);
934 			break;
935 		}
936 		drbr_advance(ifp, txr->br);
937 		enq++;
938 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
939 		if (next->m_flags & M_MCAST)
940 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
941 		if_etherbpfmtap(ifp, next);
942 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
943                         break;
944 	}
945 
946 	if (enq > 0) {
947                 /* Set the watchdog */
948                 txr->queue_status = EM_QUEUE_WORKING;
949 		txr->watchdog_time = ticks;
950 	}
951 
952 	if (txr->tx_avail < EM_MAX_SCATTER)
953 		em_txeof(txr);
954 	if (txr->tx_avail < EM_MAX_SCATTER)
955 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
956 	return (err);
957 }
958 
959 /*
960 ** Multiqueue capable stack interface
961 */
962 static int
963 em_mq_start(if_t ifp, struct mbuf *m)
964 {
965 	struct adapter	*adapter = if_getsoftc(ifp);
966 	struct tx_ring	*txr = adapter->tx_rings;
967 	int 		error;
968 
969 	if (EM_TX_TRYLOCK(txr)) {
970 		error = em_mq_start_locked(ifp, txr, m);
971 		EM_TX_UNLOCK(txr);
972 	} else
973 		error = drbr_enqueue(ifp, txr->br, m);
974 
975 	return (error);
976 }
977 
978 /*
979 ** Flush all ring buffers
980 */
981 static void
982 em_qflush(if_t ifp)
983 {
984 	struct adapter  *adapter = if_getsoftc(ifp);
985 	struct tx_ring  *txr = adapter->tx_rings;
986 	struct mbuf     *m;
987 
988 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
989 		EM_TX_LOCK(txr);
990 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
991 			m_freem(m);
992 		EM_TX_UNLOCK(txr);
993 	}
994 	if_qflush(ifp);
995 }
996 #else  /* !EM_MULTIQUEUE */
997 
998 static void
999 em_start_locked(if_t ifp, struct tx_ring *txr)
1000 {
1001 	struct adapter	*adapter = if_getsoftc(ifp);
1002 	struct mbuf	*m_head;
1003 
1004 	EM_TX_LOCK_ASSERT(txr);
1005 
1006 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1007 	    IFF_DRV_RUNNING)
1008 		return;
1009 
1010 	if (!adapter->link_active)
1011 		return;
1012 
1013 	while (!if_sendq_empty(ifp)) {
1014         	/* Call cleanup if number of TX descriptors low */
1015 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1016 			em_txeof(txr);
1017 		if (txr->tx_avail < EM_MAX_SCATTER) {
1018 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
1019 			break;
1020 		}
1021 		m_head = if_dequeue(ifp);
1022 		if (m_head == NULL)
1023 			break;
1024 		/*
1025 		 *  Encapsulation can modify our pointer, and or make it
1026 		 *  NULL on failure.  In that event, we can't requeue.
1027 		 */
1028 		if (em_xmit(txr, &m_head)) {
1029 			if (m_head == NULL)
1030 				break;
1031 			if_sendq_prepend(ifp, m_head);
1032 			break;
1033 		}
1034 
1035 		/* Send a copy of the frame to the BPF listener */
1036 		if_etherbpfmtap(ifp, m_head);
1037 
1038 		/* Set timeout in case hardware has problems transmitting. */
1039 		txr->watchdog_time = ticks;
1040                 txr->queue_status = EM_QUEUE_WORKING;
1041 	}
1042 
1043 	return;
1044 }
1045 
1046 static void
1047 em_start(if_t ifp)
1048 {
1049 	struct adapter	*adapter = if_getsoftc(ifp);
1050 	struct tx_ring	*txr = adapter->tx_rings;
1051 
1052 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1053 		EM_TX_LOCK(txr);
1054 		em_start_locked(ifp, txr);
1055 		EM_TX_UNLOCK(txr);
1056 	}
1057 	return;
1058 }
1059 #endif /* EM_MULTIQUEUE */
1060 
1061 /*********************************************************************
1062  *  Ioctl entry point
1063  *
1064  *  em_ioctl is called when the user wants to configure the
1065  *  interface.
1066  *
1067  *  return 0 on success, positive on failure
1068  **********************************************************************/
1069 
1070 static int
1071 em_ioctl(if_t ifp, u_long command, caddr_t data)
1072 {
1073 	struct adapter	*adapter = if_getsoftc(ifp);
1074 	struct ifreq	*ifr = (struct ifreq *)data;
1075 #if defined(INET) || defined(INET6)
1076 	struct ifaddr	*ifa = (struct ifaddr *)data;
1077 #endif
1078 	bool		avoid_reset = FALSE;
1079 	int		error = 0;
1080 
1081 	if (adapter->in_detach)
1082 		return (error);
1083 
1084 	switch (command) {
1085 	case SIOCSIFADDR:
1086 #ifdef INET
1087 		if (ifa->ifa_addr->sa_family == AF_INET)
1088 			avoid_reset = TRUE;
1089 #endif
1090 #ifdef INET6
1091 		if (ifa->ifa_addr->sa_family == AF_INET6)
1092 			avoid_reset = TRUE;
1093 #endif
1094 		/*
1095 		** Calling init results in link renegotiation,
1096 		** so we avoid doing it when possible.
1097 		*/
1098 		if (avoid_reset) {
1099 			if_setflagbits(ifp,IFF_UP,0);
1100 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1101 				em_init(adapter);
1102 #ifdef INET
1103 			if (!(if_getflags(ifp) & IFF_NOARP))
1104 				arp_ifinit(ifp, ifa);
1105 #endif
1106 		} else
1107 			error = ether_ioctl(ifp, command, data);
1108 		break;
1109 	case SIOCSIFMTU:
1110 	    {
1111 		int max_frame_size;
1112 
1113 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1114 
1115 		EM_CORE_LOCK(adapter);
1116 		switch (adapter->hw.mac.type) {
1117 		case e1000_82571:
1118 		case e1000_82572:
1119 		case e1000_ich9lan:
1120 		case e1000_ich10lan:
1121 		case e1000_pch2lan:
1122 		case e1000_pch_lpt:
1123 		case e1000_82574:
1124 		case e1000_82583:
1125 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1126 			max_frame_size = 9234;
1127 			break;
1128 		case e1000_pchlan:
1129 			max_frame_size = 4096;
1130 			break;
1131 			/* Adapters that do not support jumbo frames */
1132 		case e1000_ich8lan:
1133 			max_frame_size = ETHER_MAX_LEN;
1134 			break;
1135 		default:
1136 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1137 		}
1138 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1139 		    ETHER_CRC_LEN) {
1140 			EM_CORE_UNLOCK(adapter);
1141 			error = EINVAL;
1142 			break;
1143 		}
1144 
1145 		if_setmtu(ifp, ifr->ifr_mtu);
1146 		adapter->hw.mac.max_frame_size =
1147 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1148 		em_init_locked(adapter);
1149 		EM_CORE_UNLOCK(adapter);
1150 		break;
1151 	    }
1152 	case SIOCSIFFLAGS:
1153 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1154 		    SIOCSIFFLAGS (Set Interface Flags)");
1155 		EM_CORE_LOCK(adapter);
1156 		if (if_getflags(ifp) & IFF_UP) {
1157 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1158 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1159 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1160 					em_disable_promisc(adapter);
1161 					em_set_promisc(adapter);
1162 				}
1163 			} else
1164 				em_init_locked(adapter);
1165 		} else
1166 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1167 				em_stop(adapter);
1168 		adapter->if_flags = if_getflags(ifp);
1169 		EM_CORE_UNLOCK(adapter);
1170 		break;
1171 	case SIOCADDMULTI:
1172 	case SIOCDELMULTI:
1173 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1174 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1175 			EM_CORE_LOCK(adapter);
1176 			em_disable_intr(adapter);
1177 			em_set_multi(adapter);
1178 #ifdef DEVICE_POLLING
1179 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1180 #endif
1181 				em_enable_intr(adapter);
1182 			EM_CORE_UNLOCK(adapter);
1183 		}
1184 		break;
1185 	case SIOCSIFMEDIA:
1186 		/* Check SOL/IDER usage */
1187 		EM_CORE_LOCK(adapter);
1188 		if (e1000_check_reset_block(&adapter->hw)) {
1189 			EM_CORE_UNLOCK(adapter);
1190 			device_printf(adapter->dev, "Media change is"
1191 			    " blocked due to SOL/IDER session.\n");
1192 			break;
1193 		}
1194 		EM_CORE_UNLOCK(adapter);
1195 		/* falls thru */
1196 	case SIOCGIFMEDIA:
1197 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1198 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1199 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1200 		break;
1201 	case SIOCSIFCAP:
1202 	    {
1203 		int mask, reinit;
1204 
1205 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1206 		reinit = 0;
1207 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1208 #ifdef DEVICE_POLLING
1209 		if (mask & IFCAP_POLLING) {
1210 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1211 				error = ether_poll_register(em_poll, ifp);
1212 				if (error)
1213 					return (error);
1214 				EM_CORE_LOCK(adapter);
1215 				em_disable_intr(adapter);
1216 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1217 				EM_CORE_UNLOCK(adapter);
1218 			} else {
1219 				error = ether_poll_deregister(ifp);
1220 				/* Enable interrupt even in error case */
1221 				EM_CORE_LOCK(adapter);
1222 				em_enable_intr(adapter);
1223 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1224 				EM_CORE_UNLOCK(adapter);
1225 			}
1226 		}
1227 #endif
1228 		if (mask & IFCAP_HWCSUM) {
1229 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1230 			reinit = 1;
1231 		}
1232 		if (mask & IFCAP_TSO4) {
1233 			if_togglecapenable(ifp,IFCAP_TSO4);
1234 			reinit = 1;
1235 		}
1236 		if (mask & IFCAP_VLAN_HWTAGGING) {
1237 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1238 			reinit = 1;
1239 		}
1240 		if (mask & IFCAP_VLAN_HWFILTER) {
1241 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1242 			reinit = 1;
1243 		}
1244 		if (mask & IFCAP_VLAN_HWTSO) {
1245 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1246 			reinit = 1;
1247 		}
1248 		if ((mask & IFCAP_WOL) &&
1249 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1250 			if (mask & IFCAP_WOL_MCAST)
1251 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1252 			if (mask & IFCAP_WOL_MAGIC)
1253 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1254 		}
1255 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1256 			em_init(adapter);
1257 		if_vlancap(ifp);
1258 		break;
1259 	    }
1260 
1261 	default:
1262 		error = ether_ioctl(ifp, command, data);
1263 		break;
1264 	}
1265 
1266 	return (error);
1267 }
1268 
1269 
1270 /*********************************************************************
1271  *  Init entry point
1272  *
1273  *  This routine is used in two ways. It is used by the stack as
1274  *  init entry point in network interface structure. It is also used
1275  *  by the driver as a hw/sw initialization routine to get to a
1276  *  consistent state.
1277  *
1278  *  return 0 on success, positive on failure
1279  **********************************************************************/
1280 
1281 static void
1282 em_init_locked(struct adapter *adapter)
1283 {
1284 	if_t ifp = adapter->ifp;
1285 	device_t	dev = adapter->dev;
1286 
1287 	INIT_DEBUGOUT("em_init: begin");
1288 
1289 	EM_CORE_LOCK_ASSERT(adapter);
1290 
1291 	em_disable_intr(adapter);
1292 	callout_stop(&adapter->timer);
1293 
1294 	/* Get the latest mac address, User can use a LAA */
1295         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1296               ETHER_ADDR_LEN);
1297 
1298 	/* Put the address into the Receive Address Array */
1299 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1300 
1301 	/*
1302 	 * With the 82571 adapter, RAR[0] may be overwritten
1303 	 * when the other port is reset, we make a duplicate
1304 	 * in RAR[14] for that eventuality, this assures
1305 	 * the interface continues to function.
1306 	 */
1307 	if (adapter->hw.mac.type == e1000_82571) {
1308 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1309 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1310 		    E1000_RAR_ENTRIES - 1);
1311 	}
1312 
1313 	/* Initialize the hardware */
1314 	em_reset(adapter);
1315 	em_update_link_status(adapter);
1316 
1317 	/* Setup VLAN support, basic and offload if available */
1318 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1319 
1320 	/* Set hardware offload abilities */
1321 	if_clearhwassist(ifp);
1322 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1323 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1324 	if (if_getcapenable(ifp) & IFCAP_TSO4)
1325 		if_sethwassistbits(ifp, CSUM_TSO, 0);
1326 
1327 	/* Configure for OS presence */
1328 	em_init_manageability(adapter);
1329 
1330 	/* Prepare transmit descriptors and buffers */
1331 	em_setup_transmit_structures(adapter);
1332 	em_initialize_transmit_unit(adapter);
1333 
1334 	/* Setup Multicast table */
1335 	em_set_multi(adapter);
1336 
1337 	/*
1338 	** Figure out the desired mbuf
1339 	** pool for doing jumbos
1340 	*/
1341 	if (adapter->hw.mac.max_frame_size <= 2048)
1342 		adapter->rx_mbuf_sz = MCLBYTES;
1343 	else if (adapter->hw.mac.max_frame_size <= 4096)
1344 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1345 	else
1346 		adapter->rx_mbuf_sz = MJUM9BYTES;
1347 
1348 	/* Prepare receive descriptors and buffers */
1349 	if (em_setup_receive_structures(adapter)) {
1350 		device_printf(dev, "Could not setup receive structures\n");
1351 		em_stop(adapter);
1352 		return;
1353 	}
1354 	em_initialize_receive_unit(adapter);
1355 
1356 	/* Use real VLAN Filter support? */
1357 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1358 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1359 			/* Use real VLAN Filter support */
1360 			em_setup_vlan_hw_support(adapter);
1361 		else {
1362 			u32 ctrl;
1363 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1364 			ctrl |= E1000_CTRL_VME;
1365 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1366 		}
1367 	}
1368 
1369 	/* Don't lose promiscuous settings */
1370 	em_set_promisc(adapter);
1371 
1372 	/* Set the interface as ACTIVE */
1373 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1374 
1375 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1376 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1377 
1378 	/* MSI/X configuration for 82574 */
1379 	if (adapter->hw.mac.type == e1000_82574) {
1380 		int tmp;
1381 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1382 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1383 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1384 		/* Set the IVAR - interrupt vector routing. */
1385 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1386 	}
1387 
1388 #ifdef DEVICE_POLLING
1389 	/*
1390 	 * Only enable interrupts if we are not polling, make sure
1391 	 * they are off otherwise.
1392 	 */
1393 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1394 		em_disable_intr(adapter);
1395 	else
1396 #endif /* DEVICE_POLLING */
1397 		em_enable_intr(adapter);
1398 
1399 	/* AMT based hardware can now take control from firmware */
1400 	if (adapter->has_manage && adapter->has_amt)
1401 		em_get_hw_control(adapter);
1402 }
1403 
1404 static void
1405 em_init(void *arg)
1406 {
1407 	struct adapter *adapter = arg;
1408 
1409 	EM_CORE_LOCK(adapter);
1410 	em_init_locked(adapter);
1411 	EM_CORE_UNLOCK(adapter);
1412 }
1413 
1414 
1415 #ifdef DEVICE_POLLING
1416 /*********************************************************************
1417  *
1418  *  Legacy polling routine: note this only works with single queue
1419  *
1420  *********************************************************************/
1421 static int
1422 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1423 {
1424 	struct adapter *adapter = if_getsoftc(ifp);
1425 	struct tx_ring	*txr = adapter->tx_rings;
1426 	struct rx_ring	*rxr = adapter->rx_rings;
1427 	u32		reg_icr;
1428 	int		rx_done;
1429 
1430 	EM_CORE_LOCK(adapter);
1431 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1432 		EM_CORE_UNLOCK(adapter);
1433 		return (0);
1434 	}
1435 
1436 	if (cmd == POLL_AND_CHECK_STATUS) {
1437 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1438 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1439 			callout_stop(&adapter->timer);
1440 			adapter->hw.mac.get_link_status = 1;
1441 			em_update_link_status(adapter);
1442 			callout_reset(&adapter->timer, hz,
1443 			    em_local_timer, adapter);
1444 		}
1445 	}
1446 	EM_CORE_UNLOCK(adapter);
1447 
1448 	em_rxeof(rxr, count, &rx_done);
1449 
1450 	EM_TX_LOCK(txr);
1451 	em_txeof(txr);
1452 #ifdef EM_MULTIQUEUE
1453 	if (!drbr_empty(ifp, txr->br))
1454 		em_mq_start_locked(ifp, txr, NULL);
1455 #else
1456 	if (!if_sendq_empty(ifp))
1457 		em_start_locked(ifp, txr);
1458 #endif
1459 	EM_TX_UNLOCK(txr);
1460 
1461 	return (rx_done);
1462 }
1463 #endif /* DEVICE_POLLING */
1464 
1465 
1466 /*********************************************************************
1467  *
1468  *  Fast Legacy/MSI Combined Interrupt Service routine
1469  *
1470  *********************************************************************/
1471 static int
1472 em_irq_fast(void *arg)
1473 {
1474 	struct adapter	*adapter = arg;
1475 	if_t ifp;
1476 	u32		reg_icr;
1477 
1478 	ifp = adapter->ifp;
1479 
1480 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1481 
1482 	/* Hot eject?  */
1483 	if (reg_icr == 0xffffffff)
1484 		return FILTER_STRAY;
1485 
1486 	/* Definitely not our interrupt.  */
1487 	if (reg_icr == 0x0)
1488 		return FILTER_STRAY;
1489 
1490 	/*
1491 	 * Starting with the 82571 chip, bit 31 should be used to
1492 	 * determine whether the interrupt belongs to us.
1493 	 */
1494 	if (adapter->hw.mac.type >= e1000_82571 &&
1495 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1496 		return FILTER_STRAY;
1497 
1498 	em_disable_intr(adapter);
1499 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1500 
1501 	/* Link status change */
1502 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1503 		adapter->hw.mac.get_link_status = 1;
1504 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1505 	}
1506 
1507 	if (reg_icr & E1000_ICR_RXO)
1508 		adapter->rx_overruns++;
1509 	return FILTER_HANDLED;
1510 }
1511 
1512 /* Combined RX/TX handler, used by Legacy and MSI */
1513 static void
1514 em_handle_que(void *context, int pending)
1515 {
1516 	struct adapter	*adapter = context;
1517 	if_t ifp = adapter->ifp;
1518 	struct tx_ring	*txr = adapter->tx_rings;
1519 	struct rx_ring	*rxr = adapter->rx_rings;
1520 
1521 
1522 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1523 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1524 		EM_TX_LOCK(txr);
1525 		em_txeof(txr);
1526 #ifdef EM_MULTIQUEUE
1527 		if (!drbr_empty(ifp, txr->br))
1528 			em_mq_start_locked(ifp, txr, NULL);
1529 #else
1530 		if (!if_sendq_empty(ifp))
1531 			em_start_locked(ifp, txr);
1532 #endif
1533 		EM_TX_UNLOCK(txr);
1534 		if (more) {
1535 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1536 			return;
1537 		}
1538 	}
1539 
1540 	em_enable_intr(adapter);
1541 	return;
1542 }
1543 
1544 
1545 /*********************************************************************
1546  *
1547  *  MSIX Interrupt Service Routines
1548  *
1549  **********************************************************************/
1550 static void
1551 em_msix_tx(void *arg)
1552 {
1553 	struct tx_ring *txr = arg;
1554 	struct adapter *adapter = txr->adapter;
1555 	if_t ifp = adapter->ifp;
1556 
1557 	++txr->tx_irq;
1558 	EM_TX_LOCK(txr);
1559 	em_txeof(txr);
1560 #ifdef EM_MULTIQUEUE
1561 	if (!drbr_empty(ifp, txr->br))
1562 		em_mq_start_locked(ifp, txr, NULL);
1563 #else
1564 	if (!if_sendq_empty(ifp))
1565 		em_start_locked(ifp, txr);
1566 #endif
1567 	/* Reenable this interrupt */
1568 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1569 	EM_TX_UNLOCK(txr);
1570 	return;
1571 }
1572 
1573 /*********************************************************************
1574  *
1575  *  MSIX RX Interrupt Service routine
1576  *
1577  **********************************************************************/
1578 
1579 static void
1580 em_msix_rx(void *arg)
1581 {
1582 	struct rx_ring	*rxr = arg;
1583 	struct adapter	*adapter = rxr->adapter;
1584 	bool		more;
1585 
1586 	++rxr->rx_irq;
1587 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1588 		return;
1589 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1590 	if (more)
1591 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1592 	else
1593 		/* Reenable this interrupt */
1594 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1595 	return;
1596 }
1597 
1598 /*********************************************************************
1599  *
1600  *  MSIX Link Fast Interrupt Service routine
1601  *
1602  **********************************************************************/
1603 static void
1604 em_msix_link(void *arg)
1605 {
1606 	struct adapter	*adapter = arg;
1607 	u32		reg_icr;
1608 
1609 	++adapter->link_irq;
1610 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1611 
1612 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1613 		adapter->hw.mac.get_link_status = 1;
1614 		em_handle_link(adapter, 0);
1615 	} else
1616 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1617 		    EM_MSIX_LINK | E1000_IMS_LSC);
1618 	return;
1619 }
1620 
1621 static void
1622 em_handle_rx(void *context, int pending)
1623 {
1624 	struct rx_ring	*rxr = context;
1625 	struct adapter	*adapter = rxr->adapter;
1626         bool            more;
1627 
1628 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1629 	if (more)
1630 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1631 	else
1632 		/* Reenable this interrupt */
1633 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1634 }
1635 
1636 static void
1637 em_handle_tx(void *context, int pending)
1638 {
1639 	struct tx_ring	*txr = context;
1640 	struct adapter	*adapter = txr->adapter;
1641 	if_t ifp = adapter->ifp;
1642 
1643 	EM_TX_LOCK(txr);
1644 	em_txeof(txr);
1645 #ifdef EM_MULTIQUEUE
1646 	if (!drbr_empty(ifp, txr->br))
1647 		em_mq_start_locked(ifp, txr, NULL);
1648 #else
1649 	if (!if_sendq_empty(ifp))
1650 		em_start_locked(ifp, txr);
1651 #endif
1652 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1653 	EM_TX_UNLOCK(txr);
1654 }
1655 
1656 static void
1657 em_handle_link(void *context, int pending)
1658 {
1659 	struct adapter	*adapter = context;
1660 	struct tx_ring	*txr = adapter->tx_rings;
1661 	if_t ifp = adapter->ifp;
1662 
1663 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1664 		return;
1665 
1666 	EM_CORE_LOCK(adapter);
1667 	callout_stop(&adapter->timer);
1668 	em_update_link_status(adapter);
1669 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1670 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1671 	    EM_MSIX_LINK | E1000_IMS_LSC);
1672 	if (adapter->link_active) {
1673 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1674 			EM_TX_LOCK(txr);
1675 #ifdef EM_MULTIQUEUE
1676 			if (!drbr_empty(ifp, txr->br))
1677 				em_mq_start_locked(ifp, txr, NULL);
1678 #else
1679 			if (if_sendq_empty(ifp))
1680 				em_start_locked(ifp, txr);
1681 #endif
1682 			EM_TX_UNLOCK(txr);
1683 		}
1684 	}
1685 	EM_CORE_UNLOCK(adapter);
1686 }
1687 
1688 
1689 /*********************************************************************
1690  *
1691  *  Media Ioctl callback
1692  *
1693  *  This routine is called whenever the user queries the status of
1694  *  the interface using ifconfig.
1695  *
1696  **********************************************************************/
1697 static void
1698 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1699 {
1700 	struct adapter *adapter = if_getsoftc(ifp);
1701 	u_char fiber_type = IFM_1000_SX;
1702 
1703 	INIT_DEBUGOUT("em_media_status: begin");
1704 
1705 	EM_CORE_LOCK(adapter);
1706 	em_update_link_status(adapter);
1707 
1708 	ifmr->ifm_status = IFM_AVALID;
1709 	ifmr->ifm_active = IFM_ETHER;
1710 
1711 	if (!adapter->link_active) {
1712 		EM_CORE_UNLOCK(adapter);
1713 		return;
1714 	}
1715 
1716 	ifmr->ifm_status |= IFM_ACTIVE;
1717 
1718 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1719 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1720 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1721 	} else {
1722 		switch (adapter->link_speed) {
1723 		case 10:
1724 			ifmr->ifm_active |= IFM_10_T;
1725 			break;
1726 		case 100:
1727 			ifmr->ifm_active |= IFM_100_TX;
1728 			break;
1729 		case 1000:
1730 			ifmr->ifm_active |= IFM_1000_T;
1731 			break;
1732 		}
1733 		if (adapter->link_duplex == FULL_DUPLEX)
1734 			ifmr->ifm_active |= IFM_FDX;
1735 		else
1736 			ifmr->ifm_active |= IFM_HDX;
1737 	}
1738 	EM_CORE_UNLOCK(adapter);
1739 }
1740 
1741 /*********************************************************************
1742  *
1743  *  Media Ioctl callback
1744  *
1745  *  This routine is called when the user changes speed/duplex using
1746  *  media/mediopt option with ifconfig.
1747  *
1748  **********************************************************************/
1749 static int
1750 em_media_change(if_t ifp)
1751 {
1752 	struct adapter *adapter = if_getsoftc(ifp);
1753 	struct ifmedia  *ifm = &adapter->media;
1754 
1755 	INIT_DEBUGOUT("em_media_change: begin");
1756 
1757 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1758 		return (EINVAL);
1759 
1760 	EM_CORE_LOCK(adapter);
1761 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1762 	case IFM_AUTO:
1763 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1764 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1765 		break;
1766 	case IFM_1000_LX:
1767 	case IFM_1000_SX:
1768 	case IFM_1000_T:
1769 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1770 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1771 		break;
1772 	case IFM_100_TX:
1773 		adapter->hw.mac.autoneg = FALSE;
1774 		adapter->hw.phy.autoneg_advertised = 0;
1775 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1776 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1777 		else
1778 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1779 		break;
1780 	case IFM_10_T:
1781 		adapter->hw.mac.autoneg = FALSE;
1782 		adapter->hw.phy.autoneg_advertised = 0;
1783 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1784 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1785 		else
1786 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1787 		break;
1788 	default:
1789 		device_printf(adapter->dev, "Unsupported media type\n");
1790 	}
1791 
1792 	em_init_locked(adapter);
1793 	EM_CORE_UNLOCK(adapter);
1794 
1795 	return (0);
1796 }
1797 
1798 /*********************************************************************
1799  *
1800  *  This routine maps the mbufs to tx descriptors.
1801  *
1802  *  return 0 on success, positive on failure
1803  **********************************************************************/
1804 
1805 static int
1806 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1807 {
1808 	struct adapter		*adapter = txr->adapter;
1809 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1810 	bus_dmamap_t		map;
1811 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1812 	struct e1000_tx_desc	*ctxd = NULL;
1813 	struct mbuf		*m_head;
1814 	struct ether_header	*eh;
1815 	struct ip		*ip = NULL;
1816 	struct tcphdr		*tp = NULL;
1817 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1818 	int			ip_off, poff;
1819 	int			nsegs, i, j, first, last = 0;
1820 	int			error, do_tso, tso_desc = 0, remap = 1;
1821 
1822 	m_head = *m_headp;
1823 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1824 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1825 	ip_off = poff = 0;
1826 
1827 	/*
1828 	 * Intel recommends entire IP/TCP header length reside in a single
1829 	 * buffer. If multiple descriptors are used to describe the IP and
1830 	 * TCP header, each descriptor should describe one or more
1831 	 * complete headers; descriptors referencing only parts of headers
1832 	 * are not supported. If all layer headers are not coalesced into
1833 	 * a single buffer, each buffer should not cross a 4KB boundary,
1834 	 * or be larger than the maximum read request size.
1835 	 * Controller also requires modifing IP/TCP header to make TSO work
1836 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1837 	 * IP/TCP header into a single buffer to meet the requirement of
1838 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1839 	 * which also has similiar restrictions.
1840 	 */
1841 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1842 		if (do_tso || (m_head->m_next != NULL &&
1843 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1844 			if (M_WRITABLE(*m_headp) == 0) {
1845 				m_head = m_dup(*m_headp, M_NOWAIT);
1846 				m_freem(*m_headp);
1847 				if (m_head == NULL) {
1848 					*m_headp = NULL;
1849 					return (ENOBUFS);
1850 				}
1851 				*m_headp = m_head;
1852 			}
1853 		}
1854 		/*
1855 		 * XXX
1856 		 * Assume IPv4, we don't have TSO/checksum offload support
1857 		 * for IPv6 yet.
1858 		 */
1859 		ip_off = sizeof(struct ether_header);
1860 		m_head = m_pullup(m_head, ip_off);
1861 		if (m_head == NULL) {
1862 			*m_headp = NULL;
1863 			return (ENOBUFS);
1864 		}
1865 		eh = mtod(m_head, struct ether_header *);
1866 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1867 			ip_off = sizeof(struct ether_vlan_header);
1868 			m_head = m_pullup(m_head, ip_off);
1869 			if (m_head == NULL) {
1870 				*m_headp = NULL;
1871 				return (ENOBUFS);
1872 			}
1873 		}
1874 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1875 		if (m_head == NULL) {
1876 			*m_headp = NULL;
1877 			return (ENOBUFS);
1878 		}
1879 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1880 		poff = ip_off + (ip->ip_hl << 2);
1881 		if (do_tso) {
1882 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1883 			if (m_head == NULL) {
1884 				*m_headp = NULL;
1885 				return (ENOBUFS);
1886 			}
1887 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1888 			/*
1889 			 * TSO workaround:
1890 			 *   pull 4 more bytes of data into it.
1891 			 */
1892 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1893 			if (m_head == NULL) {
1894 				*m_headp = NULL;
1895 				return (ENOBUFS);
1896 			}
1897 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1898 			ip->ip_len = 0;
1899 			ip->ip_sum = 0;
1900 			/*
1901 			 * The pseudo TCP checksum does not include TCP payload
1902 			 * length so driver should recompute the checksum here
1903 			 * what hardware expect to see. This is adherence of
1904 			 * Microsoft's Large Send specification.
1905 			 */
1906 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1907 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1908 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1909 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1910 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1911 			if (m_head == NULL) {
1912 				*m_headp = NULL;
1913 				return (ENOBUFS);
1914 			}
1915 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1916 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1917 			if (m_head == NULL) {
1918 				*m_headp = NULL;
1919 				return (ENOBUFS);
1920 			}
1921 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1922 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1923 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1924 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1925 			if (m_head == NULL) {
1926 				*m_headp = NULL;
1927 				return (ENOBUFS);
1928 			}
1929 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1930 		}
1931 		*m_headp = m_head;
1932 	}
1933 
1934 	/*
1935 	 * Map the packet for DMA
1936 	 *
1937 	 * Capture the first descriptor index,
1938 	 * this descriptor will have the index
1939 	 * of the EOP which is the only one that
1940 	 * now gets a DONE bit writeback.
1941 	 */
1942 	first = txr->next_avail_desc;
1943 	tx_buffer = &txr->tx_buffers[first];
1944 	tx_buffer_mapped = tx_buffer;
1945 	map = tx_buffer->map;
1946 
1947 retry:
1948 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1949 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1950 
1951 	/*
1952 	 * There are two types of errors we can (try) to handle:
1953 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1954 	 *   out of segments.  Defragment the mbuf chain and try again.
1955 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1956 	 *   at this point in time.  Defer sending and try again later.
1957 	 * All other errors, in particular EINVAL, are fatal and prevent the
1958 	 * mbuf chain from ever going through.  Drop it and report error.
1959 	 */
1960 	if (error == EFBIG && remap) {
1961 		struct mbuf *m;
1962 
1963 		m = m_defrag(*m_headp, M_NOWAIT);
1964 		if (m == NULL) {
1965 			adapter->mbuf_alloc_failed++;
1966 			m_freem(*m_headp);
1967 			*m_headp = NULL;
1968 			return (ENOBUFS);
1969 		}
1970 		*m_headp = m;
1971 
1972 		/* Try it again, but only once */
1973 		remap = 0;
1974 		goto retry;
1975 	} else if (error == ENOMEM) {
1976 		adapter->no_tx_dma_setup++;
1977 		return (error);
1978 	} else if (error != 0) {
1979 		adapter->no_tx_dma_setup++;
1980 		m_freem(*m_headp);
1981 		*m_headp = NULL;
1982 		return (error);
1983 	}
1984 
1985 	/*
1986 	 * TSO Hardware workaround, if this packet is not
1987 	 * TSO, and is only a single descriptor long, and
1988 	 * it follows a TSO burst, then we need to add a
1989 	 * sentinel descriptor to prevent premature writeback.
1990 	 */
1991 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1992 		if (nsegs == 1)
1993 			tso_desc = TRUE;
1994 		txr->tx_tso = FALSE;
1995 	}
1996 
1997         if (nsegs > (txr->tx_avail - 2)) {
1998                 txr->no_desc_avail++;
1999 		bus_dmamap_unload(txr->txtag, map);
2000 		return (ENOBUFS);
2001         }
2002 	m_head = *m_headp;
2003 
2004 	/* Do hardware assists */
2005 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2006 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2007 		    &txd_upper, &txd_lower);
2008 		/* we need to make a final sentinel transmit desc */
2009 		tso_desc = TRUE;
2010 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2011 		em_transmit_checksum_setup(txr, m_head,
2012 		    ip_off, ip, &txd_upper, &txd_lower);
2013 
2014 	if (m_head->m_flags & M_VLANTAG) {
2015 		/* Set the vlan id. */
2016 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2017                 /* Tell hardware to add tag */
2018                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2019         }
2020 
2021 	i = txr->next_avail_desc;
2022 
2023 	/* Set up our transmit descriptors */
2024 	for (j = 0; j < nsegs; j++) {
2025 		bus_size_t seg_len;
2026 		bus_addr_t seg_addr;
2027 
2028 		tx_buffer = &txr->tx_buffers[i];
2029 		ctxd = &txr->tx_base[i];
2030 		seg_addr = segs[j].ds_addr;
2031 		seg_len  = segs[j].ds_len;
2032 		/*
2033 		** TSO Workaround:
2034 		** If this is the last descriptor, we want to
2035 		** split it so we have a small final sentinel
2036 		*/
2037 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2038 			seg_len -= 4;
2039 			ctxd->buffer_addr = htole64(seg_addr);
2040 			ctxd->lower.data = htole32(
2041 			adapter->txd_cmd | txd_lower | seg_len);
2042 			ctxd->upper.data =
2043 			    htole32(txd_upper);
2044 			if (++i == adapter->num_tx_desc)
2045 				i = 0;
2046 			/* Now make the sentinel */
2047 			++txd_used; /* using an extra txd */
2048 			ctxd = &txr->tx_base[i];
2049 			tx_buffer = &txr->tx_buffers[i];
2050 			ctxd->buffer_addr =
2051 			    htole64(seg_addr + seg_len);
2052 			ctxd->lower.data = htole32(
2053 			adapter->txd_cmd | txd_lower | 4);
2054 			ctxd->upper.data =
2055 			    htole32(txd_upper);
2056 			last = i;
2057 			if (++i == adapter->num_tx_desc)
2058 				i = 0;
2059 		} else {
2060 			ctxd->buffer_addr = htole64(seg_addr);
2061 			ctxd->lower.data = htole32(
2062 			adapter->txd_cmd | txd_lower | seg_len);
2063 			ctxd->upper.data =
2064 			    htole32(txd_upper);
2065 			last = i;
2066 			if (++i == adapter->num_tx_desc)
2067 				i = 0;
2068 		}
2069 		tx_buffer->m_head = NULL;
2070 		tx_buffer->next_eop = -1;
2071 	}
2072 
2073 	txr->next_avail_desc = i;
2074 	txr->tx_avail -= nsegs;
2075 	if (tso_desc) /* TSO used an extra for sentinel */
2076 		txr->tx_avail -= txd_used;
2077 
2078         tx_buffer->m_head = m_head;
2079 	/*
2080 	** Here we swap the map so the last descriptor,
2081 	** which gets the completion interrupt has the
2082 	** real map, and the first descriptor gets the
2083 	** unused map from this descriptor.
2084 	*/
2085 	tx_buffer_mapped->map = tx_buffer->map;
2086 	tx_buffer->map = map;
2087         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2088 
2089         /*
2090          * Last Descriptor of Packet
2091 	 * needs End Of Packet (EOP)
2092 	 * and Report Status (RS)
2093          */
2094         ctxd->lower.data |=
2095 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2096 	/*
2097 	 * Keep track in the first buffer which
2098 	 * descriptor will be written back
2099 	 */
2100 	tx_buffer = &txr->tx_buffers[first];
2101 	tx_buffer->next_eop = last;
2102 	/* Update the watchdog time early and often */
2103 	txr->watchdog_time = ticks;
2104 
2105 	/*
2106 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2107 	 * that this frame is available to transmit.
2108 	 */
2109 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2110 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2111 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2112 
2113 	return (0);
2114 }
2115 
2116 static void
2117 em_set_promisc(struct adapter *adapter)
2118 {
2119 	if_t ifp = adapter->ifp;
2120 	u32		reg_rctl;
2121 
2122 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2123 
2124 	if (if_getflags(ifp) & IFF_PROMISC) {
2125 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2126 		/* Turn this on if you want to see bad packets */
2127 		if (em_debug_sbp)
2128 			reg_rctl |= E1000_RCTL_SBP;
2129 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2130 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2131 		reg_rctl |= E1000_RCTL_MPE;
2132 		reg_rctl &= ~E1000_RCTL_UPE;
2133 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2134 	}
2135 }
2136 
2137 static void
2138 em_disable_promisc(struct adapter *adapter)
2139 {
2140 	if_t		ifp = adapter->ifp;
2141 	u32		reg_rctl;
2142 	int		mcnt = 0;
2143 
2144 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2145 	reg_rctl &=  (~E1000_RCTL_UPE);
2146 	if (if_getflags(ifp) & IFF_ALLMULTI)
2147 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2148 	else
2149 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2150 	/* Don't disable if in MAX groups */
2151 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2152 		reg_rctl &=  (~E1000_RCTL_MPE);
2153 	reg_rctl &=  (~E1000_RCTL_SBP);
2154 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2155 }
2156 
2157 
2158 /*********************************************************************
2159  *  Multicast Update
2160  *
2161  *  This routine is called whenever multicast address list is updated.
2162  *
2163  **********************************************************************/
2164 
2165 static void
2166 em_set_multi(struct adapter *adapter)
2167 {
2168 	if_t ifp = adapter->ifp;
2169 	u32 reg_rctl = 0;
2170 	u8  *mta; /* Multicast array memory */
2171 	int mcnt = 0;
2172 
2173 	IOCTL_DEBUGOUT("em_set_multi: begin");
2174 
2175 	mta = adapter->mta;
2176 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2177 
2178 	if (adapter->hw.mac.type == e1000_82542 &&
2179 	    adapter->hw.revision_id == E1000_REVISION_2) {
2180 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2181 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2182 			e1000_pci_clear_mwi(&adapter->hw);
2183 		reg_rctl |= E1000_RCTL_RST;
2184 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2185 		msec_delay(5);
2186 	}
2187 
2188 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2189 
2190 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2191 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2192 		reg_rctl |= E1000_RCTL_MPE;
2193 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2194 	} else
2195 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2196 
2197 	if (adapter->hw.mac.type == e1000_82542 &&
2198 	    adapter->hw.revision_id == E1000_REVISION_2) {
2199 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2200 		reg_rctl &= ~E1000_RCTL_RST;
2201 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2202 		msec_delay(5);
2203 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2204 			e1000_pci_set_mwi(&adapter->hw);
2205 	}
2206 }
2207 
2208 
2209 /*********************************************************************
2210  *  Timer routine
2211  *
2212  *  This routine checks for link status and updates statistics.
2213  *
2214  **********************************************************************/
2215 
2216 static void
2217 em_local_timer(void *arg)
2218 {
2219 	struct adapter	*adapter = arg;
2220 	if_t ifp = adapter->ifp;
2221 	struct tx_ring	*txr = adapter->tx_rings;
2222 	struct rx_ring	*rxr = adapter->rx_rings;
2223 	u32		trigger;
2224 
2225 	EM_CORE_LOCK_ASSERT(adapter);
2226 
2227 	em_update_link_status(adapter);
2228 	em_update_stats_counters(adapter);
2229 
2230 	/* Reset LAA into RAR[0] on 82571 */
2231 	if ((adapter->hw.mac.type == e1000_82571) &&
2232 	    e1000_get_laa_state_82571(&adapter->hw))
2233 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2234 
2235 	/* Mask to use in the irq trigger */
2236 	if (adapter->msix_mem)
2237 		trigger = rxr->ims;
2238 	else
2239 		trigger = E1000_ICS_RXDMT0;
2240 
2241 	/*
2242 	** Check on the state of the TX queue(s), this
2243 	** can be done without the lock because its RO
2244 	** and the HUNG state will be static if set.
2245 	*/
2246 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2247 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2248 		    (adapter->pause_frames == 0))
2249 			goto hung;
2250 		/* Schedule a TX tasklet if needed */
2251 		if (txr->tx_avail <= EM_MAX_SCATTER)
2252 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2253 	}
2254 
2255 	adapter->pause_frames = 0;
2256 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2257 #ifndef DEVICE_POLLING
2258 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2259 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2260 #endif
2261 	return;
2262 hung:
2263 	/* Looks like we're hung */
2264 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2265 	device_printf(adapter->dev,
2266 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2267 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2268 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2269 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2270 	    "Next TX to Clean = %d\n",
2271 	    txr->me, txr->tx_avail, txr->next_to_clean);
2272 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2273 	adapter->watchdog_events++;
2274 	adapter->pause_frames = 0;
2275 	em_init_locked(adapter);
2276 }
2277 
2278 
2279 static void
2280 em_update_link_status(struct adapter *adapter)
2281 {
2282 	struct e1000_hw *hw = &adapter->hw;
2283 	if_t ifp = adapter->ifp;
2284 	device_t dev = adapter->dev;
2285 	struct tx_ring *txr = adapter->tx_rings;
2286 	u32 link_check = 0;
2287 
2288 	/* Get the cached link value or read phy for real */
2289 	switch (hw->phy.media_type) {
2290 	case e1000_media_type_copper:
2291 		if (hw->mac.get_link_status) {
2292 			/* Do the work to read phy */
2293 			e1000_check_for_link(hw);
2294 			link_check = !hw->mac.get_link_status;
2295 			if (link_check) /* ESB2 fix */
2296 				e1000_cfg_on_link_up(hw);
2297 		} else
2298 			link_check = TRUE;
2299 		break;
2300 	case e1000_media_type_fiber:
2301 		e1000_check_for_link(hw);
2302 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2303                                  E1000_STATUS_LU);
2304 		break;
2305 	case e1000_media_type_internal_serdes:
2306 		e1000_check_for_link(hw);
2307 		link_check = adapter->hw.mac.serdes_has_link;
2308 		break;
2309 	default:
2310 	case e1000_media_type_unknown:
2311 		break;
2312 	}
2313 
2314 	/* Now check for a transition */
2315 	if (link_check && (adapter->link_active == 0)) {
2316 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2317 		    &adapter->link_duplex);
2318 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2319 		if ((adapter->link_speed != SPEED_1000) &&
2320 		    ((hw->mac.type == e1000_82571) ||
2321 		    (hw->mac.type == e1000_82572))) {
2322 			int tarc0;
2323 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2324 			tarc0 &= ~SPEED_MODE_BIT;
2325 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2326 		}
2327 		if (bootverbose)
2328 			device_printf(dev, "Link is up %d Mbps %s\n",
2329 			    adapter->link_speed,
2330 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2331 			    "Full Duplex" : "Half Duplex"));
2332 		adapter->link_active = 1;
2333 		adapter->smartspeed = 0;
2334 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2335 		if_link_state_change(ifp, LINK_STATE_UP);
2336 	} else if (!link_check && (adapter->link_active == 1)) {
2337 		if_setbaudrate(ifp, 0);
2338 		adapter->link_speed = 0;
2339 		adapter->link_duplex = 0;
2340 		if (bootverbose)
2341 			device_printf(dev, "Link is Down\n");
2342 		adapter->link_active = 0;
2343 		/* Link down, disable watchdog */
2344 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2345 			txr->queue_status = EM_QUEUE_IDLE;
2346 		if_link_state_change(ifp, LINK_STATE_DOWN);
2347 	}
2348 }
2349 
2350 /*********************************************************************
2351  *
2352  *  This routine disables all traffic on the adapter by issuing a
2353  *  global reset on the MAC and deallocates TX/RX buffers.
2354  *
2355  *  This routine should always be called with BOTH the CORE
2356  *  and TX locks.
2357  **********************************************************************/
2358 
2359 static void
2360 em_stop(void *arg)
2361 {
2362 	struct adapter	*adapter = arg;
2363 	if_t ifp = adapter->ifp;
2364 	struct tx_ring	*txr = adapter->tx_rings;
2365 
2366 	EM_CORE_LOCK_ASSERT(adapter);
2367 
2368 	INIT_DEBUGOUT("em_stop: begin");
2369 
2370 	em_disable_intr(adapter);
2371 	callout_stop(&adapter->timer);
2372 
2373 	/* Tell the stack that the interface is no longer active */
2374 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2375 
2376         /* Unarm watchdog timer. */
2377 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2378 		EM_TX_LOCK(txr);
2379 		txr->queue_status = EM_QUEUE_IDLE;
2380 		EM_TX_UNLOCK(txr);
2381 	}
2382 
2383 	e1000_reset_hw(&adapter->hw);
2384 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2385 
2386 	e1000_led_off(&adapter->hw);
2387 	e1000_cleanup_led(&adapter->hw);
2388 }
2389 
2390 
2391 /*********************************************************************
2392  *
2393  *  Determine hardware revision.
2394  *
2395  **********************************************************************/
2396 static void
2397 em_identify_hardware(struct adapter *adapter)
2398 {
2399 	device_t dev = adapter->dev;
2400 
2401 	/* Make sure our PCI config space has the necessary stuff set */
2402 	pci_enable_busmaster(dev);
2403 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2404 
2405 	/* Save off the information about this board */
2406 	adapter->hw.vendor_id = pci_get_vendor(dev);
2407 	adapter->hw.device_id = pci_get_device(dev);
2408 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2409 	adapter->hw.subsystem_vendor_id =
2410 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2411 	adapter->hw.subsystem_device_id =
2412 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2413 
2414 	/* Do Shared Code Init and Setup */
2415 	if (e1000_set_mac_type(&adapter->hw)) {
2416 		device_printf(dev, "Setup init failure\n");
2417 		return;
2418 	}
2419 }
2420 
2421 static int
2422 em_allocate_pci_resources(struct adapter *adapter)
2423 {
2424 	device_t	dev = adapter->dev;
2425 	int		rid;
2426 
2427 	rid = PCIR_BAR(0);
2428 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2429 	    &rid, RF_ACTIVE);
2430 	if (adapter->memory == NULL) {
2431 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2432 		return (ENXIO);
2433 	}
2434 	adapter->osdep.mem_bus_space_tag =
2435 	    rman_get_bustag(adapter->memory);
2436 	adapter->osdep.mem_bus_space_handle =
2437 	    rman_get_bushandle(adapter->memory);
2438 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2439 
2440 	/* Default to a single queue */
2441 	adapter->num_queues = 1;
2442 
2443 	/*
2444 	 * Setup MSI/X or MSI if PCI Express
2445 	 */
2446 	adapter->msix = em_setup_msix(adapter);
2447 
2448 	adapter->hw.back = &adapter->osdep;
2449 
2450 	return (0);
2451 }
2452 
2453 /*********************************************************************
2454  *
2455  *  Setup the Legacy or MSI Interrupt handler
2456  *
2457  **********************************************************************/
2458 int
2459 em_allocate_legacy(struct adapter *adapter)
2460 {
2461 	device_t dev = adapter->dev;
2462 	struct tx_ring	*txr = adapter->tx_rings;
2463 	int error, rid = 0;
2464 
2465 	/* Manually turn off all interrupts */
2466 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2467 
2468 	if (adapter->msix == 1) /* using MSI */
2469 		rid = 1;
2470 	/* We allocate a single interrupt resource */
2471 	adapter->res = bus_alloc_resource_any(dev,
2472 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2473 	if (adapter->res == NULL) {
2474 		device_printf(dev, "Unable to allocate bus resource: "
2475 		    "interrupt\n");
2476 		return (ENXIO);
2477 	}
2478 
2479 	/*
2480 	 * Allocate a fast interrupt and the associated
2481 	 * deferred processing contexts.
2482 	 */
2483 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2484 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2485 	    taskqueue_thread_enqueue, &adapter->tq);
2486 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2487 	    device_get_nameunit(adapter->dev));
2488 	/* Use a TX only tasklet for local timer */
2489 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2490 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2491 	    taskqueue_thread_enqueue, &txr->tq);
2492 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2493 	    device_get_nameunit(adapter->dev));
2494 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2495 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2496 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2497 		device_printf(dev, "Failed to register fast interrupt "
2498 			    "handler: %d\n", error);
2499 		taskqueue_free(adapter->tq);
2500 		adapter->tq = NULL;
2501 		return (error);
2502 	}
2503 
2504 	return (0);
2505 }
2506 
2507 /*********************************************************************
2508  *
2509  *  Setup the MSIX Interrupt handlers
2510  *   This is not really Multiqueue, rather
2511  *   its just seperate interrupt vectors
2512  *   for TX, RX, and Link.
2513  *
2514  **********************************************************************/
2515 int
2516 em_allocate_msix(struct adapter *adapter)
2517 {
2518 	device_t	dev = adapter->dev;
2519 	struct		tx_ring *txr = adapter->tx_rings;
2520 	struct		rx_ring *rxr = adapter->rx_rings;
2521 	int		error, rid, vector = 0;
2522 
2523 
2524 	/* Make sure all interrupts are disabled */
2525 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2526 
2527 	/* First set up ring resources */
2528 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2529 
2530 		/* RX ring */
2531 		rid = vector + 1;
2532 
2533 		rxr->res = bus_alloc_resource_any(dev,
2534 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2535 		if (rxr->res == NULL) {
2536 			device_printf(dev,
2537 			    "Unable to allocate bus resource: "
2538 			    "RX MSIX Interrupt %d\n", i);
2539 			return (ENXIO);
2540 		}
2541 		if ((error = bus_setup_intr(dev, rxr->res,
2542 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2543 		    rxr, &rxr->tag)) != 0) {
2544 			device_printf(dev, "Failed to register RX handler");
2545 			return (error);
2546 		}
2547 #if __FreeBSD_version >= 800504
2548 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2549 #endif
2550 		rxr->msix = vector++; /* NOTE increment vector for TX */
2551 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2552 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2553 		    taskqueue_thread_enqueue, &rxr->tq);
2554 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2555 		    device_get_nameunit(adapter->dev));
2556 		/*
2557 		** Set the bit to enable interrupt
2558 		** in E1000_IMS -- bits 20 and 21
2559 		** are for RX0 and RX1, note this has
2560 		** NOTHING to do with the MSIX vector
2561 		*/
2562 		rxr->ims = 1 << (20 + i);
2563 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2564 
2565 		/* TX ring */
2566 		rid = vector + 1;
2567 		txr->res = bus_alloc_resource_any(dev,
2568 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2569 		if (txr->res == NULL) {
2570 			device_printf(dev,
2571 			    "Unable to allocate bus resource: "
2572 			    "TX MSIX Interrupt %d\n", i);
2573 			return (ENXIO);
2574 		}
2575 		if ((error = bus_setup_intr(dev, txr->res,
2576 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2577 		    txr, &txr->tag)) != 0) {
2578 			device_printf(dev, "Failed to register TX handler");
2579 			return (error);
2580 		}
2581 #if __FreeBSD_version >= 800504
2582 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2583 #endif
2584 		txr->msix = vector++; /* Increment vector for next pass */
2585 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2586 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2587 		    taskqueue_thread_enqueue, &txr->tq);
2588 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2589 		    device_get_nameunit(adapter->dev));
2590 		/*
2591 		** Set the bit to enable interrupt
2592 		** in E1000_IMS -- bits 22 and 23
2593 		** are for TX0 and TX1, note this has
2594 		** NOTHING to do with the MSIX vector
2595 		*/
2596 		txr->ims = 1 << (22 + i);
2597 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2598 	}
2599 
2600 	/* Link interrupt */
2601 	++rid;
2602 	adapter->res = bus_alloc_resource_any(dev,
2603 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2604 	if (!adapter->res) {
2605 		device_printf(dev,"Unable to allocate "
2606 		    "bus resource: Link interrupt [%d]\n", rid);
2607 		return (ENXIO);
2608         }
2609 	/* Set the link handler function */
2610 	error = bus_setup_intr(dev, adapter->res,
2611 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2612 	    em_msix_link, adapter, &adapter->tag);
2613 	if (error) {
2614 		adapter->res = NULL;
2615 		device_printf(dev, "Failed to register LINK handler");
2616 		return (error);
2617 	}
2618 #if __FreeBSD_version >= 800504
2619 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2620 #endif
2621 	adapter->linkvec = vector;
2622 	adapter->ivars |=  (8 | vector) << 16;
2623 	adapter->ivars |= 0x80000000;
2624 
2625 	return (0);
2626 }
2627 
2628 
2629 static void
2630 em_free_pci_resources(struct adapter *adapter)
2631 {
2632 	device_t	dev = adapter->dev;
2633 	struct tx_ring	*txr;
2634 	struct rx_ring	*rxr;
2635 	int		rid;
2636 
2637 
2638 	/*
2639 	** Release all the queue interrupt resources:
2640 	*/
2641 	for (int i = 0; i < adapter->num_queues; i++) {
2642 		txr = &adapter->tx_rings[i];
2643 		rxr = &adapter->rx_rings[i];
2644 		/* an early abort? */
2645 		if ((txr == NULL) || (rxr == NULL))
2646 			break;
2647 		rid = txr->msix +1;
2648 		if (txr->tag != NULL) {
2649 			bus_teardown_intr(dev, txr->res, txr->tag);
2650 			txr->tag = NULL;
2651 		}
2652 		if (txr->res != NULL)
2653 			bus_release_resource(dev, SYS_RES_IRQ,
2654 			    rid, txr->res);
2655 		rid = rxr->msix +1;
2656 		if (rxr->tag != NULL) {
2657 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2658 			rxr->tag = NULL;
2659 		}
2660 		if (rxr->res != NULL)
2661 			bus_release_resource(dev, SYS_RES_IRQ,
2662 			    rid, rxr->res);
2663 	}
2664 
2665         if (adapter->linkvec) /* we are doing MSIX */
2666                 rid = adapter->linkvec + 1;
2667         else
2668                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2669 
2670 	if (adapter->tag != NULL) {
2671 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2672 		adapter->tag = NULL;
2673 	}
2674 
2675 	if (adapter->res != NULL)
2676 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2677 
2678 
2679 	if (adapter->msix)
2680 		pci_release_msi(dev);
2681 
2682 	if (adapter->msix_mem != NULL)
2683 		bus_release_resource(dev, SYS_RES_MEMORY,
2684 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2685 
2686 	if (adapter->memory != NULL)
2687 		bus_release_resource(dev, SYS_RES_MEMORY,
2688 		    PCIR_BAR(0), adapter->memory);
2689 
2690 	if (adapter->flash != NULL)
2691 		bus_release_resource(dev, SYS_RES_MEMORY,
2692 		    EM_FLASH, adapter->flash);
2693 }
2694 
2695 /*
2696  * Setup MSI or MSI/X
2697  */
2698 static int
2699 em_setup_msix(struct adapter *adapter)
2700 {
2701 	device_t dev = adapter->dev;
2702 	int val;
2703 
2704 	/*
2705 	** Setup MSI/X for Hartwell: tests have shown
2706 	** use of two queues to be unstable, and to
2707 	** provide no great gain anyway, so we simply
2708 	** seperate the interrupts and use a single queue.
2709 	*/
2710 	if ((adapter->hw.mac.type == e1000_82574) &&
2711 	    (em_enable_msix == TRUE)) {
2712 		/* Map the MSIX BAR */
2713 		int rid = PCIR_BAR(EM_MSIX_BAR);
2714 		adapter->msix_mem = bus_alloc_resource_any(dev,
2715 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2716        		if (adapter->msix_mem == NULL) {
2717 			/* May not be enabled */
2718                		device_printf(adapter->dev,
2719 			    "Unable to map MSIX table \n");
2720 			goto msi;
2721        		}
2722 		val = pci_msix_count(dev);
2723 		/* We only need/want 3 vectors */
2724 		if (val >= 3)
2725 			val = 3;
2726 		else {
2727                		device_printf(adapter->dev,
2728 			    "MSIX: insufficient vectors, using MSI\n");
2729 			goto msi;
2730 		}
2731 
2732 		if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2733 			device_printf(adapter->dev,
2734 			    "Using MSIX interrupts "
2735 			    "with %d vectors\n", val);
2736 			return (val);
2737 		}
2738 
2739 		/*
2740 		** If MSIX alloc failed or provided us with
2741 		** less than needed, free and fall through to MSI
2742 		*/
2743 		pci_release_msi(dev);
2744 	}
2745 msi:
2746 	if (adapter->msix_mem != NULL) {
2747 		bus_release_resource(dev, SYS_RES_MEMORY,
2748 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2749 		adapter->msix_mem = NULL;
2750 	}
2751        	val = 1;
2752        	if (pci_alloc_msi(dev, &val) == 0) {
2753                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2754 		return (val);
2755 	}
2756 	/* Should only happen due to manual configuration */
2757 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2758 	return (0);
2759 }
2760 
2761 
2762 /*********************************************************************
2763  *
2764  *  Initialize the hardware to a configuration
2765  *  as specified by the adapter structure.
2766  *
2767  **********************************************************************/
2768 static void
2769 em_reset(struct adapter *adapter)
2770 {
2771 	device_t	dev = adapter->dev;
2772 	if_t ifp = adapter->ifp;
2773 	struct e1000_hw	*hw = &adapter->hw;
2774 	u16		rx_buffer_size;
2775 	u32		pba;
2776 
2777 	INIT_DEBUGOUT("em_reset: begin");
2778 
2779 	/* Set up smart power down as default off on newer adapters. */
2780 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2781 	    hw->mac.type == e1000_82572)) {
2782 		u16 phy_tmp = 0;
2783 
2784 		/* Speed up time to link by disabling smart power down. */
2785 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2786 		phy_tmp &= ~IGP02E1000_PM_SPD;
2787 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2788 	}
2789 
2790 	/*
2791 	 * Packet Buffer Allocation (PBA)
2792 	 * Writing PBA sets the receive portion of the buffer
2793 	 * the remainder is used for the transmit buffer.
2794 	 */
2795 	switch (hw->mac.type) {
2796 	/* Total Packet Buffer on these is 48K */
2797 	case e1000_82571:
2798 	case e1000_82572:
2799 	case e1000_80003es2lan:
2800 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2801 		break;
2802 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2803 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2804 		break;
2805 	case e1000_82574:
2806 	case e1000_82583:
2807 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2808 		break;
2809 	case e1000_ich8lan:
2810 		pba = E1000_PBA_8K;
2811 		break;
2812 	case e1000_ich9lan:
2813 	case e1000_ich10lan:
2814 		/* Boost Receive side for jumbo frames */
2815 		if (adapter->hw.mac.max_frame_size > 4096)
2816 			pba = E1000_PBA_14K;
2817 		else
2818 			pba = E1000_PBA_10K;
2819 		break;
2820 	case e1000_pchlan:
2821 	case e1000_pch2lan:
2822 	case e1000_pch_lpt:
2823 		pba = E1000_PBA_26K;
2824 		break;
2825 	default:
2826 		if (adapter->hw.mac.max_frame_size > 8192)
2827 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2828 		else
2829 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2830 	}
2831 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2832 
2833 	/*
2834 	 * These parameters control the automatic generation (Tx) and
2835 	 * response (Rx) to Ethernet PAUSE frames.
2836 	 * - High water mark should allow for at least two frames to be
2837 	 *   received after sending an XOFF.
2838 	 * - Low water mark works best when it is very near the high water mark.
2839 	 *   This allows the receiver to restart by sending XON when it has
2840 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2841 	 *   restart after one full frame is pulled from the buffer. There
2842 	 *   could be several smaller frames in the buffer and if so they will
2843 	 *   not trigger the XON until their total number reduces the buffer
2844 	 *   by 1500.
2845 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2846 	 */
2847 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2848 	hw->fc.high_water = rx_buffer_size -
2849 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2850 	hw->fc.low_water = hw->fc.high_water - 1500;
2851 
2852 	if (adapter->fc) /* locally set flow control value? */
2853 		hw->fc.requested_mode = adapter->fc;
2854 	else
2855 		hw->fc.requested_mode = e1000_fc_full;
2856 
2857 	if (hw->mac.type == e1000_80003es2lan)
2858 		hw->fc.pause_time = 0xFFFF;
2859 	else
2860 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2861 
2862 	hw->fc.send_xon = TRUE;
2863 
2864 	/* Device specific overrides/settings */
2865 	switch (hw->mac.type) {
2866 	case e1000_pchlan:
2867 		/* Workaround: no TX flow ctrl for PCH */
2868                 hw->fc.requested_mode = e1000_fc_rx_pause;
2869 		hw->fc.pause_time = 0xFFFF; /* override */
2870 		if (if_getmtu(ifp) > ETHERMTU) {
2871 			hw->fc.high_water = 0x3500;
2872 			hw->fc.low_water = 0x1500;
2873 		} else {
2874 			hw->fc.high_water = 0x5000;
2875 			hw->fc.low_water = 0x3000;
2876 		}
2877 		hw->fc.refresh_time = 0x1000;
2878 		break;
2879 	case e1000_pch2lan:
2880 	case e1000_pch_lpt:
2881 		hw->fc.high_water = 0x5C20;
2882 		hw->fc.low_water = 0x5048;
2883 		hw->fc.pause_time = 0x0650;
2884 		hw->fc.refresh_time = 0x0400;
2885 		/* Jumbos need adjusted PBA */
2886 		if (if_getmtu(ifp) > ETHERMTU)
2887 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2888 		else
2889 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2890 		break;
2891         case e1000_ich9lan:
2892         case e1000_ich10lan:
2893 		if (if_getmtu(ifp) > ETHERMTU) {
2894 			hw->fc.high_water = 0x2800;
2895 			hw->fc.low_water = hw->fc.high_water - 8;
2896 			break;
2897 		}
2898 		/* else fall thru */
2899 	default:
2900 		if (hw->mac.type == e1000_80003es2lan)
2901 			hw->fc.pause_time = 0xFFFF;
2902 		break;
2903 	}
2904 
2905 	/* Issue a global reset */
2906 	e1000_reset_hw(hw);
2907 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2908 	em_disable_aspm(adapter);
2909 	/* and a re-init */
2910 	if (e1000_init_hw(hw) < 0) {
2911 		device_printf(dev, "Hardware Initialization Failed\n");
2912 		return;
2913 	}
2914 
2915 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2916 	e1000_get_phy_info(hw);
2917 	e1000_check_for_link(hw);
2918 	return;
2919 }
2920 
2921 /*********************************************************************
2922  *
2923  *  Setup networking device structure and register an interface.
2924  *
2925  **********************************************************************/
2926 static int
2927 em_setup_interface(device_t dev, struct adapter *adapter)
2928 {
2929 	if_t ifp;
2930 
2931 	INIT_DEBUGOUT("em_setup_interface: begin");
2932 
2933 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2934 	if (ifp == 0) {
2935 		device_printf(dev, "can not allocate ifnet structure\n");
2936 		return (-1);
2937 	}
2938 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2939 	if_setdev(ifp, dev);
2940 	if_setinitfn(ifp, em_init);
2941 	if_setsoftc(ifp, adapter);
2942 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2943 	if_setioctlfn(ifp, em_ioctl);
2944 	if_setgetcounterfn(ifp, em_get_counter);
2945 #ifdef EM_MULTIQUEUE
2946 	/* Multiqueue stack interface */
2947 	if_settransmitfn(ifp, em_mq_start);
2948 	if_setqflushfn(ifp, em_qflush);
2949 #else
2950 	if_setstartfn(ifp, em_start);
2951 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
2952 	if_setsendqready(ifp);
2953 #endif
2954 
2955 	ether_ifattach(ifp, adapter->hw.mac.addr);
2956 
2957 	if_setcapabilities(ifp, 0);
2958 	if_setcapenable(ifp, 0);
2959 
2960 
2961 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
2962 	    IFCAP_TSO4, 0);
2963 	/*
2964 	 * Tell the upper layer(s) we
2965 	 * support full VLAN capability
2966 	 */
2967 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2968 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
2969 	    IFCAP_VLAN_MTU, 0);
2970 	if_setcapenable(ifp, if_getcapabilities(ifp));
2971 
2972 	/*
2973 	** Don't turn this on by default, if vlans are
2974 	** created on another pseudo device (eg. lagg)
2975 	** then vlan events are not passed thru, breaking
2976 	** operation, but with HW FILTER off it works. If
2977 	** using vlans directly on the em driver you can
2978 	** enable this and get full hardware tag filtering.
2979 	*/
2980 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
2981 
2982 #ifdef DEVICE_POLLING
2983 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
2984 #endif
2985 
2986 	/* Enable only WOL MAGIC by default */
2987 	if (adapter->wol) {
2988 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
2989 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
2990 	}
2991 
2992 	/*
2993 	 * Specify the media types supported by this adapter and register
2994 	 * callbacks to update media and link information
2995 	 */
2996 	ifmedia_init(&adapter->media, IFM_IMASK,
2997 	    em_media_change, em_media_status);
2998 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2999 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3000 		u_char fiber_type = IFM_1000_SX;	/* default type */
3001 
3002 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3003 			    0, NULL);
3004 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3005 	} else {
3006 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3007 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3008 			    0, NULL);
3009 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3010 			    0, NULL);
3011 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3012 			    0, NULL);
3013 		if (adapter->hw.phy.type != e1000_phy_ife) {
3014 			ifmedia_add(&adapter->media,
3015 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3016 			ifmedia_add(&adapter->media,
3017 				IFM_ETHER | IFM_1000_T, 0, NULL);
3018 		}
3019 	}
3020 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3021 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3022 	return (0);
3023 }
3024 
3025 
3026 /*
3027  * Manage DMA'able memory.
3028  */
3029 static void
3030 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3031 {
3032 	if (error)
3033 		return;
3034 	*(bus_addr_t *) arg = segs[0].ds_addr;
3035 }
3036 
3037 static int
3038 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3039         struct em_dma_alloc *dma, int mapflags)
3040 {
3041 	int error;
3042 
3043 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3044 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3045 				BUS_SPACE_MAXADDR,	/* lowaddr */
3046 				BUS_SPACE_MAXADDR,	/* highaddr */
3047 				NULL, NULL,		/* filter, filterarg */
3048 				size,			/* maxsize */
3049 				1,			/* nsegments */
3050 				size,			/* maxsegsize */
3051 				0,			/* flags */
3052 				NULL,			/* lockfunc */
3053 				NULL,			/* lockarg */
3054 				&dma->dma_tag);
3055 	if (error) {
3056 		device_printf(adapter->dev,
3057 		    "%s: bus_dma_tag_create failed: %d\n",
3058 		    __func__, error);
3059 		goto fail_0;
3060 	}
3061 
3062 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3063 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3064 	if (error) {
3065 		device_printf(adapter->dev,
3066 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3067 		    __func__, (uintmax_t)size, error);
3068 		goto fail_2;
3069 	}
3070 
3071 	dma->dma_paddr = 0;
3072 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3073 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3074 	if (error || dma->dma_paddr == 0) {
3075 		device_printf(adapter->dev,
3076 		    "%s: bus_dmamap_load failed: %d\n",
3077 		    __func__, error);
3078 		goto fail_3;
3079 	}
3080 
3081 	return (0);
3082 
3083 fail_3:
3084 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3085 fail_2:
3086 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3087 	bus_dma_tag_destroy(dma->dma_tag);
3088 fail_0:
3089 	dma->dma_tag = NULL;
3090 
3091 	return (error);
3092 }
3093 
3094 static void
3095 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3096 {
3097 	if (dma->dma_tag == NULL)
3098 		return;
3099 	if (dma->dma_paddr != 0) {
3100 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3101 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3102 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3103 		dma->dma_paddr = 0;
3104 	}
3105 	if (dma->dma_vaddr != NULL) {
3106 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3107 		dma->dma_vaddr = NULL;
3108 	}
3109 	bus_dma_tag_destroy(dma->dma_tag);
3110 	dma->dma_tag = NULL;
3111 }
3112 
3113 
3114 /*********************************************************************
3115  *
3116  *  Allocate memory for the transmit and receive rings, and then
3117  *  the descriptors associated with each, called only once at attach.
3118  *
3119  **********************************************************************/
3120 static int
3121 em_allocate_queues(struct adapter *adapter)
3122 {
3123 	device_t		dev = adapter->dev;
3124 	struct tx_ring		*txr = NULL;
3125 	struct rx_ring		*rxr = NULL;
3126 	int rsize, tsize, error = E1000_SUCCESS;
3127 	int txconf = 0, rxconf = 0;
3128 
3129 
3130 	/* Allocate the TX ring struct memory */
3131 	if (!(adapter->tx_rings =
3132 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3133 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3134 		device_printf(dev, "Unable to allocate TX ring memory\n");
3135 		error = ENOMEM;
3136 		goto fail;
3137 	}
3138 
3139 	/* Now allocate the RX */
3140 	if (!(adapter->rx_rings =
3141 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3142 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3143 		device_printf(dev, "Unable to allocate RX ring memory\n");
3144 		error = ENOMEM;
3145 		goto rx_fail;
3146 	}
3147 
3148 	tsize = roundup2(adapter->num_tx_desc *
3149 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3150 	/*
3151 	 * Now set up the TX queues, txconf is needed to handle the
3152 	 * possibility that things fail midcourse and we need to
3153 	 * undo memory gracefully
3154 	 */
3155 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3156 		/* Set up some basics */
3157 		txr = &adapter->tx_rings[i];
3158 		txr->adapter = adapter;
3159 		txr->me = i;
3160 
3161 		/* Initialize the TX lock */
3162 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3163 		    device_get_nameunit(dev), txr->me);
3164 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3165 
3166 		if (em_dma_malloc(adapter, tsize,
3167 			&txr->txdma, BUS_DMA_NOWAIT)) {
3168 			device_printf(dev,
3169 			    "Unable to allocate TX Descriptor memory\n");
3170 			error = ENOMEM;
3171 			goto err_tx_desc;
3172 		}
3173 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3174 		bzero((void *)txr->tx_base, tsize);
3175 
3176         	if (em_allocate_transmit_buffers(txr)) {
3177 			device_printf(dev,
3178 			    "Critical Failure setting up transmit buffers\n");
3179 			error = ENOMEM;
3180 			goto err_tx_desc;
3181         	}
3182 #if __FreeBSD_version >= 800000
3183 		/* Allocate a buf ring */
3184 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3185 		    M_WAITOK, &txr->tx_mtx);
3186 #endif
3187 	}
3188 
3189 	/*
3190 	 * Next the RX queues...
3191 	 */
3192 	rsize = roundup2(adapter->num_rx_desc *
3193 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3194 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3195 		rxr = &adapter->rx_rings[i];
3196 		rxr->adapter = adapter;
3197 		rxr->me = i;
3198 
3199 		/* Initialize the RX lock */
3200 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3201 		    device_get_nameunit(dev), txr->me);
3202 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3203 
3204 		if (em_dma_malloc(adapter, rsize,
3205 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3206 			device_printf(dev,
3207 			    "Unable to allocate RxDescriptor memory\n");
3208 			error = ENOMEM;
3209 			goto err_rx_desc;
3210 		}
3211 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3212 		bzero((void *)rxr->rx_base, rsize);
3213 
3214         	/* Allocate receive buffers for the ring*/
3215 		if (em_allocate_receive_buffers(rxr)) {
3216 			device_printf(dev,
3217 			    "Critical Failure setting up receive buffers\n");
3218 			error = ENOMEM;
3219 			goto err_rx_desc;
3220 		}
3221 	}
3222 
3223 	return (0);
3224 
3225 err_rx_desc:
3226 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3227 		em_dma_free(adapter, &rxr->rxdma);
3228 err_tx_desc:
3229 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3230 		em_dma_free(adapter, &txr->txdma);
3231 	free(adapter->rx_rings, M_DEVBUF);
3232 rx_fail:
3233 #if __FreeBSD_version >= 800000
3234 	buf_ring_free(txr->br, M_DEVBUF);
3235 #endif
3236 	free(adapter->tx_rings, M_DEVBUF);
3237 fail:
3238 	return (error);
3239 }
3240 
3241 
3242 /*********************************************************************
3243  *
3244  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3245  *  the information needed to transmit a packet on the wire. This is
3246  *  called only once at attach, setup is done every reset.
3247  *
3248  **********************************************************************/
3249 static int
3250 em_allocate_transmit_buffers(struct tx_ring *txr)
3251 {
3252 	struct adapter *adapter = txr->adapter;
3253 	device_t dev = adapter->dev;
3254 	struct em_buffer *txbuf;
3255 	int error, i;
3256 
3257 	/*
3258 	 * Setup DMA descriptor areas.
3259 	 */
3260 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3261 			       1, 0,			/* alignment, bounds */
3262 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3263 			       BUS_SPACE_MAXADDR,	/* highaddr */
3264 			       NULL, NULL,		/* filter, filterarg */
3265 			       EM_TSO_SIZE,		/* maxsize */
3266 			       EM_MAX_SCATTER,		/* nsegments */
3267 			       PAGE_SIZE,		/* maxsegsize */
3268 			       0,			/* flags */
3269 			       NULL,			/* lockfunc */
3270 			       NULL,			/* lockfuncarg */
3271 			       &txr->txtag))) {
3272 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3273 		goto fail;
3274 	}
3275 
3276 	if (!(txr->tx_buffers =
3277 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3278 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3279 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3280 		error = ENOMEM;
3281 		goto fail;
3282 	}
3283 
3284         /* Create the descriptor buffer dma maps */
3285 	txbuf = txr->tx_buffers;
3286 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3287 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3288 		if (error != 0) {
3289 			device_printf(dev, "Unable to create TX DMA map\n");
3290 			goto fail;
3291 		}
3292 	}
3293 
3294 	return 0;
3295 fail:
3296 	/* We free all, it handles case where we are in the middle */
3297 	em_free_transmit_structures(adapter);
3298 	return (error);
3299 }
3300 
3301 /*********************************************************************
3302  *
3303  *  Initialize a transmit ring.
3304  *
3305  **********************************************************************/
3306 static void
3307 em_setup_transmit_ring(struct tx_ring *txr)
3308 {
3309 	struct adapter *adapter = txr->adapter;
3310 	struct em_buffer *txbuf;
3311 	int i;
3312 #ifdef DEV_NETMAP
3313 	struct netmap_slot *slot;
3314 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3315 #endif /* DEV_NETMAP */
3316 
3317 	/* Clear the old descriptor contents */
3318 	EM_TX_LOCK(txr);
3319 #ifdef DEV_NETMAP
3320 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3321 #endif /* DEV_NETMAP */
3322 
3323 	bzero((void *)txr->tx_base,
3324 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3325 	/* Reset indices */
3326 	txr->next_avail_desc = 0;
3327 	txr->next_to_clean = 0;
3328 
3329 	/* Free any existing tx buffers. */
3330         txbuf = txr->tx_buffers;
3331 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3332 		if (txbuf->m_head != NULL) {
3333 			bus_dmamap_sync(txr->txtag, txbuf->map,
3334 			    BUS_DMASYNC_POSTWRITE);
3335 			bus_dmamap_unload(txr->txtag, txbuf->map);
3336 			m_freem(txbuf->m_head);
3337 			txbuf->m_head = NULL;
3338 		}
3339 #ifdef DEV_NETMAP
3340 		if (slot) {
3341 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3342 			uint64_t paddr;
3343 			void *addr;
3344 
3345 			addr = PNMB(na, slot + si, &paddr);
3346 			txr->tx_base[i].buffer_addr = htole64(paddr);
3347 			/* reload the map for netmap mode */
3348 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3349 		}
3350 #endif /* DEV_NETMAP */
3351 
3352 		/* clear the watch index */
3353 		txbuf->next_eop = -1;
3354         }
3355 
3356 	/* Set number of descriptors available */
3357 	txr->tx_avail = adapter->num_tx_desc;
3358 	txr->queue_status = EM_QUEUE_IDLE;
3359 
3360 	/* Clear checksum offload context. */
3361 	txr->last_hw_offload = 0;
3362 	txr->last_hw_ipcss = 0;
3363 	txr->last_hw_ipcso = 0;
3364 	txr->last_hw_tucss = 0;
3365 	txr->last_hw_tucso = 0;
3366 
3367 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3368 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3369 	EM_TX_UNLOCK(txr);
3370 }
3371 
3372 /*********************************************************************
3373  *
3374  *  Initialize all transmit rings.
3375  *
3376  **********************************************************************/
3377 static void
3378 em_setup_transmit_structures(struct adapter *adapter)
3379 {
3380 	struct tx_ring *txr = adapter->tx_rings;
3381 
3382 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3383 		em_setup_transmit_ring(txr);
3384 
3385 	return;
3386 }
3387 
3388 /*********************************************************************
3389  *
3390  *  Enable transmit unit.
3391  *
3392  **********************************************************************/
3393 static void
3394 em_initialize_transmit_unit(struct adapter *adapter)
3395 {
3396 	struct tx_ring	*txr = adapter->tx_rings;
3397 	struct e1000_hw	*hw = &adapter->hw;
3398 	u32	tctl, tarc, tipg = 0;
3399 
3400 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3401 
3402 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3403 		u64 bus_addr = txr->txdma.dma_paddr;
3404 		/* Base and Len of TX Ring */
3405 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3406 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3407 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3408 	    	    (u32)(bus_addr >> 32));
3409 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3410 	    	    (u32)bus_addr);
3411 		/* Init the HEAD/TAIL indices */
3412 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3413 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3414 
3415 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3416 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3417 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3418 
3419 		txr->queue_status = EM_QUEUE_IDLE;
3420 	}
3421 
3422 	/* Set the default values for the Tx Inter Packet Gap timer */
3423 	switch (adapter->hw.mac.type) {
3424 	case e1000_80003es2lan:
3425 		tipg = DEFAULT_82543_TIPG_IPGR1;
3426 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3427 		    E1000_TIPG_IPGR2_SHIFT;
3428 		break;
3429 	default:
3430 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3431 		    (adapter->hw.phy.media_type ==
3432 		    e1000_media_type_internal_serdes))
3433 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3434 		else
3435 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3436 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3437 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3438 	}
3439 
3440 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3441 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3442 
3443 	if(adapter->hw.mac.type >= e1000_82540)
3444 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3445 		    adapter->tx_abs_int_delay.value);
3446 
3447 	if ((adapter->hw.mac.type == e1000_82571) ||
3448 	    (adapter->hw.mac.type == e1000_82572)) {
3449 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3450 		tarc |= SPEED_MODE_BIT;
3451 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3452 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3453 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3454 		tarc |= 1;
3455 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3456 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3457 		tarc |= 1;
3458 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3459 	}
3460 
3461 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3462 	if (adapter->tx_int_delay.value > 0)
3463 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3464 
3465 	/* Program the Transmit Control Register */
3466 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3467 	tctl &= ~E1000_TCTL_CT;
3468 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3469 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3470 
3471 	if (adapter->hw.mac.type >= e1000_82571)
3472 		tctl |= E1000_TCTL_MULR;
3473 
3474 	/* This write will effectively turn on the transmit unit. */
3475 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3476 
3477 }
3478 
3479 
3480 /*********************************************************************
3481  *
3482  *  Free all transmit rings.
3483  *
3484  **********************************************************************/
3485 static void
3486 em_free_transmit_structures(struct adapter *adapter)
3487 {
3488 	struct tx_ring *txr = adapter->tx_rings;
3489 
3490 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3491 		EM_TX_LOCK(txr);
3492 		em_free_transmit_buffers(txr);
3493 		em_dma_free(adapter, &txr->txdma);
3494 		EM_TX_UNLOCK(txr);
3495 		EM_TX_LOCK_DESTROY(txr);
3496 	}
3497 
3498 	free(adapter->tx_rings, M_DEVBUF);
3499 }
3500 
3501 /*********************************************************************
3502  *
3503  *  Free transmit ring related data structures.
3504  *
3505  **********************************************************************/
3506 static void
3507 em_free_transmit_buffers(struct tx_ring *txr)
3508 {
3509 	struct adapter		*adapter = txr->adapter;
3510 	struct em_buffer	*txbuf;
3511 
3512 	INIT_DEBUGOUT("free_transmit_ring: begin");
3513 
3514 	if (txr->tx_buffers == NULL)
3515 		return;
3516 
3517 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3518 		txbuf = &txr->tx_buffers[i];
3519 		if (txbuf->m_head != NULL) {
3520 			bus_dmamap_sync(txr->txtag, txbuf->map,
3521 			    BUS_DMASYNC_POSTWRITE);
3522 			bus_dmamap_unload(txr->txtag,
3523 			    txbuf->map);
3524 			m_freem(txbuf->m_head);
3525 			txbuf->m_head = NULL;
3526 			if (txbuf->map != NULL) {
3527 				bus_dmamap_destroy(txr->txtag,
3528 				    txbuf->map);
3529 				txbuf->map = NULL;
3530 			}
3531 		} else if (txbuf->map != NULL) {
3532 			bus_dmamap_unload(txr->txtag,
3533 			    txbuf->map);
3534 			bus_dmamap_destroy(txr->txtag,
3535 			    txbuf->map);
3536 			txbuf->map = NULL;
3537 		}
3538 	}
3539 #if __FreeBSD_version >= 800000
3540 	if (txr->br != NULL)
3541 		buf_ring_free(txr->br, M_DEVBUF);
3542 #endif
3543 	if (txr->tx_buffers != NULL) {
3544 		free(txr->tx_buffers, M_DEVBUF);
3545 		txr->tx_buffers = NULL;
3546 	}
3547 	if (txr->txtag != NULL) {
3548 		bus_dma_tag_destroy(txr->txtag);
3549 		txr->txtag = NULL;
3550 	}
3551 	return;
3552 }
3553 
3554 
3555 /*********************************************************************
3556  *  The offload context is protocol specific (TCP/UDP) and thus
3557  *  only needs to be set when the protocol changes. The occasion
3558  *  of a context change can be a performance detriment, and
3559  *  might be better just disabled. The reason arises in the way
3560  *  in which the controller supports pipelined requests from the
3561  *  Tx data DMA. Up to four requests can be pipelined, and they may
3562  *  belong to the same packet or to multiple packets. However all
3563  *  requests for one packet are issued before a request is issued
3564  *  for a subsequent packet and if a request for the next packet
3565  *  requires a context change, that request will be stalled
3566  *  until the previous request completes. This means setting up
3567  *  a new context effectively disables pipelined Tx data DMA which
3568  *  in turn greatly slow down performance to send small sized
3569  *  frames.
3570  **********************************************************************/
3571 static void
3572 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3573     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3574 {
3575 	struct adapter			*adapter = txr->adapter;
3576 	struct e1000_context_desc	*TXD = NULL;
3577 	struct em_buffer		*tx_buffer;
3578 	int				cur, hdr_len;
3579 	u32				cmd = 0;
3580 	u16				offload = 0;
3581 	u8				ipcso, ipcss, tucso, tucss;
3582 
3583 	ipcss = ipcso = tucss = tucso = 0;
3584 	hdr_len = ip_off + (ip->ip_hl << 2);
3585 	cur = txr->next_avail_desc;
3586 
3587 	/* Setup of IP header checksum. */
3588 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3589 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3590 		offload |= CSUM_IP;
3591 		ipcss = ip_off;
3592 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3593 		/*
3594 		 * Start offset for header checksum calculation.
3595 		 * End offset for header checksum calculation.
3596 		 * Offset of place to put the checksum.
3597 		 */
3598 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3599 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3600 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3601 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3602 		cmd |= E1000_TXD_CMD_IP;
3603 	}
3604 
3605 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3606  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3607  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3608  		offload |= CSUM_TCP;
3609  		tucss = hdr_len;
3610  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3611  		/*
3612  		 * Setting up new checksum offload context for every frames
3613  		 * takes a lot of processing time for hardware. This also
3614  		 * reduces performance a lot for small sized frames so avoid
3615  		 * it if driver can use previously configured checksum
3616  		 * offload context.
3617  		 */
3618  		if (txr->last_hw_offload == offload) {
3619  			if (offload & CSUM_IP) {
3620  				if (txr->last_hw_ipcss == ipcss &&
3621  				    txr->last_hw_ipcso == ipcso &&
3622  				    txr->last_hw_tucss == tucss &&
3623  				    txr->last_hw_tucso == tucso)
3624  					return;
3625  			} else {
3626  				if (txr->last_hw_tucss == tucss &&
3627  				    txr->last_hw_tucso == tucso)
3628  					return;
3629  			}
3630   		}
3631  		txr->last_hw_offload = offload;
3632  		txr->last_hw_tucss = tucss;
3633  		txr->last_hw_tucso = tucso;
3634  		/*
3635  		 * Start offset for payload checksum calculation.
3636  		 * End offset for payload checksum calculation.
3637  		 * Offset of place to put the checksum.
3638  		 */
3639 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3640  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3641  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3642  		TXD->upper_setup.tcp_fields.tucso = tucso;
3643  		cmd |= E1000_TXD_CMD_TCP;
3644  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3645  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3646  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3647  		tucss = hdr_len;
3648  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3649  		/*
3650  		 * Setting up new checksum offload context for every frames
3651  		 * takes a lot of processing time for hardware. This also
3652  		 * reduces performance a lot for small sized frames so avoid
3653  		 * it if driver can use previously configured checksum
3654  		 * offload context.
3655  		 */
3656  		if (txr->last_hw_offload == offload) {
3657  			if (offload & CSUM_IP) {
3658  				if (txr->last_hw_ipcss == ipcss &&
3659  				    txr->last_hw_ipcso == ipcso &&
3660  				    txr->last_hw_tucss == tucss &&
3661  				    txr->last_hw_tucso == tucso)
3662  					return;
3663  			} else {
3664  				if (txr->last_hw_tucss == tucss &&
3665  				    txr->last_hw_tucso == tucso)
3666  					return;
3667  			}
3668  		}
3669  		txr->last_hw_offload = offload;
3670  		txr->last_hw_tucss = tucss;
3671  		txr->last_hw_tucso = tucso;
3672  		/*
3673  		 * Start offset for header checksum calculation.
3674  		 * End offset for header checksum calculation.
3675  		 * Offset of place to put the checksum.
3676  		 */
3677 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3678  		TXD->upper_setup.tcp_fields.tucss = tucss;
3679  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3680  		TXD->upper_setup.tcp_fields.tucso = tucso;
3681   	}
3682 
3683  	if (offload & CSUM_IP) {
3684  		txr->last_hw_ipcss = ipcss;
3685  		txr->last_hw_ipcso = ipcso;
3686   	}
3687 
3688 	TXD->tcp_seg_setup.data = htole32(0);
3689 	TXD->cmd_and_length =
3690 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3691 	tx_buffer = &txr->tx_buffers[cur];
3692 	tx_buffer->m_head = NULL;
3693 	tx_buffer->next_eop = -1;
3694 
3695 	if (++cur == adapter->num_tx_desc)
3696 		cur = 0;
3697 
3698 	txr->tx_avail--;
3699 	txr->next_avail_desc = cur;
3700 }
3701 
3702 
3703 /**********************************************************************
3704  *
3705  *  Setup work for hardware segmentation offload (TSO)
3706  *
3707  **********************************************************************/
3708 static void
3709 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3710     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3711 {
3712 	struct adapter			*adapter = txr->adapter;
3713 	struct e1000_context_desc	*TXD;
3714 	struct em_buffer		*tx_buffer;
3715 	int cur, hdr_len;
3716 
3717 	/*
3718 	 * In theory we can use the same TSO context if and only if
3719 	 * frame is the same type(IP/TCP) and the same MSS. However
3720 	 * checking whether a frame has the same IP/TCP structure is
3721 	 * hard thing so just ignore that and always restablish a
3722 	 * new TSO context.
3723 	 */
3724 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3725 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3726 		      E1000_TXD_DTYP_D |	/* Data descr type */
3727 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3728 
3729 	/* IP and/or TCP header checksum calculation and insertion. */
3730 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3731 
3732 	cur = txr->next_avail_desc;
3733 	tx_buffer = &txr->tx_buffers[cur];
3734 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3735 
3736 	/*
3737 	 * Start offset for header checksum calculation.
3738 	 * End offset for header checksum calculation.
3739 	 * Offset of place put the checksum.
3740 	 */
3741 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3742 	TXD->lower_setup.ip_fields.ipcse =
3743 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3744 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3745 	/*
3746 	 * Start offset for payload checksum calculation.
3747 	 * End offset for payload checksum calculation.
3748 	 * Offset of place to put the checksum.
3749 	 */
3750 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3751 	TXD->upper_setup.tcp_fields.tucse = 0;
3752 	TXD->upper_setup.tcp_fields.tucso =
3753 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3754 	/*
3755 	 * Payload size per packet w/o any headers.
3756 	 * Length of all headers up to payload.
3757 	 */
3758 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3759 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3760 
3761 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3762 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3763 				E1000_TXD_CMD_TSE |	/* TSE context */
3764 				E1000_TXD_CMD_IP |	/* Do IP csum */
3765 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3766 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3767 
3768 	tx_buffer->m_head = NULL;
3769 	tx_buffer->next_eop = -1;
3770 
3771 	if (++cur == adapter->num_tx_desc)
3772 		cur = 0;
3773 
3774 	txr->tx_avail--;
3775 	txr->next_avail_desc = cur;
3776 	txr->tx_tso = TRUE;
3777 }
3778 
3779 
3780 /**********************************************************************
3781  *
3782  *  Examine each tx_buffer in the used queue. If the hardware is done
3783  *  processing the packet then free associated resources. The
3784  *  tx_buffer is put back on the free queue.
3785  *
3786  **********************************************************************/
3787 static void
3788 em_txeof(struct tx_ring *txr)
3789 {
3790 	struct adapter	*adapter = txr->adapter;
3791         int first, last, done, processed;
3792         struct em_buffer *tx_buffer;
3793         struct e1000_tx_desc   *tx_desc, *eop_desc;
3794 	if_t ifp = adapter->ifp;
3795 
3796 	EM_TX_LOCK_ASSERT(txr);
3797 #ifdef DEV_NETMAP
3798 	if (netmap_tx_irq(ifp, txr->me))
3799 		return;
3800 #endif /* DEV_NETMAP */
3801 
3802 	/* No work, make sure watchdog is off */
3803         if (txr->tx_avail == adapter->num_tx_desc) {
3804 		txr->queue_status = EM_QUEUE_IDLE;
3805                 return;
3806 	}
3807 
3808 	processed = 0;
3809         first = txr->next_to_clean;
3810         tx_desc = &txr->tx_base[first];
3811         tx_buffer = &txr->tx_buffers[first];
3812 	last = tx_buffer->next_eop;
3813         eop_desc = &txr->tx_base[last];
3814 
3815 	/*
3816 	 * What this does is get the index of the
3817 	 * first descriptor AFTER the EOP of the
3818 	 * first packet, that way we can do the
3819 	 * simple comparison on the inner while loop.
3820 	 */
3821 	if (++last == adapter->num_tx_desc)
3822  		last = 0;
3823 	done = last;
3824 
3825         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3826             BUS_DMASYNC_POSTREAD);
3827 
3828         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3829 		/* We clean the range of the packet */
3830 		while (first != done) {
3831                 	tx_desc->upper.data = 0;
3832                 	tx_desc->lower.data = 0;
3833                 	tx_desc->buffer_addr = 0;
3834                 	++txr->tx_avail;
3835 			++processed;
3836 
3837 			if (tx_buffer->m_head) {
3838 				bus_dmamap_sync(txr->txtag,
3839 				    tx_buffer->map,
3840 				    BUS_DMASYNC_POSTWRITE);
3841 				bus_dmamap_unload(txr->txtag,
3842 				    tx_buffer->map);
3843                         	m_freem(tx_buffer->m_head);
3844                         	tx_buffer->m_head = NULL;
3845                 	}
3846 			tx_buffer->next_eop = -1;
3847 			txr->watchdog_time = ticks;
3848 
3849 	                if (++first == adapter->num_tx_desc)
3850 				first = 0;
3851 
3852 	                tx_buffer = &txr->tx_buffers[first];
3853 			tx_desc = &txr->tx_base[first];
3854 		}
3855 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
3856 		/* See if we can continue to the next packet */
3857 		last = tx_buffer->next_eop;
3858 		if (last != -1) {
3859         		eop_desc = &txr->tx_base[last];
3860 			/* Get new done point */
3861 			if (++last == adapter->num_tx_desc) last = 0;
3862 			done = last;
3863 		} else
3864 			break;
3865         }
3866         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3867             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3868 
3869         txr->next_to_clean = first;
3870 
3871 	/*
3872 	** Watchdog calculation, we know there's
3873 	** work outstanding or the first return
3874 	** would have been taken, so none processed
3875 	** for too long indicates a hang. local timer
3876 	** will examine this and do a reset if needed.
3877 	*/
3878 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3879 		txr->queue_status = EM_QUEUE_HUNG;
3880 
3881         /*
3882          * If we have a minimum free, clear IFF_DRV_OACTIVE
3883          * to tell the stack that it is OK to send packets.
3884 	 * Notice that all writes of OACTIVE happen under the
3885 	 * TX lock which, with a single queue, guarantees
3886 	 * sanity.
3887          */
3888         if (txr->tx_avail >= EM_MAX_SCATTER)
3889 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
3890 
3891 	/* Disable watchdog if all clean */
3892 	if (txr->tx_avail == adapter->num_tx_desc) {
3893 		txr->queue_status = EM_QUEUE_IDLE;
3894 	}
3895 }
3896 
3897 
3898 /*********************************************************************
3899  *
3900  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3901  *
3902  **********************************************************************/
3903 static void
3904 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3905 {
3906 	struct adapter		*adapter = rxr->adapter;
3907 	struct mbuf		*m;
3908 	bus_dma_segment_t	segs[1];
3909 	struct em_buffer	*rxbuf;
3910 	int			i, j, error, nsegs;
3911 	bool			cleaned = FALSE;
3912 
3913 	i = j = rxr->next_to_refresh;
3914 	/*
3915 	** Get one descriptor beyond
3916 	** our work mark to control
3917 	** the loop.
3918 	*/
3919 	if (++j == adapter->num_rx_desc)
3920 		j = 0;
3921 
3922 	while (j != limit) {
3923 		rxbuf = &rxr->rx_buffers[i];
3924 		if (rxbuf->m_head == NULL) {
3925 			m = m_getjcl(M_NOWAIT, MT_DATA,
3926 			    M_PKTHDR, adapter->rx_mbuf_sz);
3927 			/*
3928 			** If we have a temporary resource shortage
3929 			** that causes a failure, just abort refresh
3930 			** for now, we will return to this point when
3931 			** reinvoked from em_rxeof.
3932 			*/
3933 			if (m == NULL)
3934 				goto update;
3935 		} else
3936 			m = rxbuf->m_head;
3937 
3938 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3939 		m->m_flags |= M_PKTHDR;
3940 		m->m_data = m->m_ext.ext_buf;
3941 
3942 		/* Use bus_dma machinery to setup the memory mapping  */
3943 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3944 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3945 		if (error != 0) {
3946 			printf("Refresh mbufs: hdr dmamap load"
3947 			    " failure - %d\n", error);
3948 			m_free(m);
3949 			rxbuf->m_head = NULL;
3950 			goto update;
3951 		}
3952 		rxbuf->m_head = m;
3953 		bus_dmamap_sync(rxr->rxtag,
3954 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3955 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3956 		cleaned = TRUE;
3957 
3958 		i = j; /* Next is precalulated for us */
3959 		rxr->next_to_refresh = i;
3960 		/* Calculate next controlling index */
3961 		if (++j == adapter->num_rx_desc)
3962 			j = 0;
3963 	}
3964 update:
3965 	/*
3966 	** Update the tail pointer only if,
3967 	** and as far as we have refreshed.
3968 	*/
3969 	if (cleaned)
3970 		E1000_WRITE_REG(&adapter->hw,
3971 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3972 
3973 	return;
3974 }
3975 
3976 
3977 /*********************************************************************
3978  *
3979  *  Allocate memory for rx_buffer structures. Since we use one
3980  *  rx_buffer per received packet, the maximum number of rx_buffer's
3981  *  that we'll need is equal to the number of receive descriptors
3982  *  that we've allocated.
3983  *
3984  **********************************************************************/
3985 static int
3986 em_allocate_receive_buffers(struct rx_ring *rxr)
3987 {
3988 	struct adapter		*adapter = rxr->adapter;
3989 	device_t		dev = adapter->dev;
3990 	struct em_buffer	*rxbuf;
3991 	int			error;
3992 
3993 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3994 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3995 	if (rxr->rx_buffers == NULL) {
3996 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3997 		return (ENOMEM);
3998 	}
3999 
4000 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4001 				1, 0,			/* alignment, bounds */
4002 				BUS_SPACE_MAXADDR,	/* lowaddr */
4003 				BUS_SPACE_MAXADDR,	/* highaddr */
4004 				NULL, NULL,		/* filter, filterarg */
4005 				MJUM9BYTES,		/* maxsize */
4006 				1,			/* nsegments */
4007 				MJUM9BYTES,		/* maxsegsize */
4008 				0,			/* flags */
4009 				NULL,			/* lockfunc */
4010 				NULL,			/* lockarg */
4011 				&rxr->rxtag);
4012 	if (error) {
4013 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4014 		    __func__, error);
4015 		goto fail;
4016 	}
4017 
4018 	rxbuf = rxr->rx_buffers;
4019 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4020 		rxbuf = &rxr->rx_buffers[i];
4021 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4022 		if (error) {
4023 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4024 			    __func__, error);
4025 			goto fail;
4026 		}
4027 	}
4028 
4029 	return (0);
4030 
4031 fail:
4032 	em_free_receive_structures(adapter);
4033 	return (error);
4034 }
4035 
4036 
4037 /*********************************************************************
4038  *
4039  *  Initialize a receive ring and its buffers.
4040  *
4041  **********************************************************************/
4042 static int
4043 em_setup_receive_ring(struct rx_ring *rxr)
4044 {
4045 	struct	adapter 	*adapter = rxr->adapter;
4046 	struct em_buffer	*rxbuf;
4047 	bus_dma_segment_t	seg[1];
4048 	int			rsize, nsegs, error = 0;
4049 #ifdef DEV_NETMAP
4050 	struct netmap_slot *slot;
4051 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4052 #endif
4053 
4054 
4055 	/* Clear the ring contents */
4056 	EM_RX_LOCK(rxr);
4057 	rsize = roundup2(adapter->num_rx_desc *
4058 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4059 	bzero((void *)rxr->rx_base, rsize);
4060 #ifdef DEV_NETMAP
4061 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4062 #endif
4063 
4064 	/*
4065 	** Free current RX buffer structs and their mbufs
4066 	*/
4067 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4068 		rxbuf = &rxr->rx_buffers[i];
4069 		if (rxbuf->m_head != NULL) {
4070 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4071 			    BUS_DMASYNC_POSTREAD);
4072 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4073 			m_freem(rxbuf->m_head);
4074 			rxbuf->m_head = NULL; /* mark as freed */
4075 		}
4076 	}
4077 
4078 	/* Now replenish the mbufs */
4079         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4080 		rxbuf = &rxr->rx_buffers[j];
4081 #ifdef DEV_NETMAP
4082 		if (slot) {
4083 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4084 			uint64_t paddr;
4085 			void *addr;
4086 
4087 			addr = PNMB(na, slot + si, &paddr);
4088 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4089 			/* Update descriptor */
4090 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4091 			continue;
4092 		}
4093 #endif /* DEV_NETMAP */
4094 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4095 		    M_PKTHDR, adapter->rx_mbuf_sz);
4096 		if (rxbuf->m_head == NULL) {
4097 			error = ENOBUFS;
4098 			goto fail;
4099 		}
4100 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4101 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4102 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4103 
4104 		/* Get the memory mapping */
4105 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4106 		    rxbuf->map, rxbuf->m_head, seg,
4107 		    &nsegs, BUS_DMA_NOWAIT);
4108 		if (error != 0) {
4109 			m_freem(rxbuf->m_head);
4110 			rxbuf->m_head = NULL;
4111 			goto fail;
4112 		}
4113 		bus_dmamap_sync(rxr->rxtag,
4114 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4115 
4116 		/* Update descriptor */
4117 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4118 	}
4119 	rxr->next_to_check = 0;
4120 	rxr->next_to_refresh = 0;
4121 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4122 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4123 
4124 fail:
4125 	EM_RX_UNLOCK(rxr);
4126 	return (error);
4127 }
4128 
4129 /*********************************************************************
4130  *
4131  *  Initialize all receive rings.
4132  *
4133  **********************************************************************/
4134 static int
4135 em_setup_receive_structures(struct adapter *adapter)
4136 {
4137 	struct rx_ring *rxr = adapter->rx_rings;
4138 	int q;
4139 
4140 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4141 		if (em_setup_receive_ring(rxr))
4142 			goto fail;
4143 
4144 	return (0);
4145 fail:
4146 	/*
4147 	 * Free RX buffers allocated so far, we will only handle
4148 	 * the rings that completed, the failing case will have
4149 	 * cleaned up for itself. 'q' failed, so its the terminus.
4150 	 */
4151 	for (int i = 0; i < q; ++i) {
4152 		rxr = &adapter->rx_rings[i];
4153 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4154 			struct em_buffer *rxbuf;
4155 			rxbuf = &rxr->rx_buffers[n];
4156 			if (rxbuf->m_head != NULL) {
4157 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4158 			  	  BUS_DMASYNC_POSTREAD);
4159 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4160 				m_freem(rxbuf->m_head);
4161 				rxbuf->m_head = NULL;
4162 			}
4163 		}
4164 		rxr->next_to_check = 0;
4165 		rxr->next_to_refresh = 0;
4166 	}
4167 
4168 	return (ENOBUFS);
4169 }
4170 
4171 /*********************************************************************
4172  *
4173  *  Free all receive rings.
4174  *
4175  **********************************************************************/
4176 static void
4177 em_free_receive_structures(struct adapter *adapter)
4178 {
4179 	struct rx_ring *rxr = adapter->rx_rings;
4180 
4181 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4182 		em_free_receive_buffers(rxr);
4183 		/* Free the ring memory as well */
4184 		em_dma_free(adapter, &rxr->rxdma);
4185 		EM_RX_LOCK_DESTROY(rxr);
4186 	}
4187 
4188 	free(adapter->rx_rings, M_DEVBUF);
4189 }
4190 
4191 
4192 /*********************************************************************
4193  *
4194  *  Free receive ring data structures
4195  *
4196  **********************************************************************/
4197 static void
4198 em_free_receive_buffers(struct rx_ring *rxr)
4199 {
4200 	struct adapter		*adapter = rxr->adapter;
4201 	struct em_buffer	*rxbuf = NULL;
4202 
4203 	INIT_DEBUGOUT("free_receive_buffers: begin");
4204 
4205 	if (rxr->rx_buffers != NULL) {
4206 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4207 			rxbuf = &rxr->rx_buffers[i];
4208 			if (rxbuf->map != NULL) {
4209 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4210 				    BUS_DMASYNC_POSTREAD);
4211 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4212 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4213 			}
4214 			if (rxbuf->m_head != NULL) {
4215 				m_freem(rxbuf->m_head);
4216 				rxbuf->m_head = NULL;
4217 			}
4218 		}
4219 		free(rxr->rx_buffers, M_DEVBUF);
4220 		rxr->rx_buffers = NULL;
4221 		rxr->next_to_check = 0;
4222 		rxr->next_to_refresh = 0;
4223 	}
4224 
4225 	if (rxr->rxtag != NULL) {
4226 		bus_dma_tag_destroy(rxr->rxtag);
4227 		rxr->rxtag = NULL;
4228 	}
4229 
4230 	return;
4231 }
4232 
4233 
4234 /*********************************************************************
4235  *
4236  *  Enable receive unit.
4237  *
4238  **********************************************************************/
4239 
4240 static void
4241 em_initialize_receive_unit(struct adapter *adapter)
4242 {
4243 	struct rx_ring	*rxr = adapter->rx_rings;
4244 	if_t ifp = adapter->ifp;
4245 	struct e1000_hw	*hw = &adapter->hw;
4246 	u64	bus_addr;
4247 	u32	rctl, rxcsum;
4248 
4249 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4250 
4251 	/*
4252 	 * Make sure receives are disabled while setting
4253 	 * up the descriptor ring
4254 	 */
4255 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4256 	/* Do not disable if ever enabled on this hardware */
4257 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4258 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4259 
4260 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4261 	    adapter->rx_abs_int_delay.value);
4262 	/*
4263 	 * Set the interrupt throttling rate. Value is calculated
4264 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4265 	 */
4266 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4267 
4268 	/*
4269 	** When using MSIX interrupts we need to throttle
4270 	** using the EITR register (82574 only)
4271 	*/
4272 	if (hw->mac.type == e1000_82574) {
4273 		for (int i = 0; i < 4; i++)
4274 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4275 			    DEFAULT_ITR);
4276 		/* Disable accelerated acknowledge */
4277 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4278 	}
4279 
4280 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4281 	if (if_getcapenable(ifp) & IFCAP_RXCSUM)
4282 		rxcsum |= E1000_RXCSUM_TUOFL;
4283 	else
4284 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4285 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4286 
4287 	/*
4288 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4289 	** long latencies are observed, like Lenovo X60. This
4290 	** change eliminates the problem, but since having positive
4291 	** values in RDTR is a known source of problems on other
4292 	** platforms another solution is being sought.
4293 	*/
4294 	if (hw->mac.type == e1000_82573)
4295 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4296 
4297 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4298 		/* Setup the Base and Length of the Rx Descriptor Ring */
4299 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4300 
4301 		bus_addr = rxr->rxdma.dma_paddr;
4302 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4303 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4304 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4305 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4306 		/* Setup the Head and Tail Descriptor Pointers */
4307 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4308 #ifdef DEV_NETMAP
4309 		/*
4310 		 * an init() while a netmap client is active must
4311 		 * preserve the rx buffers passed to userspace.
4312 		 */
4313 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4314 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4315 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4316 		}
4317 #endif /* DEV_NETMAP */
4318 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4319 	}
4320 
4321 	/* Set PTHRESH for improved jumbo performance */
4322 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4323 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4324 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4325 	    (if_getmtu(ifp) > ETHERMTU)) {
4326 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4327 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4328 	}
4329 
4330 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4331 		if (if_getmtu(ifp) > ETHERMTU)
4332 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4333 		else
4334 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4335 	}
4336 
4337 	/* Setup the Receive Control Register */
4338 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4339 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4340 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4341 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4342 
4343         /* Strip the CRC */
4344         rctl |= E1000_RCTL_SECRC;
4345 
4346         /* Make sure VLAN Filters are off */
4347         rctl &= ~E1000_RCTL_VFE;
4348 	rctl &= ~E1000_RCTL_SBP;
4349 
4350 	if (adapter->rx_mbuf_sz == MCLBYTES)
4351 		rctl |= E1000_RCTL_SZ_2048;
4352 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4353 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4354 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4355 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4356 
4357 	if (if_getmtu(ifp) > ETHERMTU)
4358 		rctl |= E1000_RCTL_LPE;
4359 	else
4360 		rctl &= ~E1000_RCTL_LPE;
4361 
4362 	/* Write out the settings */
4363 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4364 
4365 	return;
4366 }
4367 
4368 
4369 /*********************************************************************
4370  *
4371  *  This routine executes in interrupt context. It replenishes
4372  *  the mbufs in the descriptor and sends data which has been
4373  *  dma'ed into host memory to upper layer.
4374  *
4375  *  We loop at most count times if count is > 0, or until done if
4376  *  count < 0.
4377  *
4378  *  For polling we also now return the number of cleaned packets
4379  *********************************************************************/
4380 static bool
4381 em_rxeof(struct rx_ring *rxr, int count, int *done)
4382 {
4383 	struct adapter		*adapter = rxr->adapter;
4384 	if_t ifp = adapter->ifp;
4385 	struct mbuf		*mp, *sendmp;
4386 	u8			status = 0;
4387 	u16 			len;
4388 	int			i, processed, rxdone = 0;
4389 	bool			eop;
4390 	struct e1000_rx_desc	*cur;
4391 
4392 	EM_RX_LOCK(rxr);
4393 
4394 #ifdef DEV_NETMAP
4395 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4396 		EM_RX_UNLOCK(rxr);
4397 		return (FALSE);
4398 	}
4399 #endif /* DEV_NETMAP */
4400 
4401 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4402 
4403 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4404 			break;
4405 
4406 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4407 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4408 
4409 		cur = &rxr->rx_base[i];
4410 		status = cur->status;
4411 		mp = sendmp = NULL;
4412 
4413 		if ((status & E1000_RXD_STAT_DD) == 0)
4414 			break;
4415 
4416 		len = le16toh(cur->length);
4417 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4418 
4419 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4420 		    (rxr->discard == TRUE)) {
4421 			adapter->dropped_pkts++;
4422 			++rxr->rx_discarded;
4423 			if (!eop) /* Catch subsequent segs */
4424 				rxr->discard = TRUE;
4425 			else
4426 				rxr->discard = FALSE;
4427 			em_rx_discard(rxr, i);
4428 			goto next_desc;
4429 		}
4430 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4431 
4432 		/* Assign correct length to the current fragment */
4433 		mp = rxr->rx_buffers[i].m_head;
4434 		mp->m_len = len;
4435 
4436 		/* Trigger for refresh */
4437 		rxr->rx_buffers[i].m_head = NULL;
4438 
4439 		/* First segment? */
4440 		if (rxr->fmp == NULL) {
4441 			mp->m_pkthdr.len = len;
4442 			rxr->fmp = rxr->lmp = mp;
4443 		} else {
4444 			/* Chain mbuf's together */
4445 			mp->m_flags &= ~M_PKTHDR;
4446 			rxr->lmp->m_next = mp;
4447 			rxr->lmp = mp;
4448 			rxr->fmp->m_pkthdr.len += len;
4449 		}
4450 
4451 		if (eop) {
4452 			--count;
4453 			sendmp = rxr->fmp;
4454 			if_setrcvif(sendmp, ifp);
4455 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4456 			em_receive_checksum(cur, sendmp);
4457 #ifndef __NO_STRICT_ALIGNMENT
4458 			if (adapter->hw.mac.max_frame_size >
4459 			    (MCLBYTES - ETHER_ALIGN) &&
4460 			    em_fixup_rx(rxr) != 0)
4461 				goto skip;
4462 #endif
4463 			if (status & E1000_RXD_STAT_VP) {
4464 				if_setvtag(sendmp,
4465 				    le16toh(cur->special));
4466 				sendmp->m_flags |= M_VLANTAG;
4467 			}
4468 #ifndef __NO_STRICT_ALIGNMENT
4469 skip:
4470 #endif
4471 			rxr->fmp = rxr->lmp = NULL;
4472 		}
4473 next_desc:
4474 		/* Zero out the receive descriptors status. */
4475 		cur->status = 0;
4476 		++rxdone;	/* cumulative for POLL */
4477 		++processed;
4478 
4479 		/* Advance our pointers to the next descriptor. */
4480 		if (++i == adapter->num_rx_desc)
4481 			i = 0;
4482 
4483 		/* Send to the stack */
4484 		if (sendmp != NULL) {
4485 			rxr->next_to_check = i;
4486 			EM_RX_UNLOCK(rxr);
4487 			if_input(ifp, sendmp);
4488 			EM_RX_LOCK(rxr);
4489 			i = rxr->next_to_check;
4490 		}
4491 
4492 		/* Only refresh mbufs every 8 descriptors */
4493 		if (processed == 8) {
4494 			em_refresh_mbufs(rxr, i);
4495 			processed = 0;
4496 		}
4497 	}
4498 
4499 	/* Catch any remaining refresh work */
4500 	if (e1000_rx_unrefreshed(rxr))
4501 		em_refresh_mbufs(rxr, i);
4502 
4503 	rxr->next_to_check = i;
4504 	if (done != NULL)
4505 		*done = rxdone;
4506 	EM_RX_UNLOCK(rxr);
4507 
4508 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4509 }
4510 
4511 static __inline void
4512 em_rx_discard(struct rx_ring *rxr, int i)
4513 {
4514 	struct em_buffer	*rbuf;
4515 
4516 	rbuf = &rxr->rx_buffers[i];
4517 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4518 
4519 	/* Free any previous pieces */
4520 	if (rxr->fmp != NULL) {
4521 		rxr->fmp->m_flags |= M_PKTHDR;
4522 		m_freem(rxr->fmp);
4523 		rxr->fmp = NULL;
4524 		rxr->lmp = NULL;
4525 	}
4526 	/*
4527 	** Free buffer and allow em_refresh_mbufs()
4528 	** to clean up and recharge buffer.
4529 	*/
4530 	if (rbuf->m_head) {
4531 		m_free(rbuf->m_head);
4532 		rbuf->m_head = NULL;
4533 	}
4534 	return;
4535 }
4536 
4537 #ifndef __NO_STRICT_ALIGNMENT
4538 /*
4539  * When jumbo frames are enabled we should realign entire payload on
4540  * architecures with strict alignment. This is serious design mistake of 8254x
4541  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4542  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4543  * payload. On architecures without strict alignment restrictions 8254x still
4544  * performs unaligned memory access which would reduce the performance too.
4545  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4546  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4547  * existing mbuf chain.
4548  *
4549  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4550  * not used at all on architectures with strict alignment.
4551  */
4552 static int
4553 em_fixup_rx(struct rx_ring *rxr)
4554 {
4555 	struct adapter *adapter = rxr->adapter;
4556 	struct mbuf *m, *n;
4557 	int error;
4558 
4559 	error = 0;
4560 	m = rxr->fmp;
4561 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4562 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4563 		m->m_data += ETHER_HDR_LEN;
4564 	} else {
4565 		MGETHDR(n, M_NOWAIT, MT_DATA);
4566 		if (n != NULL) {
4567 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4568 			m->m_data += ETHER_HDR_LEN;
4569 			m->m_len -= ETHER_HDR_LEN;
4570 			n->m_len = ETHER_HDR_LEN;
4571 			M_MOVE_PKTHDR(n, m);
4572 			n->m_next = m;
4573 			rxr->fmp = n;
4574 		} else {
4575 			adapter->dropped_pkts++;
4576 			m_freem(rxr->fmp);
4577 			rxr->fmp = NULL;
4578 			error = ENOMEM;
4579 		}
4580 	}
4581 
4582 	return (error);
4583 }
4584 #endif
4585 
4586 /*********************************************************************
4587  *
4588  *  Verify that the hardware indicated that the checksum is valid.
4589  *  Inform the stack about the status of checksum so that stack
4590  *  doesn't spend time verifying the checksum.
4591  *
4592  *********************************************************************/
4593 static void
4594 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4595 {
4596 	mp->m_pkthdr.csum_flags = 0;
4597 
4598 	/* Ignore Checksum bit is set */
4599 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4600 		return;
4601 
4602 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4603 		return;
4604 
4605 	/* IP Checksum Good? */
4606 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4607 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4608 
4609 	/* TCP or UDP checksum */
4610 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4611 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4612 		mp->m_pkthdr.csum_data = htons(0xffff);
4613 	}
4614 }
4615 
4616 /*
4617  * This routine is run via an vlan
4618  * config EVENT
4619  */
4620 static void
4621 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4622 {
4623 	struct adapter	*adapter = if_getsoftc(ifp);
4624 	u32		index, bit;
4625 
4626 	if ((void*)adapter !=  arg)   /* Not our event */
4627 		return;
4628 
4629 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4630                 return;
4631 
4632 	EM_CORE_LOCK(adapter);
4633 	index = (vtag >> 5) & 0x7F;
4634 	bit = vtag & 0x1F;
4635 	adapter->shadow_vfta[index] |= (1 << bit);
4636 	++adapter->num_vlans;
4637 	/* Re-init to load the changes */
4638 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4639 		em_init_locked(adapter);
4640 	EM_CORE_UNLOCK(adapter);
4641 }
4642 
4643 /*
4644  * This routine is run via an vlan
4645  * unconfig EVENT
4646  */
4647 static void
4648 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4649 {
4650 	struct adapter	*adapter = if_getsoftc(ifp);
4651 	u32		index, bit;
4652 
4653 	if (adapter != arg)
4654 		return;
4655 
4656 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4657                 return;
4658 
4659 	EM_CORE_LOCK(adapter);
4660 	index = (vtag >> 5) & 0x7F;
4661 	bit = vtag & 0x1F;
4662 	adapter->shadow_vfta[index] &= ~(1 << bit);
4663 	--adapter->num_vlans;
4664 	/* Re-init to load the changes */
4665 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4666 		em_init_locked(adapter);
4667 	EM_CORE_UNLOCK(adapter);
4668 }
4669 
4670 static void
4671 em_setup_vlan_hw_support(struct adapter *adapter)
4672 {
4673 	struct e1000_hw *hw = &adapter->hw;
4674 	u32             reg;
4675 
4676 	/*
4677 	** We get here thru init_locked, meaning
4678 	** a soft reset, this has already cleared
4679 	** the VFTA and other state, so if there
4680 	** have been no vlan's registered do nothing.
4681 	*/
4682 	if (adapter->num_vlans == 0)
4683                 return;
4684 
4685 	/*
4686 	** A soft reset zero's out the VFTA, so
4687 	** we need to repopulate it now.
4688 	*/
4689 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4690                 if (adapter->shadow_vfta[i] != 0)
4691 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4692                             i, adapter->shadow_vfta[i]);
4693 
4694 	reg = E1000_READ_REG(hw, E1000_CTRL);
4695 	reg |= E1000_CTRL_VME;
4696 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4697 
4698 	/* Enable the Filter Table */
4699 	reg = E1000_READ_REG(hw, E1000_RCTL);
4700 	reg &= ~E1000_RCTL_CFIEN;
4701 	reg |= E1000_RCTL_VFE;
4702 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4703 }
4704 
4705 static void
4706 em_enable_intr(struct adapter *adapter)
4707 {
4708 	struct e1000_hw *hw = &adapter->hw;
4709 	u32 ims_mask = IMS_ENABLE_MASK;
4710 
4711 	if (hw->mac.type == e1000_82574) {
4712 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4713 		ims_mask |= EM_MSIX_MASK;
4714 	}
4715 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4716 }
4717 
4718 static void
4719 em_disable_intr(struct adapter *adapter)
4720 {
4721 	struct e1000_hw *hw = &adapter->hw;
4722 
4723 	if (hw->mac.type == e1000_82574)
4724 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4725 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4726 }
4727 
4728 /*
4729  * Bit of a misnomer, what this really means is
4730  * to enable OS management of the system... aka
4731  * to disable special hardware management features
4732  */
4733 static void
4734 em_init_manageability(struct adapter *adapter)
4735 {
4736 	/* A shared code workaround */
4737 #define E1000_82542_MANC2H E1000_MANC2H
4738 	if (adapter->has_manage) {
4739 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4740 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4741 
4742 		/* disable hardware interception of ARP */
4743 		manc &= ~(E1000_MANC_ARP_EN);
4744 
4745                 /* enable receiving management packets to the host */
4746 		manc |= E1000_MANC_EN_MNG2HOST;
4747 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4748 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4749 		manc2h |= E1000_MNG2HOST_PORT_623;
4750 		manc2h |= E1000_MNG2HOST_PORT_664;
4751 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4752 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4753 	}
4754 }
4755 
4756 /*
4757  * Give control back to hardware management
4758  * controller if there is one.
4759  */
4760 static void
4761 em_release_manageability(struct adapter *adapter)
4762 {
4763 	if (adapter->has_manage) {
4764 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4765 
4766 		/* re-enable hardware interception of ARP */
4767 		manc |= E1000_MANC_ARP_EN;
4768 		manc &= ~E1000_MANC_EN_MNG2HOST;
4769 
4770 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4771 	}
4772 }
4773 
4774 /*
4775  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4776  * For ASF and Pass Through versions of f/w this means
4777  * that the driver is loaded. For AMT version type f/w
4778  * this means that the network i/f is open.
4779  */
4780 static void
4781 em_get_hw_control(struct adapter *adapter)
4782 {
4783 	u32 ctrl_ext, swsm;
4784 
4785 	if (adapter->hw.mac.type == e1000_82573) {
4786 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4787 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4788 		    swsm | E1000_SWSM_DRV_LOAD);
4789 		return;
4790 	}
4791 	/* else */
4792 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4793 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4794 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4795 	return;
4796 }
4797 
4798 /*
4799  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4800  * For ASF and Pass Through versions of f/w this means that
4801  * the driver is no longer loaded. For AMT versions of the
4802  * f/w this means that the network i/f is closed.
4803  */
4804 static void
4805 em_release_hw_control(struct adapter *adapter)
4806 {
4807 	u32 ctrl_ext, swsm;
4808 
4809 	if (!adapter->has_manage)
4810 		return;
4811 
4812 	if (adapter->hw.mac.type == e1000_82573) {
4813 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4814 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4815 		    swsm & ~E1000_SWSM_DRV_LOAD);
4816 		return;
4817 	}
4818 	/* else */
4819 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4820 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4821 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4822 	return;
4823 }
4824 
4825 static int
4826 em_is_valid_ether_addr(u8 *addr)
4827 {
4828 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4829 
4830 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4831 		return (FALSE);
4832 	}
4833 
4834 	return (TRUE);
4835 }
4836 
4837 /*
4838 ** Parse the interface capabilities with regard
4839 ** to both system management and wake-on-lan for
4840 ** later use.
4841 */
4842 static void
4843 em_get_wakeup(device_t dev)
4844 {
4845 	struct adapter	*adapter = device_get_softc(dev);
4846 	u16		eeprom_data = 0, device_id, apme_mask;
4847 
4848 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4849 	apme_mask = EM_EEPROM_APME;
4850 
4851 	switch (adapter->hw.mac.type) {
4852 	case e1000_82573:
4853 	case e1000_82583:
4854 		adapter->has_amt = TRUE;
4855 		/* Falls thru */
4856 	case e1000_82571:
4857 	case e1000_82572:
4858 	case e1000_80003es2lan:
4859 		if (adapter->hw.bus.func == 1) {
4860 			e1000_read_nvm(&adapter->hw,
4861 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4862 			break;
4863 		} else
4864 			e1000_read_nvm(&adapter->hw,
4865 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4866 		break;
4867 	case e1000_ich8lan:
4868 	case e1000_ich9lan:
4869 	case e1000_ich10lan:
4870 	case e1000_pchlan:
4871 	case e1000_pch2lan:
4872 		apme_mask = E1000_WUC_APME;
4873 		adapter->has_amt = TRUE;
4874 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4875 		break;
4876 	default:
4877 		e1000_read_nvm(&adapter->hw,
4878 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4879 		break;
4880 	}
4881 	if (eeprom_data & apme_mask)
4882 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4883 	/*
4884          * We have the eeprom settings, now apply the special cases
4885          * where the eeprom may be wrong or the board won't support
4886          * wake on lan on a particular port
4887 	 */
4888 	device_id = pci_get_device(dev);
4889         switch (device_id) {
4890 	case E1000_DEV_ID_82571EB_FIBER:
4891 		/* Wake events only supported on port A for dual fiber
4892 		 * regardless of eeprom setting */
4893 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4894 		    E1000_STATUS_FUNC_1)
4895 			adapter->wol = 0;
4896 		break;
4897 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4898 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4899 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4900                 /* if quad port adapter, disable WoL on all but port A */
4901 		if (global_quad_port_a != 0)
4902 			adapter->wol = 0;
4903 		/* Reset for multiple quad port adapters */
4904 		if (++global_quad_port_a == 4)
4905 			global_quad_port_a = 0;
4906                 break;
4907 	}
4908 	return;
4909 }
4910 
4911 
4912 /*
4913  * Enable PCI Wake On Lan capability
4914  */
4915 static void
4916 em_enable_wakeup(device_t dev)
4917 {
4918 	struct adapter	*adapter = device_get_softc(dev);
4919 	if_t ifp = adapter->ifp;
4920 	u32		pmc, ctrl, ctrl_ext, rctl;
4921 	u16     	status;
4922 
4923 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4924 		return;
4925 
4926 	/* Advertise the wakeup capability */
4927 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4928 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4929 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4930 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4931 
4932 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4933 	    (adapter->hw.mac.type == e1000_pchlan) ||
4934 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4935 	    (adapter->hw.mac.type == e1000_ich10lan))
4936 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4937 
4938 	/* Keep the laser running on Fiber adapters */
4939 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4940 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4941 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4942 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4943 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4944 	}
4945 
4946 	/*
4947 	** Determine type of Wakeup: note that wol
4948 	** is set with all bits on by default.
4949 	*/
4950 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
4951 		adapter->wol &= ~E1000_WUFC_MAG;
4952 
4953 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
4954 		adapter->wol &= ~E1000_WUFC_MC;
4955 	else {
4956 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4957 		rctl |= E1000_RCTL_MPE;
4958 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4959 	}
4960 
4961 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4962 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4963 		if (em_enable_phy_wakeup(adapter))
4964 			return;
4965 	} else {
4966 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4967 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4968 	}
4969 
4970 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4971 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4972 
4973         /* Request PME */
4974         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4975 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4976 	if (if_getcapenable(ifp) & IFCAP_WOL)
4977 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4978         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4979 
4980 	return;
4981 }
4982 
4983 /*
4984 ** WOL in the newer chipset interfaces (pchlan)
4985 ** require thing to be copied into the phy
4986 */
4987 static int
4988 em_enable_phy_wakeup(struct adapter *adapter)
4989 {
4990 	struct e1000_hw *hw = &adapter->hw;
4991 	u32 mreg, ret = 0;
4992 	u16 preg;
4993 
4994 	/* copy MAC RARs to PHY RARs */
4995 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4996 
4997 	/* copy MAC MTA to PHY MTA */
4998 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4999 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5000 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5001 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5002 		    (u16)((mreg >> 16) & 0xFFFF));
5003 	}
5004 
5005 	/* configure PHY Rx Control register */
5006 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5007 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5008 	if (mreg & E1000_RCTL_UPE)
5009 		preg |= BM_RCTL_UPE;
5010 	if (mreg & E1000_RCTL_MPE)
5011 		preg |= BM_RCTL_MPE;
5012 	preg &= ~(BM_RCTL_MO_MASK);
5013 	if (mreg & E1000_RCTL_MO_3)
5014 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5015 				<< BM_RCTL_MO_SHIFT);
5016 	if (mreg & E1000_RCTL_BAM)
5017 		preg |= BM_RCTL_BAM;
5018 	if (mreg & E1000_RCTL_PMCF)
5019 		preg |= BM_RCTL_PMCF;
5020 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5021 	if (mreg & E1000_CTRL_RFCE)
5022 		preg |= BM_RCTL_RFCE;
5023 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5024 
5025 	/* enable PHY wakeup in MAC register */
5026 	E1000_WRITE_REG(hw, E1000_WUC,
5027 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5028 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5029 
5030 	/* configure and enable PHY wakeup in PHY registers */
5031 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5032 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5033 
5034 	/* activate PHY wakeup */
5035 	ret = hw->phy.ops.acquire(hw);
5036 	if (ret) {
5037 		printf("Could not acquire PHY\n");
5038 		return ret;
5039 	}
5040 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5041 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5042 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5043 	if (ret) {
5044 		printf("Could not read PHY page 769\n");
5045 		goto out;
5046 	}
5047 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5048 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5049 	if (ret)
5050 		printf("Could not set PHY Host Wakeup bit\n");
5051 out:
5052 	hw->phy.ops.release(hw);
5053 
5054 	return ret;
5055 }
5056 
5057 static void
5058 em_led_func(void *arg, int onoff)
5059 {
5060 	struct adapter	*adapter = arg;
5061 
5062 	EM_CORE_LOCK(adapter);
5063 	if (onoff) {
5064 		e1000_setup_led(&adapter->hw);
5065 		e1000_led_on(&adapter->hw);
5066 	} else {
5067 		e1000_led_off(&adapter->hw);
5068 		e1000_cleanup_led(&adapter->hw);
5069 	}
5070 	EM_CORE_UNLOCK(adapter);
5071 }
5072 
5073 /*
5074 ** Disable the L0S and L1 LINK states
5075 */
5076 static void
5077 em_disable_aspm(struct adapter *adapter)
5078 {
5079 	int		base, reg;
5080 	u16		link_cap,link_ctrl;
5081 	device_t	dev = adapter->dev;
5082 
5083 	switch (adapter->hw.mac.type) {
5084 		case e1000_82573:
5085 		case e1000_82574:
5086 		case e1000_82583:
5087 			break;
5088 		default:
5089 			return;
5090 	}
5091 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5092 		return;
5093 	reg = base + PCIER_LINK_CAP;
5094 	link_cap = pci_read_config(dev, reg, 2);
5095 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5096 		return;
5097 	reg = base + PCIER_LINK_CTL;
5098 	link_ctrl = pci_read_config(dev, reg, 2);
5099 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5100 	pci_write_config(dev, reg, link_ctrl, 2);
5101 	return;
5102 }
5103 
5104 /**********************************************************************
5105  *
5106  *  Update the board statistics counters.
5107  *
5108  **********************************************************************/
5109 static void
5110 em_update_stats_counters(struct adapter *adapter)
5111 {
5112 
5113 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5114 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5115 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5116 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5117 	}
5118 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5119 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5120 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5121 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5122 
5123 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5124 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5125 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5126 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5127 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5128 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5129 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5130 	/*
5131 	** For watchdog management we need to know if we have been
5132 	** paused during the last interval, so capture that here.
5133 	*/
5134 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5135 	adapter->stats.xoffrxc += adapter->pause_frames;
5136 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5137 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5138 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5139 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5140 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5141 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5142 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5143 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5144 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5145 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5146 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5147 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5148 
5149 	/* For the 64-bit byte counters the low dword must be read first. */
5150 	/* Both registers clear on the read of the high dword */
5151 
5152 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5153 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5154 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5155 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5156 
5157 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5158 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5159 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5160 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5161 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5162 
5163 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5164 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5165 
5166 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5167 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5168 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5169 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5170 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5171 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5172 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5173 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5174 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5175 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5176 
5177 	/* Interrupt Counts */
5178 
5179 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5180 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5181 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5182 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5183 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5184 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5185 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5186 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5187 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5188 
5189 	if (adapter->hw.mac.type >= e1000_82543) {
5190 		adapter->stats.algnerrc +=
5191 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5192 		adapter->stats.rxerrc +=
5193 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5194 		adapter->stats.tncrs +=
5195 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5196 		adapter->stats.cexterr +=
5197 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5198 		adapter->stats.tsctc +=
5199 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5200 		adapter->stats.tsctfc +=
5201 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5202 	}
5203 }
5204 
5205 static uint64_t
5206 em_get_counter(if_t ifp, ift_counter cnt)
5207 {
5208 	struct adapter *adapter;
5209 
5210 	adapter = if_getsoftc(ifp);
5211 
5212 	switch (cnt) {
5213 	case IFCOUNTER_COLLISIONS:
5214 		return (adapter->stats.colc);
5215 	case IFCOUNTER_IERRORS:
5216 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5217 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5218 		    adapter->stats.ruc + adapter->stats.roc +
5219 		    adapter->stats.mpc + adapter->stats.cexterr);
5220 	case IFCOUNTER_OERRORS:
5221 		return (adapter->stats.ecol + adapter->stats.latecol +
5222 		    adapter->watchdog_events);
5223 	default:
5224 		return (if_get_counter_default(ifp, cnt));
5225 	}
5226 }
5227 
5228 /* Export a single 32-bit register via a read-only sysctl. */
5229 static int
5230 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5231 {
5232 	struct adapter *adapter;
5233 	u_int val;
5234 
5235 	adapter = oidp->oid_arg1;
5236 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5237 	return (sysctl_handle_int(oidp, &val, 0, req));
5238 }
5239 
5240 /*
5241  * Add sysctl variables, one per statistic, to the system.
5242  */
5243 static void
5244 em_add_hw_stats(struct adapter *adapter)
5245 {
5246 	device_t dev = adapter->dev;
5247 
5248 	struct tx_ring *txr = adapter->tx_rings;
5249 	struct rx_ring *rxr = adapter->rx_rings;
5250 
5251 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5252 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5253 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5254 	struct e1000_hw_stats *stats = &adapter->stats;
5255 
5256 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5257 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5258 
5259 #define QUEUE_NAME_LEN 32
5260 	char namebuf[QUEUE_NAME_LEN];
5261 
5262 	/* Driver Statistics */
5263 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5264 			CTLFLAG_RD, &adapter->link_irq,
5265 			"Link MSIX IRQ Handled");
5266 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5267 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5268 			 "Std mbuf failed");
5269 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5270 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5271 			 "Std mbuf cluster failed");
5272 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5273 			CTLFLAG_RD, &adapter->dropped_pkts,
5274 			"Driver dropped packets");
5275 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5276 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5277 			"Driver tx dma failure in xmit");
5278 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5279 			CTLFLAG_RD, &adapter->rx_overruns,
5280 			"RX overruns");
5281 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5282 			CTLFLAG_RD, &adapter->watchdog_events,
5283 			"Watchdog timeouts");
5284 
5285 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5286 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5287 			em_sysctl_reg_handler, "IU",
5288 			"Device Control Register");
5289 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5290 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5291 			em_sysctl_reg_handler, "IU",
5292 			"Receiver Control Register");
5293 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5294 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5295 			"Flow Control High Watermark");
5296 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5297 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5298 			"Flow Control Low Watermark");
5299 
5300 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5301 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5302 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5303 					    CTLFLAG_RD, NULL, "Queue Name");
5304 		queue_list = SYSCTL_CHILDREN(queue_node);
5305 
5306 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5307 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5308 				E1000_TDH(txr->me),
5309 				em_sysctl_reg_handler, "IU",
5310  				"Transmit Descriptor Head");
5311 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5312 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5313 				E1000_TDT(txr->me),
5314 				em_sysctl_reg_handler, "IU",
5315  				"Transmit Descriptor Tail");
5316 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5317 				CTLFLAG_RD, &txr->tx_irq,
5318 				"Queue MSI-X Transmit Interrupts");
5319 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5320 				CTLFLAG_RD, &txr->no_desc_avail,
5321 				"Queue No Descriptor Available");
5322 
5323 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5324 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5325 				E1000_RDH(rxr->me),
5326 				em_sysctl_reg_handler, "IU",
5327 				"Receive Descriptor Head");
5328 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5329 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5330 				E1000_RDT(rxr->me),
5331 				em_sysctl_reg_handler, "IU",
5332 				"Receive Descriptor Tail");
5333 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5334 				CTLFLAG_RD, &rxr->rx_irq,
5335 				"Queue MSI-X Receive Interrupts");
5336 	}
5337 
5338 	/* MAC stats get their own sub node */
5339 
5340 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5341 				    CTLFLAG_RD, NULL, "Statistics");
5342 	stat_list = SYSCTL_CHILDREN(stat_node);
5343 
5344 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5345 			CTLFLAG_RD, &stats->ecol,
5346 			"Excessive collisions");
5347 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5348 			CTLFLAG_RD, &stats->scc,
5349 			"Single collisions");
5350 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5351 			CTLFLAG_RD, &stats->mcc,
5352 			"Multiple collisions");
5353 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5354 			CTLFLAG_RD, &stats->latecol,
5355 			"Late collisions");
5356 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5357 			CTLFLAG_RD, &stats->colc,
5358 			"Collision Count");
5359 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5360 			CTLFLAG_RD, &adapter->stats.symerrs,
5361 			"Symbol Errors");
5362 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5363 			CTLFLAG_RD, &adapter->stats.sec,
5364 			"Sequence Errors");
5365 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5366 			CTLFLAG_RD, &adapter->stats.dc,
5367 			"Defer Count");
5368 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5369 			CTLFLAG_RD, &adapter->stats.mpc,
5370 			"Missed Packets");
5371 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5372 			CTLFLAG_RD, &adapter->stats.rnbc,
5373 			"Receive No Buffers");
5374 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5375 			CTLFLAG_RD, &adapter->stats.ruc,
5376 			"Receive Undersize");
5377 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5378 			CTLFLAG_RD, &adapter->stats.rfc,
5379 			"Fragmented Packets Received ");
5380 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5381 			CTLFLAG_RD, &adapter->stats.roc,
5382 			"Oversized Packets Received");
5383 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5384 			CTLFLAG_RD, &adapter->stats.rjc,
5385 			"Recevied Jabber");
5386 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5387 			CTLFLAG_RD, &adapter->stats.rxerrc,
5388 			"Receive Errors");
5389 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5390 			CTLFLAG_RD, &adapter->stats.crcerrs,
5391 			"CRC errors");
5392 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5393 			CTLFLAG_RD, &adapter->stats.algnerrc,
5394 			"Alignment Errors");
5395 	/* On 82575 these are collision counts */
5396 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5397 			CTLFLAG_RD, &adapter->stats.cexterr,
5398 			"Collision/Carrier extension errors");
5399 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5400 			CTLFLAG_RD, &adapter->stats.xonrxc,
5401 			"XON Received");
5402 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5403 			CTLFLAG_RD, &adapter->stats.xontxc,
5404 			"XON Transmitted");
5405 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5406 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5407 			"XOFF Received");
5408 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5409 			CTLFLAG_RD, &adapter->stats.xofftxc,
5410 			"XOFF Transmitted");
5411 
5412 	/* Packet Reception Stats */
5413 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5414 			CTLFLAG_RD, &adapter->stats.tpr,
5415 			"Total Packets Received ");
5416 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5417 			CTLFLAG_RD, &adapter->stats.gprc,
5418 			"Good Packets Received");
5419 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5420 			CTLFLAG_RD, &adapter->stats.bprc,
5421 			"Broadcast Packets Received");
5422 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5423 			CTLFLAG_RD, &adapter->stats.mprc,
5424 			"Multicast Packets Received");
5425 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5426 			CTLFLAG_RD, &adapter->stats.prc64,
5427 			"64 byte frames received ");
5428 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5429 			CTLFLAG_RD, &adapter->stats.prc127,
5430 			"65-127 byte frames received");
5431 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5432 			CTLFLAG_RD, &adapter->stats.prc255,
5433 			"128-255 byte frames received");
5434 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5435 			CTLFLAG_RD, &adapter->stats.prc511,
5436 			"256-511 byte frames received");
5437 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5438 			CTLFLAG_RD, &adapter->stats.prc1023,
5439 			"512-1023 byte frames received");
5440 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5441 			CTLFLAG_RD, &adapter->stats.prc1522,
5442 			"1023-1522 byte frames received");
5443  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5444  			CTLFLAG_RD, &adapter->stats.gorc,
5445  			"Good Octets Received");
5446 
5447 	/* Packet Transmission Stats */
5448  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5449  			CTLFLAG_RD, &adapter->stats.gotc,
5450  			"Good Octets Transmitted");
5451 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5452 			CTLFLAG_RD, &adapter->stats.tpt,
5453 			"Total Packets Transmitted");
5454 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5455 			CTLFLAG_RD, &adapter->stats.gptc,
5456 			"Good Packets Transmitted");
5457 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5458 			CTLFLAG_RD, &adapter->stats.bptc,
5459 			"Broadcast Packets Transmitted");
5460 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5461 			CTLFLAG_RD, &adapter->stats.mptc,
5462 			"Multicast Packets Transmitted");
5463 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5464 			CTLFLAG_RD, &adapter->stats.ptc64,
5465 			"64 byte frames transmitted ");
5466 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5467 			CTLFLAG_RD, &adapter->stats.ptc127,
5468 			"65-127 byte frames transmitted");
5469 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5470 			CTLFLAG_RD, &adapter->stats.ptc255,
5471 			"128-255 byte frames transmitted");
5472 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5473 			CTLFLAG_RD, &adapter->stats.ptc511,
5474 			"256-511 byte frames transmitted");
5475 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5476 			CTLFLAG_RD, &adapter->stats.ptc1023,
5477 			"512-1023 byte frames transmitted");
5478 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5479 			CTLFLAG_RD, &adapter->stats.ptc1522,
5480 			"1024-1522 byte frames transmitted");
5481 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5482 			CTLFLAG_RD, &adapter->stats.tsctc,
5483 			"TSO Contexts Transmitted");
5484 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5485 			CTLFLAG_RD, &adapter->stats.tsctfc,
5486 			"TSO Contexts Failed");
5487 
5488 
5489 	/* Interrupt Stats */
5490 
5491 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5492 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5493 	int_list = SYSCTL_CHILDREN(int_node);
5494 
5495 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5496 			CTLFLAG_RD, &adapter->stats.iac,
5497 			"Interrupt Assertion Count");
5498 
5499 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5500 			CTLFLAG_RD, &adapter->stats.icrxptc,
5501 			"Interrupt Cause Rx Pkt Timer Expire Count");
5502 
5503 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5504 			CTLFLAG_RD, &adapter->stats.icrxatc,
5505 			"Interrupt Cause Rx Abs Timer Expire Count");
5506 
5507 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5508 			CTLFLAG_RD, &adapter->stats.ictxptc,
5509 			"Interrupt Cause Tx Pkt Timer Expire Count");
5510 
5511 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5512 			CTLFLAG_RD, &adapter->stats.ictxatc,
5513 			"Interrupt Cause Tx Abs Timer Expire Count");
5514 
5515 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5516 			CTLFLAG_RD, &adapter->stats.ictxqec,
5517 			"Interrupt Cause Tx Queue Empty Count");
5518 
5519 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5520 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5521 			"Interrupt Cause Tx Queue Min Thresh Count");
5522 
5523 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5524 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5525 			"Interrupt Cause Rx Desc Min Thresh Count");
5526 
5527 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5528 			CTLFLAG_RD, &adapter->stats.icrxoc,
5529 			"Interrupt Cause Receiver Overrun Count");
5530 }
5531 
5532 /**********************************************************************
5533  *
5534  *  This routine provides a way to dump out the adapter eeprom,
5535  *  often a useful debug/service tool. This only dumps the first
5536  *  32 words, stuff that matters is in that extent.
5537  *
5538  **********************************************************************/
5539 static int
5540 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5541 {
5542 	struct adapter *adapter = (struct adapter *)arg1;
5543 	int error;
5544 	int result;
5545 
5546 	result = -1;
5547 	error = sysctl_handle_int(oidp, &result, 0, req);
5548 
5549 	if (error || !req->newptr)
5550 		return (error);
5551 
5552 	/*
5553 	 * This value will cause a hex dump of the
5554 	 * first 32 16-bit words of the EEPROM to
5555 	 * the screen.
5556 	 */
5557 	if (result == 1)
5558 		em_print_nvm_info(adapter);
5559 
5560 	return (error);
5561 }
5562 
5563 static void
5564 em_print_nvm_info(struct adapter *adapter)
5565 {
5566 	u16	eeprom_data;
5567 	int	i, j, row = 0;
5568 
5569 	/* Its a bit crude, but it gets the job done */
5570 	printf("\nInterface EEPROM Dump:\n");
5571 	printf("Offset\n0x0000  ");
5572 	for (i = 0, j = 0; i < 32; i++, j++) {
5573 		if (j == 8) { /* Make the offset block */
5574 			j = 0; ++row;
5575 			printf("\n0x00%x0  ",row);
5576 		}
5577 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5578 		printf("%04x ", eeprom_data);
5579 	}
5580 	printf("\n");
5581 }
5582 
5583 static int
5584 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5585 {
5586 	struct em_int_delay_info *info;
5587 	struct adapter *adapter;
5588 	u32 regval;
5589 	int error, usecs, ticks;
5590 
5591 	info = (struct em_int_delay_info *)arg1;
5592 	usecs = info->value;
5593 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5594 	if (error != 0 || req->newptr == NULL)
5595 		return (error);
5596 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5597 		return (EINVAL);
5598 	info->value = usecs;
5599 	ticks = EM_USECS_TO_TICKS(usecs);
5600 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5601 		ticks *= 4;
5602 
5603 	adapter = info->adapter;
5604 
5605 	EM_CORE_LOCK(adapter);
5606 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5607 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5608 	/* Handle a few special cases. */
5609 	switch (info->offset) {
5610 	case E1000_RDTR:
5611 		break;
5612 	case E1000_TIDV:
5613 		if (ticks == 0) {
5614 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5615 			/* Don't write 0 into the TIDV register. */
5616 			regval++;
5617 		} else
5618 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5619 		break;
5620 	}
5621 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5622 	EM_CORE_UNLOCK(adapter);
5623 	return (0);
5624 }
5625 
5626 static void
5627 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5628 	const char *description, struct em_int_delay_info *info,
5629 	int offset, int value)
5630 {
5631 	info->adapter = adapter;
5632 	info->offset = offset;
5633 	info->value = value;
5634 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5635 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5636 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5637 	    info, 0, em_sysctl_int_delay, "I", description);
5638 }
5639 
5640 static void
5641 em_set_sysctl_value(struct adapter *adapter, const char *name,
5642 	const char *description, int *limit, int value)
5643 {
5644 	*limit = value;
5645 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5646 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5647 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5648 }
5649 
5650 
5651 /*
5652 ** Set flow control using sysctl:
5653 ** Flow control values:
5654 **      0 - off
5655 **      1 - rx pause
5656 **      2 - tx pause
5657 **      3 - full
5658 */
5659 static int
5660 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5661 {
5662         int		error;
5663 	static int	input = 3; /* default is full */
5664         struct adapter	*adapter = (struct adapter *) arg1;
5665 
5666         error = sysctl_handle_int(oidp, &input, 0, req);
5667 
5668         if ((error) || (req->newptr == NULL))
5669                 return (error);
5670 
5671 	if (input == adapter->fc) /* no change? */
5672 		return (error);
5673 
5674         switch (input) {
5675                 case e1000_fc_rx_pause:
5676                 case e1000_fc_tx_pause:
5677                 case e1000_fc_full:
5678                 case e1000_fc_none:
5679                         adapter->hw.fc.requested_mode = input;
5680 			adapter->fc = input;
5681                         break;
5682                 default:
5683 			/* Do nothing */
5684 			return (error);
5685         }
5686 
5687         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5688         e1000_force_mac_fc(&adapter->hw);
5689         return (error);
5690 }
5691 
5692 /*
5693 ** Manage Energy Efficient Ethernet:
5694 ** Control values:
5695 **     0/1 - enabled/disabled
5696 */
5697 static int
5698 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5699 {
5700        struct adapter *adapter = (struct adapter *) arg1;
5701        int             error, value;
5702 
5703        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5704        error = sysctl_handle_int(oidp, &value, 0, req);
5705        if (error || req->newptr == NULL)
5706                return (error);
5707        EM_CORE_LOCK(adapter);
5708        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5709        em_init_locked(adapter);
5710        EM_CORE_UNLOCK(adapter);
5711        return (0);
5712 }
5713 
5714 static int
5715 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5716 {
5717 	struct adapter *adapter;
5718 	int error;
5719 	int result;
5720 
5721 	result = -1;
5722 	error = sysctl_handle_int(oidp, &result, 0, req);
5723 
5724 	if (error || !req->newptr)
5725 		return (error);
5726 
5727 	if (result == 1) {
5728 		adapter = (struct adapter *)arg1;
5729 		em_print_debug_info(adapter);
5730         }
5731 
5732 	return (error);
5733 }
5734 
5735 /*
5736 ** This routine is meant to be fluid, add whatever is
5737 ** needed for debugging a problem.  -jfv
5738 */
5739 static void
5740 em_print_debug_info(struct adapter *adapter)
5741 {
5742 	device_t dev = adapter->dev;
5743 	struct tx_ring *txr = adapter->tx_rings;
5744 	struct rx_ring *rxr = adapter->rx_rings;
5745 
5746 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5747 		printf("Interface is RUNNING ");
5748 	else
5749 		printf("Interface is NOT RUNNING\n");
5750 
5751 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5752 		printf("and INACTIVE\n");
5753 	else
5754 		printf("and ACTIVE\n");
5755 
5756 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5757 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5758 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5759 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5760 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5761 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5762 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5763 	device_printf(dev, "TX descriptors avail = %d\n",
5764 	    txr->tx_avail);
5765 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5766 	    txr->no_desc_avail);
5767 	device_printf(dev, "RX discarded packets = %ld\n",
5768 	    rxr->rx_discarded);
5769 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5770 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5771 }
5772