xref: /freebsd/sys/dev/e1000/if_em.c (revision 44d314f704764f0247a540648a4b4fc3e8012133)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2014, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_var.h>
67 #include <net/if_arp.h>
68 #include <net/if_dl.h>
69 #include <net/if_media.h>
70 
71 #include <net/if_types.h>
72 #include <net/if_vlan_var.h>
73 
74 #include <netinet/in_systm.h>
75 #include <netinet/in.h>
76 #include <netinet/if_ether.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 #include <netinet/tcp.h>
80 #include <netinet/udp.h>
81 
82 #include <machine/in_cksum.h>
83 #include <dev/led/led.h>
84 #include <dev/pci/pcivar.h>
85 #include <dev/pci/pcireg.h>
86 
87 #include "e1000_api.h"
88 #include "e1000_82571.h"
89 #include "if_em.h"
90 
91 /*********************************************************************
92  *  Set this to one to display debug statistics
93  *********************************************************************/
94 int	em_display_debug_stats = 0;
95 
96 /*********************************************************************
97  *  Driver version:
98  *********************************************************************/
99 char em_driver_version[] = "7.4.2";
100 
101 /*********************************************************************
102  *  PCI Device ID Table
103  *
104  *  Used by probe to select devices to load on
105  *  Last field stores an index into e1000_strings
106  *  Last entry must be all 0s
107  *
108  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
109  *********************************************************************/
110 
111 static em_vendor_info_t em_vendor_info_array[] =
112 {
113 	/* Intel(R) PRO/1000 Network Connection */
114 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 
134 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
180 						PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
184 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
187 	/* required last entry */
188 	{ 0, 0, 0, 0, 0}
189 };
190 
191 /*********************************************************************
192  *  Table of branding strings for all supported NICs.
193  *********************************************************************/
194 
195 static char *em_strings[] = {
196 	"Intel(R) PRO/1000 Network Connection"
197 };
198 
199 /*********************************************************************
200  *  Function prototypes
201  *********************************************************************/
202 static int	em_probe(device_t);
203 static int	em_attach(device_t);
204 static int	em_detach(device_t);
205 static int	em_shutdown(device_t);
206 static int	em_suspend(device_t);
207 static int	em_resume(device_t);
208 #ifdef EM_MULTIQUEUE
209 static int	em_mq_start(if_t, struct mbuf *);
210 static int	em_mq_start_locked(if_t,
211 		    struct tx_ring *, struct mbuf *);
212 static void	em_qflush(if_t);
213 #else
214 static void	em_start(if_t);
215 static void	em_start_locked(if_t, struct tx_ring *);
216 #endif
217 static int	em_ioctl(if_t, u_long, caddr_t);
218 static uint64_t	em_get_counter(if_t, ift_counter);
219 static void	em_init(void *);
220 static void	em_init_locked(struct adapter *);
221 static void	em_stop(void *);
222 static void	em_media_status(if_t, struct ifmediareq *);
223 static int	em_media_change(if_t);
224 static void	em_identify_hardware(struct adapter *);
225 static int	em_allocate_pci_resources(struct adapter *);
226 static int	em_allocate_legacy(struct adapter *);
227 static int	em_allocate_msix(struct adapter *);
228 static int	em_allocate_queues(struct adapter *);
229 static int	em_setup_msix(struct adapter *);
230 static void	em_free_pci_resources(struct adapter *);
231 static void	em_local_timer(void *);
232 static void	em_reset(struct adapter *);
233 static int	em_setup_interface(device_t, struct adapter *);
234 
235 static void	em_setup_transmit_structures(struct adapter *);
236 static void	em_initialize_transmit_unit(struct adapter *);
237 static int	em_allocate_transmit_buffers(struct tx_ring *);
238 static void	em_free_transmit_structures(struct adapter *);
239 static void	em_free_transmit_buffers(struct tx_ring *);
240 
241 static int	em_setup_receive_structures(struct adapter *);
242 static int	em_allocate_receive_buffers(struct rx_ring *);
243 static void	em_initialize_receive_unit(struct adapter *);
244 static void	em_free_receive_structures(struct adapter *);
245 static void	em_free_receive_buffers(struct rx_ring *);
246 
247 static void	em_enable_intr(struct adapter *);
248 static void	em_disable_intr(struct adapter *);
249 static void	em_update_stats_counters(struct adapter *);
250 static void	em_add_hw_stats(struct adapter *adapter);
251 static void	em_txeof(struct tx_ring *);
252 static bool	em_rxeof(struct rx_ring *, int, int *);
253 #ifndef __NO_STRICT_ALIGNMENT
254 static int	em_fixup_rx(struct rx_ring *);
255 #endif
256 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
257 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
258 		    struct ip *, u32 *, u32 *);
259 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
260 		    struct tcphdr *, u32 *, u32 *);
261 static void	em_set_promisc(struct adapter *);
262 static void	em_disable_promisc(struct adapter *);
263 static void	em_set_multi(struct adapter *);
264 static void	em_update_link_status(struct adapter *);
265 static void	em_refresh_mbufs(struct rx_ring *, int);
266 static void	em_register_vlan(void *, if_t, u16);
267 static void	em_unregister_vlan(void *, if_t, u16);
268 static void	em_setup_vlan_hw_support(struct adapter *);
269 static int	em_xmit(struct tx_ring *, struct mbuf **);
270 static int	em_dma_malloc(struct adapter *, bus_size_t,
271 		    struct em_dma_alloc *, int);
272 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
273 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
274 static void	em_print_nvm_info(struct adapter *);
275 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
276 static void	em_print_debug_info(struct adapter *);
277 static int 	em_is_valid_ether_addr(u8 *);
278 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
279 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
280 		    const char *, struct em_int_delay_info *, int, int);
281 /* Management and WOL Support */
282 static void	em_init_manageability(struct adapter *);
283 static void	em_release_manageability(struct adapter *);
284 static void     em_get_hw_control(struct adapter *);
285 static void     em_release_hw_control(struct adapter *);
286 static void	em_get_wakeup(device_t);
287 static void     em_enable_wakeup(device_t);
288 static int	em_enable_phy_wakeup(struct adapter *);
289 static void	em_led_func(void *, int);
290 static void	em_disable_aspm(struct adapter *);
291 
292 static int	em_irq_fast(void *);
293 
294 /* MSIX handlers */
295 static void	em_msix_tx(void *);
296 static void	em_msix_rx(void *);
297 static void	em_msix_link(void *);
298 static void	em_handle_tx(void *context, int pending);
299 static void	em_handle_rx(void *context, int pending);
300 static void	em_handle_link(void *context, int pending);
301 
302 static void	em_set_sysctl_value(struct adapter *, const char *,
303 		    const char *, int *, int);
304 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
305 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
306 
307 static __inline void em_rx_discard(struct rx_ring *, int);
308 
309 #ifdef DEVICE_POLLING
310 static poll_handler_t em_poll;
311 #endif /* POLLING */
312 
313 /*********************************************************************
314  *  FreeBSD Device Interface Entry Points
315  *********************************************************************/
316 
317 static device_method_t em_methods[] = {
318 	/* Device interface */
319 	DEVMETHOD(device_probe, em_probe),
320 	DEVMETHOD(device_attach, em_attach),
321 	DEVMETHOD(device_detach, em_detach),
322 	DEVMETHOD(device_shutdown, em_shutdown),
323 	DEVMETHOD(device_suspend, em_suspend),
324 	DEVMETHOD(device_resume, em_resume),
325 	DEVMETHOD_END
326 };
327 
328 static driver_t em_driver = {
329 	"em", em_methods, sizeof(struct adapter),
330 };
331 
332 devclass_t em_devclass;
333 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
334 MODULE_DEPEND(em, pci, 1, 1, 1);
335 MODULE_DEPEND(em, ether, 1, 1, 1);
336 
337 /*********************************************************************
338  *  Tunable default values.
339  *********************************************************************/
340 
341 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
342 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
343 #define M_TSO_LEN			66
344 
345 #define MAX_INTS_PER_SEC	8000
346 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
347 
348 /* Allow common code without TSO */
349 #ifndef CSUM_TSO
350 #define CSUM_TSO	0
351 #endif
352 
353 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
354 
355 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
356 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
357 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
358     0, "Default transmit interrupt delay in usecs");
359 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
360     0, "Default receive interrupt delay in usecs");
361 
362 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
363 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
364 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
365     &em_tx_abs_int_delay_dflt, 0,
366     "Default transmit interrupt delay limit in usecs");
367 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
368     &em_rx_abs_int_delay_dflt, 0,
369     "Default receive interrupt delay limit in usecs");
370 
371 static int em_rxd = EM_DEFAULT_RXD;
372 static int em_txd = EM_DEFAULT_TXD;
373 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
374     "Number of receive descriptors per queue");
375 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
376     "Number of transmit descriptors per queue");
377 
378 static int em_smart_pwr_down = FALSE;
379 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
380     0, "Set to true to leave smart power down enabled on newer adapters");
381 
382 /* Controls whether promiscuous also shows bad packets */
383 static int em_debug_sbp = FALSE;
384 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
385     "Show bad packets in promiscuous mode");
386 
387 static int em_enable_msix = TRUE;
388 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
389     "Enable MSI-X interrupts");
390 
391 /* How many packets rxeof tries to clean at a time */
392 static int em_rx_process_limit = 100;
393 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
394     &em_rx_process_limit, 0,
395     "Maximum number of received packets to process "
396     "at a time, -1 means unlimited");
397 
398 /* Energy efficient ethernet - default to OFF */
399 static int eee_setting = 1;
400 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
401     "Enable Energy Efficient Ethernet");
402 
403 /* Global used in WOL setup with multiport cards */
404 static int global_quad_port_a = 0;
405 
406 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
407 #include <dev/netmap/if_em_netmap.h>
408 #endif /* DEV_NETMAP */
409 
410 /*********************************************************************
411  *  Device identification routine
412  *
413  *  em_probe determines if the driver should be loaded on
414  *  adapter based on PCI vendor/device id of the adapter.
415  *
416  *  return BUS_PROBE_DEFAULT on success, positive on failure
417  *********************************************************************/
418 
419 static int
420 em_probe(device_t dev)
421 {
422 	char		adapter_name[60];
423 	u16		pci_vendor_id = 0;
424 	u16		pci_device_id = 0;
425 	u16		pci_subvendor_id = 0;
426 	u16		pci_subdevice_id = 0;
427 	em_vendor_info_t *ent;
428 
429 	INIT_DEBUGOUT("em_probe: begin");
430 
431 	pci_vendor_id = pci_get_vendor(dev);
432 	if (pci_vendor_id != EM_VENDOR_ID)
433 		return (ENXIO);
434 
435 	pci_device_id = pci_get_device(dev);
436 	pci_subvendor_id = pci_get_subvendor(dev);
437 	pci_subdevice_id = pci_get_subdevice(dev);
438 
439 	ent = em_vendor_info_array;
440 	while (ent->vendor_id != 0) {
441 		if ((pci_vendor_id == ent->vendor_id) &&
442 		    (pci_device_id == ent->device_id) &&
443 
444 		    ((pci_subvendor_id == ent->subvendor_id) ||
445 		    (ent->subvendor_id == PCI_ANY_ID)) &&
446 
447 		    ((pci_subdevice_id == ent->subdevice_id) ||
448 		    (ent->subdevice_id == PCI_ANY_ID))) {
449 			sprintf(adapter_name, "%s %s",
450 				em_strings[ent->index],
451 				em_driver_version);
452 			device_set_desc_copy(dev, adapter_name);
453 			return (BUS_PROBE_DEFAULT);
454 		}
455 		ent++;
456 	}
457 
458 	return (ENXIO);
459 }
460 
461 /*********************************************************************
462  *  Device initialization routine
463  *
464  *  The attach entry point is called when the driver is being loaded.
465  *  This routine identifies the type of hardware, allocates all resources
466  *  and initializes the hardware.
467  *
468  *  return 0 on success, positive on failure
469  *********************************************************************/
470 
471 static int
472 em_attach(device_t dev)
473 {
474 	struct adapter	*adapter;
475 	struct e1000_hw	*hw;
476 	int		error = 0;
477 
478 	INIT_DEBUGOUT("em_attach: begin");
479 
480 	if (resource_disabled("em", device_get_unit(dev))) {
481 		device_printf(dev, "Disabled by device hint\n");
482 		return (ENXIO);
483 	}
484 
485 	adapter = device_get_softc(dev);
486 	adapter->dev = adapter->osdep.dev = dev;
487 	hw = &adapter->hw;
488 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
489 
490 	/* SYSCTL stuff */
491 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494 	    em_sysctl_nvm_info, "I", "NVM Information");
495 
496 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499 	    em_sysctl_debug_info, "I", "Debug Information");
500 
501 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
502 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
503 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
504 	    em_set_flowcntl, "I", "Flow Control");
505 
506 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
507 
508 	/* Determine hardware and mac info */
509 	em_identify_hardware(adapter);
510 
511 	/* Setup PCI resources */
512 	if (em_allocate_pci_resources(adapter)) {
513 		device_printf(dev, "Allocation of PCI resources failed\n");
514 		error = ENXIO;
515 		goto err_pci;
516 	}
517 
518 	/*
519 	** For ICH8 and family we need to
520 	** map the flash memory, and this
521 	** must happen after the MAC is
522 	** identified
523 	*/
524 	if ((hw->mac.type == e1000_ich8lan) ||
525 	    (hw->mac.type == e1000_ich9lan) ||
526 	    (hw->mac.type == e1000_ich10lan) ||
527 	    (hw->mac.type == e1000_pchlan) ||
528 	    (hw->mac.type == e1000_pch2lan) ||
529 	    (hw->mac.type == e1000_pch_lpt)) {
530 		int rid = EM_BAR_TYPE_FLASH;
531 		adapter->flash = bus_alloc_resource_any(dev,
532 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
533 		if (adapter->flash == NULL) {
534 			device_printf(dev, "Mapping of Flash failed\n");
535 			error = ENXIO;
536 			goto err_pci;
537 		}
538 		/* This is used in the shared code */
539 		hw->flash_address = (u8 *)adapter->flash;
540 		adapter->osdep.flash_bus_space_tag =
541 		    rman_get_bustag(adapter->flash);
542 		adapter->osdep.flash_bus_space_handle =
543 		    rman_get_bushandle(adapter->flash);
544 	}
545 
546 	/* Do Shared Code initialization */
547 	if (e1000_setup_init_funcs(hw, TRUE)) {
548 		device_printf(dev, "Setup of Shared code failed\n");
549 		error = ENXIO;
550 		goto err_pci;
551 	}
552 
553 	e1000_get_bus_info(hw);
554 
555 	/* Set up some sysctls for the tunable interrupt delays */
556 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
557 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
558 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
559 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
560 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
561 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
562 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
563 	    "receive interrupt delay limit in usecs",
564 	    &adapter->rx_abs_int_delay,
565 	    E1000_REGISTER(hw, E1000_RADV),
566 	    em_rx_abs_int_delay_dflt);
567 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
568 	    "transmit interrupt delay limit in usecs",
569 	    &adapter->tx_abs_int_delay,
570 	    E1000_REGISTER(hw, E1000_TADV),
571 	    em_tx_abs_int_delay_dflt);
572 	em_add_int_delay_sysctl(adapter, "itr",
573 	    "interrupt delay limit in usecs/4",
574 	    &adapter->tx_itr,
575 	    E1000_REGISTER(hw, E1000_ITR),
576 	    DEFAULT_ITR);
577 
578 	/* Sysctl for limiting the amount of work done in the taskqueue */
579 	em_set_sysctl_value(adapter, "rx_processing_limit",
580 	    "max number of rx packets to process", &adapter->rx_process_limit,
581 	    em_rx_process_limit);
582 
583 	/*
584 	 * Validate number of transmit and receive descriptors. It
585 	 * must not exceed hardware maximum, and must be multiple
586 	 * of E1000_DBA_ALIGN.
587 	 */
588 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
589 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
590 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
591 		    EM_DEFAULT_TXD, em_txd);
592 		adapter->num_tx_desc = EM_DEFAULT_TXD;
593 	} else
594 		adapter->num_tx_desc = em_txd;
595 
596 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
597 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
598 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
599 		    EM_DEFAULT_RXD, em_rxd);
600 		adapter->num_rx_desc = EM_DEFAULT_RXD;
601 	} else
602 		adapter->num_rx_desc = em_rxd;
603 
604 	hw->mac.autoneg = DO_AUTO_NEG;
605 	hw->phy.autoneg_wait_to_complete = FALSE;
606 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
607 
608 	/* Copper options */
609 	if (hw->phy.media_type == e1000_media_type_copper) {
610 		hw->phy.mdix = AUTO_ALL_MODES;
611 		hw->phy.disable_polarity_correction = FALSE;
612 		hw->phy.ms_type = EM_MASTER_SLAVE;
613 	}
614 
615 	/*
616 	 * Set the frame limits assuming
617 	 * standard ethernet sized frames.
618 	 */
619 	adapter->hw.mac.max_frame_size =
620 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
621 
622 	/*
623 	 * This controls when hardware reports transmit completion
624 	 * status.
625 	 */
626 	hw->mac.report_tx_early = 1;
627 
628 	/*
629 	** Get queue/ring memory
630 	*/
631 	if (em_allocate_queues(adapter)) {
632 		error = ENOMEM;
633 		goto err_pci;
634 	}
635 
636 	/* Allocate multicast array memory. */
637 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
638 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
639 	if (adapter->mta == NULL) {
640 		device_printf(dev, "Can not allocate multicast setup array\n");
641 		error = ENOMEM;
642 		goto err_late;
643 	}
644 
645 	/* Check SOL/IDER usage */
646 	if (e1000_check_reset_block(hw))
647 		device_printf(dev, "PHY reset is blocked"
648 		    " due to SOL/IDER session.\n");
649 
650 	/* Sysctl for setting Energy Efficient Ethernet */
651 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
652 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
653 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
654 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
655 	    adapter, 0, em_sysctl_eee, "I",
656 	    "Disable Energy Efficient Ethernet");
657 
658 	/*
659 	** Start from a known state, this is
660 	** important in reading the nvm and
661 	** mac from that.
662 	*/
663 	e1000_reset_hw(hw);
664 
665 
666 	/* Make sure we have a good EEPROM before we read from it */
667 	if (e1000_validate_nvm_checksum(hw) < 0) {
668 		/*
669 		** Some PCI-E parts fail the first check due to
670 		** the link being in sleep state, call it again,
671 		** if it fails a second time its a real issue.
672 		*/
673 		if (e1000_validate_nvm_checksum(hw) < 0) {
674 			device_printf(dev,
675 			    "The EEPROM Checksum Is Not Valid\n");
676 			error = EIO;
677 			goto err_late;
678 		}
679 	}
680 
681 	/* Copy the permanent MAC address out of the EEPROM */
682 	if (e1000_read_mac_addr(hw) < 0) {
683 		device_printf(dev, "EEPROM read error while reading MAC"
684 		    " address\n");
685 		error = EIO;
686 		goto err_late;
687 	}
688 
689 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
690 		device_printf(dev, "Invalid MAC address\n");
691 		error = EIO;
692 		goto err_late;
693 	}
694 
695 	/* Disable ULP support */
696 	e1000_disable_ulp_lpt_lp(hw, TRUE);
697 
698 	/*
699 	**  Do interrupt configuration
700 	*/
701 	if (adapter->msix > 1) /* Do MSIX */
702 		error = em_allocate_msix(adapter);
703 	else  /* MSI or Legacy */
704 		error = em_allocate_legacy(adapter);
705 	if (error)
706 		goto err_late;
707 
708 	/*
709 	 * Get Wake-on-Lan and Management info for later use
710 	 */
711 	em_get_wakeup(dev);
712 
713 	/* Setup OS specific network interface */
714 	if (em_setup_interface(dev, adapter) != 0)
715 		goto err_late;
716 
717 	em_reset(adapter);
718 
719 	/* Initialize statistics */
720 	em_update_stats_counters(adapter);
721 
722 	hw->mac.get_link_status = 1;
723 	em_update_link_status(adapter);
724 
725 	/* Register for VLAN events */
726 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
727 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
728 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
729 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
730 
731 	em_add_hw_stats(adapter);
732 
733 	/* Non-AMT based hardware can now take control from firmware */
734 	if (adapter->has_manage && !adapter->has_amt)
735 		em_get_hw_control(adapter);
736 
737 	/* Tell the stack that the interface is not active */
738 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
739 
740 	adapter->led_dev = led_create(em_led_func, adapter,
741 	    device_get_nameunit(dev));
742 #ifdef DEV_NETMAP
743 	em_netmap_attach(adapter);
744 #endif /* DEV_NETMAP */
745 
746 	INIT_DEBUGOUT("em_attach: end");
747 
748 	return (0);
749 
750 err_late:
751 	em_free_transmit_structures(adapter);
752 	em_free_receive_structures(adapter);
753 	em_release_hw_control(adapter);
754 	if (adapter->ifp != (void *)NULL)
755 		if_free(adapter->ifp);
756 err_pci:
757 	em_free_pci_resources(adapter);
758 	free(adapter->mta, M_DEVBUF);
759 	EM_CORE_LOCK_DESTROY(adapter);
760 
761 	return (error);
762 }
763 
764 /*********************************************************************
765  *  Device removal routine
766  *
767  *  The detach entry point is called when the driver is being removed.
768  *  This routine stops the adapter and deallocates all the resources
769  *  that were allocated for driver operation.
770  *
771  *  return 0 on success, positive on failure
772  *********************************************************************/
773 
774 static int
775 em_detach(device_t dev)
776 {
777 	struct adapter	*adapter = device_get_softc(dev);
778 	if_t ifp = adapter->ifp;
779 
780 	INIT_DEBUGOUT("em_detach: begin");
781 
782 	/* Make sure VLANS are not using driver */
783 	if (if_vlantrunkinuse(ifp)) {
784 		device_printf(dev,"Vlan in use, detach first\n");
785 		return (EBUSY);
786 	}
787 
788 #ifdef DEVICE_POLLING
789 	if (if_getcapenable(ifp) & IFCAP_POLLING)
790 		ether_poll_deregister(ifp);
791 #endif
792 
793 	if (adapter->led_dev != NULL)
794 		led_destroy(adapter->led_dev);
795 
796 	EM_CORE_LOCK(adapter);
797 	adapter->in_detach = 1;
798 	em_stop(adapter);
799 	EM_CORE_UNLOCK(adapter);
800 	EM_CORE_LOCK_DESTROY(adapter);
801 
802 	e1000_phy_hw_reset(&adapter->hw);
803 
804 	em_release_manageability(adapter);
805 	em_release_hw_control(adapter);
806 
807 	/* Unregister VLAN events */
808 	if (adapter->vlan_attach != NULL)
809 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
810 	if (adapter->vlan_detach != NULL)
811 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
812 
813 	ether_ifdetach(adapter->ifp);
814 	callout_drain(&adapter->timer);
815 
816 #ifdef DEV_NETMAP
817 	netmap_detach(ifp);
818 #endif /* DEV_NETMAP */
819 
820 	em_free_pci_resources(adapter);
821 	bus_generic_detach(dev);
822 	if_free(ifp);
823 
824 	em_free_transmit_structures(adapter);
825 	em_free_receive_structures(adapter);
826 
827 	em_release_hw_control(adapter);
828 	free(adapter->mta, M_DEVBUF);
829 
830 	return (0);
831 }
832 
833 /*********************************************************************
834  *
835  *  Shutdown entry point
836  *
837  **********************************************************************/
838 
839 static int
840 em_shutdown(device_t dev)
841 {
842 	return em_suspend(dev);
843 }
844 
845 /*
846  * Suspend/resume device methods.
847  */
848 static int
849 em_suspend(device_t dev)
850 {
851 	struct adapter *adapter = device_get_softc(dev);
852 
853 	EM_CORE_LOCK(adapter);
854 
855         em_release_manageability(adapter);
856 	em_release_hw_control(adapter);
857 	em_enable_wakeup(dev);
858 
859 	EM_CORE_UNLOCK(adapter);
860 
861 	return bus_generic_suspend(dev);
862 }
863 
864 static int
865 em_resume(device_t dev)
866 {
867 	struct adapter *adapter = device_get_softc(dev);
868 	struct tx_ring	*txr = adapter->tx_rings;
869 	if_t ifp = adapter->ifp;
870 
871 	EM_CORE_LOCK(adapter);
872 	if (adapter->hw.mac.type == e1000_pch2lan)
873 		e1000_resume_workarounds_pchlan(&adapter->hw);
874 	em_init_locked(adapter);
875 	em_init_manageability(adapter);
876 
877 	if ((if_getflags(ifp) & IFF_UP) &&
878 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
879 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
880 			EM_TX_LOCK(txr);
881 #ifdef EM_MULTIQUEUE
882 			if (!drbr_empty(ifp, txr->br))
883 				em_mq_start_locked(ifp, txr, NULL);
884 #else
885 			if (!if_sendq_empty(ifp))
886 				em_start_locked(ifp, txr);
887 #endif
888 			EM_TX_UNLOCK(txr);
889 		}
890 	}
891 	EM_CORE_UNLOCK(adapter);
892 
893 	return bus_generic_resume(dev);
894 }
895 
896 
897 #ifdef EM_MULTIQUEUE
898 /*********************************************************************
899  *  Multiqueue Transmit routines
900  *
901  *  em_mq_start is called by the stack to initiate a transmit.
902  *  however, if busy the driver can queue the request rather
903  *  than do an immediate send. It is this that is an advantage
904  *  in this driver, rather than also having multiple tx queues.
905  **********************************************************************/
906 static int
907 em_mq_start_locked(if_t ifp, struct tx_ring *txr, struct mbuf *m)
908 {
909 	struct adapter  *adapter = txr->adapter;
910         struct mbuf     *next;
911         int             err = 0, enq = 0;
912 
913 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
914 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
915 		if (m != NULL)
916 			err = drbr_enqueue(ifp, txr->br, m);
917 		return (err);
918 	}
919 
920 	enq = 0;
921 	if (m != NULL) {
922 		err = drbr_enqueue(ifp, txr->br, m);
923 		if (err)
924 			return (err);
925 	}
926 
927 	/* Process the queue */
928 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
929 		if ((err = em_xmit(txr, &next)) != 0) {
930 			if (next == NULL)
931 				drbr_advance(ifp, txr->br);
932 			else
933 				drbr_putback(ifp, txr->br, next);
934 			break;
935 		}
936 		drbr_advance(ifp, txr->br);
937 		enq++;
938 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
939 		if (next->m_flags & M_MCAST)
940 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
941 		if_etherbpfmtap(ifp, next);
942 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
943                         break;
944 	}
945 
946 	if (enq > 0) {
947                 /* Set the watchdog */
948                 txr->queue_status = EM_QUEUE_WORKING;
949 		txr->watchdog_time = ticks;
950 	}
951 
952 	if (txr->tx_avail < EM_MAX_SCATTER)
953 		em_txeof(txr);
954 	if (txr->tx_avail < EM_MAX_SCATTER)
955 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
956 	return (err);
957 }
958 
959 /*
960 ** Multiqueue capable stack interface
961 */
962 static int
963 em_mq_start(if_t ifp, struct mbuf *m)
964 {
965 	struct adapter	*adapter = if_getsoftc(ifp);
966 	struct tx_ring	*txr = adapter->tx_rings;
967 	int 		error;
968 
969 	if (EM_TX_TRYLOCK(txr)) {
970 		error = em_mq_start_locked(ifp, txr, m);
971 		EM_TX_UNLOCK(txr);
972 	} else
973 		error = drbr_enqueue(ifp, txr->br, m);
974 
975 	return (error);
976 }
977 
978 /*
979 ** Flush all ring buffers
980 */
981 static void
982 em_qflush(if_t ifp)
983 {
984 	struct adapter  *adapter = if_getsoftc(ifp);
985 	struct tx_ring  *txr = adapter->tx_rings;
986 	struct mbuf     *m;
987 
988 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
989 		EM_TX_LOCK(txr);
990 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
991 			m_freem(m);
992 		EM_TX_UNLOCK(txr);
993 	}
994 	if_qflush(ifp);
995 }
996 #else  /* !EM_MULTIQUEUE */
997 
998 static void
999 em_start_locked(if_t ifp, struct tx_ring *txr)
1000 {
1001 	struct adapter	*adapter = if_getsoftc(ifp);
1002 	struct mbuf	*m_head;
1003 
1004 	EM_TX_LOCK_ASSERT(txr);
1005 
1006 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1007 	    IFF_DRV_RUNNING)
1008 		return;
1009 
1010 	if (!adapter->link_active)
1011 		return;
1012 
1013 	while (!if_sendq_empty(ifp)) {
1014         	/* Call cleanup if number of TX descriptors low */
1015 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1016 			em_txeof(txr);
1017 		if (txr->tx_avail < EM_MAX_SCATTER) {
1018 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
1019 			break;
1020 		}
1021 		m_head = if_dequeue(ifp);
1022 		if (m_head == NULL)
1023 			break;
1024 		/*
1025 		 *  Encapsulation can modify our pointer, and or make it
1026 		 *  NULL on failure.  In that event, we can't requeue.
1027 		 */
1028 		if (em_xmit(txr, &m_head)) {
1029 			if (m_head == NULL)
1030 				break;
1031 			if_sendq_prepend(ifp, m_head);
1032 			break;
1033 		}
1034 
1035 		/* Send a copy of the frame to the BPF listener */
1036 		if_etherbpfmtap(ifp, m_head);
1037 
1038 		/* Set timeout in case hardware has problems transmitting. */
1039 		txr->watchdog_time = ticks;
1040                 txr->queue_status = EM_QUEUE_WORKING;
1041 	}
1042 
1043 	return;
1044 }
1045 
1046 static void
1047 em_start(if_t ifp)
1048 {
1049 	struct adapter	*adapter = if_getsoftc(ifp);
1050 	struct tx_ring	*txr = adapter->tx_rings;
1051 
1052 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1053 		EM_TX_LOCK(txr);
1054 		em_start_locked(ifp, txr);
1055 		EM_TX_UNLOCK(txr);
1056 	}
1057 	return;
1058 }
1059 #endif /* EM_MULTIQUEUE */
1060 
1061 /*********************************************************************
1062  *  Ioctl entry point
1063  *
1064  *  em_ioctl is called when the user wants to configure the
1065  *  interface.
1066  *
1067  *  return 0 on success, positive on failure
1068  **********************************************************************/
1069 
1070 static int
1071 em_ioctl(if_t ifp, u_long command, caddr_t data)
1072 {
1073 	struct adapter	*adapter = if_getsoftc(ifp);
1074 	struct ifreq	*ifr = (struct ifreq *)data;
1075 #if defined(INET) || defined(INET6)
1076 	struct ifaddr	*ifa = (struct ifaddr *)data;
1077 #endif
1078 	bool		avoid_reset = FALSE;
1079 	int		error = 0;
1080 
1081 	if (adapter->in_detach)
1082 		return (error);
1083 
1084 	switch (command) {
1085 	case SIOCSIFADDR:
1086 #ifdef INET
1087 		if (ifa->ifa_addr->sa_family == AF_INET)
1088 			avoid_reset = TRUE;
1089 #endif
1090 #ifdef INET6
1091 		if (ifa->ifa_addr->sa_family == AF_INET6)
1092 			avoid_reset = TRUE;
1093 #endif
1094 		/*
1095 		** Calling init results in link renegotiation,
1096 		** so we avoid doing it when possible.
1097 		*/
1098 		if (avoid_reset) {
1099 			if_setflagbits(ifp,IFF_UP,0);
1100 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1101 				em_init(adapter);
1102 #ifdef INET
1103 			if (!(if_getflags(ifp) & IFF_NOARP))
1104 				arp_ifinit(ifp, ifa);
1105 #endif
1106 		} else
1107 			error = ether_ioctl(ifp, command, data);
1108 		break;
1109 	case SIOCSIFMTU:
1110 	    {
1111 		int max_frame_size;
1112 
1113 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1114 
1115 		EM_CORE_LOCK(adapter);
1116 		switch (adapter->hw.mac.type) {
1117 		case e1000_82571:
1118 		case e1000_82572:
1119 		case e1000_ich9lan:
1120 		case e1000_ich10lan:
1121 		case e1000_pch2lan:
1122 		case e1000_pch_lpt:
1123 		case e1000_82574:
1124 		case e1000_82583:
1125 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1126 			max_frame_size = 9234;
1127 			break;
1128 		case e1000_pchlan:
1129 			max_frame_size = 4096;
1130 			break;
1131 			/* Adapters that do not support jumbo frames */
1132 		case e1000_ich8lan:
1133 			max_frame_size = ETHER_MAX_LEN;
1134 			break;
1135 		default:
1136 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1137 		}
1138 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1139 		    ETHER_CRC_LEN) {
1140 			EM_CORE_UNLOCK(adapter);
1141 			error = EINVAL;
1142 			break;
1143 		}
1144 
1145 		if_setmtu(ifp, ifr->ifr_mtu);
1146 		adapter->hw.mac.max_frame_size =
1147 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1148 		em_init_locked(adapter);
1149 		EM_CORE_UNLOCK(adapter);
1150 		break;
1151 	    }
1152 	case SIOCSIFFLAGS:
1153 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1154 		    SIOCSIFFLAGS (Set Interface Flags)");
1155 		EM_CORE_LOCK(adapter);
1156 		if (if_getflags(ifp) & IFF_UP) {
1157 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1158 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1159 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1160 					em_disable_promisc(adapter);
1161 					em_set_promisc(adapter);
1162 				}
1163 			} else
1164 				em_init_locked(adapter);
1165 		} else
1166 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1167 				em_stop(adapter);
1168 		adapter->if_flags = if_getflags(ifp);
1169 		EM_CORE_UNLOCK(adapter);
1170 		break;
1171 	case SIOCADDMULTI:
1172 	case SIOCDELMULTI:
1173 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1174 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1175 			EM_CORE_LOCK(adapter);
1176 			em_disable_intr(adapter);
1177 			em_set_multi(adapter);
1178 #ifdef DEVICE_POLLING
1179 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1180 #endif
1181 				em_enable_intr(adapter);
1182 			EM_CORE_UNLOCK(adapter);
1183 		}
1184 		break;
1185 	case SIOCSIFMEDIA:
1186 		/* Check SOL/IDER usage */
1187 		EM_CORE_LOCK(adapter);
1188 		if (e1000_check_reset_block(&adapter->hw)) {
1189 			EM_CORE_UNLOCK(adapter);
1190 			device_printf(adapter->dev, "Media change is"
1191 			    " blocked due to SOL/IDER session.\n");
1192 			break;
1193 		}
1194 		EM_CORE_UNLOCK(adapter);
1195 		/* falls thru */
1196 	case SIOCGIFMEDIA:
1197 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1198 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1199 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1200 		break;
1201 	case SIOCSIFCAP:
1202 	    {
1203 		int mask, reinit;
1204 
1205 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1206 		reinit = 0;
1207 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1208 #ifdef DEVICE_POLLING
1209 		if (mask & IFCAP_POLLING) {
1210 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1211 				error = ether_poll_register(em_poll, ifp);
1212 				if (error)
1213 					return (error);
1214 				EM_CORE_LOCK(adapter);
1215 				em_disable_intr(adapter);
1216 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1217 				EM_CORE_UNLOCK(adapter);
1218 			} else {
1219 				error = ether_poll_deregister(ifp);
1220 				/* Enable interrupt even in error case */
1221 				EM_CORE_LOCK(adapter);
1222 				em_enable_intr(adapter);
1223 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1224 				EM_CORE_UNLOCK(adapter);
1225 			}
1226 		}
1227 #endif
1228 		if (mask & IFCAP_HWCSUM) {
1229 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1230 			reinit = 1;
1231 		}
1232 		if (mask & IFCAP_TSO4) {
1233 			if_togglecapenable(ifp,IFCAP_TSO4);
1234 			reinit = 1;
1235 		}
1236 		if (mask & IFCAP_VLAN_HWTAGGING) {
1237 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1238 			reinit = 1;
1239 		}
1240 		if (mask & IFCAP_VLAN_HWFILTER) {
1241 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1242 			reinit = 1;
1243 		}
1244 		if (mask & IFCAP_VLAN_HWTSO) {
1245 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1246 			reinit = 1;
1247 		}
1248 		if ((mask & IFCAP_WOL) &&
1249 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1250 			if (mask & IFCAP_WOL_MCAST)
1251 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1252 			if (mask & IFCAP_WOL_MAGIC)
1253 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1254 		}
1255 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1256 			em_init(adapter);
1257 		if_vlancap(ifp);
1258 		break;
1259 	    }
1260 
1261 	default:
1262 		error = ether_ioctl(ifp, command, data);
1263 		break;
1264 	}
1265 
1266 	return (error);
1267 }
1268 
1269 
1270 /*********************************************************************
1271  *  Init entry point
1272  *
1273  *  This routine is used in two ways. It is used by the stack as
1274  *  init entry point in network interface structure. It is also used
1275  *  by the driver as a hw/sw initialization routine to get to a
1276  *  consistent state.
1277  *
1278  *  return 0 on success, positive on failure
1279  **********************************************************************/
1280 
1281 static void
1282 em_init_locked(struct adapter *adapter)
1283 {
1284 	if_t ifp = adapter->ifp;
1285 	device_t	dev = adapter->dev;
1286 
1287 	INIT_DEBUGOUT("em_init: begin");
1288 
1289 	EM_CORE_LOCK_ASSERT(adapter);
1290 
1291 	em_disable_intr(adapter);
1292 	callout_stop(&adapter->timer);
1293 
1294 	/* Get the latest mac address, User can use a LAA */
1295         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1296               ETHER_ADDR_LEN);
1297 
1298 	/* Put the address into the Receive Address Array */
1299 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1300 
1301 	/*
1302 	 * With the 82571 adapter, RAR[0] may be overwritten
1303 	 * when the other port is reset, we make a duplicate
1304 	 * in RAR[14] for that eventuality, this assures
1305 	 * the interface continues to function.
1306 	 */
1307 	if (adapter->hw.mac.type == e1000_82571) {
1308 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1309 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1310 		    E1000_RAR_ENTRIES - 1);
1311 	}
1312 
1313 	/* Initialize the hardware */
1314 	em_reset(adapter);
1315 	em_update_link_status(adapter);
1316 
1317 	/* Setup VLAN support, basic and offload if available */
1318 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1319 
1320 	/* Set hardware offload abilities */
1321 	if_clearhwassist(ifp);
1322 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1323 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1324 	if (if_getcapenable(ifp) & IFCAP_TSO4)
1325 		if_sethwassistbits(ifp, CSUM_TSO, 0);
1326 
1327 	/* Configure for OS presence */
1328 	em_init_manageability(adapter);
1329 
1330 	/* Prepare transmit descriptors and buffers */
1331 	em_setup_transmit_structures(adapter);
1332 	em_initialize_transmit_unit(adapter);
1333 
1334 	/* Setup Multicast table */
1335 	em_set_multi(adapter);
1336 
1337 	/*
1338 	** Figure out the desired mbuf
1339 	** pool for doing jumbos
1340 	*/
1341 	if (adapter->hw.mac.max_frame_size <= 2048)
1342 		adapter->rx_mbuf_sz = MCLBYTES;
1343 	else if (adapter->hw.mac.max_frame_size <= 4096)
1344 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1345 	else
1346 		adapter->rx_mbuf_sz = MJUM9BYTES;
1347 
1348 	/* Prepare receive descriptors and buffers */
1349 	if (em_setup_receive_structures(adapter)) {
1350 		device_printf(dev, "Could not setup receive structures\n");
1351 		em_stop(adapter);
1352 		return;
1353 	}
1354 	em_initialize_receive_unit(adapter);
1355 
1356 	/* Use real VLAN Filter support? */
1357 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1358 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1359 			/* Use real VLAN Filter support */
1360 			em_setup_vlan_hw_support(adapter);
1361 		else {
1362 			u32 ctrl;
1363 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1364 			ctrl |= E1000_CTRL_VME;
1365 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1366 		}
1367 	}
1368 
1369 	/* Don't lose promiscuous settings */
1370 	em_set_promisc(adapter);
1371 
1372 	/* Set the interface as ACTIVE */
1373 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1374 
1375 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1376 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1377 
1378 	/* MSI/X configuration for 82574 */
1379 	if (adapter->hw.mac.type == e1000_82574) {
1380 		int tmp;
1381 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1382 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1383 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1384 		/* Set the IVAR - interrupt vector routing. */
1385 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1386 	}
1387 
1388 #ifdef DEVICE_POLLING
1389 	/*
1390 	 * Only enable interrupts if we are not polling, make sure
1391 	 * they are off otherwise.
1392 	 */
1393 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1394 		em_disable_intr(adapter);
1395 	else
1396 #endif /* DEVICE_POLLING */
1397 		em_enable_intr(adapter);
1398 
1399 	/* AMT based hardware can now take control from firmware */
1400 	if (adapter->has_manage && adapter->has_amt)
1401 		em_get_hw_control(adapter);
1402 }
1403 
1404 static void
1405 em_init(void *arg)
1406 {
1407 	struct adapter *adapter = arg;
1408 
1409 	EM_CORE_LOCK(adapter);
1410 	em_init_locked(adapter);
1411 	EM_CORE_UNLOCK(adapter);
1412 }
1413 
1414 
1415 #ifdef DEVICE_POLLING
1416 /*********************************************************************
1417  *
1418  *  Legacy polling routine: note this only works with single queue
1419  *
1420  *********************************************************************/
1421 static int
1422 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1423 {
1424 	struct adapter *adapter = if_getsoftc(ifp);
1425 	struct tx_ring	*txr = adapter->tx_rings;
1426 	struct rx_ring	*rxr = adapter->rx_rings;
1427 	u32		reg_icr;
1428 	int		rx_done;
1429 
1430 	EM_CORE_LOCK(adapter);
1431 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1432 		EM_CORE_UNLOCK(adapter);
1433 		return (0);
1434 	}
1435 
1436 	if (cmd == POLL_AND_CHECK_STATUS) {
1437 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1438 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1439 			callout_stop(&adapter->timer);
1440 			adapter->hw.mac.get_link_status = 1;
1441 			em_update_link_status(adapter);
1442 			callout_reset(&adapter->timer, hz,
1443 			    em_local_timer, adapter);
1444 		}
1445 	}
1446 	EM_CORE_UNLOCK(adapter);
1447 
1448 	em_rxeof(rxr, count, &rx_done);
1449 
1450 	EM_TX_LOCK(txr);
1451 	em_txeof(txr);
1452 #ifdef EM_MULTIQUEUE
1453 	if (!drbr_empty(ifp, txr->br))
1454 		em_mq_start_locked(ifp, txr, NULL);
1455 #else
1456 	if (!if_sendq_empty(ifp))
1457 		em_start_locked(ifp, txr);
1458 #endif
1459 	EM_TX_UNLOCK(txr);
1460 
1461 	return (rx_done);
1462 }
1463 #endif /* DEVICE_POLLING */
1464 
1465 
1466 /*********************************************************************
1467  *
1468  *  Fast Legacy/MSI Combined Interrupt Service routine
1469  *
1470  *********************************************************************/
1471 static int
1472 em_irq_fast(void *arg)
1473 {
1474 	struct adapter	*adapter = arg;
1475 	if_t ifp;
1476 	u32		reg_icr;
1477 
1478 	ifp = adapter->ifp;
1479 
1480 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1481 
1482 	/* Hot eject?  */
1483 	if (reg_icr == 0xffffffff)
1484 		return FILTER_STRAY;
1485 
1486 	/* Definitely not our interrupt.  */
1487 	if (reg_icr == 0x0)
1488 		return FILTER_STRAY;
1489 
1490 	/*
1491 	 * Starting with the 82571 chip, bit 31 should be used to
1492 	 * determine whether the interrupt belongs to us.
1493 	 */
1494 	if (adapter->hw.mac.type >= e1000_82571 &&
1495 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1496 		return FILTER_STRAY;
1497 
1498 	em_disable_intr(adapter);
1499 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1500 
1501 	/* Link status change */
1502 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1503 		adapter->hw.mac.get_link_status = 1;
1504 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1505 	}
1506 
1507 	if (reg_icr & E1000_ICR_RXO)
1508 		adapter->rx_overruns++;
1509 	return FILTER_HANDLED;
1510 }
1511 
1512 /* Combined RX/TX handler, used by Legacy and MSI */
1513 static void
1514 em_handle_que(void *context, int pending)
1515 {
1516 	struct adapter	*adapter = context;
1517 	if_t ifp = adapter->ifp;
1518 	struct tx_ring	*txr = adapter->tx_rings;
1519 	struct rx_ring	*rxr = adapter->rx_rings;
1520 
1521 
1522 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1523 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1524 		EM_TX_LOCK(txr);
1525 		em_txeof(txr);
1526 #ifdef EM_MULTIQUEUE
1527 		if (!drbr_empty(ifp, txr->br))
1528 			em_mq_start_locked(ifp, txr, NULL);
1529 #else
1530 		if (!if_sendq_empty(ifp))
1531 			em_start_locked(ifp, txr);
1532 #endif
1533 		EM_TX_UNLOCK(txr);
1534 		if (more) {
1535 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1536 			return;
1537 		}
1538 	}
1539 
1540 	em_enable_intr(adapter);
1541 	return;
1542 }
1543 
1544 
1545 /*********************************************************************
1546  *
1547  *  MSIX Interrupt Service Routines
1548  *
1549  **********************************************************************/
1550 static void
1551 em_msix_tx(void *arg)
1552 {
1553 	struct tx_ring *txr = arg;
1554 	struct adapter *adapter = txr->adapter;
1555 	if_t ifp = adapter->ifp;
1556 
1557 	++txr->tx_irq;
1558 	EM_TX_LOCK(txr);
1559 	em_txeof(txr);
1560 #ifdef EM_MULTIQUEUE
1561 	if (!drbr_empty(ifp, txr->br))
1562 		em_mq_start_locked(ifp, txr, NULL);
1563 #else
1564 	if (!if_sendq_empty(ifp))
1565 		em_start_locked(ifp, txr);
1566 #endif
1567 	/* Reenable this interrupt */
1568 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1569 	EM_TX_UNLOCK(txr);
1570 	return;
1571 }
1572 
1573 /*********************************************************************
1574  *
1575  *  MSIX RX Interrupt Service routine
1576  *
1577  **********************************************************************/
1578 
1579 static void
1580 em_msix_rx(void *arg)
1581 {
1582 	struct rx_ring	*rxr = arg;
1583 	struct adapter	*adapter = rxr->adapter;
1584 	bool		more;
1585 
1586 	++rxr->rx_irq;
1587 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1588 		return;
1589 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1590 	if (more)
1591 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1592 	else
1593 		/* Reenable this interrupt */
1594 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1595 	return;
1596 }
1597 
1598 /*********************************************************************
1599  *
1600  *  MSIX Link Fast Interrupt Service routine
1601  *
1602  **********************************************************************/
1603 static void
1604 em_msix_link(void *arg)
1605 {
1606 	struct adapter	*adapter = arg;
1607 	u32		reg_icr;
1608 
1609 	++adapter->link_irq;
1610 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1611 
1612 	if (reg_icr & E1000_ICR_RXO)
1613 		adapter->rx_overruns++;
1614 
1615 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1616 		adapter->hw.mac.get_link_status = 1;
1617 		em_handle_link(adapter, 0);
1618 	} else
1619 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1620 		    EM_MSIX_LINK | E1000_IMS_LSC);
1621 	return;
1622 }
1623 
1624 static void
1625 em_handle_rx(void *context, int pending)
1626 {
1627 	struct rx_ring	*rxr = context;
1628 	struct adapter	*adapter = rxr->adapter;
1629         bool            more;
1630 
1631 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1632 	if (more)
1633 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1634 	else
1635 		/* Reenable this interrupt */
1636 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1637 }
1638 
1639 static void
1640 em_handle_tx(void *context, int pending)
1641 {
1642 	struct tx_ring	*txr = context;
1643 	struct adapter	*adapter = txr->adapter;
1644 	if_t ifp = adapter->ifp;
1645 
1646 	EM_TX_LOCK(txr);
1647 	em_txeof(txr);
1648 #ifdef EM_MULTIQUEUE
1649 	if (!drbr_empty(ifp, txr->br))
1650 		em_mq_start_locked(ifp, txr, NULL);
1651 #else
1652 	if (!if_sendq_empty(ifp))
1653 		em_start_locked(ifp, txr);
1654 #endif
1655 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1656 	EM_TX_UNLOCK(txr);
1657 }
1658 
1659 static void
1660 em_handle_link(void *context, int pending)
1661 {
1662 	struct adapter	*adapter = context;
1663 	struct tx_ring	*txr = adapter->tx_rings;
1664 	if_t ifp = adapter->ifp;
1665 
1666 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1667 		return;
1668 
1669 	EM_CORE_LOCK(adapter);
1670 	callout_stop(&adapter->timer);
1671 	em_update_link_status(adapter);
1672 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1673 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1674 	    EM_MSIX_LINK | E1000_IMS_LSC);
1675 	if (adapter->link_active) {
1676 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1677 			EM_TX_LOCK(txr);
1678 #ifdef EM_MULTIQUEUE
1679 			if (!drbr_empty(ifp, txr->br))
1680 				em_mq_start_locked(ifp, txr, NULL);
1681 #else
1682 			if (if_sendq_empty(ifp))
1683 				em_start_locked(ifp, txr);
1684 #endif
1685 			EM_TX_UNLOCK(txr);
1686 		}
1687 	}
1688 	EM_CORE_UNLOCK(adapter);
1689 }
1690 
1691 
1692 /*********************************************************************
1693  *
1694  *  Media Ioctl callback
1695  *
1696  *  This routine is called whenever the user queries the status of
1697  *  the interface using ifconfig.
1698  *
1699  **********************************************************************/
1700 static void
1701 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1702 {
1703 	struct adapter *adapter = if_getsoftc(ifp);
1704 	u_char fiber_type = IFM_1000_SX;
1705 
1706 	INIT_DEBUGOUT("em_media_status: begin");
1707 
1708 	EM_CORE_LOCK(adapter);
1709 	em_update_link_status(adapter);
1710 
1711 	ifmr->ifm_status = IFM_AVALID;
1712 	ifmr->ifm_active = IFM_ETHER;
1713 
1714 	if (!adapter->link_active) {
1715 		EM_CORE_UNLOCK(adapter);
1716 		return;
1717 	}
1718 
1719 	ifmr->ifm_status |= IFM_ACTIVE;
1720 
1721 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1722 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1723 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1724 	} else {
1725 		switch (adapter->link_speed) {
1726 		case 10:
1727 			ifmr->ifm_active |= IFM_10_T;
1728 			break;
1729 		case 100:
1730 			ifmr->ifm_active |= IFM_100_TX;
1731 			break;
1732 		case 1000:
1733 			ifmr->ifm_active |= IFM_1000_T;
1734 			break;
1735 		}
1736 		if (adapter->link_duplex == FULL_DUPLEX)
1737 			ifmr->ifm_active |= IFM_FDX;
1738 		else
1739 			ifmr->ifm_active |= IFM_HDX;
1740 	}
1741 	EM_CORE_UNLOCK(adapter);
1742 }
1743 
1744 /*********************************************************************
1745  *
1746  *  Media Ioctl callback
1747  *
1748  *  This routine is called when the user changes speed/duplex using
1749  *  media/mediopt option with ifconfig.
1750  *
1751  **********************************************************************/
1752 static int
1753 em_media_change(if_t ifp)
1754 {
1755 	struct adapter *adapter = if_getsoftc(ifp);
1756 	struct ifmedia  *ifm = &adapter->media;
1757 
1758 	INIT_DEBUGOUT("em_media_change: begin");
1759 
1760 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1761 		return (EINVAL);
1762 
1763 	EM_CORE_LOCK(adapter);
1764 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1765 	case IFM_AUTO:
1766 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1767 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1768 		break;
1769 	case IFM_1000_LX:
1770 	case IFM_1000_SX:
1771 	case IFM_1000_T:
1772 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1773 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1774 		break;
1775 	case IFM_100_TX:
1776 		adapter->hw.mac.autoneg = FALSE;
1777 		adapter->hw.phy.autoneg_advertised = 0;
1778 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1780 		else
1781 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1782 		break;
1783 	case IFM_10_T:
1784 		adapter->hw.mac.autoneg = FALSE;
1785 		adapter->hw.phy.autoneg_advertised = 0;
1786 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1787 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1788 		else
1789 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1790 		break;
1791 	default:
1792 		device_printf(adapter->dev, "Unsupported media type\n");
1793 	}
1794 
1795 	em_init_locked(adapter);
1796 	EM_CORE_UNLOCK(adapter);
1797 
1798 	return (0);
1799 }
1800 
1801 /*********************************************************************
1802  *
1803  *  This routine maps the mbufs to tx descriptors.
1804  *
1805  *  return 0 on success, positive on failure
1806  **********************************************************************/
1807 
1808 static int
1809 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1810 {
1811 	struct adapter		*adapter = txr->adapter;
1812 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1813 	bus_dmamap_t		map;
1814 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1815 	struct e1000_tx_desc	*ctxd = NULL;
1816 	struct mbuf		*m_head;
1817 	struct ether_header	*eh;
1818 	struct ip		*ip = NULL;
1819 	struct tcphdr		*tp = NULL;
1820 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1821 	int			ip_off, poff;
1822 	int			nsegs, i, j, first, last = 0;
1823 	int			error, do_tso, tso_desc = 0, remap = 1;
1824 
1825 	m_head = *m_headp;
1826 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1827 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1828 	ip_off = poff = 0;
1829 
1830 	/*
1831 	 * Intel recommends entire IP/TCP header length reside in a single
1832 	 * buffer. If multiple descriptors are used to describe the IP and
1833 	 * TCP header, each descriptor should describe one or more
1834 	 * complete headers; descriptors referencing only parts of headers
1835 	 * are not supported. If all layer headers are not coalesced into
1836 	 * a single buffer, each buffer should not cross a 4KB boundary,
1837 	 * or be larger than the maximum read request size.
1838 	 * Controller also requires modifing IP/TCP header to make TSO work
1839 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1840 	 * IP/TCP header into a single buffer to meet the requirement of
1841 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1842 	 * which also has similiar restrictions.
1843 	 */
1844 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1845 		if (do_tso || (m_head->m_next != NULL &&
1846 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1847 			if (M_WRITABLE(*m_headp) == 0) {
1848 				m_head = m_dup(*m_headp, M_NOWAIT);
1849 				m_freem(*m_headp);
1850 				if (m_head == NULL) {
1851 					*m_headp = NULL;
1852 					return (ENOBUFS);
1853 				}
1854 				*m_headp = m_head;
1855 			}
1856 		}
1857 		/*
1858 		 * XXX
1859 		 * Assume IPv4, we don't have TSO/checksum offload support
1860 		 * for IPv6 yet.
1861 		 */
1862 		ip_off = sizeof(struct ether_header);
1863 		m_head = m_pullup(m_head, ip_off);
1864 		if (m_head == NULL) {
1865 			*m_headp = NULL;
1866 			return (ENOBUFS);
1867 		}
1868 		eh = mtod(m_head, struct ether_header *);
1869 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1870 			ip_off = sizeof(struct ether_vlan_header);
1871 			m_head = m_pullup(m_head, ip_off);
1872 			if (m_head == NULL) {
1873 				*m_headp = NULL;
1874 				return (ENOBUFS);
1875 			}
1876 		}
1877 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1878 		if (m_head == NULL) {
1879 			*m_headp = NULL;
1880 			return (ENOBUFS);
1881 		}
1882 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1883 		poff = ip_off + (ip->ip_hl << 2);
1884 		if (do_tso) {
1885 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1886 			if (m_head == NULL) {
1887 				*m_headp = NULL;
1888 				return (ENOBUFS);
1889 			}
1890 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1891 			/*
1892 			 * TSO workaround:
1893 			 *   pull 4 more bytes of data into it.
1894 			 */
1895 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1896 			if (m_head == NULL) {
1897 				*m_headp = NULL;
1898 				return (ENOBUFS);
1899 			}
1900 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1901 			ip->ip_len = 0;
1902 			ip->ip_sum = 0;
1903 			/*
1904 			 * The pseudo TCP checksum does not include TCP payload
1905 			 * length so driver should recompute the checksum here
1906 			 * what hardware expect to see. This is adherence of
1907 			 * Microsoft's Large Send specification.
1908 			 */
1909 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1910 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1911 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1912 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1913 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1914 			if (m_head == NULL) {
1915 				*m_headp = NULL;
1916 				return (ENOBUFS);
1917 			}
1918 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1919 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1920 			if (m_head == NULL) {
1921 				*m_headp = NULL;
1922 				return (ENOBUFS);
1923 			}
1924 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1925 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1926 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1927 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1928 			if (m_head == NULL) {
1929 				*m_headp = NULL;
1930 				return (ENOBUFS);
1931 			}
1932 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1933 		}
1934 		*m_headp = m_head;
1935 	}
1936 
1937 	/*
1938 	 * Map the packet for DMA
1939 	 *
1940 	 * Capture the first descriptor index,
1941 	 * this descriptor will have the index
1942 	 * of the EOP which is the only one that
1943 	 * now gets a DONE bit writeback.
1944 	 */
1945 	first = txr->next_avail_desc;
1946 	tx_buffer = &txr->tx_buffers[first];
1947 	tx_buffer_mapped = tx_buffer;
1948 	map = tx_buffer->map;
1949 
1950 retry:
1951 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1952 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1953 
1954 	/*
1955 	 * There are two types of errors we can (try) to handle:
1956 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1957 	 *   out of segments.  Defragment the mbuf chain and try again.
1958 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1959 	 *   at this point in time.  Defer sending and try again later.
1960 	 * All other errors, in particular EINVAL, are fatal and prevent the
1961 	 * mbuf chain from ever going through.  Drop it and report error.
1962 	 */
1963 	if (error == EFBIG && remap) {
1964 		struct mbuf *m;
1965 
1966 		m = m_defrag(*m_headp, M_NOWAIT);
1967 		if (m == NULL) {
1968 			adapter->mbuf_alloc_failed++;
1969 			m_freem(*m_headp);
1970 			*m_headp = NULL;
1971 			return (ENOBUFS);
1972 		}
1973 		*m_headp = m;
1974 
1975 		/* Try it again, but only once */
1976 		remap = 0;
1977 		goto retry;
1978 	} else if (error == ENOMEM) {
1979 		adapter->no_tx_dma_setup++;
1980 		return (error);
1981 	} else if (error != 0) {
1982 		adapter->no_tx_dma_setup++;
1983 		m_freem(*m_headp);
1984 		*m_headp = NULL;
1985 		return (error);
1986 	}
1987 
1988 	/*
1989 	 * TSO Hardware workaround, if this packet is not
1990 	 * TSO, and is only a single descriptor long, and
1991 	 * it follows a TSO burst, then we need to add a
1992 	 * sentinel descriptor to prevent premature writeback.
1993 	 */
1994 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1995 		if (nsegs == 1)
1996 			tso_desc = TRUE;
1997 		txr->tx_tso = FALSE;
1998 	}
1999 
2000         if (nsegs > (txr->tx_avail - 2)) {
2001                 txr->no_desc_avail++;
2002 		bus_dmamap_unload(txr->txtag, map);
2003 		return (ENOBUFS);
2004         }
2005 	m_head = *m_headp;
2006 
2007 	/* Do hardware assists */
2008 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2009 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2010 		    &txd_upper, &txd_lower);
2011 		/* we need to make a final sentinel transmit desc */
2012 		tso_desc = TRUE;
2013 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2014 		em_transmit_checksum_setup(txr, m_head,
2015 		    ip_off, ip, &txd_upper, &txd_lower);
2016 
2017 	if (m_head->m_flags & M_VLANTAG) {
2018 		/* Set the vlan id. */
2019 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2020                 /* Tell hardware to add tag */
2021                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2022         }
2023 
2024 	i = txr->next_avail_desc;
2025 
2026 	/* Set up our transmit descriptors */
2027 	for (j = 0; j < nsegs; j++) {
2028 		bus_size_t seg_len;
2029 		bus_addr_t seg_addr;
2030 
2031 		tx_buffer = &txr->tx_buffers[i];
2032 		ctxd = &txr->tx_base[i];
2033 		seg_addr = segs[j].ds_addr;
2034 		seg_len  = segs[j].ds_len;
2035 		/*
2036 		** TSO Workaround:
2037 		** If this is the last descriptor, we want to
2038 		** split it so we have a small final sentinel
2039 		*/
2040 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2041 			seg_len -= 4;
2042 			ctxd->buffer_addr = htole64(seg_addr);
2043 			ctxd->lower.data = htole32(
2044 			adapter->txd_cmd | txd_lower | seg_len);
2045 			ctxd->upper.data =
2046 			    htole32(txd_upper);
2047 			if (++i == adapter->num_tx_desc)
2048 				i = 0;
2049 			/* Now make the sentinel */
2050 			++txd_used; /* using an extra txd */
2051 			ctxd = &txr->tx_base[i];
2052 			tx_buffer = &txr->tx_buffers[i];
2053 			ctxd->buffer_addr =
2054 			    htole64(seg_addr + seg_len);
2055 			ctxd->lower.data = htole32(
2056 			adapter->txd_cmd | txd_lower | 4);
2057 			ctxd->upper.data =
2058 			    htole32(txd_upper);
2059 			last = i;
2060 			if (++i == adapter->num_tx_desc)
2061 				i = 0;
2062 		} else {
2063 			ctxd->buffer_addr = htole64(seg_addr);
2064 			ctxd->lower.data = htole32(
2065 			adapter->txd_cmd | txd_lower | seg_len);
2066 			ctxd->upper.data =
2067 			    htole32(txd_upper);
2068 			last = i;
2069 			if (++i == adapter->num_tx_desc)
2070 				i = 0;
2071 		}
2072 		tx_buffer->m_head = NULL;
2073 		tx_buffer->next_eop = -1;
2074 	}
2075 
2076 	txr->next_avail_desc = i;
2077 	txr->tx_avail -= nsegs;
2078 	if (tso_desc) /* TSO used an extra for sentinel */
2079 		txr->tx_avail -= txd_used;
2080 
2081         tx_buffer->m_head = m_head;
2082 	/*
2083 	** Here we swap the map so the last descriptor,
2084 	** which gets the completion interrupt has the
2085 	** real map, and the first descriptor gets the
2086 	** unused map from this descriptor.
2087 	*/
2088 	tx_buffer_mapped->map = tx_buffer->map;
2089 	tx_buffer->map = map;
2090         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2091 
2092         /*
2093          * Last Descriptor of Packet
2094 	 * needs End Of Packet (EOP)
2095 	 * and Report Status (RS)
2096          */
2097         ctxd->lower.data |=
2098 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2099 	/*
2100 	 * Keep track in the first buffer which
2101 	 * descriptor will be written back
2102 	 */
2103 	tx_buffer = &txr->tx_buffers[first];
2104 	tx_buffer->next_eop = last;
2105 	/* Update the watchdog time early and often */
2106 	txr->watchdog_time = ticks;
2107 
2108 	/*
2109 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2110 	 * that this frame is available to transmit.
2111 	 */
2112 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2113 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2114 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2115 
2116 	return (0);
2117 }
2118 
2119 static void
2120 em_set_promisc(struct adapter *adapter)
2121 {
2122 	if_t ifp = adapter->ifp;
2123 	u32		reg_rctl;
2124 
2125 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2126 
2127 	if (if_getflags(ifp) & IFF_PROMISC) {
2128 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2129 		/* Turn this on if you want to see bad packets */
2130 		if (em_debug_sbp)
2131 			reg_rctl |= E1000_RCTL_SBP;
2132 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2133 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2134 		reg_rctl |= E1000_RCTL_MPE;
2135 		reg_rctl &= ~E1000_RCTL_UPE;
2136 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2137 	}
2138 }
2139 
2140 static void
2141 em_disable_promisc(struct adapter *adapter)
2142 {
2143 	if_t		ifp = adapter->ifp;
2144 	u32		reg_rctl;
2145 	int		mcnt = 0;
2146 
2147 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2148 	reg_rctl &=  (~E1000_RCTL_UPE);
2149 	if (if_getflags(ifp) & IFF_ALLMULTI)
2150 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2151 	else
2152 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2153 	/* Don't disable if in MAX groups */
2154 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2155 		reg_rctl &=  (~E1000_RCTL_MPE);
2156 	reg_rctl &=  (~E1000_RCTL_SBP);
2157 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2158 }
2159 
2160 
2161 /*********************************************************************
2162  *  Multicast Update
2163  *
2164  *  This routine is called whenever multicast address list is updated.
2165  *
2166  **********************************************************************/
2167 
2168 static void
2169 em_set_multi(struct adapter *adapter)
2170 {
2171 	if_t ifp = adapter->ifp;
2172 	u32 reg_rctl = 0;
2173 	u8  *mta; /* Multicast array memory */
2174 	int mcnt = 0;
2175 
2176 	IOCTL_DEBUGOUT("em_set_multi: begin");
2177 
2178 	mta = adapter->mta;
2179 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2180 
2181 	if (adapter->hw.mac.type == e1000_82542 &&
2182 	    adapter->hw.revision_id == E1000_REVISION_2) {
2183 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2184 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2185 			e1000_pci_clear_mwi(&adapter->hw);
2186 		reg_rctl |= E1000_RCTL_RST;
2187 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2188 		msec_delay(5);
2189 	}
2190 
2191 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2192 
2193 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2194 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2195 		reg_rctl |= E1000_RCTL_MPE;
2196 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2197 	} else
2198 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2199 
2200 	if (adapter->hw.mac.type == e1000_82542 &&
2201 	    adapter->hw.revision_id == E1000_REVISION_2) {
2202 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2203 		reg_rctl &= ~E1000_RCTL_RST;
2204 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2205 		msec_delay(5);
2206 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2207 			e1000_pci_set_mwi(&adapter->hw);
2208 	}
2209 }
2210 
2211 
2212 /*********************************************************************
2213  *  Timer routine
2214  *
2215  *  This routine checks for link status and updates statistics.
2216  *
2217  **********************************************************************/
2218 
2219 static void
2220 em_local_timer(void *arg)
2221 {
2222 	struct adapter	*adapter = arg;
2223 	if_t ifp = adapter->ifp;
2224 	struct tx_ring	*txr = adapter->tx_rings;
2225 	struct rx_ring	*rxr = adapter->rx_rings;
2226 	u32		trigger;
2227 
2228 	EM_CORE_LOCK_ASSERT(adapter);
2229 
2230 	em_update_link_status(adapter);
2231 	em_update_stats_counters(adapter);
2232 
2233 	/* Reset LAA into RAR[0] on 82571 */
2234 	if ((adapter->hw.mac.type == e1000_82571) &&
2235 	    e1000_get_laa_state_82571(&adapter->hw))
2236 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2237 
2238 	/* Mask to use in the irq trigger */
2239 	if (adapter->msix_mem)
2240 		trigger = rxr->ims;
2241 	else
2242 		trigger = E1000_ICS_RXDMT0;
2243 
2244 	/*
2245 	** Check on the state of the TX queue(s), this
2246 	** can be done without the lock because its RO
2247 	** and the HUNG state will be static if set.
2248 	*/
2249 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2250 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2251 		    (adapter->pause_frames == 0))
2252 			goto hung;
2253 		/* Schedule a TX tasklet if needed */
2254 		if (txr->tx_avail <= EM_MAX_SCATTER)
2255 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2256 	}
2257 
2258 	adapter->pause_frames = 0;
2259 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2260 #ifndef DEVICE_POLLING
2261 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2262 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2263 #endif
2264 	return;
2265 hung:
2266 	/* Looks like we're hung */
2267 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2268 	device_printf(adapter->dev,
2269 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2270 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2271 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2272 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2273 	    "Next TX to Clean = %d\n",
2274 	    txr->me, txr->tx_avail, txr->next_to_clean);
2275 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2276 	adapter->watchdog_events++;
2277 	adapter->pause_frames = 0;
2278 	em_init_locked(adapter);
2279 }
2280 
2281 
2282 static void
2283 em_update_link_status(struct adapter *adapter)
2284 {
2285 	struct e1000_hw *hw = &adapter->hw;
2286 	if_t ifp = adapter->ifp;
2287 	device_t dev = adapter->dev;
2288 	struct tx_ring *txr = adapter->tx_rings;
2289 	u32 link_check = 0;
2290 
2291 	/* Get the cached link value or read phy for real */
2292 	switch (hw->phy.media_type) {
2293 	case e1000_media_type_copper:
2294 		if (hw->mac.get_link_status) {
2295 			/* Do the work to read phy */
2296 			e1000_check_for_link(hw);
2297 			link_check = !hw->mac.get_link_status;
2298 			if (link_check) /* ESB2 fix */
2299 				e1000_cfg_on_link_up(hw);
2300 		} else
2301 			link_check = TRUE;
2302 		break;
2303 	case e1000_media_type_fiber:
2304 		e1000_check_for_link(hw);
2305 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2306                                  E1000_STATUS_LU);
2307 		break;
2308 	case e1000_media_type_internal_serdes:
2309 		e1000_check_for_link(hw);
2310 		link_check = adapter->hw.mac.serdes_has_link;
2311 		break;
2312 	default:
2313 	case e1000_media_type_unknown:
2314 		break;
2315 	}
2316 
2317 	/* Now check for a transition */
2318 	if (link_check && (adapter->link_active == 0)) {
2319 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2320 		    &adapter->link_duplex);
2321 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2322 		if ((adapter->link_speed != SPEED_1000) &&
2323 		    ((hw->mac.type == e1000_82571) ||
2324 		    (hw->mac.type == e1000_82572))) {
2325 			int tarc0;
2326 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2327 			tarc0 &= ~SPEED_MODE_BIT;
2328 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2329 		}
2330 		if (bootverbose)
2331 			device_printf(dev, "Link is up %d Mbps %s\n",
2332 			    adapter->link_speed,
2333 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2334 			    "Full Duplex" : "Half Duplex"));
2335 		adapter->link_active = 1;
2336 		adapter->smartspeed = 0;
2337 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2338 		if_link_state_change(ifp, LINK_STATE_UP);
2339 	} else if (!link_check && (adapter->link_active == 1)) {
2340 		if_setbaudrate(ifp, 0);
2341 		adapter->link_speed = 0;
2342 		adapter->link_duplex = 0;
2343 		if (bootverbose)
2344 			device_printf(dev, "Link is Down\n");
2345 		adapter->link_active = 0;
2346 		/* Link down, disable watchdog */
2347 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2348 			txr->queue_status = EM_QUEUE_IDLE;
2349 		if_link_state_change(ifp, LINK_STATE_DOWN);
2350 	}
2351 }
2352 
2353 /*********************************************************************
2354  *
2355  *  This routine disables all traffic on the adapter by issuing a
2356  *  global reset on the MAC and deallocates TX/RX buffers.
2357  *
2358  *  This routine should always be called with BOTH the CORE
2359  *  and TX locks.
2360  **********************************************************************/
2361 
2362 static void
2363 em_stop(void *arg)
2364 {
2365 	struct adapter	*adapter = arg;
2366 	if_t ifp = adapter->ifp;
2367 	struct tx_ring	*txr = adapter->tx_rings;
2368 
2369 	EM_CORE_LOCK_ASSERT(adapter);
2370 
2371 	INIT_DEBUGOUT("em_stop: begin");
2372 
2373 	em_disable_intr(adapter);
2374 	callout_stop(&adapter->timer);
2375 
2376 	/* Tell the stack that the interface is no longer active */
2377 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2378 
2379         /* Unarm watchdog timer. */
2380 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2381 		EM_TX_LOCK(txr);
2382 		txr->queue_status = EM_QUEUE_IDLE;
2383 		EM_TX_UNLOCK(txr);
2384 	}
2385 
2386 	e1000_reset_hw(&adapter->hw);
2387 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2388 
2389 	e1000_led_off(&adapter->hw);
2390 	e1000_cleanup_led(&adapter->hw);
2391 }
2392 
2393 
2394 /*********************************************************************
2395  *
2396  *  Determine hardware revision.
2397  *
2398  **********************************************************************/
2399 static void
2400 em_identify_hardware(struct adapter *adapter)
2401 {
2402 	device_t dev = adapter->dev;
2403 
2404 	/* Make sure our PCI config space has the necessary stuff set */
2405 	pci_enable_busmaster(dev);
2406 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2407 
2408 	/* Save off the information about this board */
2409 	adapter->hw.vendor_id = pci_get_vendor(dev);
2410 	adapter->hw.device_id = pci_get_device(dev);
2411 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2412 	adapter->hw.subsystem_vendor_id =
2413 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2414 	adapter->hw.subsystem_device_id =
2415 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2416 
2417 	/* Do Shared Code Init and Setup */
2418 	if (e1000_set_mac_type(&adapter->hw)) {
2419 		device_printf(dev, "Setup init failure\n");
2420 		return;
2421 	}
2422 }
2423 
2424 static int
2425 em_allocate_pci_resources(struct adapter *adapter)
2426 {
2427 	device_t	dev = adapter->dev;
2428 	int		rid;
2429 
2430 	rid = PCIR_BAR(0);
2431 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2432 	    &rid, RF_ACTIVE);
2433 	if (adapter->memory == NULL) {
2434 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2435 		return (ENXIO);
2436 	}
2437 	adapter->osdep.mem_bus_space_tag =
2438 	    rman_get_bustag(adapter->memory);
2439 	adapter->osdep.mem_bus_space_handle =
2440 	    rman_get_bushandle(adapter->memory);
2441 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2442 
2443 	/* Default to a single queue */
2444 	adapter->num_queues = 1;
2445 
2446 	/*
2447 	 * Setup MSI/X or MSI if PCI Express
2448 	 */
2449 	adapter->msix = em_setup_msix(adapter);
2450 
2451 	adapter->hw.back = &adapter->osdep;
2452 
2453 	return (0);
2454 }
2455 
2456 /*********************************************************************
2457  *
2458  *  Setup the Legacy or MSI Interrupt handler
2459  *
2460  **********************************************************************/
2461 int
2462 em_allocate_legacy(struct adapter *adapter)
2463 {
2464 	device_t dev = adapter->dev;
2465 	struct tx_ring	*txr = adapter->tx_rings;
2466 	int error, rid = 0;
2467 
2468 	/* Manually turn off all interrupts */
2469 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2470 
2471 	if (adapter->msix == 1) /* using MSI */
2472 		rid = 1;
2473 	/* We allocate a single interrupt resource */
2474 	adapter->res = bus_alloc_resource_any(dev,
2475 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2476 	if (adapter->res == NULL) {
2477 		device_printf(dev, "Unable to allocate bus resource: "
2478 		    "interrupt\n");
2479 		return (ENXIO);
2480 	}
2481 
2482 	/*
2483 	 * Allocate a fast interrupt and the associated
2484 	 * deferred processing contexts.
2485 	 */
2486 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2487 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2488 	    taskqueue_thread_enqueue, &adapter->tq);
2489 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2490 	    device_get_nameunit(adapter->dev));
2491 	/* Use a TX only tasklet for local timer */
2492 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2493 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2494 	    taskqueue_thread_enqueue, &txr->tq);
2495 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2496 	    device_get_nameunit(adapter->dev));
2497 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2498 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2499 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2500 		device_printf(dev, "Failed to register fast interrupt "
2501 			    "handler: %d\n", error);
2502 		taskqueue_free(adapter->tq);
2503 		adapter->tq = NULL;
2504 		return (error);
2505 	}
2506 
2507 	return (0);
2508 }
2509 
2510 /*********************************************************************
2511  *
2512  *  Setup the MSIX Interrupt handlers
2513  *   This is not really Multiqueue, rather
2514  *   its just seperate interrupt vectors
2515  *   for TX, RX, and Link.
2516  *
2517  **********************************************************************/
2518 int
2519 em_allocate_msix(struct adapter *adapter)
2520 {
2521 	device_t	dev = adapter->dev;
2522 	struct		tx_ring *txr = adapter->tx_rings;
2523 	struct		rx_ring *rxr = adapter->rx_rings;
2524 	int		error, rid, vector = 0;
2525 
2526 
2527 	/* Make sure all interrupts are disabled */
2528 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2529 
2530 	/* First set up ring resources */
2531 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2532 
2533 		/* RX ring */
2534 		rid = vector + 1;
2535 
2536 		rxr->res = bus_alloc_resource_any(dev,
2537 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2538 		if (rxr->res == NULL) {
2539 			device_printf(dev,
2540 			    "Unable to allocate bus resource: "
2541 			    "RX MSIX Interrupt %d\n", i);
2542 			return (ENXIO);
2543 		}
2544 		if ((error = bus_setup_intr(dev, rxr->res,
2545 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2546 		    rxr, &rxr->tag)) != 0) {
2547 			device_printf(dev, "Failed to register RX handler");
2548 			return (error);
2549 		}
2550 #if __FreeBSD_version >= 800504
2551 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2552 #endif
2553 		rxr->msix = vector++; /* NOTE increment vector for TX */
2554 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2555 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2556 		    taskqueue_thread_enqueue, &rxr->tq);
2557 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2558 		    device_get_nameunit(adapter->dev));
2559 		/*
2560 		** Set the bit to enable interrupt
2561 		** in E1000_IMS -- bits 20 and 21
2562 		** are for RX0 and RX1, note this has
2563 		** NOTHING to do with the MSIX vector
2564 		*/
2565 		rxr->ims = 1 << (20 + i);
2566 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2567 
2568 		/* TX ring */
2569 		rid = vector + 1;
2570 		txr->res = bus_alloc_resource_any(dev,
2571 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2572 		if (txr->res == NULL) {
2573 			device_printf(dev,
2574 			    "Unable to allocate bus resource: "
2575 			    "TX MSIX Interrupt %d\n", i);
2576 			return (ENXIO);
2577 		}
2578 		if ((error = bus_setup_intr(dev, txr->res,
2579 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2580 		    txr, &txr->tag)) != 0) {
2581 			device_printf(dev, "Failed to register TX handler");
2582 			return (error);
2583 		}
2584 #if __FreeBSD_version >= 800504
2585 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2586 #endif
2587 		txr->msix = vector++; /* Increment vector for next pass */
2588 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2589 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2590 		    taskqueue_thread_enqueue, &txr->tq);
2591 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2592 		    device_get_nameunit(adapter->dev));
2593 		/*
2594 		** Set the bit to enable interrupt
2595 		** in E1000_IMS -- bits 22 and 23
2596 		** are for TX0 and TX1, note this has
2597 		** NOTHING to do with the MSIX vector
2598 		*/
2599 		txr->ims = 1 << (22 + i);
2600 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2601 	}
2602 
2603 	/* Link interrupt */
2604 	++rid;
2605 	adapter->res = bus_alloc_resource_any(dev,
2606 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2607 	if (!adapter->res) {
2608 		device_printf(dev,"Unable to allocate "
2609 		    "bus resource: Link interrupt [%d]\n", rid);
2610 		return (ENXIO);
2611         }
2612 	/* Set the link handler function */
2613 	error = bus_setup_intr(dev, adapter->res,
2614 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2615 	    em_msix_link, adapter, &adapter->tag);
2616 	if (error) {
2617 		adapter->res = NULL;
2618 		device_printf(dev, "Failed to register LINK handler");
2619 		return (error);
2620 	}
2621 #if __FreeBSD_version >= 800504
2622 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2623 #endif
2624 	adapter->linkvec = vector;
2625 	adapter->ivars |=  (8 | vector) << 16;
2626 	adapter->ivars |= 0x80000000;
2627 
2628 	return (0);
2629 }
2630 
2631 
2632 static void
2633 em_free_pci_resources(struct adapter *adapter)
2634 {
2635 	device_t	dev = adapter->dev;
2636 	struct tx_ring	*txr;
2637 	struct rx_ring	*rxr;
2638 	int		rid;
2639 
2640 
2641 	/*
2642 	** Release all the queue interrupt resources:
2643 	*/
2644 	for (int i = 0; i < adapter->num_queues; i++) {
2645 		txr = &adapter->tx_rings[i];
2646 		rxr = &adapter->rx_rings[i];
2647 		/* an early abort? */
2648 		if ((txr == NULL) || (rxr == NULL))
2649 			break;
2650 		rid = txr->msix +1;
2651 		if (txr->tag != NULL) {
2652 			bus_teardown_intr(dev, txr->res, txr->tag);
2653 			txr->tag = NULL;
2654 		}
2655 		if (txr->res != NULL)
2656 			bus_release_resource(dev, SYS_RES_IRQ,
2657 			    rid, txr->res);
2658 		rid = rxr->msix +1;
2659 		if (rxr->tag != NULL) {
2660 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2661 			rxr->tag = NULL;
2662 		}
2663 		if (rxr->res != NULL)
2664 			bus_release_resource(dev, SYS_RES_IRQ,
2665 			    rid, rxr->res);
2666 	}
2667 
2668         if (adapter->linkvec) /* we are doing MSIX */
2669                 rid = adapter->linkvec + 1;
2670         else
2671                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2672 
2673 	if (adapter->tag != NULL) {
2674 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2675 		adapter->tag = NULL;
2676 	}
2677 
2678 	if (adapter->res != NULL)
2679 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2680 
2681 
2682 	if (adapter->msix)
2683 		pci_release_msi(dev);
2684 
2685 	if (adapter->msix_mem != NULL)
2686 		bus_release_resource(dev, SYS_RES_MEMORY,
2687 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2688 
2689 	if (adapter->memory != NULL)
2690 		bus_release_resource(dev, SYS_RES_MEMORY,
2691 		    PCIR_BAR(0), adapter->memory);
2692 
2693 	if (adapter->flash != NULL)
2694 		bus_release_resource(dev, SYS_RES_MEMORY,
2695 		    EM_FLASH, adapter->flash);
2696 }
2697 
2698 /*
2699  * Setup MSI or MSI/X
2700  */
2701 static int
2702 em_setup_msix(struct adapter *adapter)
2703 {
2704 	device_t dev = adapter->dev;
2705 	int val;
2706 
2707 	/*
2708 	** Setup MSI/X for Hartwell: tests have shown
2709 	** use of two queues to be unstable, and to
2710 	** provide no great gain anyway, so we simply
2711 	** seperate the interrupts and use a single queue.
2712 	*/
2713 	if ((adapter->hw.mac.type == e1000_82574) &&
2714 	    (em_enable_msix == TRUE)) {
2715 		/* Map the MSIX BAR */
2716 		int rid = PCIR_BAR(EM_MSIX_BAR);
2717 		adapter->msix_mem = bus_alloc_resource_any(dev,
2718 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2719        		if (adapter->msix_mem == NULL) {
2720 			/* May not be enabled */
2721                		device_printf(adapter->dev,
2722 			    "Unable to map MSIX table \n");
2723 			goto msi;
2724        		}
2725 		val = pci_msix_count(dev);
2726 		/* We only need/want 3 vectors */
2727 		if (val >= 3)
2728 			val = 3;
2729 		else {
2730                		device_printf(adapter->dev,
2731 			    "MSIX: insufficient vectors, using MSI\n");
2732 			goto msi;
2733 		}
2734 
2735 		if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2736 			device_printf(adapter->dev,
2737 			    "Using MSIX interrupts "
2738 			    "with %d vectors\n", val);
2739 			return (val);
2740 		}
2741 
2742 		/*
2743 		** If MSIX alloc failed or provided us with
2744 		** less than needed, free and fall through to MSI
2745 		*/
2746 		pci_release_msi(dev);
2747 	}
2748 msi:
2749 	if (adapter->msix_mem != NULL) {
2750 		bus_release_resource(dev, SYS_RES_MEMORY,
2751 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2752 		adapter->msix_mem = NULL;
2753 	}
2754        	val = 1;
2755        	if (pci_alloc_msi(dev, &val) == 0) {
2756                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2757 		return (val);
2758 	}
2759 	/* Should only happen due to manual configuration */
2760 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2761 	return (0);
2762 }
2763 
2764 
2765 /*********************************************************************
2766  *
2767  *  Initialize the hardware to a configuration
2768  *  as specified by the adapter structure.
2769  *
2770  **********************************************************************/
2771 static void
2772 em_reset(struct adapter *adapter)
2773 {
2774 	device_t	dev = adapter->dev;
2775 	if_t ifp = adapter->ifp;
2776 	struct e1000_hw	*hw = &adapter->hw;
2777 	u16		rx_buffer_size;
2778 	u32		pba;
2779 
2780 	INIT_DEBUGOUT("em_reset: begin");
2781 
2782 	/* Set up smart power down as default off on newer adapters. */
2783 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2784 	    hw->mac.type == e1000_82572)) {
2785 		u16 phy_tmp = 0;
2786 
2787 		/* Speed up time to link by disabling smart power down. */
2788 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2789 		phy_tmp &= ~IGP02E1000_PM_SPD;
2790 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2791 	}
2792 
2793 	/*
2794 	 * Packet Buffer Allocation (PBA)
2795 	 * Writing PBA sets the receive portion of the buffer
2796 	 * the remainder is used for the transmit buffer.
2797 	 */
2798 	switch (hw->mac.type) {
2799 	/* Total Packet Buffer on these is 48K */
2800 	case e1000_82571:
2801 	case e1000_82572:
2802 	case e1000_80003es2lan:
2803 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2804 		break;
2805 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2806 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2807 		break;
2808 	case e1000_82574:
2809 	case e1000_82583:
2810 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2811 		break;
2812 	case e1000_ich8lan:
2813 		pba = E1000_PBA_8K;
2814 		break;
2815 	case e1000_ich9lan:
2816 	case e1000_ich10lan:
2817 		/* Boost Receive side for jumbo frames */
2818 		if (adapter->hw.mac.max_frame_size > 4096)
2819 			pba = E1000_PBA_14K;
2820 		else
2821 			pba = E1000_PBA_10K;
2822 		break;
2823 	case e1000_pchlan:
2824 	case e1000_pch2lan:
2825 	case e1000_pch_lpt:
2826 		pba = E1000_PBA_26K;
2827 		break;
2828 	default:
2829 		if (adapter->hw.mac.max_frame_size > 8192)
2830 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2831 		else
2832 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2833 	}
2834 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2835 
2836 	/*
2837 	 * These parameters control the automatic generation (Tx) and
2838 	 * response (Rx) to Ethernet PAUSE frames.
2839 	 * - High water mark should allow for at least two frames to be
2840 	 *   received after sending an XOFF.
2841 	 * - Low water mark works best when it is very near the high water mark.
2842 	 *   This allows the receiver to restart by sending XON when it has
2843 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2844 	 *   restart after one full frame is pulled from the buffer. There
2845 	 *   could be several smaller frames in the buffer and if so they will
2846 	 *   not trigger the XON until their total number reduces the buffer
2847 	 *   by 1500.
2848 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2849 	 */
2850 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2851 	hw->fc.high_water = rx_buffer_size -
2852 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2853 	hw->fc.low_water = hw->fc.high_water - 1500;
2854 
2855 	if (adapter->fc) /* locally set flow control value? */
2856 		hw->fc.requested_mode = adapter->fc;
2857 	else
2858 		hw->fc.requested_mode = e1000_fc_full;
2859 
2860 	if (hw->mac.type == e1000_80003es2lan)
2861 		hw->fc.pause_time = 0xFFFF;
2862 	else
2863 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2864 
2865 	hw->fc.send_xon = TRUE;
2866 
2867 	/* Device specific overrides/settings */
2868 	switch (hw->mac.type) {
2869 	case e1000_pchlan:
2870 		/* Workaround: no TX flow ctrl for PCH */
2871                 hw->fc.requested_mode = e1000_fc_rx_pause;
2872 		hw->fc.pause_time = 0xFFFF; /* override */
2873 		if (if_getmtu(ifp) > ETHERMTU) {
2874 			hw->fc.high_water = 0x3500;
2875 			hw->fc.low_water = 0x1500;
2876 		} else {
2877 			hw->fc.high_water = 0x5000;
2878 			hw->fc.low_water = 0x3000;
2879 		}
2880 		hw->fc.refresh_time = 0x1000;
2881 		break;
2882 	case e1000_pch2lan:
2883 	case e1000_pch_lpt:
2884 		hw->fc.high_water = 0x5C20;
2885 		hw->fc.low_water = 0x5048;
2886 		hw->fc.pause_time = 0x0650;
2887 		hw->fc.refresh_time = 0x0400;
2888 		/* Jumbos need adjusted PBA */
2889 		if (if_getmtu(ifp) > ETHERMTU)
2890 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2891 		else
2892 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2893 		break;
2894         case e1000_ich9lan:
2895         case e1000_ich10lan:
2896 		if (if_getmtu(ifp) > ETHERMTU) {
2897 			hw->fc.high_water = 0x2800;
2898 			hw->fc.low_water = hw->fc.high_water - 8;
2899 			break;
2900 		}
2901 		/* else fall thru */
2902 	default:
2903 		if (hw->mac.type == e1000_80003es2lan)
2904 			hw->fc.pause_time = 0xFFFF;
2905 		break;
2906 	}
2907 
2908 	/* Issue a global reset */
2909 	e1000_reset_hw(hw);
2910 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2911 	em_disable_aspm(adapter);
2912 	/* and a re-init */
2913 	if (e1000_init_hw(hw) < 0) {
2914 		device_printf(dev, "Hardware Initialization Failed\n");
2915 		return;
2916 	}
2917 
2918 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2919 	e1000_get_phy_info(hw);
2920 	e1000_check_for_link(hw);
2921 	return;
2922 }
2923 
2924 /*********************************************************************
2925  *
2926  *  Setup networking device structure and register an interface.
2927  *
2928  **********************************************************************/
2929 static int
2930 em_setup_interface(device_t dev, struct adapter *adapter)
2931 {
2932 	if_t ifp;
2933 
2934 	INIT_DEBUGOUT("em_setup_interface: begin");
2935 
2936 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2937 	if (ifp == 0) {
2938 		device_printf(dev, "can not allocate ifnet structure\n");
2939 		return (-1);
2940 	}
2941 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2942 	if_setdev(ifp, dev);
2943 	if_setinitfn(ifp, em_init);
2944 	if_setsoftc(ifp, adapter);
2945 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2946 	if_setioctlfn(ifp, em_ioctl);
2947 	if_setgetcounterfn(ifp, em_get_counter);
2948 #ifdef EM_MULTIQUEUE
2949 	/* Multiqueue stack interface */
2950 	if_settransmitfn(ifp, em_mq_start);
2951 	if_setqflushfn(ifp, em_qflush);
2952 #else
2953 	if_setstartfn(ifp, em_start);
2954 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
2955 	if_setsendqready(ifp);
2956 #endif
2957 
2958 	ether_ifattach(ifp, adapter->hw.mac.addr);
2959 
2960 	if_setcapabilities(ifp, 0);
2961 	if_setcapenable(ifp, 0);
2962 
2963 
2964 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
2965 	    IFCAP_TSO4, 0);
2966 	/*
2967 	 * Tell the upper layer(s) we
2968 	 * support full VLAN capability
2969 	 */
2970 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2971 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
2972 	    IFCAP_VLAN_MTU, 0);
2973 	if_setcapenable(ifp, if_getcapabilities(ifp));
2974 
2975 	/*
2976 	** Don't turn this on by default, if vlans are
2977 	** created on another pseudo device (eg. lagg)
2978 	** then vlan events are not passed thru, breaking
2979 	** operation, but with HW FILTER off it works. If
2980 	** using vlans directly on the em driver you can
2981 	** enable this and get full hardware tag filtering.
2982 	*/
2983 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
2984 
2985 #ifdef DEVICE_POLLING
2986 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
2987 #endif
2988 
2989 	/* Enable only WOL MAGIC by default */
2990 	if (adapter->wol) {
2991 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
2992 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
2993 	}
2994 
2995 	/*
2996 	 * Specify the media types supported by this adapter and register
2997 	 * callbacks to update media and link information
2998 	 */
2999 	ifmedia_init(&adapter->media, IFM_IMASK,
3000 	    em_media_change, em_media_status);
3001 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3002 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3003 		u_char fiber_type = IFM_1000_SX;	/* default type */
3004 
3005 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3006 			    0, NULL);
3007 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3008 	} else {
3009 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3010 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3011 			    0, NULL);
3012 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3013 			    0, NULL);
3014 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3015 			    0, NULL);
3016 		if (adapter->hw.phy.type != e1000_phy_ife) {
3017 			ifmedia_add(&adapter->media,
3018 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3019 			ifmedia_add(&adapter->media,
3020 				IFM_ETHER | IFM_1000_T, 0, NULL);
3021 		}
3022 	}
3023 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3024 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3025 	return (0);
3026 }
3027 
3028 
3029 /*
3030  * Manage DMA'able memory.
3031  */
3032 static void
3033 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3034 {
3035 	if (error)
3036 		return;
3037 	*(bus_addr_t *) arg = segs[0].ds_addr;
3038 }
3039 
3040 static int
3041 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3042         struct em_dma_alloc *dma, int mapflags)
3043 {
3044 	int error;
3045 
3046 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3047 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3048 				BUS_SPACE_MAXADDR,	/* lowaddr */
3049 				BUS_SPACE_MAXADDR,	/* highaddr */
3050 				NULL, NULL,		/* filter, filterarg */
3051 				size,			/* maxsize */
3052 				1,			/* nsegments */
3053 				size,			/* maxsegsize */
3054 				0,			/* flags */
3055 				NULL,			/* lockfunc */
3056 				NULL,			/* lockarg */
3057 				&dma->dma_tag);
3058 	if (error) {
3059 		device_printf(adapter->dev,
3060 		    "%s: bus_dma_tag_create failed: %d\n",
3061 		    __func__, error);
3062 		goto fail_0;
3063 	}
3064 
3065 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3066 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3067 	if (error) {
3068 		device_printf(adapter->dev,
3069 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3070 		    __func__, (uintmax_t)size, error);
3071 		goto fail_2;
3072 	}
3073 
3074 	dma->dma_paddr = 0;
3075 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3076 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3077 	if (error || dma->dma_paddr == 0) {
3078 		device_printf(adapter->dev,
3079 		    "%s: bus_dmamap_load failed: %d\n",
3080 		    __func__, error);
3081 		goto fail_3;
3082 	}
3083 
3084 	return (0);
3085 
3086 fail_3:
3087 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3088 fail_2:
3089 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3090 	bus_dma_tag_destroy(dma->dma_tag);
3091 fail_0:
3092 	dma->dma_tag = NULL;
3093 
3094 	return (error);
3095 }
3096 
3097 static void
3098 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3099 {
3100 	if (dma->dma_tag == NULL)
3101 		return;
3102 	if (dma->dma_paddr != 0) {
3103 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3104 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3105 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3106 		dma->dma_paddr = 0;
3107 	}
3108 	if (dma->dma_vaddr != NULL) {
3109 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3110 		dma->dma_vaddr = NULL;
3111 	}
3112 	bus_dma_tag_destroy(dma->dma_tag);
3113 	dma->dma_tag = NULL;
3114 }
3115 
3116 
3117 /*********************************************************************
3118  *
3119  *  Allocate memory for the transmit and receive rings, and then
3120  *  the descriptors associated with each, called only once at attach.
3121  *
3122  **********************************************************************/
3123 static int
3124 em_allocate_queues(struct adapter *adapter)
3125 {
3126 	device_t		dev = adapter->dev;
3127 	struct tx_ring		*txr = NULL;
3128 	struct rx_ring		*rxr = NULL;
3129 	int rsize, tsize, error = E1000_SUCCESS;
3130 	int txconf = 0, rxconf = 0;
3131 
3132 
3133 	/* Allocate the TX ring struct memory */
3134 	if (!(adapter->tx_rings =
3135 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3136 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3137 		device_printf(dev, "Unable to allocate TX ring memory\n");
3138 		error = ENOMEM;
3139 		goto fail;
3140 	}
3141 
3142 	/* Now allocate the RX */
3143 	if (!(adapter->rx_rings =
3144 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3145 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3146 		device_printf(dev, "Unable to allocate RX ring memory\n");
3147 		error = ENOMEM;
3148 		goto rx_fail;
3149 	}
3150 
3151 	tsize = roundup2(adapter->num_tx_desc *
3152 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3153 	/*
3154 	 * Now set up the TX queues, txconf is needed to handle the
3155 	 * possibility that things fail midcourse and we need to
3156 	 * undo memory gracefully
3157 	 */
3158 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3159 		/* Set up some basics */
3160 		txr = &adapter->tx_rings[i];
3161 		txr->adapter = adapter;
3162 		txr->me = i;
3163 
3164 		/* Initialize the TX lock */
3165 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3166 		    device_get_nameunit(dev), txr->me);
3167 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3168 
3169 		if (em_dma_malloc(adapter, tsize,
3170 			&txr->txdma, BUS_DMA_NOWAIT)) {
3171 			device_printf(dev,
3172 			    "Unable to allocate TX Descriptor memory\n");
3173 			error = ENOMEM;
3174 			goto err_tx_desc;
3175 		}
3176 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3177 		bzero((void *)txr->tx_base, tsize);
3178 
3179         	if (em_allocate_transmit_buffers(txr)) {
3180 			device_printf(dev,
3181 			    "Critical Failure setting up transmit buffers\n");
3182 			error = ENOMEM;
3183 			goto err_tx_desc;
3184         	}
3185 #if __FreeBSD_version >= 800000
3186 		/* Allocate a buf ring */
3187 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3188 		    M_WAITOK, &txr->tx_mtx);
3189 #endif
3190 	}
3191 
3192 	/*
3193 	 * Next the RX queues...
3194 	 */
3195 	rsize = roundup2(adapter->num_rx_desc *
3196 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3197 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3198 		rxr = &adapter->rx_rings[i];
3199 		rxr->adapter = adapter;
3200 		rxr->me = i;
3201 
3202 		/* Initialize the RX lock */
3203 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3204 		    device_get_nameunit(dev), txr->me);
3205 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3206 
3207 		if (em_dma_malloc(adapter, rsize,
3208 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3209 			device_printf(dev,
3210 			    "Unable to allocate RxDescriptor memory\n");
3211 			error = ENOMEM;
3212 			goto err_rx_desc;
3213 		}
3214 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3215 		bzero((void *)rxr->rx_base, rsize);
3216 
3217         	/* Allocate receive buffers for the ring*/
3218 		if (em_allocate_receive_buffers(rxr)) {
3219 			device_printf(dev,
3220 			    "Critical Failure setting up receive buffers\n");
3221 			error = ENOMEM;
3222 			goto err_rx_desc;
3223 		}
3224 	}
3225 
3226 	return (0);
3227 
3228 err_rx_desc:
3229 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3230 		em_dma_free(adapter, &rxr->rxdma);
3231 err_tx_desc:
3232 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3233 		em_dma_free(adapter, &txr->txdma);
3234 	free(adapter->rx_rings, M_DEVBUF);
3235 rx_fail:
3236 #if __FreeBSD_version >= 800000
3237 	buf_ring_free(txr->br, M_DEVBUF);
3238 #endif
3239 	free(adapter->tx_rings, M_DEVBUF);
3240 fail:
3241 	return (error);
3242 }
3243 
3244 
3245 /*********************************************************************
3246  *
3247  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3248  *  the information needed to transmit a packet on the wire. This is
3249  *  called only once at attach, setup is done every reset.
3250  *
3251  **********************************************************************/
3252 static int
3253 em_allocate_transmit_buffers(struct tx_ring *txr)
3254 {
3255 	struct adapter *adapter = txr->adapter;
3256 	device_t dev = adapter->dev;
3257 	struct em_buffer *txbuf;
3258 	int error, i;
3259 
3260 	/*
3261 	 * Setup DMA descriptor areas.
3262 	 */
3263 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3264 			       1, 0,			/* alignment, bounds */
3265 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3266 			       BUS_SPACE_MAXADDR,	/* highaddr */
3267 			       NULL, NULL,		/* filter, filterarg */
3268 			       EM_TSO_SIZE,		/* maxsize */
3269 			       EM_MAX_SCATTER,		/* nsegments */
3270 			       PAGE_SIZE,		/* maxsegsize */
3271 			       0,			/* flags */
3272 			       NULL,			/* lockfunc */
3273 			       NULL,			/* lockfuncarg */
3274 			       &txr->txtag))) {
3275 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3276 		goto fail;
3277 	}
3278 
3279 	if (!(txr->tx_buffers =
3280 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3281 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3282 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3283 		error = ENOMEM;
3284 		goto fail;
3285 	}
3286 
3287         /* Create the descriptor buffer dma maps */
3288 	txbuf = txr->tx_buffers;
3289 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3290 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3291 		if (error != 0) {
3292 			device_printf(dev, "Unable to create TX DMA map\n");
3293 			goto fail;
3294 		}
3295 	}
3296 
3297 	return 0;
3298 fail:
3299 	/* We free all, it handles case where we are in the middle */
3300 	em_free_transmit_structures(adapter);
3301 	return (error);
3302 }
3303 
3304 /*********************************************************************
3305  *
3306  *  Initialize a transmit ring.
3307  *
3308  **********************************************************************/
3309 static void
3310 em_setup_transmit_ring(struct tx_ring *txr)
3311 {
3312 	struct adapter *adapter = txr->adapter;
3313 	struct em_buffer *txbuf;
3314 	int i;
3315 #ifdef DEV_NETMAP
3316 	struct netmap_slot *slot;
3317 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3318 #endif /* DEV_NETMAP */
3319 
3320 	/* Clear the old descriptor contents */
3321 	EM_TX_LOCK(txr);
3322 #ifdef DEV_NETMAP
3323 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3324 #endif /* DEV_NETMAP */
3325 
3326 	bzero((void *)txr->tx_base,
3327 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3328 	/* Reset indices */
3329 	txr->next_avail_desc = 0;
3330 	txr->next_to_clean = 0;
3331 
3332 	/* Free any existing tx buffers. */
3333         txbuf = txr->tx_buffers;
3334 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3335 		if (txbuf->m_head != NULL) {
3336 			bus_dmamap_sync(txr->txtag, txbuf->map,
3337 			    BUS_DMASYNC_POSTWRITE);
3338 			bus_dmamap_unload(txr->txtag, txbuf->map);
3339 			m_freem(txbuf->m_head);
3340 			txbuf->m_head = NULL;
3341 		}
3342 #ifdef DEV_NETMAP
3343 		if (slot) {
3344 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3345 			uint64_t paddr;
3346 			void *addr;
3347 
3348 			addr = PNMB(na, slot + si, &paddr);
3349 			txr->tx_base[i].buffer_addr = htole64(paddr);
3350 			/* reload the map for netmap mode */
3351 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3352 		}
3353 #endif /* DEV_NETMAP */
3354 
3355 		/* clear the watch index */
3356 		txbuf->next_eop = -1;
3357         }
3358 
3359 	/* Set number of descriptors available */
3360 	txr->tx_avail = adapter->num_tx_desc;
3361 	txr->queue_status = EM_QUEUE_IDLE;
3362 
3363 	/* Clear checksum offload context. */
3364 	txr->last_hw_offload = 0;
3365 	txr->last_hw_ipcss = 0;
3366 	txr->last_hw_ipcso = 0;
3367 	txr->last_hw_tucss = 0;
3368 	txr->last_hw_tucso = 0;
3369 
3370 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3371 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3372 	EM_TX_UNLOCK(txr);
3373 }
3374 
3375 /*********************************************************************
3376  *
3377  *  Initialize all transmit rings.
3378  *
3379  **********************************************************************/
3380 static void
3381 em_setup_transmit_structures(struct adapter *adapter)
3382 {
3383 	struct tx_ring *txr = adapter->tx_rings;
3384 
3385 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3386 		em_setup_transmit_ring(txr);
3387 
3388 	return;
3389 }
3390 
3391 /*********************************************************************
3392  *
3393  *  Enable transmit unit.
3394  *
3395  **********************************************************************/
3396 static void
3397 em_initialize_transmit_unit(struct adapter *adapter)
3398 {
3399 	struct tx_ring	*txr = adapter->tx_rings;
3400 	struct e1000_hw	*hw = &adapter->hw;
3401 	u32	tctl, tarc, tipg = 0;
3402 
3403 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3404 
3405 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3406 		u64 bus_addr = txr->txdma.dma_paddr;
3407 		/* Base and Len of TX Ring */
3408 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3409 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3410 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3411 	    	    (u32)(bus_addr >> 32));
3412 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3413 	    	    (u32)bus_addr);
3414 		/* Init the HEAD/TAIL indices */
3415 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3416 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3417 
3418 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3419 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3420 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3421 
3422 		txr->queue_status = EM_QUEUE_IDLE;
3423 	}
3424 
3425 	/* Set the default values for the Tx Inter Packet Gap timer */
3426 	switch (adapter->hw.mac.type) {
3427 	case e1000_80003es2lan:
3428 		tipg = DEFAULT_82543_TIPG_IPGR1;
3429 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3430 		    E1000_TIPG_IPGR2_SHIFT;
3431 		break;
3432 	default:
3433 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3434 		    (adapter->hw.phy.media_type ==
3435 		    e1000_media_type_internal_serdes))
3436 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3437 		else
3438 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3439 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3440 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3441 	}
3442 
3443 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3444 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3445 
3446 	if(adapter->hw.mac.type >= e1000_82540)
3447 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3448 		    adapter->tx_abs_int_delay.value);
3449 
3450 	if ((adapter->hw.mac.type == e1000_82571) ||
3451 	    (adapter->hw.mac.type == e1000_82572)) {
3452 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3453 		tarc |= SPEED_MODE_BIT;
3454 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3455 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3456 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3457 		tarc |= 1;
3458 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3459 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3460 		tarc |= 1;
3461 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3462 	}
3463 
3464 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3465 	if (adapter->tx_int_delay.value > 0)
3466 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3467 
3468 	/* Program the Transmit Control Register */
3469 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3470 	tctl &= ~E1000_TCTL_CT;
3471 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3472 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3473 
3474 	if (adapter->hw.mac.type >= e1000_82571)
3475 		tctl |= E1000_TCTL_MULR;
3476 
3477 	/* This write will effectively turn on the transmit unit. */
3478 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3479 
3480 }
3481 
3482 
3483 /*********************************************************************
3484  *
3485  *  Free all transmit rings.
3486  *
3487  **********************************************************************/
3488 static void
3489 em_free_transmit_structures(struct adapter *adapter)
3490 {
3491 	struct tx_ring *txr = adapter->tx_rings;
3492 
3493 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3494 		EM_TX_LOCK(txr);
3495 		em_free_transmit_buffers(txr);
3496 		em_dma_free(adapter, &txr->txdma);
3497 		EM_TX_UNLOCK(txr);
3498 		EM_TX_LOCK_DESTROY(txr);
3499 	}
3500 
3501 	free(adapter->tx_rings, M_DEVBUF);
3502 }
3503 
3504 /*********************************************************************
3505  *
3506  *  Free transmit ring related data structures.
3507  *
3508  **********************************************************************/
3509 static void
3510 em_free_transmit_buffers(struct tx_ring *txr)
3511 {
3512 	struct adapter		*adapter = txr->adapter;
3513 	struct em_buffer	*txbuf;
3514 
3515 	INIT_DEBUGOUT("free_transmit_ring: begin");
3516 
3517 	if (txr->tx_buffers == NULL)
3518 		return;
3519 
3520 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3521 		txbuf = &txr->tx_buffers[i];
3522 		if (txbuf->m_head != NULL) {
3523 			bus_dmamap_sync(txr->txtag, txbuf->map,
3524 			    BUS_DMASYNC_POSTWRITE);
3525 			bus_dmamap_unload(txr->txtag,
3526 			    txbuf->map);
3527 			m_freem(txbuf->m_head);
3528 			txbuf->m_head = NULL;
3529 			if (txbuf->map != NULL) {
3530 				bus_dmamap_destroy(txr->txtag,
3531 				    txbuf->map);
3532 				txbuf->map = NULL;
3533 			}
3534 		} else if (txbuf->map != NULL) {
3535 			bus_dmamap_unload(txr->txtag,
3536 			    txbuf->map);
3537 			bus_dmamap_destroy(txr->txtag,
3538 			    txbuf->map);
3539 			txbuf->map = NULL;
3540 		}
3541 	}
3542 #if __FreeBSD_version >= 800000
3543 	if (txr->br != NULL)
3544 		buf_ring_free(txr->br, M_DEVBUF);
3545 #endif
3546 	if (txr->tx_buffers != NULL) {
3547 		free(txr->tx_buffers, M_DEVBUF);
3548 		txr->tx_buffers = NULL;
3549 	}
3550 	if (txr->txtag != NULL) {
3551 		bus_dma_tag_destroy(txr->txtag);
3552 		txr->txtag = NULL;
3553 	}
3554 	return;
3555 }
3556 
3557 
3558 /*********************************************************************
3559  *  The offload context is protocol specific (TCP/UDP) and thus
3560  *  only needs to be set when the protocol changes. The occasion
3561  *  of a context change can be a performance detriment, and
3562  *  might be better just disabled. The reason arises in the way
3563  *  in which the controller supports pipelined requests from the
3564  *  Tx data DMA. Up to four requests can be pipelined, and they may
3565  *  belong to the same packet or to multiple packets. However all
3566  *  requests for one packet are issued before a request is issued
3567  *  for a subsequent packet and if a request for the next packet
3568  *  requires a context change, that request will be stalled
3569  *  until the previous request completes. This means setting up
3570  *  a new context effectively disables pipelined Tx data DMA which
3571  *  in turn greatly slow down performance to send small sized
3572  *  frames.
3573  **********************************************************************/
3574 static void
3575 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3576     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3577 {
3578 	struct adapter			*adapter = txr->adapter;
3579 	struct e1000_context_desc	*TXD = NULL;
3580 	struct em_buffer		*tx_buffer;
3581 	int				cur, hdr_len;
3582 	u32				cmd = 0;
3583 	u16				offload = 0;
3584 	u8				ipcso, ipcss, tucso, tucss;
3585 
3586 	ipcss = ipcso = tucss = tucso = 0;
3587 	hdr_len = ip_off + (ip->ip_hl << 2);
3588 	cur = txr->next_avail_desc;
3589 
3590 	/* Setup of IP header checksum. */
3591 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3592 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3593 		offload |= CSUM_IP;
3594 		ipcss = ip_off;
3595 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3596 		/*
3597 		 * Start offset for header checksum calculation.
3598 		 * End offset for header checksum calculation.
3599 		 * Offset of place to put the checksum.
3600 		 */
3601 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3602 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3603 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3604 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3605 		cmd |= E1000_TXD_CMD_IP;
3606 	}
3607 
3608 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3609  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3610  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3611  		offload |= CSUM_TCP;
3612  		tucss = hdr_len;
3613  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3614  		/*
3615  		 * Setting up new checksum offload context for every frames
3616  		 * takes a lot of processing time for hardware. This also
3617  		 * reduces performance a lot for small sized frames so avoid
3618  		 * it if driver can use previously configured checksum
3619  		 * offload context.
3620  		 */
3621  		if (txr->last_hw_offload == offload) {
3622  			if (offload & CSUM_IP) {
3623  				if (txr->last_hw_ipcss == ipcss &&
3624  				    txr->last_hw_ipcso == ipcso &&
3625  				    txr->last_hw_tucss == tucss &&
3626  				    txr->last_hw_tucso == tucso)
3627  					return;
3628  			} else {
3629  				if (txr->last_hw_tucss == tucss &&
3630  				    txr->last_hw_tucso == tucso)
3631  					return;
3632  			}
3633   		}
3634  		txr->last_hw_offload = offload;
3635  		txr->last_hw_tucss = tucss;
3636  		txr->last_hw_tucso = tucso;
3637  		/*
3638  		 * Start offset for payload checksum calculation.
3639  		 * End offset for payload checksum calculation.
3640  		 * Offset of place to put the checksum.
3641  		 */
3642 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3643  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3644  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3645  		TXD->upper_setup.tcp_fields.tucso = tucso;
3646  		cmd |= E1000_TXD_CMD_TCP;
3647  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3648  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3649  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3650  		tucss = hdr_len;
3651  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3652  		/*
3653  		 * Setting up new checksum offload context for every frames
3654  		 * takes a lot of processing time for hardware. This also
3655  		 * reduces performance a lot for small sized frames so avoid
3656  		 * it if driver can use previously configured checksum
3657  		 * offload context.
3658  		 */
3659  		if (txr->last_hw_offload == offload) {
3660  			if (offload & CSUM_IP) {
3661  				if (txr->last_hw_ipcss == ipcss &&
3662  				    txr->last_hw_ipcso == ipcso &&
3663  				    txr->last_hw_tucss == tucss &&
3664  				    txr->last_hw_tucso == tucso)
3665  					return;
3666  			} else {
3667  				if (txr->last_hw_tucss == tucss &&
3668  				    txr->last_hw_tucso == tucso)
3669  					return;
3670  			}
3671  		}
3672  		txr->last_hw_offload = offload;
3673  		txr->last_hw_tucss = tucss;
3674  		txr->last_hw_tucso = tucso;
3675  		/*
3676  		 * Start offset for header checksum calculation.
3677  		 * End offset for header checksum calculation.
3678  		 * Offset of place to put the checksum.
3679  		 */
3680 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3681  		TXD->upper_setup.tcp_fields.tucss = tucss;
3682  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3683  		TXD->upper_setup.tcp_fields.tucso = tucso;
3684   	}
3685 
3686  	if (offload & CSUM_IP) {
3687  		txr->last_hw_ipcss = ipcss;
3688  		txr->last_hw_ipcso = ipcso;
3689   	}
3690 
3691 	TXD->tcp_seg_setup.data = htole32(0);
3692 	TXD->cmd_and_length =
3693 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3694 	tx_buffer = &txr->tx_buffers[cur];
3695 	tx_buffer->m_head = NULL;
3696 	tx_buffer->next_eop = -1;
3697 
3698 	if (++cur == adapter->num_tx_desc)
3699 		cur = 0;
3700 
3701 	txr->tx_avail--;
3702 	txr->next_avail_desc = cur;
3703 }
3704 
3705 
3706 /**********************************************************************
3707  *
3708  *  Setup work for hardware segmentation offload (TSO)
3709  *
3710  **********************************************************************/
3711 static void
3712 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3713     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3714 {
3715 	struct adapter			*adapter = txr->adapter;
3716 	struct e1000_context_desc	*TXD;
3717 	struct em_buffer		*tx_buffer;
3718 	int cur, hdr_len;
3719 
3720 	/*
3721 	 * In theory we can use the same TSO context if and only if
3722 	 * frame is the same type(IP/TCP) and the same MSS. However
3723 	 * checking whether a frame has the same IP/TCP structure is
3724 	 * hard thing so just ignore that and always restablish a
3725 	 * new TSO context.
3726 	 */
3727 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3728 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3729 		      E1000_TXD_DTYP_D |	/* Data descr type */
3730 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3731 
3732 	/* IP and/or TCP header checksum calculation and insertion. */
3733 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3734 
3735 	cur = txr->next_avail_desc;
3736 	tx_buffer = &txr->tx_buffers[cur];
3737 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3738 
3739 	/*
3740 	 * Start offset for header checksum calculation.
3741 	 * End offset for header checksum calculation.
3742 	 * Offset of place put the checksum.
3743 	 */
3744 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3745 	TXD->lower_setup.ip_fields.ipcse =
3746 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3747 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3748 	/*
3749 	 * Start offset for payload checksum calculation.
3750 	 * End offset for payload checksum calculation.
3751 	 * Offset of place to put the checksum.
3752 	 */
3753 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3754 	TXD->upper_setup.tcp_fields.tucse = 0;
3755 	TXD->upper_setup.tcp_fields.tucso =
3756 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3757 	/*
3758 	 * Payload size per packet w/o any headers.
3759 	 * Length of all headers up to payload.
3760 	 */
3761 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3762 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3763 
3764 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3765 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3766 				E1000_TXD_CMD_TSE |	/* TSE context */
3767 				E1000_TXD_CMD_IP |	/* Do IP csum */
3768 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3769 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3770 
3771 	tx_buffer->m_head = NULL;
3772 	tx_buffer->next_eop = -1;
3773 
3774 	if (++cur == adapter->num_tx_desc)
3775 		cur = 0;
3776 
3777 	txr->tx_avail--;
3778 	txr->next_avail_desc = cur;
3779 	txr->tx_tso = TRUE;
3780 }
3781 
3782 
3783 /**********************************************************************
3784  *
3785  *  Examine each tx_buffer in the used queue. If the hardware is done
3786  *  processing the packet then free associated resources. The
3787  *  tx_buffer is put back on the free queue.
3788  *
3789  **********************************************************************/
3790 static void
3791 em_txeof(struct tx_ring *txr)
3792 {
3793 	struct adapter	*adapter = txr->adapter;
3794         int first, last, done, processed;
3795         struct em_buffer *tx_buffer;
3796         struct e1000_tx_desc   *tx_desc, *eop_desc;
3797 	if_t ifp = adapter->ifp;
3798 
3799 	EM_TX_LOCK_ASSERT(txr);
3800 #ifdef DEV_NETMAP
3801 	if (netmap_tx_irq(ifp, txr->me))
3802 		return;
3803 #endif /* DEV_NETMAP */
3804 
3805 	/* No work, make sure watchdog is off */
3806         if (txr->tx_avail == adapter->num_tx_desc) {
3807 		txr->queue_status = EM_QUEUE_IDLE;
3808                 return;
3809 	}
3810 
3811 	processed = 0;
3812         first = txr->next_to_clean;
3813         tx_desc = &txr->tx_base[first];
3814         tx_buffer = &txr->tx_buffers[first];
3815 	last = tx_buffer->next_eop;
3816         eop_desc = &txr->tx_base[last];
3817 
3818 	/*
3819 	 * What this does is get the index of the
3820 	 * first descriptor AFTER the EOP of the
3821 	 * first packet, that way we can do the
3822 	 * simple comparison on the inner while loop.
3823 	 */
3824 	if (++last == adapter->num_tx_desc)
3825  		last = 0;
3826 	done = last;
3827 
3828         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3829             BUS_DMASYNC_POSTREAD);
3830 
3831         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3832 		/* We clean the range of the packet */
3833 		while (first != done) {
3834                 	tx_desc->upper.data = 0;
3835                 	tx_desc->lower.data = 0;
3836                 	tx_desc->buffer_addr = 0;
3837                 	++txr->tx_avail;
3838 			++processed;
3839 
3840 			if (tx_buffer->m_head) {
3841 				bus_dmamap_sync(txr->txtag,
3842 				    tx_buffer->map,
3843 				    BUS_DMASYNC_POSTWRITE);
3844 				bus_dmamap_unload(txr->txtag,
3845 				    tx_buffer->map);
3846                         	m_freem(tx_buffer->m_head);
3847                         	tx_buffer->m_head = NULL;
3848                 	}
3849 			tx_buffer->next_eop = -1;
3850 			txr->watchdog_time = ticks;
3851 
3852 	                if (++first == adapter->num_tx_desc)
3853 				first = 0;
3854 
3855 	                tx_buffer = &txr->tx_buffers[first];
3856 			tx_desc = &txr->tx_base[first];
3857 		}
3858 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
3859 		/* See if we can continue to the next packet */
3860 		last = tx_buffer->next_eop;
3861 		if (last != -1) {
3862         		eop_desc = &txr->tx_base[last];
3863 			/* Get new done point */
3864 			if (++last == adapter->num_tx_desc) last = 0;
3865 			done = last;
3866 		} else
3867 			break;
3868         }
3869         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3870             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3871 
3872         txr->next_to_clean = first;
3873 
3874 	/*
3875 	** Watchdog calculation, we know there's
3876 	** work outstanding or the first return
3877 	** would have been taken, so none processed
3878 	** for too long indicates a hang. local timer
3879 	** will examine this and do a reset if needed.
3880 	*/
3881 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3882 		txr->queue_status = EM_QUEUE_HUNG;
3883 
3884         /*
3885          * If we have a minimum free, clear IFF_DRV_OACTIVE
3886          * to tell the stack that it is OK to send packets.
3887 	 * Notice that all writes of OACTIVE happen under the
3888 	 * TX lock which, with a single queue, guarantees
3889 	 * sanity.
3890          */
3891         if (txr->tx_avail >= EM_MAX_SCATTER)
3892 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
3893 
3894 	/* Disable watchdog if all clean */
3895 	if (txr->tx_avail == adapter->num_tx_desc) {
3896 		txr->queue_status = EM_QUEUE_IDLE;
3897 	}
3898 }
3899 
3900 
3901 /*********************************************************************
3902  *
3903  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3904  *
3905  **********************************************************************/
3906 static void
3907 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3908 {
3909 	struct adapter		*adapter = rxr->adapter;
3910 	struct mbuf		*m;
3911 	bus_dma_segment_t	segs[1];
3912 	struct em_buffer	*rxbuf;
3913 	int			i, j, error, nsegs;
3914 	bool			cleaned = FALSE;
3915 
3916 	i = j = rxr->next_to_refresh;
3917 	/*
3918 	** Get one descriptor beyond
3919 	** our work mark to control
3920 	** the loop.
3921 	*/
3922 	if (++j == adapter->num_rx_desc)
3923 		j = 0;
3924 
3925 	while (j != limit) {
3926 		rxbuf = &rxr->rx_buffers[i];
3927 		if (rxbuf->m_head == NULL) {
3928 			m = m_getjcl(M_NOWAIT, MT_DATA,
3929 			    M_PKTHDR, adapter->rx_mbuf_sz);
3930 			/*
3931 			** If we have a temporary resource shortage
3932 			** that causes a failure, just abort refresh
3933 			** for now, we will return to this point when
3934 			** reinvoked from em_rxeof.
3935 			*/
3936 			if (m == NULL)
3937 				goto update;
3938 		} else
3939 			m = rxbuf->m_head;
3940 
3941 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3942 		m->m_flags |= M_PKTHDR;
3943 		m->m_data = m->m_ext.ext_buf;
3944 
3945 		/* Use bus_dma machinery to setup the memory mapping  */
3946 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3947 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3948 		if (error != 0) {
3949 			printf("Refresh mbufs: hdr dmamap load"
3950 			    " failure - %d\n", error);
3951 			m_free(m);
3952 			rxbuf->m_head = NULL;
3953 			goto update;
3954 		}
3955 		rxbuf->m_head = m;
3956 		bus_dmamap_sync(rxr->rxtag,
3957 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3958 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3959 		cleaned = TRUE;
3960 
3961 		i = j; /* Next is precalulated for us */
3962 		rxr->next_to_refresh = i;
3963 		/* Calculate next controlling index */
3964 		if (++j == adapter->num_rx_desc)
3965 			j = 0;
3966 	}
3967 update:
3968 	/*
3969 	** Update the tail pointer only if,
3970 	** and as far as we have refreshed.
3971 	*/
3972 	if (cleaned)
3973 		E1000_WRITE_REG(&adapter->hw,
3974 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3975 
3976 	return;
3977 }
3978 
3979 
3980 /*********************************************************************
3981  *
3982  *  Allocate memory for rx_buffer structures. Since we use one
3983  *  rx_buffer per received packet, the maximum number of rx_buffer's
3984  *  that we'll need is equal to the number of receive descriptors
3985  *  that we've allocated.
3986  *
3987  **********************************************************************/
3988 static int
3989 em_allocate_receive_buffers(struct rx_ring *rxr)
3990 {
3991 	struct adapter		*adapter = rxr->adapter;
3992 	device_t		dev = adapter->dev;
3993 	struct em_buffer	*rxbuf;
3994 	int			error;
3995 
3996 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3997 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3998 	if (rxr->rx_buffers == NULL) {
3999 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4000 		return (ENOMEM);
4001 	}
4002 
4003 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4004 				1, 0,			/* alignment, bounds */
4005 				BUS_SPACE_MAXADDR,	/* lowaddr */
4006 				BUS_SPACE_MAXADDR,	/* highaddr */
4007 				NULL, NULL,		/* filter, filterarg */
4008 				MJUM9BYTES,		/* maxsize */
4009 				1,			/* nsegments */
4010 				MJUM9BYTES,		/* maxsegsize */
4011 				0,			/* flags */
4012 				NULL,			/* lockfunc */
4013 				NULL,			/* lockarg */
4014 				&rxr->rxtag);
4015 	if (error) {
4016 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4017 		    __func__, error);
4018 		goto fail;
4019 	}
4020 
4021 	rxbuf = rxr->rx_buffers;
4022 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4023 		rxbuf = &rxr->rx_buffers[i];
4024 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4025 		if (error) {
4026 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4027 			    __func__, error);
4028 			goto fail;
4029 		}
4030 	}
4031 
4032 	return (0);
4033 
4034 fail:
4035 	em_free_receive_structures(adapter);
4036 	return (error);
4037 }
4038 
4039 
4040 /*********************************************************************
4041  *
4042  *  Initialize a receive ring and its buffers.
4043  *
4044  **********************************************************************/
4045 static int
4046 em_setup_receive_ring(struct rx_ring *rxr)
4047 {
4048 	struct	adapter 	*adapter = rxr->adapter;
4049 	struct em_buffer	*rxbuf;
4050 	bus_dma_segment_t	seg[1];
4051 	int			rsize, nsegs, error = 0;
4052 #ifdef DEV_NETMAP
4053 	struct netmap_slot *slot;
4054 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4055 #endif
4056 
4057 
4058 	/* Clear the ring contents */
4059 	EM_RX_LOCK(rxr);
4060 	rsize = roundup2(adapter->num_rx_desc *
4061 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4062 	bzero((void *)rxr->rx_base, rsize);
4063 #ifdef DEV_NETMAP
4064 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4065 #endif
4066 
4067 	/*
4068 	** Free current RX buffer structs and their mbufs
4069 	*/
4070 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4071 		rxbuf = &rxr->rx_buffers[i];
4072 		if (rxbuf->m_head != NULL) {
4073 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4074 			    BUS_DMASYNC_POSTREAD);
4075 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4076 			m_freem(rxbuf->m_head);
4077 			rxbuf->m_head = NULL; /* mark as freed */
4078 		}
4079 	}
4080 
4081 	/* Now replenish the mbufs */
4082         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4083 		rxbuf = &rxr->rx_buffers[j];
4084 #ifdef DEV_NETMAP
4085 		if (slot) {
4086 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4087 			uint64_t paddr;
4088 			void *addr;
4089 
4090 			addr = PNMB(na, slot + si, &paddr);
4091 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4092 			/* Update descriptor */
4093 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4094 			continue;
4095 		}
4096 #endif /* DEV_NETMAP */
4097 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4098 		    M_PKTHDR, adapter->rx_mbuf_sz);
4099 		if (rxbuf->m_head == NULL) {
4100 			error = ENOBUFS;
4101 			goto fail;
4102 		}
4103 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4104 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4105 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4106 
4107 		/* Get the memory mapping */
4108 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4109 		    rxbuf->map, rxbuf->m_head, seg,
4110 		    &nsegs, BUS_DMA_NOWAIT);
4111 		if (error != 0) {
4112 			m_freem(rxbuf->m_head);
4113 			rxbuf->m_head = NULL;
4114 			goto fail;
4115 		}
4116 		bus_dmamap_sync(rxr->rxtag,
4117 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4118 
4119 		/* Update descriptor */
4120 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4121 	}
4122 	rxr->next_to_check = 0;
4123 	rxr->next_to_refresh = 0;
4124 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4125 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4126 
4127 fail:
4128 	EM_RX_UNLOCK(rxr);
4129 	return (error);
4130 }
4131 
4132 /*********************************************************************
4133  *
4134  *  Initialize all receive rings.
4135  *
4136  **********************************************************************/
4137 static int
4138 em_setup_receive_structures(struct adapter *adapter)
4139 {
4140 	struct rx_ring *rxr = adapter->rx_rings;
4141 	int q;
4142 
4143 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4144 		if (em_setup_receive_ring(rxr))
4145 			goto fail;
4146 
4147 	return (0);
4148 fail:
4149 	/*
4150 	 * Free RX buffers allocated so far, we will only handle
4151 	 * the rings that completed, the failing case will have
4152 	 * cleaned up for itself. 'q' failed, so its the terminus.
4153 	 */
4154 	for (int i = 0; i < q; ++i) {
4155 		rxr = &adapter->rx_rings[i];
4156 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4157 			struct em_buffer *rxbuf;
4158 			rxbuf = &rxr->rx_buffers[n];
4159 			if (rxbuf->m_head != NULL) {
4160 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4161 			  	  BUS_DMASYNC_POSTREAD);
4162 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4163 				m_freem(rxbuf->m_head);
4164 				rxbuf->m_head = NULL;
4165 			}
4166 		}
4167 		rxr->next_to_check = 0;
4168 		rxr->next_to_refresh = 0;
4169 	}
4170 
4171 	return (ENOBUFS);
4172 }
4173 
4174 /*********************************************************************
4175  *
4176  *  Free all receive rings.
4177  *
4178  **********************************************************************/
4179 static void
4180 em_free_receive_structures(struct adapter *adapter)
4181 {
4182 	struct rx_ring *rxr = adapter->rx_rings;
4183 
4184 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4185 		em_free_receive_buffers(rxr);
4186 		/* Free the ring memory as well */
4187 		em_dma_free(adapter, &rxr->rxdma);
4188 		EM_RX_LOCK_DESTROY(rxr);
4189 	}
4190 
4191 	free(adapter->rx_rings, M_DEVBUF);
4192 }
4193 
4194 
4195 /*********************************************************************
4196  *
4197  *  Free receive ring data structures
4198  *
4199  **********************************************************************/
4200 static void
4201 em_free_receive_buffers(struct rx_ring *rxr)
4202 {
4203 	struct adapter		*adapter = rxr->adapter;
4204 	struct em_buffer	*rxbuf = NULL;
4205 
4206 	INIT_DEBUGOUT("free_receive_buffers: begin");
4207 
4208 	if (rxr->rx_buffers != NULL) {
4209 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4210 			rxbuf = &rxr->rx_buffers[i];
4211 			if (rxbuf->map != NULL) {
4212 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4213 				    BUS_DMASYNC_POSTREAD);
4214 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4215 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4216 			}
4217 			if (rxbuf->m_head != NULL) {
4218 				m_freem(rxbuf->m_head);
4219 				rxbuf->m_head = NULL;
4220 			}
4221 		}
4222 		free(rxr->rx_buffers, M_DEVBUF);
4223 		rxr->rx_buffers = NULL;
4224 		rxr->next_to_check = 0;
4225 		rxr->next_to_refresh = 0;
4226 	}
4227 
4228 	if (rxr->rxtag != NULL) {
4229 		bus_dma_tag_destroy(rxr->rxtag);
4230 		rxr->rxtag = NULL;
4231 	}
4232 
4233 	return;
4234 }
4235 
4236 
4237 /*********************************************************************
4238  *
4239  *  Enable receive unit.
4240  *
4241  **********************************************************************/
4242 
4243 static void
4244 em_initialize_receive_unit(struct adapter *adapter)
4245 {
4246 	struct rx_ring	*rxr = adapter->rx_rings;
4247 	if_t ifp = adapter->ifp;
4248 	struct e1000_hw	*hw = &adapter->hw;
4249 	u64	bus_addr;
4250 	u32	rctl, rxcsum;
4251 
4252 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4253 
4254 	/*
4255 	 * Make sure receives are disabled while setting
4256 	 * up the descriptor ring
4257 	 */
4258 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4259 	/* Do not disable if ever enabled on this hardware */
4260 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4261 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4262 
4263 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4264 	    adapter->rx_abs_int_delay.value);
4265 	/*
4266 	 * Set the interrupt throttling rate. Value is calculated
4267 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4268 	 */
4269 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4270 
4271 	/*
4272 	** When using MSIX interrupts we need to throttle
4273 	** using the EITR register (82574 only)
4274 	*/
4275 	if (hw->mac.type == e1000_82574) {
4276 		for (int i = 0; i < 4; i++)
4277 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4278 			    DEFAULT_ITR);
4279 		/* Disable accelerated acknowledge */
4280 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4281 	}
4282 
4283 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4284 	if (if_getcapenable(ifp) & IFCAP_RXCSUM)
4285 		rxcsum |= E1000_RXCSUM_TUOFL;
4286 	else
4287 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4288 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4289 
4290 	/*
4291 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4292 	** long latencies are observed, like Lenovo X60. This
4293 	** change eliminates the problem, but since having positive
4294 	** values in RDTR is a known source of problems on other
4295 	** platforms another solution is being sought.
4296 	*/
4297 	if (hw->mac.type == e1000_82573)
4298 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4299 
4300 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4301 		/* Setup the Base and Length of the Rx Descriptor Ring */
4302 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4303 
4304 		bus_addr = rxr->rxdma.dma_paddr;
4305 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4306 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4307 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4308 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4309 		/* Setup the Head and Tail Descriptor Pointers */
4310 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4311 #ifdef DEV_NETMAP
4312 		/*
4313 		 * an init() while a netmap client is active must
4314 		 * preserve the rx buffers passed to userspace.
4315 		 */
4316 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4317 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4318 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4319 		}
4320 #endif /* DEV_NETMAP */
4321 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4322 	}
4323 
4324 	/* Set PTHRESH for improved jumbo performance */
4325 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4326 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4327 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4328 	    (if_getmtu(ifp) > ETHERMTU)) {
4329 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4330 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4331 	}
4332 
4333 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4334 		if (if_getmtu(ifp) > ETHERMTU)
4335 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4336 		else
4337 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4338 	}
4339 
4340 	/* Setup the Receive Control Register */
4341 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4342 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4343 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4344 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4345 
4346         /* Strip the CRC */
4347         rctl |= E1000_RCTL_SECRC;
4348 
4349         /* Make sure VLAN Filters are off */
4350         rctl &= ~E1000_RCTL_VFE;
4351 	rctl &= ~E1000_RCTL_SBP;
4352 
4353 	if (adapter->rx_mbuf_sz == MCLBYTES)
4354 		rctl |= E1000_RCTL_SZ_2048;
4355 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4356 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4357 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4358 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4359 
4360 	if (if_getmtu(ifp) > ETHERMTU)
4361 		rctl |= E1000_RCTL_LPE;
4362 	else
4363 		rctl &= ~E1000_RCTL_LPE;
4364 
4365 	/* Write out the settings */
4366 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4367 
4368 	return;
4369 }
4370 
4371 
4372 /*********************************************************************
4373  *
4374  *  This routine executes in interrupt context. It replenishes
4375  *  the mbufs in the descriptor and sends data which has been
4376  *  dma'ed into host memory to upper layer.
4377  *
4378  *  We loop at most count times if count is > 0, or until done if
4379  *  count < 0.
4380  *
4381  *  For polling we also now return the number of cleaned packets
4382  *********************************************************************/
4383 static bool
4384 em_rxeof(struct rx_ring *rxr, int count, int *done)
4385 {
4386 	struct adapter		*adapter = rxr->adapter;
4387 	if_t ifp = adapter->ifp;
4388 	struct mbuf		*mp, *sendmp;
4389 	u8			status = 0;
4390 	u16 			len;
4391 	int			i, processed, rxdone = 0;
4392 	bool			eop;
4393 	struct e1000_rx_desc	*cur;
4394 
4395 	EM_RX_LOCK(rxr);
4396 
4397 #ifdef DEV_NETMAP
4398 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4399 		EM_RX_UNLOCK(rxr);
4400 		return (FALSE);
4401 	}
4402 #endif /* DEV_NETMAP */
4403 
4404 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4405 
4406 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4407 			break;
4408 
4409 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4410 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4411 
4412 		cur = &rxr->rx_base[i];
4413 		status = cur->status;
4414 		mp = sendmp = NULL;
4415 
4416 		if ((status & E1000_RXD_STAT_DD) == 0)
4417 			break;
4418 
4419 		len = le16toh(cur->length);
4420 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4421 
4422 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4423 		    (rxr->discard == TRUE)) {
4424 			adapter->dropped_pkts++;
4425 			++rxr->rx_discarded;
4426 			if (!eop) /* Catch subsequent segs */
4427 				rxr->discard = TRUE;
4428 			else
4429 				rxr->discard = FALSE;
4430 			em_rx_discard(rxr, i);
4431 			goto next_desc;
4432 		}
4433 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4434 
4435 		/* Assign correct length to the current fragment */
4436 		mp = rxr->rx_buffers[i].m_head;
4437 		mp->m_len = len;
4438 
4439 		/* Trigger for refresh */
4440 		rxr->rx_buffers[i].m_head = NULL;
4441 
4442 		/* First segment? */
4443 		if (rxr->fmp == NULL) {
4444 			mp->m_pkthdr.len = len;
4445 			rxr->fmp = rxr->lmp = mp;
4446 		} else {
4447 			/* Chain mbuf's together */
4448 			mp->m_flags &= ~M_PKTHDR;
4449 			rxr->lmp->m_next = mp;
4450 			rxr->lmp = mp;
4451 			rxr->fmp->m_pkthdr.len += len;
4452 		}
4453 
4454 		if (eop) {
4455 			--count;
4456 			sendmp = rxr->fmp;
4457 			if_setrcvif(sendmp, ifp);
4458 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4459 			em_receive_checksum(cur, sendmp);
4460 #ifndef __NO_STRICT_ALIGNMENT
4461 			if (adapter->hw.mac.max_frame_size >
4462 			    (MCLBYTES - ETHER_ALIGN) &&
4463 			    em_fixup_rx(rxr) != 0)
4464 				goto skip;
4465 #endif
4466 			if (status & E1000_RXD_STAT_VP) {
4467 				if_setvtag(sendmp,
4468 				    le16toh(cur->special));
4469 				sendmp->m_flags |= M_VLANTAG;
4470 			}
4471 #ifndef __NO_STRICT_ALIGNMENT
4472 skip:
4473 #endif
4474 			rxr->fmp = rxr->lmp = NULL;
4475 		}
4476 next_desc:
4477 		/* Zero out the receive descriptors status. */
4478 		cur->status = 0;
4479 		++rxdone;	/* cumulative for POLL */
4480 		++processed;
4481 
4482 		/* Advance our pointers to the next descriptor. */
4483 		if (++i == adapter->num_rx_desc)
4484 			i = 0;
4485 
4486 		/* Send to the stack */
4487 		if (sendmp != NULL) {
4488 			rxr->next_to_check = i;
4489 			EM_RX_UNLOCK(rxr);
4490 			if_input(ifp, sendmp);
4491 			EM_RX_LOCK(rxr);
4492 			i = rxr->next_to_check;
4493 		}
4494 
4495 		/* Only refresh mbufs every 8 descriptors */
4496 		if (processed == 8) {
4497 			em_refresh_mbufs(rxr, i);
4498 			processed = 0;
4499 		}
4500 	}
4501 
4502 	/* Catch any remaining refresh work */
4503 	if (e1000_rx_unrefreshed(rxr))
4504 		em_refresh_mbufs(rxr, i);
4505 
4506 	rxr->next_to_check = i;
4507 	if (done != NULL)
4508 		*done = rxdone;
4509 	EM_RX_UNLOCK(rxr);
4510 
4511 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4512 }
4513 
4514 static __inline void
4515 em_rx_discard(struct rx_ring *rxr, int i)
4516 {
4517 	struct em_buffer	*rbuf;
4518 
4519 	rbuf = &rxr->rx_buffers[i];
4520 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4521 
4522 	/* Free any previous pieces */
4523 	if (rxr->fmp != NULL) {
4524 		rxr->fmp->m_flags |= M_PKTHDR;
4525 		m_freem(rxr->fmp);
4526 		rxr->fmp = NULL;
4527 		rxr->lmp = NULL;
4528 	}
4529 	/*
4530 	** Free buffer and allow em_refresh_mbufs()
4531 	** to clean up and recharge buffer.
4532 	*/
4533 	if (rbuf->m_head) {
4534 		m_free(rbuf->m_head);
4535 		rbuf->m_head = NULL;
4536 	}
4537 	return;
4538 }
4539 
4540 #ifndef __NO_STRICT_ALIGNMENT
4541 /*
4542  * When jumbo frames are enabled we should realign entire payload on
4543  * architecures with strict alignment. This is serious design mistake of 8254x
4544  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4545  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4546  * payload. On architecures without strict alignment restrictions 8254x still
4547  * performs unaligned memory access which would reduce the performance too.
4548  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4549  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4550  * existing mbuf chain.
4551  *
4552  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4553  * not used at all on architectures with strict alignment.
4554  */
4555 static int
4556 em_fixup_rx(struct rx_ring *rxr)
4557 {
4558 	struct adapter *adapter = rxr->adapter;
4559 	struct mbuf *m, *n;
4560 	int error;
4561 
4562 	error = 0;
4563 	m = rxr->fmp;
4564 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4565 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4566 		m->m_data += ETHER_HDR_LEN;
4567 	} else {
4568 		MGETHDR(n, M_NOWAIT, MT_DATA);
4569 		if (n != NULL) {
4570 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4571 			m->m_data += ETHER_HDR_LEN;
4572 			m->m_len -= ETHER_HDR_LEN;
4573 			n->m_len = ETHER_HDR_LEN;
4574 			M_MOVE_PKTHDR(n, m);
4575 			n->m_next = m;
4576 			rxr->fmp = n;
4577 		} else {
4578 			adapter->dropped_pkts++;
4579 			m_freem(rxr->fmp);
4580 			rxr->fmp = NULL;
4581 			error = ENOMEM;
4582 		}
4583 	}
4584 
4585 	return (error);
4586 }
4587 #endif
4588 
4589 /*********************************************************************
4590  *
4591  *  Verify that the hardware indicated that the checksum is valid.
4592  *  Inform the stack about the status of checksum so that stack
4593  *  doesn't spend time verifying the checksum.
4594  *
4595  *********************************************************************/
4596 static void
4597 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4598 {
4599 	mp->m_pkthdr.csum_flags = 0;
4600 
4601 	/* Ignore Checksum bit is set */
4602 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4603 		return;
4604 
4605 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4606 		return;
4607 
4608 	/* IP Checksum Good? */
4609 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4610 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4611 
4612 	/* TCP or UDP checksum */
4613 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4614 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4615 		mp->m_pkthdr.csum_data = htons(0xffff);
4616 	}
4617 }
4618 
4619 /*
4620  * This routine is run via an vlan
4621  * config EVENT
4622  */
4623 static void
4624 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4625 {
4626 	struct adapter	*adapter = if_getsoftc(ifp);
4627 	u32		index, bit;
4628 
4629 	if ((void*)adapter !=  arg)   /* Not our event */
4630 		return;
4631 
4632 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4633                 return;
4634 
4635 	EM_CORE_LOCK(adapter);
4636 	index = (vtag >> 5) & 0x7F;
4637 	bit = vtag & 0x1F;
4638 	adapter->shadow_vfta[index] |= (1 << bit);
4639 	++adapter->num_vlans;
4640 	/* Re-init to load the changes */
4641 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4642 		em_init_locked(adapter);
4643 	EM_CORE_UNLOCK(adapter);
4644 }
4645 
4646 /*
4647  * This routine is run via an vlan
4648  * unconfig EVENT
4649  */
4650 static void
4651 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4652 {
4653 	struct adapter	*adapter = if_getsoftc(ifp);
4654 	u32		index, bit;
4655 
4656 	if (adapter != arg)
4657 		return;
4658 
4659 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4660                 return;
4661 
4662 	EM_CORE_LOCK(adapter);
4663 	index = (vtag >> 5) & 0x7F;
4664 	bit = vtag & 0x1F;
4665 	adapter->shadow_vfta[index] &= ~(1 << bit);
4666 	--adapter->num_vlans;
4667 	/* Re-init to load the changes */
4668 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4669 		em_init_locked(adapter);
4670 	EM_CORE_UNLOCK(adapter);
4671 }
4672 
4673 static void
4674 em_setup_vlan_hw_support(struct adapter *adapter)
4675 {
4676 	struct e1000_hw *hw = &adapter->hw;
4677 	u32             reg;
4678 
4679 	/*
4680 	** We get here thru init_locked, meaning
4681 	** a soft reset, this has already cleared
4682 	** the VFTA and other state, so if there
4683 	** have been no vlan's registered do nothing.
4684 	*/
4685 	if (adapter->num_vlans == 0)
4686                 return;
4687 
4688 	/*
4689 	** A soft reset zero's out the VFTA, so
4690 	** we need to repopulate it now.
4691 	*/
4692 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4693                 if (adapter->shadow_vfta[i] != 0)
4694 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4695                             i, adapter->shadow_vfta[i]);
4696 
4697 	reg = E1000_READ_REG(hw, E1000_CTRL);
4698 	reg |= E1000_CTRL_VME;
4699 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4700 
4701 	/* Enable the Filter Table */
4702 	reg = E1000_READ_REG(hw, E1000_RCTL);
4703 	reg &= ~E1000_RCTL_CFIEN;
4704 	reg |= E1000_RCTL_VFE;
4705 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4706 }
4707 
4708 static void
4709 em_enable_intr(struct adapter *adapter)
4710 {
4711 	struct e1000_hw *hw = &adapter->hw;
4712 	u32 ims_mask = IMS_ENABLE_MASK;
4713 
4714 	if (hw->mac.type == e1000_82574) {
4715 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4716 		ims_mask |= EM_MSIX_MASK;
4717 	}
4718 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4719 }
4720 
4721 static void
4722 em_disable_intr(struct adapter *adapter)
4723 {
4724 	struct e1000_hw *hw = &adapter->hw;
4725 
4726 	if (hw->mac.type == e1000_82574)
4727 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4728 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4729 }
4730 
4731 /*
4732  * Bit of a misnomer, what this really means is
4733  * to enable OS management of the system... aka
4734  * to disable special hardware management features
4735  */
4736 static void
4737 em_init_manageability(struct adapter *adapter)
4738 {
4739 	/* A shared code workaround */
4740 #define E1000_82542_MANC2H E1000_MANC2H
4741 	if (adapter->has_manage) {
4742 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4743 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4744 
4745 		/* disable hardware interception of ARP */
4746 		manc &= ~(E1000_MANC_ARP_EN);
4747 
4748                 /* enable receiving management packets to the host */
4749 		manc |= E1000_MANC_EN_MNG2HOST;
4750 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4751 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4752 		manc2h |= E1000_MNG2HOST_PORT_623;
4753 		manc2h |= E1000_MNG2HOST_PORT_664;
4754 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4755 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4756 	}
4757 }
4758 
4759 /*
4760  * Give control back to hardware management
4761  * controller if there is one.
4762  */
4763 static void
4764 em_release_manageability(struct adapter *adapter)
4765 {
4766 	if (adapter->has_manage) {
4767 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4768 
4769 		/* re-enable hardware interception of ARP */
4770 		manc |= E1000_MANC_ARP_EN;
4771 		manc &= ~E1000_MANC_EN_MNG2HOST;
4772 
4773 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4774 	}
4775 }
4776 
4777 /*
4778  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4779  * For ASF and Pass Through versions of f/w this means
4780  * that the driver is loaded. For AMT version type f/w
4781  * this means that the network i/f is open.
4782  */
4783 static void
4784 em_get_hw_control(struct adapter *adapter)
4785 {
4786 	u32 ctrl_ext, swsm;
4787 
4788 	if (adapter->hw.mac.type == e1000_82573) {
4789 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4790 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4791 		    swsm | E1000_SWSM_DRV_LOAD);
4792 		return;
4793 	}
4794 	/* else */
4795 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4796 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4797 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4798 	return;
4799 }
4800 
4801 /*
4802  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4803  * For ASF and Pass Through versions of f/w this means that
4804  * the driver is no longer loaded. For AMT versions of the
4805  * f/w this means that the network i/f is closed.
4806  */
4807 static void
4808 em_release_hw_control(struct adapter *adapter)
4809 {
4810 	u32 ctrl_ext, swsm;
4811 
4812 	if (!adapter->has_manage)
4813 		return;
4814 
4815 	if (adapter->hw.mac.type == e1000_82573) {
4816 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4817 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4818 		    swsm & ~E1000_SWSM_DRV_LOAD);
4819 		return;
4820 	}
4821 	/* else */
4822 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4823 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4824 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4825 	return;
4826 }
4827 
4828 static int
4829 em_is_valid_ether_addr(u8 *addr)
4830 {
4831 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4832 
4833 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4834 		return (FALSE);
4835 	}
4836 
4837 	return (TRUE);
4838 }
4839 
4840 /*
4841 ** Parse the interface capabilities with regard
4842 ** to both system management and wake-on-lan for
4843 ** later use.
4844 */
4845 static void
4846 em_get_wakeup(device_t dev)
4847 {
4848 	struct adapter	*adapter = device_get_softc(dev);
4849 	u16		eeprom_data = 0, device_id, apme_mask;
4850 
4851 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4852 	apme_mask = EM_EEPROM_APME;
4853 
4854 	switch (adapter->hw.mac.type) {
4855 	case e1000_82573:
4856 	case e1000_82583:
4857 		adapter->has_amt = TRUE;
4858 		/* Falls thru */
4859 	case e1000_82571:
4860 	case e1000_82572:
4861 	case e1000_80003es2lan:
4862 		if (adapter->hw.bus.func == 1) {
4863 			e1000_read_nvm(&adapter->hw,
4864 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4865 			break;
4866 		} else
4867 			e1000_read_nvm(&adapter->hw,
4868 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4869 		break;
4870 	case e1000_ich8lan:
4871 	case e1000_ich9lan:
4872 	case e1000_ich10lan:
4873 	case e1000_pchlan:
4874 	case e1000_pch2lan:
4875 		apme_mask = E1000_WUC_APME;
4876 		adapter->has_amt = TRUE;
4877 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4878 		break;
4879 	default:
4880 		e1000_read_nvm(&adapter->hw,
4881 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4882 		break;
4883 	}
4884 	if (eeprom_data & apme_mask)
4885 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4886 	/*
4887          * We have the eeprom settings, now apply the special cases
4888          * where the eeprom may be wrong or the board won't support
4889          * wake on lan on a particular port
4890 	 */
4891 	device_id = pci_get_device(dev);
4892         switch (device_id) {
4893 	case E1000_DEV_ID_82571EB_FIBER:
4894 		/* Wake events only supported on port A for dual fiber
4895 		 * regardless of eeprom setting */
4896 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4897 		    E1000_STATUS_FUNC_1)
4898 			adapter->wol = 0;
4899 		break;
4900 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4901 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4902 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4903                 /* if quad port adapter, disable WoL on all but port A */
4904 		if (global_quad_port_a != 0)
4905 			adapter->wol = 0;
4906 		/* Reset for multiple quad port adapters */
4907 		if (++global_quad_port_a == 4)
4908 			global_quad_port_a = 0;
4909                 break;
4910 	}
4911 	return;
4912 }
4913 
4914 
4915 /*
4916  * Enable PCI Wake On Lan capability
4917  */
4918 static void
4919 em_enable_wakeup(device_t dev)
4920 {
4921 	struct adapter	*adapter = device_get_softc(dev);
4922 	if_t ifp = adapter->ifp;
4923 	u32		pmc, ctrl, ctrl_ext, rctl;
4924 	u16     	status;
4925 
4926 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4927 		return;
4928 
4929 	/* Advertise the wakeup capability */
4930 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4931 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4932 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4933 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4934 
4935 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4936 	    (adapter->hw.mac.type == e1000_pchlan) ||
4937 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4938 	    (adapter->hw.mac.type == e1000_ich10lan))
4939 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4940 
4941 	/* Keep the laser running on Fiber adapters */
4942 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4943 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4944 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4945 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4946 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4947 	}
4948 
4949 	/*
4950 	** Determine type of Wakeup: note that wol
4951 	** is set with all bits on by default.
4952 	*/
4953 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
4954 		adapter->wol &= ~E1000_WUFC_MAG;
4955 
4956 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
4957 		adapter->wol &= ~E1000_WUFC_MC;
4958 	else {
4959 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4960 		rctl |= E1000_RCTL_MPE;
4961 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4962 	}
4963 
4964 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4965 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4966 		if (em_enable_phy_wakeup(adapter))
4967 			return;
4968 	} else {
4969 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4970 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4971 	}
4972 
4973 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4974 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4975 
4976         /* Request PME */
4977         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4978 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4979 	if (if_getcapenable(ifp) & IFCAP_WOL)
4980 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4981         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4982 
4983 	return;
4984 }
4985 
4986 /*
4987 ** WOL in the newer chipset interfaces (pchlan)
4988 ** require thing to be copied into the phy
4989 */
4990 static int
4991 em_enable_phy_wakeup(struct adapter *adapter)
4992 {
4993 	struct e1000_hw *hw = &adapter->hw;
4994 	u32 mreg, ret = 0;
4995 	u16 preg;
4996 
4997 	/* copy MAC RARs to PHY RARs */
4998 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4999 
5000 	/* copy MAC MTA to PHY MTA */
5001 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5002 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5003 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5004 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5005 		    (u16)((mreg >> 16) & 0xFFFF));
5006 	}
5007 
5008 	/* configure PHY Rx Control register */
5009 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5010 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5011 	if (mreg & E1000_RCTL_UPE)
5012 		preg |= BM_RCTL_UPE;
5013 	if (mreg & E1000_RCTL_MPE)
5014 		preg |= BM_RCTL_MPE;
5015 	preg &= ~(BM_RCTL_MO_MASK);
5016 	if (mreg & E1000_RCTL_MO_3)
5017 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5018 				<< BM_RCTL_MO_SHIFT);
5019 	if (mreg & E1000_RCTL_BAM)
5020 		preg |= BM_RCTL_BAM;
5021 	if (mreg & E1000_RCTL_PMCF)
5022 		preg |= BM_RCTL_PMCF;
5023 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5024 	if (mreg & E1000_CTRL_RFCE)
5025 		preg |= BM_RCTL_RFCE;
5026 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5027 
5028 	/* enable PHY wakeup in MAC register */
5029 	E1000_WRITE_REG(hw, E1000_WUC,
5030 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5031 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5032 
5033 	/* configure and enable PHY wakeup in PHY registers */
5034 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5035 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5036 
5037 	/* activate PHY wakeup */
5038 	ret = hw->phy.ops.acquire(hw);
5039 	if (ret) {
5040 		printf("Could not acquire PHY\n");
5041 		return ret;
5042 	}
5043 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5044 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5045 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5046 	if (ret) {
5047 		printf("Could not read PHY page 769\n");
5048 		goto out;
5049 	}
5050 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5051 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5052 	if (ret)
5053 		printf("Could not set PHY Host Wakeup bit\n");
5054 out:
5055 	hw->phy.ops.release(hw);
5056 
5057 	return ret;
5058 }
5059 
5060 static void
5061 em_led_func(void *arg, int onoff)
5062 {
5063 	struct adapter	*adapter = arg;
5064 
5065 	EM_CORE_LOCK(adapter);
5066 	if (onoff) {
5067 		e1000_setup_led(&adapter->hw);
5068 		e1000_led_on(&adapter->hw);
5069 	} else {
5070 		e1000_led_off(&adapter->hw);
5071 		e1000_cleanup_led(&adapter->hw);
5072 	}
5073 	EM_CORE_UNLOCK(adapter);
5074 }
5075 
5076 /*
5077 ** Disable the L0S and L1 LINK states
5078 */
5079 static void
5080 em_disable_aspm(struct adapter *adapter)
5081 {
5082 	int		base, reg;
5083 	u16		link_cap,link_ctrl;
5084 	device_t	dev = adapter->dev;
5085 
5086 	switch (adapter->hw.mac.type) {
5087 		case e1000_82573:
5088 		case e1000_82574:
5089 		case e1000_82583:
5090 			break;
5091 		default:
5092 			return;
5093 	}
5094 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5095 		return;
5096 	reg = base + PCIER_LINK_CAP;
5097 	link_cap = pci_read_config(dev, reg, 2);
5098 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5099 		return;
5100 	reg = base + PCIER_LINK_CTL;
5101 	link_ctrl = pci_read_config(dev, reg, 2);
5102 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5103 	pci_write_config(dev, reg, link_ctrl, 2);
5104 	return;
5105 }
5106 
5107 /**********************************************************************
5108  *
5109  *  Update the board statistics counters.
5110  *
5111  **********************************************************************/
5112 static void
5113 em_update_stats_counters(struct adapter *adapter)
5114 {
5115 
5116 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5117 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5118 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5119 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5120 	}
5121 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5122 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5123 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5124 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5125 
5126 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5127 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5128 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5129 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5130 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5131 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5132 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5133 	/*
5134 	** For watchdog management we need to know if we have been
5135 	** paused during the last interval, so capture that here.
5136 	*/
5137 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5138 	adapter->stats.xoffrxc += adapter->pause_frames;
5139 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5140 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5141 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5142 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5143 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5144 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5145 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5146 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5147 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5148 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5149 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5150 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5151 
5152 	/* For the 64-bit byte counters the low dword must be read first. */
5153 	/* Both registers clear on the read of the high dword */
5154 
5155 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5156 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5157 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5158 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5159 
5160 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5161 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5162 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5163 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5164 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5165 
5166 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5167 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5168 
5169 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5170 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5171 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5172 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5173 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5174 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5175 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5176 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5177 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5178 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5179 
5180 	/* Interrupt Counts */
5181 
5182 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5183 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5184 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5185 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5186 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5187 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5188 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5189 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5190 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5191 
5192 	if (adapter->hw.mac.type >= e1000_82543) {
5193 		adapter->stats.algnerrc +=
5194 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5195 		adapter->stats.rxerrc +=
5196 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5197 		adapter->stats.tncrs +=
5198 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5199 		adapter->stats.cexterr +=
5200 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5201 		adapter->stats.tsctc +=
5202 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5203 		adapter->stats.tsctfc +=
5204 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5205 	}
5206 }
5207 
5208 static uint64_t
5209 em_get_counter(if_t ifp, ift_counter cnt)
5210 {
5211 	struct adapter *adapter;
5212 
5213 	adapter = if_getsoftc(ifp);
5214 
5215 	switch (cnt) {
5216 	case IFCOUNTER_COLLISIONS:
5217 		return (adapter->stats.colc);
5218 	case IFCOUNTER_IERRORS:
5219 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5220 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5221 		    adapter->stats.ruc + adapter->stats.roc +
5222 		    adapter->stats.mpc + adapter->stats.cexterr);
5223 	case IFCOUNTER_OERRORS:
5224 		return (adapter->stats.ecol + adapter->stats.latecol +
5225 		    adapter->watchdog_events);
5226 	default:
5227 		return (if_get_counter_default(ifp, cnt));
5228 	}
5229 }
5230 
5231 /* Export a single 32-bit register via a read-only sysctl. */
5232 static int
5233 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5234 {
5235 	struct adapter *adapter;
5236 	u_int val;
5237 
5238 	adapter = oidp->oid_arg1;
5239 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5240 	return (sysctl_handle_int(oidp, &val, 0, req));
5241 }
5242 
5243 /*
5244  * Add sysctl variables, one per statistic, to the system.
5245  */
5246 static void
5247 em_add_hw_stats(struct adapter *adapter)
5248 {
5249 	device_t dev = adapter->dev;
5250 
5251 	struct tx_ring *txr = adapter->tx_rings;
5252 	struct rx_ring *rxr = adapter->rx_rings;
5253 
5254 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5255 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5256 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5257 	struct e1000_hw_stats *stats = &adapter->stats;
5258 
5259 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5260 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5261 
5262 #define QUEUE_NAME_LEN 32
5263 	char namebuf[QUEUE_NAME_LEN];
5264 
5265 	/* Driver Statistics */
5266 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5267 			CTLFLAG_RD, &adapter->link_irq,
5268 			"Link MSIX IRQ Handled");
5269 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5270 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5271 			 "Std mbuf failed");
5272 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5273 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5274 			 "Std mbuf cluster failed");
5275 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5276 			CTLFLAG_RD, &adapter->dropped_pkts,
5277 			"Driver dropped packets");
5278 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5279 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5280 			"Driver tx dma failure in xmit");
5281 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5282 			CTLFLAG_RD, &adapter->rx_overruns,
5283 			"RX overruns");
5284 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5285 			CTLFLAG_RD, &adapter->watchdog_events,
5286 			"Watchdog timeouts");
5287 
5288 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5289 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5290 			em_sysctl_reg_handler, "IU",
5291 			"Device Control Register");
5292 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5293 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5294 			em_sysctl_reg_handler, "IU",
5295 			"Receiver Control Register");
5296 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5297 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5298 			"Flow Control High Watermark");
5299 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5300 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5301 			"Flow Control Low Watermark");
5302 
5303 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5304 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5305 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5306 					    CTLFLAG_RD, NULL, "Queue Name");
5307 		queue_list = SYSCTL_CHILDREN(queue_node);
5308 
5309 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5310 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5311 				E1000_TDH(txr->me),
5312 				em_sysctl_reg_handler, "IU",
5313  				"Transmit Descriptor Head");
5314 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5315 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5316 				E1000_TDT(txr->me),
5317 				em_sysctl_reg_handler, "IU",
5318  				"Transmit Descriptor Tail");
5319 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5320 				CTLFLAG_RD, &txr->tx_irq,
5321 				"Queue MSI-X Transmit Interrupts");
5322 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5323 				CTLFLAG_RD, &txr->no_desc_avail,
5324 				"Queue No Descriptor Available");
5325 
5326 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5327 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5328 				E1000_RDH(rxr->me),
5329 				em_sysctl_reg_handler, "IU",
5330 				"Receive Descriptor Head");
5331 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5332 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5333 				E1000_RDT(rxr->me),
5334 				em_sysctl_reg_handler, "IU",
5335 				"Receive Descriptor Tail");
5336 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5337 				CTLFLAG_RD, &rxr->rx_irq,
5338 				"Queue MSI-X Receive Interrupts");
5339 	}
5340 
5341 	/* MAC stats get their own sub node */
5342 
5343 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5344 				    CTLFLAG_RD, NULL, "Statistics");
5345 	stat_list = SYSCTL_CHILDREN(stat_node);
5346 
5347 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5348 			CTLFLAG_RD, &stats->ecol,
5349 			"Excessive collisions");
5350 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5351 			CTLFLAG_RD, &stats->scc,
5352 			"Single collisions");
5353 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5354 			CTLFLAG_RD, &stats->mcc,
5355 			"Multiple collisions");
5356 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5357 			CTLFLAG_RD, &stats->latecol,
5358 			"Late collisions");
5359 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5360 			CTLFLAG_RD, &stats->colc,
5361 			"Collision Count");
5362 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5363 			CTLFLAG_RD, &adapter->stats.symerrs,
5364 			"Symbol Errors");
5365 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5366 			CTLFLAG_RD, &adapter->stats.sec,
5367 			"Sequence Errors");
5368 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5369 			CTLFLAG_RD, &adapter->stats.dc,
5370 			"Defer Count");
5371 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5372 			CTLFLAG_RD, &adapter->stats.mpc,
5373 			"Missed Packets");
5374 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5375 			CTLFLAG_RD, &adapter->stats.rnbc,
5376 			"Receive No Buffers");
5377 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5378 			CTLFLAG_RD, &adapter->stats.ruc,
5379 			"Receive Undersize");
5380 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5381 			CTLFLAG_RD, &adapter->stats.rfc,
5382 			"Fragmented Packets Received ");
5383 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5384 			CTLFLAG_RD, &adapter->stats.roc,
5385 			"Oversized Packets Received");
5386 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5387 			CTLFLAG_RD, &adapter->stats.rjc,
5388 			"Recevied Jabber");
5389 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5390 			CTLFLAG_RD, &adapter->stats.rxerrc,
5391 			"Receive Errors");
5392 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5393 			CTLFLAG_RD, &adapter->stats.crcerrs,
5394 			"CRC errors");
5395 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5396 			CTLFLAG_RD, &adapter->stats.algnerrc,
5397 			"Alignment Errors");
5398 	/* On 82575 these are collision counts */
5399 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5400 			CTLFLAG_RD, &adapter->stats.cexterr,
5401 			"Collision/Carrier extension errors");
5402 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5403 			CTLFLAG_RD, &adapter->stats.xonrxc,
5404 			"XON Received");
5405 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5406 			CTLFLAG_RD, &adapter->stats.xontxc,
5407 			"XON Transmitted");
5408 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5409 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5410 			"XOFF Received");
5411 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5412 			CTLFLAG_RD, &adapter->stats.xofftxc,
5413 			"XOFF Transmitted");
5414 
5415 	/* Packet Reception Stats */
5416 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5417 			CTLFLAG_RD, &adapter->stats.tpr,
5418 			"Total Packets Received ");
5419 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5420 			CTLFLAG_RD, &adapter->stats.gprc,
5421 			"Good Packets Received");
5422 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5423 			CTLFLAG_RD, &adapter->stats.bprc,
5424 			"Broadcast Packets Received");
5425 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5426 			CTLFLAG_RD, &adapter->stats.mprc,
5427 			"Multicast Packets Received");
5428 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5429 			CTLFLAG_RD, &adapter->stats.prc64,
5430 			"64 byte frames received ");
5431 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5432 			CTLFLAG_RD, &adapter->stats.prc127,
5433 			"65-127 byte frames received");
5434 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5435 			CTLFLAG_RD, &adapter->stats.prc255,
5436 			"128-255 byte frames received");
5437 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5438 			CTLFLAG_RD, &adapter->stats.prc511,
5439 			"256-511 byte frames received");
5440 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5441 			CTLFLAG_RD, &adapter->stats.prc1023,
5442 			"512-1023 byte frames received");
5443 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5444 			CTLFLAG_RD, &adapter->stats.prc1522,
5445 			"1023-1522 byte frames received");
5446  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5447  			CTLFLAG_RD, &adapter->stats.gorc,
5448  			"Good Octets Received");
5449 
5450 	/* Packet Transmission Stats */
5451  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5452  			CTLFLAG_RD, &adapter->stats.gotc,
5453  			"Good Octets Transmitted");
5454 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5455 			CTLFLAG_RD, &adapter->stats.tpt,
5456 			"Total Packets Transmitted");
5457 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5458 			CTLFLAG_RD, &adapter->stats.gptc,
5459 			"Good Packets Transmitted");
5460 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5461 			CTLFLAG_RD, &adapter->stats.bptc,
5462 			"Broadcast Packets Transmitted");
5463 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5464 			CTLFLAG_RD, &adapter->stats.mptc,
5465 			"Multicast Packets Transmitted");
5466 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5467 			CTLFLAG_RD, &adapter->stats.ptc64,
5468 			"64 byte frames transmitted ");
5469 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5470 			CTLFLAG_RD, &adapter->stats.ptc127,
5471 			"65-127 byte frames transmitted");
5472 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5473 			CTLFLAG_RD, &adapter->stats.ptc255,
5474 			"128-255 byte frames transmitted");
5475 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5476 			CTLFLAG_RD, &adapter->stats.ptc511,
5477 			"256-511 byte frames transmitted");
5478 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5479 			CTLFLAG_RD, &adapter->stats.ptc1023,
5480 			"512-1023 byte frames transmitted");
5481 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5482 			CTLFLAG_RD, &adapter->stats.ptc1522,
5483 			"1024-1522 byte frames transmitted");
5484 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5485 			CTLFLAG_RD, &adapter->stats.tsctc,
5486 			"TSO Contexts Transmitted");
5487 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5488 			CTLFLAG_RD, &adapter->stats.tsctfc,
5489 			"TSO Contexts Failed");
5490 
5491 
5492 	/* Interrupt Stats */
5493 
5494 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5495 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5496 	int_list = SYSCTL_CHILDREN(int_node);
5497 
5498 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5499 			CTLFLAG_RD, &adapter->stats.iac,
5500 			"Interrupt Assertion Count");
5501 
5502 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5503 			CTLFLAG_RD, &adapter->stats.icrxptc,
5504 			"Interrupt Cause Rx Pkt Timer Expire Count");
5505 
5506 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5507 			CTLFLAG_RD, &adapter->stats.icrxatc,
5508 			"Interrupt Cause Rx Abs Timer Expire Count");
5509 
5510 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5511 			CTLFLAG_RD, &adapter->stats.ictxptc,
5512 			"Interrupt Cause Tx Pkt Timer Expire Count");
5513 
5514 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5515 			CTLFLAG_RD, &adapter->stats.ictxatc,
5516 			"Interrupt Cause Tx Abs Timer Expire Count");
5517 
5518 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5519 			CTLFLAG_RD, &adapter->stats.ictxqec,
5520 			"Interrupt Cause Tx Queue Empty Count");
5521 
5522 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5523 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5524 			"Interrupt Cause Tx Queue Min Thresh Count");
5525 
5526 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5527 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5528 			"Interrupt Cause Rx Desc Min Thresh Count");
5529 
5530 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5531 			CTLFLAG_RD, &adapter->stats.icrxoc,
5532 			"Interrupt Cause Receiver Overrun Count");
5533 }
5534 
5535 /**********************************************************************
5536  *
5537  *  This routine provides a way to dump out the adapter eeprom,
5538  *  often a useful debug/service tool. This only dumps the first
5539  *  32 words, stuff that matters is in that extent.
5540  *
5541  **********************************************************************/
5542 static int
5543 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5544 {
5545 	struct adapter *adapter = (struct adapter *)arg1;
5546 	int error;
5547 	int result;
5548 
5549 	result = -1;
5550 	error = sysctl_handle_int(oidp, &result, 0, req);
5551 
5552 	if (error || !req->newptr)
5553 		return (error);
5554 
5555 	/*
5556 	 * This value will cause a hex dump of the
5557 	 * first 32 16-bit words of the EEPROM to
5558 	 * the screen.
5559 	 */
5560 	if (result == 1)
5561 		em_print_nvm_info(adapter);
5562 
5563 	return (error);
5564 }
5565 
5566 static void
5567 em_print_nvm_info(struct adapter *adapter)
5568 {
5569 	u16	eeprom_data;
5570 	int	i, j, row = 0;
5571 
5572 	/* Its a bit crude, but it gets the job done */
5573 	printf("\nInterface EEPROM Dump:\n");
5574 	printf("Offset\n0x0000  ");
5575 	for (i = 0, j = 0; i < 32; i++, j++) {
5576 		if (j == 8) { /* Make the offset block */
5577 			j = 0; ++row;
5578 			printf("\n0x00%x0  ",row);
5579 		}
5580 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5581 		printf("%04x ", eeprom_data);
5582 	}
5583 	printf("\n");
5584 }
5585 
5586 static int
5587 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5588 {
5589 	struct em_int_delay_info *info;
5590 	struct adapter *adapter;
5591 	u32 regval;
5592 	int error, usecs, ticks;
5593 
5594 	info = (struct em_int_delay_info *)arg1;
5595 	usecs = info->value;
5596 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5597 	if (error != 0 || req->newptr == NULL)
5598 		return (error);
5599 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5600 		return (EINVAL);
5601 	info->value = usecs;
5602 	ticks = EM_USECS_TO_TICKS(usecs);
5603 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5604 		ticks *= 4;
5605 
5606 	adapter = info->adapter;
5607 
5608 	EM_CORE_LOCK(adapter);
5609 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5610 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5611 	/* Handle a few special cases. */
5612 	switch (info->offset) {
5613 	case E1000_RDTR:
5614 		break;
5615 	case E1000_TIDV:
5616 		if (ticks == 0) {
5617 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5618 			/* Don't write 0 into the TIDV register. */
5619 			regval++;
5620 		} else
5621 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5622 		break;
5623 	}
5624 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5625 	EM_CORE_UNLOCK(adapter);
5626 	return (0);
5627 }
5628 
5629 static void
5630 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5631 	const char *description, struct em_int_delay_info *info,
5632 	int offset, int value)
5633 {
5634 	info->adapter = adapter;
5635 	info->offset = offset;
5636 	info->value = value;
5637 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5638 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5639 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5640 	    info, 0, em_sysctl_int_delay, "I", description);
5641 }
5642 
5643 static void
5644 em_set_sysctl_value(struct adapter *adapter, const char *name,
5645 	const char *description, int *limit, int value)
5646 {
5647 	*limit = value;
5648 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5649 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5650 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5651 }
5652 
5653 
5654 /*
5655 ** Set flow control using sysctl:
5656 ** Flow control values:
5657 **      0 - off
5658 **      1 - rx pause
5659 **      2 - tx pause
5660 **      3 - full
5661 */
5662 static int
5663 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5664 {
5665         int		error;
5666 	static int	input = 3; /* default is full */
5667         struct adapter	*adapter = (struct adapter *) arg1;
5668 
5669         error = sysctl_handle_int(oidp, &input, 0, req);
5670 
5671         if ((error) || (req->newptr == NULL))
5672                 return (error);
5673 
5674 	if (input == adapter->fc) /* no change? */
5675 		return (error);
5676 
5677         switch (input) {
5678                 case e1000_fc_rx_pause:
5679                 case e1000_fc_tx_pause:
5680                 case e1000_fc_full:
5681                 case e1000_fc_none:
5682                         adapter->hw.fc.requested_mode = input;
5683 			adapter->fc = input;
5684                         break;
5685                 default:
5686 			/* Do nothing */
5687 			return (error);
5688         }
5689 
5690         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5691         e1000_force_mac_fc(&adapter->hw);
5692         return (error);
5693 }
5694 
5695 /*
5696 ** Manage Energy Efficient Ethernet:
5697 ** Control values:
5698 **     0/1 - enabled/disabled
5699 */
5700 static int
5701 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5702 {
5703        struct adapter *adapter = (struct adapter *) arg1;
5704        int             error, value;
5705 
5706        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5707        error = sysctl_handle_int(oidp, &value, 0, req);
5708        if (error || req->newptr == NULL)
5709                return (error);
5710        EM_CORE_LOCK(adapter);
5711        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5712        em_init_locked(adapter);
5713        EM_CORE_UNLOCK(adapter);
5714        return (0);
5715 }
5716 
5717 static int
5718 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5719 {
5720 	struct adapter *adapter;
5721 	int error;
5722 	int result;
5723 
5724 	result = -1;
5725 	error = sysctl_handle_int(oidp, &result, 0, req);
5726 
5727 	if (error || !req->newptr)
5728 		return (error);
5729 
5730 	if (result == 1) {
5731 		adapter = (struct adapter *)arg1;
5732 		em_print_debug_info(adapter);
5733         }
5734 
5735 	return (error);
5736 }
5737 
5738 /*
5739 ** This routine is meant to be fluid, add whatever is
5740 ** needed for debugging a problem.  -jfv
5741 */
5742 static void
5743 em_print_debug_info(struct adapter *adapter)
5744 {
5745 	device_t dev = adapter->dev;
5746 	struct tx_ring *txr = adapter->tx_rings;
5747 	struct rx_ring *rxr = adapter->rx_rings;
5748 
5749 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5750 		printf("Interface is RUNNING ");
5751 	else
5752 		printf("Interface is NOT RUNNING\n");
5753 
5754 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5755 		printf("and INACTIVE\n");
5756 	else
5757 		printf("and ACTIVE\n");
5758 
5759 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5760 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5761 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5762 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5763 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5764 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5765 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5766 	device_printf(dev, "TX descriptors avail = %d\n",
5767 	    txr->tx_avail);
5768 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5769 	    txr->no_desc_avail);
5770 	device_printf(dev, "RX discarded packets = %ld\n",
5771 	    rxr->rx_discarded);
5772 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5773 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5774 }
5775