xref: /freebsd/sys/dev/re/if_re.c (revision 531c890b8aecbf157fe3491503b5ca62c0b01093)
1 /*-
2  * Copyright (c) 1997, 1998-2003
3  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by Bill Paul.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30  * THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 /*
37  * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
38  *
39  * Written by Bill Paul <wpaul@windriver.com>
40  * Senior Networking Software Engineer
41  * Wind River Systems
42  */
43 
44 /*
45  * This driver is designed to support RealTek's next generation of
46  * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
47  * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
48  * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
49  *
50  * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
51  * with the older 8139 family, however it also supports a special
52  * C+ mode of operation that provides several new performance enhancing
53  * features. These include:
54  *
55  *	o Descriptor based DMA mechanism. Each descriptor represents
56  *	  a single packet fragment. Data buffers may be aligned on
57  *	  any byte boundary.
58  *
59  *	o 64-bit DMA
60  *
61  *	o TCP/IP checksum offload for both RX and TX
62  *
63  *	o High and normal priority transmit DMA rings
64  *
65  *	o VLAN tag insertion and extraction
66  *
67  *	o TCP large send (segmentation offload)
68  *
69  * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
70  * programming API is fairly straightforward. The RX filtering, EEPROM
71  * access and PHY access is the same as it is on the older 8139 series
72  * chips.
73  *
74  * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
75  * same programming API and feature set as the 8139C+ with the following
76  * differences and additions:
77  *
78  *	o 1000Mbps mode
79  *
80  *	o Jumbo frames
81  *
82  *	o GMII and TBI ports/registers for interfacing with copper
83  *	  or fiber PHYs
84  *
85  *	o RX and TX DMA rings can have up to 1024 descriptors
86  *	  (the 8139C+ allows a maximum of 64)
87  *
88  *	o Slight differences in register layout from the 8139C+
89  *
90  * The TX start and timer interrupt registers are at different locations
91  * on the 8169 than they are on the 8139C+. Also, the status word in the
92  * RX descriptor has a slightly different bit layout. The 8169 does not
93  * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
94  * copper gigE PHY.
95  *
96  * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
97  * (the 'S' stands for 'single-chip'). These devices have the same
98  * programming API as the older 8169, but also have some vendor-specific
99  * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
100  * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
101  *
102  * This driver takes advantage of the RX and TX checksum offload and
103  * VLAN tag insertion/extraction features. It also implements TX
104  * interrupt moderation using the timer interrupt registers, which
105  * significantly reduces TX interrupt load. There is also support
106  * for jumbo frames, however the 8169/8169S/8110S can not transmit
107  * jumbo frames larger than 7440, so the max MTU possible with this
108  * driver is 7422 bytes.
109  */
110 
111 #ifdef HAVE_KERNEL_OPTION_HEADERS
112 #include "opt_device_polling.h"
113 #endif
114 
115 #include <sys/param.h>
116 #include <sys/endian.h>
117 #include <sys/systm.h>
118 #include <sys/sockio.h>
119 #include <sys/mbuf.h>
120 #include <sys/malloc.h>
121 #include <sys/module.h>
122 #include <sys/kernel.h>
123 #include <sys/socket.h>
124 #include <sys/lock.h>
125 #include <sys/mutex.h>
126 #include <sys/taskqueue.h>
127 
128 #include <net/if.h>
129 #include <net/if_arp.h>
130 #include <net/ethernet.h>
131 #include <net/if_dl.h>
132 #include <net/if_media.h>
133 #include <net/if_types.h>
134 #include <net/if_vlan_var.h>
135 
136 #include <net/bpf.h>
137 
138 #include <machine/bus.h>
139 #include <machine/resource.h>
140 #include <sys/bus.h>
141 #include <sys/rman.h>
142 
143 #include <dev/mii/mii.h>
144 #include <dev/mii/miivar.h>
145 
146 #include <dev/pci/pcireg.h>
147 #include <dev/pci/pcivar.h>
148 
149 #include <pci/if_rlreg.h>
150 
151 MODULE_DEPEND(re, pci, 1, 1, 1);
152 MODULE_DEPEND(re, ether, 1, 1, 1);
153 MODULE_DEPEND(re, miibus, 1, 1, 1);
154 
155 /* "device miibus" required.  See GENERIC if you get errors here. */
156 #include "miibus_if.h"
157 
158 /*
159  * Default to using PIO access for this driver.
160  */
161 #define RE_USEIOSPACE
162 
163 /* Tunables. */
164 static int msi_disable = 0;
165 TUNABLE_INT("hw.re.msi_disable", &msi_disable);
166 
167 #define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
168 
169 /*
170  * Various supported device vendors/types and their names.
171  */
172 static struct rl_type re_devs[] = {
173 	{ DLINK_VENDORID, DLINK_DEVICEID_528T, RL_HWREV_8169S,
174 		"D-Link DGE-528(T) Gigabit Ethernet Adapter" },
175 	{ DLINK_VENDORID, DLINK_DEVICEID_528T, RL_HWREV_8169_8110SB,
176 		"D-Link DGE-528(T) Rev.B1 Gigabit Ethernet Adapter" },
177 	{ RT_VENDORID, RT_DEVICEID_8139, RL_HWREV_8139CPLUS,
178 		"RealTek 8139C+ 10/100BaseTX" },
179 	{ RT_VENDORID, RT_DEVICEID_8101E, RL_HWREV_8101E,
180 		"RealTek 8101E PCIe 10/100baseTX" },
181 	{ RT_VENDORID, RT_DEVICEID_8168, RL_HWREV_8168_SPIN1,
182 		"RealTek 8168/8111B PCIe Gigabit Ethernet" },
183 	{ RT_VENDORID, RT_DEVICEID_8168, RL_HWREV_8168_SPIN2,
184 		"RealTek 8168/8111B PCIe Gigabit Ethernet" },
185 	{ RT_VENDORID, RT_DEVICEID_8168, RL_HWREV_8168_SPIN3,
186 		"RealTek 8168/8111B PCIe Gigabit Ethernet" },
187 	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8169,
188 		"RealTek 8169 Gigabit Ethernet" },
189 	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8169S,
190 		"RealTek 8169S Single-chip Gigabit Ethernet" },
191 	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8169_8110SB,
192 		"RealTek 8169SB/8110SB Single-chip Gigabit Ethernet" },
193 	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8169_8110SC,
194 		"RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
195 	{ RT_VENDORID, RT_DEVICEID_8169SC, RL_HWREV_8169_8110SC,
196 		"RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
197 	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8110S,
198 		"RealTek 8110S Single-chip Gigabit Ethernet" },
199 	{ COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, RL_HWREV_8169S,
200 		"Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
201 	{ LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, RL_HWREV_8169S,
202 		"Linksys EG1032 (RTL8169S) Gigabit Ethernet" },
203 	{ USR_VENDORID, USR_DEVICEID_997902, RL_HWREV_8169S,
204 		"US Robotics 997902 (RTL8169S) Gigabit Ethernet" },
205 	{ 0, 0, 0, NULL }
206 };
207 
208 static struct rl_hwrev re_hwrevs[] = {
209 	{ RL_HWREV_8139, RL_8139,  "" },
210 	{ RL_HWREV_8139A, RL_8139, "A" },
211 	{ RL_HWREV_8139AG, RL_8139, "A-G" },
212 	{ RL_HWREV_8139B, RL_8139, "B" },
213 	{ RL_HWREV_8130, RL_8139, "8130" },
214 	{ RL_HWREV_8139C, RL_8139, "C" },
215 	{ RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C" },
216 	{ RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+"},
217 	{ RL_HWREV_8168_SPIN1, RL_8169, "8168"},
218 	{ RL_HWREV_8169, RL_8169, "8169"},
219 	{ RL_HWREV_8169S, RL_8169, "8169S"},
220 	{ RL_HWREV_8110S, RL_8169, "8110S"},
221 	{ RL_HWREV_8169_8110SB, RL_8169, "8169SB"},
222 	{ RL_HWREV_8169_8110SC, RL_8169, "8169SC"},
223 	{ RL_HWREV_8100, RL_8139, "8100"},
224 	{ RL_HWREV_8101, RL_8139, "8101"},
225 	{ RL_HWREV_8100E, RL_8169, "8100E"},
226 	{ RL_HWREV_8101E, RL_8169, "8101E"},
227 	{ RL_HWREV_8168_SPIN2, RL_8169, "8168"},
228 	{ RL_HWREV_8168_SPIN3, RL_8169, "8168"},
229 	{ 0, 0, NULL }
230 };
231 
232 static int re_probe		(device_t);
233 static int re_attach		(device_t);
234 static int re_detach		(device_t);
235 
236 static int re_encap		(struct rl_softc *, struct mbuf **);
237 
238 static void re_dma_map_addr	(void *, bus_dma_segment_t *, int, int);
239 static int re_allocmem		(device_t, struct rl_softc *);
240 static __inline void re_discard_rxbuf
241 				(struct rl_softc *, int);
242 static int re_newbuf		(struct rl_softc *, int);
243 static int re_rx_list_init	(struct rl_softc *);
244 static int re_tx_list_init	(struct rl_softc *);
245 #ifdef RE_FIXUP_RX
246 static __inline void re_fixup_rx
247 				(struct mbuf *);
248 #endif
249 static int re_rxeof		(struct rl_softc *);
250 static void re_txeof		(struct rl_softc *);
251 #ifdef DEVICE_POLLING
252 static void re_poll		(struct ifnet *, enum poll_cmd, int);
253 static void re_poll_locked	(struct ifnet *, enum poll_cmd, int);
254 #endif
255 static int re_intr		(void *);
256 static void re_tick		(void *);
257 static void re_tx_task		(void *, int);
258 static void re_int_task		(void *, int);
259 static void re_start		(struct ifnet *);
260 static int re_ioctl		(struct ifnet *, u_long, caddr_t);
261 static void re_init		(void *);
262 static void re_init_locked	(struct rl_softc *);
263 static void re_stop		(struct rl_softc *);
264 static void re_watchdog		(struct rl_softc *);
265 static int re_suspend		(device_t);
266 static int re_resume		(device_t);
267 static int re_shutdown		(device_t);
268 static int re_ifmedia_upd	(struct ifnet *);
269 static void re_ifmedia_sts	(struct ifnet *, struct ifmediareq *);
270 
271 static void re_eeprom_putbyte	(struct rl_softc *, int);
272 static void re_eeprom_getword	(struct rl_softc *, int, u_int16_t *);
273 static void re_read_eeprom	(struct rl_softc *, caddr_t, int, int);
274 static int re_gmii_readreg	(device_t, int, int);
275 static int re_gmii_writereg	(device_t, int, int, int);
276 
277 static int re_miibus_readreg	(device_t, int, int);
278 static int re_miibus_writereg	(device_t, int, int, int);
279 static void re_miibus_statchg	(device_t);
280 
281 static void re_setmulti		(struct rl_softc *);
282 static void re_reset		(struct rl_softc *);
283 
284 #ifdef RE_DIAG
285 static int re_diag		(struct rl_softc *);
286 #endif
287 
288 #ifdef RE_USEIOSPACE
289 #define RL_RES			SYS_RES_IOPORT
290 #define RL_RID			RL_PCI_LOIO
291 #else
292 #define RL_RES			SYS_RES_MEMORY
293 #define RL_RID			RL_PCI_LOMEM
294 #endif
295 
296 static device_method_t re_methods[] = {
297 	/* Device interface */
298 	DEVMETHOD(device_probe,		re_probe),
299 	DEVMETHOD(device_attach,	re_attach),
300 	DEVMETHOD(device_detach,	re_detach),
301 	DEVMETHOD(device_suspend,	re_suspend),
302 	DEVMETHOD(device_resume,	re_resume),
303 	DEVMETHOD(device_shutdown,	re_shutdown),
304 
305 	/* bus interface */
306 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
307 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
308 
309 	/* MII interface */
310 	DEVMETHOD(miibus_readreg,	re_miibus_readreg),
311 	DEVMETHOD(miibus_writereg,	re_miibus_writereg),
312 	DEVMETHOD(miibus_statchg,	re_miibus_statchg),
313 
314 	{ 0, 0 }
315 };
316 
317 static driver_t re_driver = {
318 	"re",
319 	re_methods,
320 	sizeof(struct rl_softc)
321 };
322 
323 static devclass_t re_devclass;
324 
325 DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
326 DRIVER_MODULE(re, cardbus, re_driver, re_devclass, 0, 0);
327 DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
328 
329 #define EE_SET(x)					\
330 	CSR_WRITE_1(sc, RL_EECMD,			\
331 		CSR_READ_1(sc, RL_EECMD) | x)
332 
333 #define EE_CLR(x)					\
334 	CSR_WRITE_1(sc, RL_EECMD,			\
335 		CSR_READ_1(sc, RL_EECMD) & ~x)
336 
337 /*
338  * Send a read command and address to the EEPROM, check for ACK.
339  */
340 static void
341 re_eeprom_putbyte(sc, addr)
342 	struct rl_softc		*sc;
343 	int			addr;
344 {
345 	register int		d, i;
346 
347 	d = addr | (RL_9346_READ << sc->rl_eewidth);
348 
349 	/*
350 	 * Feed in each bit and strobe the clock.
351 	 */
352 
353 	for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) {
354 		if (d & i) {
355 			EE_SET(RL_EE_DATAIN);
356 		} else {
357 			EE_CLR(RL_EE_DATAIN);
358 		}
359 		DELAY(100);
360 		EE_SET(RL_EE_CLK);
361 		DELAY(150);
362 		EE_CLR(RL_EE_CLK);
363 		DELAY(100);
364 	}
365 
366 	return;
367 }
368 
369 /*
370  * Read a word of data stored in the EEPROM at address 'addr.'
371  */
372 static void
373 re_eeprom_getword(sc, addr, dest)
374 	struct rl_softc		*sc;
375 	int			addr;
376 	u_int16_t		*dest;
377 {
378 	register int		i;
379 	u_int16_t		word = 0;
380 
381 	/*
382 	 * Send address of word we want to read.
383 	 */
384 	re_eeprom_putbyte(sc, addr);
385 
386 	/*
387 	 * Start reading bits from EEPROM.
388 	 */
389 	for (i = 0x8000; i; i >>= 1) {
390 		EE_SET(RL_EE_CLK);
391 		DELAY(100);
392 		if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
393 			word |= i;
394 		EE_CLR(RL_EE_CLK);
395 		DELAY(100);
396 	}
397 
398 	*dest = word;
399 
400 	return;
401 }
402 
403 /*
404  * Read a sequence of words from the EEPROM.
405  */
406 static void
407 re_read_eeprom(sc, dest, off, cnt)
408 	struct rl_softc		*sc;
409 	caddr_t			dest;
410 	int			off;
411 	int			cnt;
412 {
413 	int			i;
414 	u_int16_t		word = 0, *ptr;
415 
416 	CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
417 
418         DELAY(100);
419 
420 	for (i = 0; i < cnt; i++) {
421 		CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL);
422 		re_eeprom_getword(sc, off + i, &word);
423 		CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL);
424 		ptr = (u_int16_t *)(dest + (i * 2));
425                 *ptr = word;
426 	}
427 
428 	CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
429 
430 	return;
431 }
432 
433 static int
434 re_gmii_readreg(dev, phy, reg)
435 	device_t		dev;
436 	int			phy, reg;
437 {
438 	struct rl_softc		*sc;
439 	u_int32_t		rval;
440 	int			i;
441 
442 	if (phy != 1)
443 		return (0);
444 
445 	sc = device_get_softc(dev);
446 
447 	/* Let the rgephy driver read the GMEDIASTAT register */
448 
449 	if (reg == RL_GMEDIASTAT) {
450 		rval = CSR_READ_1(sc, RL_GMEDIASTAT);
451 		return (rval);
452 	}
453 
454 	CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
455 	DELAY(1000);
456 
457 	for (i = 0; i < RL_TIMEOUT; i++) {
458 		rval = CSR_READ_4(sc, RL_PHYAR);
459 		if (rval & RL_PHYAR_BUSY)
460 			break;
461 		DELAY(100);
462 	}
463 
464 	if (i == RL_TIMEOUT) {
465 		device_printf(sc->rl_dev, "PHY read failed\n");
466 		return (0);
467 	}
468 
469 	return (rval & RL_PHYAR_PHYDATA);
470 }
471 
472 static int
473 re_gmii_writereg(dev, phy, reg, data)
474 	device_t		dev;
475 	int			phy, reg, data;
476 {
477 	struct rl_softc		*sc;
478 	u_int32_t		rval;
479 	int			i;
480 
481 	sc = device_get_softc(dev);
482 
483 	CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
484 	    (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
485 	DELAY(1000);
486 
487 	for (i = 0; i < RL_TIMEOUT; i++) {
488 		rval = CSR_READ_4(sc, RL_PHYAR);
489 		if (!(rval & RL_PHYAR_BUSY))
490 			break;
491 		DELAY(100);
492 	}
493 
494 	if (i == RL_TIMEOUT) {
495 		device_printf(sc->rl_dev, "PHY write failed\n");
496 		return (0);
497 	}
498 
499 	return (0);
500 }
501 
502 static int
503 re_miibus_readreg(dev, phy, reg)
504 	device_t		dev;
505 	int			phy, reg;
506 {
507 	struct rl_softc		*sc;
508 	u_int16_t		rval = 0;
509 	u_int16_t		re8139_reg = 0;
510 
511 	sc = device_get_softc(dev);
512 
513 	if (sc->rl_type == RL_8169) {
514 		rval = re_gmii_readreg(dev, phy, reg);
515 		return (rval);
516 	}
517 
518 	/* Pretend the internal PHY is only at address 0 */
519 	if (phy) {
520 		return (0);
521 	}
522 	switch (reg) {
523 	case MII_BMCR:
524 		re8139_reg = RL_BMCR;
525 		break;
526 	case MII_BMSR:
527 		re8139_reg = RL_BMSR;
528 		break;
529 	case MII_ANAR:
530 		re8139_reg = RL_ANAR;
531 		break;
532 	case MII_ANER:
533 		re8139_reg = RL_ANER;
534 		break;
535 	case MII_ANLPAR:
536 		re8139_reg = RL_LPAR;
537 		break;
538 	case MII_PHYIDR1:
539 	case MII_PHYIDR2:
540 		return (0);
541 	/*
542 	 * Allow the rlphy driver to read the media status
543 	 * register. If we have a link partner which does not
544 	 * support NWAY, this is the register which will tell
545 	 * us the results of parallel detection.
546 	 */
547 	case RL_MEDIASTAT:
548 		rval = CSR_READ_1(sc, RL_MEDIASTAT);
549 		return (rval);
550 	default:
551 		device_printf(sc->rl_dev, "bad phy register\n");
552 		return (0);
553 	}
554 	rval = CSR_READ_2(sc, re8139_reg);
555 	if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) {
556 		/* 8139C+ has different bit layout. */
557 		rval &= ~(BMCR_LOOP | BMCR_ISO);
558 	}
559 	return (rval);
560 }
561 
562 static int
563 re_miibus_writereg(dev, phy, reg, data)
564 	device_t		dev;
565 	int			phy, reg, data;
566 {
567 	struct rl_softc		*sc;
568 	u_int16_t		re8139_reg = 0;
569 	int			rval = 0;
570 
571 	sc = device_get_softc(dev);
572 
573 	if (sc->rl_type == RL_8169) {
574 		rval = re_gmii_writereg(dev, phy, reg, data);
575 		return (rval);
576 	}
577 
578 	/* Pretend the internal PHY is only at address 0 */
579 	if (phy)
580 		return (0);
581 
582 	switch (reg) {
583 	case MII_BMCR:
584 		re8139_reg = RL_BMCR;
585 		if (sc->rl_type == RL_8139CPLUS) {
586 			/* 8139C+ has different bit layout. */
587 			data &= ~(BMCR_LOOP | BMCR_ISO);
588 		}
589 		break;
590 	case MII_BMSR:
591 		re8139_reg = RL_BMSR;
592 		break;
593 	case MII_ANAR:
594 		re8139_reg = RL_ANAR;
595 		break;
596 	case MII_ANER:
597 		re8139_reg = RL_ANER;
598 		break;
599 	case MII_ANLPAR:
600 		re8139_reg = RL_LPAR;
601 		break;
602 	case MII_PHYIDR1:
603 	case MII_PHYIDR2:
604 		return (0);
605 		break;
606 	default:
607 		device_printf(sc->rl_dev, "bad phy register\n");
608 		return (0);
609 	}
610 	CSR_WRITE_2(sc, re8139_reg, data);
611 	return (0);
612 }
613 
614 static void
615 re_miibus_statchg(dev)
616 	device_t		dev;
617 {
618 
619 }
620 
621 /*
622  * Program the 64-bit multicast hash filter.
623  */
624 static void
625 re_setmulti(sc)
626 	struct rl_softc		*sc;
627 {
628 	struct ifnet		*ifp;
629 	int			h = 0;
630 	u_int32_t		hashes[2] = { 0, 0 };
631 	struct ifmultiaddr	*ifma;
632 	u_int32_t		rxfilt;
633 	int			mcnt = 0;
634 	u_int32_t		hwrev;
635 
636 	RL_LOCK_ASSERT(sc);
637 
638 	ifp = sc->rl_ifp;
639 
640 
641 	rxfilt = CSR_READ_4(sc, RL_RXCFG);
642 	rxfilt &= ~(RL_RXCFG_RX_ALLPHYS | RL_RXCFG_RX_MULTI);
643 	if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) {
644 		if (ifp->if_flags & IFF_PROMISC)
645 			rxfilt |= RL_RXCFG_RX_ALLPHYS;
646 		/*
647 		 * Unlike other hardwares, we have to explicitly set
648 		 * RL_RXCFG_RX_MULTI to receive multicast frames in
649 		 * promiscuous mode.
650 		 */
651 		rxfilt |= RL_RXCFG_RX_MULTI;
652 		CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
653 		CSR_WRITE_4(sc, RL_MAR0, 0xFFFFFFFF);
654 		CSR_WRITE_4(sc, RL_MAR4, 0xFFFFFFFF);
655 		return;
656 	}
657 
658 	/* first, zot all the existing hash bits */
659 	CSR_WRITE_4(sc, RL_MAR0, 0);
660 	CSR_WRITE_4(sc, RL_MAR4, 0);
661 
662 	/* now program new ones */
663 	IF_ADDR_LOCK(ifp);
664 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
665 		if (ifma->ifma_addr->sa_family != AF_LINK)
666 			continue;
667 		h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
668 		    ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
669 		if (h < 32)
670 			hashes[0] |= (1 << h);
671 		else
672 			hashes[1] |= (1 << (h - 32));
673 		mcnt++;
674 	}
675 	IF_ADDR_UNLOCK(ifp);
676 
677 	if (mcnt)
678 		rxfilt |= RL_RXCFG_RX_MULTI;
679 	else
680 		rxfilt &= ~RL_RXCFG_RX_MULTI;
681 
682 	CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
683 
684 	/*
685 	 * For some unfathomable reason, RealTek decided to reverse
686 	 * the order of the multicast hash registers in the PCI Express
687 	 * parts. This means we have to write the hash pattern in reverse
688 	 * order for those devices.
689 	 */
690 
691 	hwrev = CSR_READ_4(sc, RL_TXCFG) & RL_TXCFG_HWREV;
692 
693 	switch (hwrev) {
694 	case RL_HWREV_8100E:
695 	case RL_HWREV_8101E:
696 	case RL_HWREV_8168_SPIN1:
697 	case RL_HWREV_8168_SPIN2:
698 	case RL_HWREV_8168_SPIN3:
699 		CSR_WRITE_4(sc, RL_MAR0, bswap32(hashes[1]));
700 		CSR_WRITE_4(sc, RL_MAR4, bswap32(hashes[0]));
701 		break;
702 	default:
703 		CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
704 		CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
705 		break;
706 	}
707 }
708 
709 static void
710 re_reset(sc)
711 	struct rl_softc		*sc;
712 {
713 	register int		i;
714 
715 	RL_LOCK_ASSERT(sc);
716 
717 	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
718 
719 	for (i = 0; i < RL_TIMEOUT; i++) {
720 		DELAY(10);
721 		if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
722 			break;
723 	}
724 	if (i == RL_TIMEOUT)
725 		device_printf(sc->rl_dev, "reset never completed!\n");
726 
727 	CSR_WRITE_1(sc, 0x82, 1);
728 }
729 
730 #ifdef RE_DIAG
731 
732 /*
733  * The following routine is designed to test for a defect on some
734  * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
735  * lines connected to the bus, however for a 32-bit only card, they
736  * should be pulled high. The result of this defect is that the
737  * NIC will not work right if you plug it into a 64-bit slot: DMA
738  * operations will be done with 64-bit transfers, which will fail
739  * because the 64-bit data lines aren't connected.
740  *
741  * There's no way to work around this (short of talking a soldering
742  * iron to the board), however we can detect it. The method we use
743  * here is to put the NIC into digital loopback mode, set the receiver
744  * to promiscuous mode, and then try to send a frame. We then compare
745  * the frame data we sent to what was received. If the data matches,
746  * then the NIC is working correctly, otherwise we know the user has
747  * a defective NIC which has been mistakenly plugged into a 64-bit PCI
748  * slot. In the latter case, there's no way the NIC can work correctly,
749  * so we print out a message on the console and abort the device attach.
750  */
751 
752 static int
753 re_diag(sc)
754 	struct rl_softc		*sc;
755 {
756 	struct ifnet		*ifp = sc->rl_ifp;
757 	struct mbuf		*m0;
758 	struct ether_header	*eh;
759 	struct rl_desc		*cur_rx;
760 	u_int16_t		status;
761 	u_int32_t		rxstat;
762 	int			total_len, i, error = 0, phyaddr;
763 	u_int8_t		dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
764 	u_int8_t		src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
765 
766 	/* Allocate a single mbuf */
767 	MGETHDR(m0, M_DONTWAIT, MT_DATA);
768 	if (m0 == NULL)
769 		return (ENOBUFS);
770 
771 	RL_LOCK(sc);
772 
773 	/*
774 	 * Initialize the NIC in test mode. This sets the chip up
775 	 * so that it can send and receive frames, but performs the
776 	 * following special functions:
777 	 * - Puts receiver in promiscuous mode
778 	 * - Enables digital loopback mode
779 	 * - Leaves interrupts turned off
780 	 */
781 
782 	ifp->if_flags |= IFF_PROMISC;
783 	sc->rl_testmode = 1;
784 	re_reset(sc);
785 	re_init_locked(sc);
786 	sc->rl_link = 1;
787 	if (sc->rl_type == RL_8169)
788 		phyaddr = 1;
789 	else
790 		phyaddr = 0;
791 
792 	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET);
793 	for (i = 0; i < RL_TIMEOUT; i++) {
794 		status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR);
795 		if (!(status & BMCR_RESET))
796 			break;
797 	}
798 
799 	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP);
800 	CSR_WRITE_2(sc, RL_ISR, RL_INTRS);
801 
802 	DELAY(100000);
803 
804 	/* Put some data in the mbuf */
805 
806 	eh = mtod(m0, struct ether_header *);
807 	bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
808 	bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
809 	eh->ether_type = htons(ETHERTYPE_IP);
810 	m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
811 
812 	/*
813 	 * Queue the packet, start transmission.
814 	 * Note: IF_HANDOFF() ultimately calls re_start() for us.
815 	 */
816 
817 	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
818 	RL_UNLOCK(sc);
819 	/* XXX: re_diag must not be called when in ALTQ mode */
820 	IF_HANDOFF(&ifp->if_snd, m0, ifp);
821 	RL_LOCK(sc);
822 	m0 = NULL;
823 
824 	/* Wait for it to propagate through the chip */
825 
826 	DELAY(100000);
827 	for (i = 0; i < RL_TIMEOUT; i++) {
828 		status = CSR_READ_2(sc, RL_ISR);
829 		CSR_WRITE_2(sc, RL_ISR, status);
830 		if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
831 		    (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
832 			break;
833 		DELAY(10);
834 	}
835 
836 	if (i == RL_TIMEOUT) {
837 		device_printf(sc->rl_dev,
838 		    "diagnostic failed, failed to receive packet in"
839 		    " loopback mode\n");
840 		error = EIO;
841 		goto done;
842 	}
843 
844 	/*
845 	 * The packet should have been dumped into the first
846 	 * entry in the RX DMA ring. Grab it from there.
847 	 */
848 
849 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
850 	    sc->rl_ldata.rl_rx_list_map,
851 	    BUS_DMASYNC_POSTREAD);
852 	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
853 	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap,
854 	    BUS_DMASYNC_POSTREAD);
855 	bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
856 	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap);
857 
858 	m0 = sc->rl_ldata.rl_rx_desc[0].rx_m;
859 	sc->rl_ldata.rl_rx_desc[0].rx_m = NULL;
860 	eh = mtod(m0, struct ether_header *);
861 
862 	cur_rx = &sc->rl_ldata.rl_rx_list[0];
863 	total_len = RL_RXBYTES(cur_rx);
864 	rxstat = le32toh(cur_rx->rl_cmdstat);
865 
866 	if (total_len != ETHER_MIN_LEN) {
867 		device_printf(sc->rl_dev,
868 		    "diagnostic failed, received short packet\n");
869 		error = EIO;
870 		goto done;
871 	}
872 
873 	/* Test that the received packet data matches what we sent. */
874 
875 	if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
876 	    bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
877 	    ntohs(eh->ether_type) != ETHERTYPE_IP) {
878 		device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n");
879 		device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n",
880 		    dst, ":", src, ":", ETHERTYPE_IP);
881 		device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n",
882 		    eh->ether_dhost, ":",  eh->ether_shost, ":",
883 		    ntohs(eh->ether_type));
884 		device_printf(sc->rl_dev, "You may have a defective 32-bit "
885 		    "NIC plugged into a 64-bit PCI slot.\n");
886 		device_printf(sc->rl_dev, "Please re-install the NIC in a "
887 		    "32-bit slot for proper operation.\n");
888 		device_printf(sc->rl_dev, "Read the re(4) man page for more "
889 		    "details.\n");
890 		error = EIO;
891 	}
892 
893 done:
894 	/* Turn interface off, release resources */
895 
896 	sc->rl_testmode = 0;
897 	sc->rl_link = 0;
898 	ifp->if_flags &= ~IFF_PROMISC;
899 	re_stop(sc);
900 	if (m0 != NULL)
901 		m_freem(m0);
902 
903 	RL_UNLOCK(sc);
904 
905 	return (error);
906 }
907 
908 #endif
909 
910 /*
911  * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
912  * IDs against our list and return a device name if we find a match.
913  */
914 static int
915 re_probe(dev)
916 	device_t		dev;
917 {
918 	struct rl_type		*t;
919 	struct rl_softc		*sc;
920 	int			rid;
921 	u_int32_t		hwrev;
922 
923 	t = re_devs;
924 	sc = device_get_softc(dev);
925 
926 	while (t->rl_name != NULL) {
927 		if ((pci_get_vendor(dev) == t->rl_vid) &&
928 		    (pci_get_device(dev) == t->rl_did)) {
929 			/*
930 			 * Only attach to rev. 3 of the Linksys EG1032 adapter.
931 			 * Rev. 2 i supported by sk(4).
932 			 */
933 			if ((t->rl_vid == LINKSYS_VENDORID) &&
934 				(t->rl_did == LINKSYS_DEVICEID_EG1032) &&
935 				(pci_get_subdevice(dev) !=
936 				LINKSYS_SUBDEVICE_EG1032_REV3)) {
937 				t++;
938 				continue;
939 			}
940 
941 			/*
942 			 * Temporarily map the I/O space
943 			 * so we can read the chip ID register.
944 			 */
945 			rid = RL_RID;
946 			sc->rl_res = bus_alloc_resource_any(dev, RL_RES, &rid,
947 			    RF_ACTIVE);
948 			if (sc->rl_res == NULL) {
949 				device_printf(dev,
950 				    "couldn't map ports/memory\n");
951 				return (ENXIO);
952 			}
953 			sc->rl_btag = rman_get_bustag(sc->rl_res);
954 			sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
955 			hwrev = CSR_READ_4(sc, RL_TXCFG) & RL_TXCFG_HWREV;
956 			bus_release_resource(dev, RL_RES,
957 			    RL_RID, sc->rl_res);
958 			if (t->rl_basetype == hwrev) {
959 				device_set_desc(dev, t->rl_name);
960 				return (BUS_PROBE_DEFAULT);
961 			}
962 		}
963 		t++;
964 	}
965 
966 	return (ENXIO);
967 }
968 
969 /*
970  * Map a single buffer address.
971  */
972 
973 static void
974 re_dma_map_addr(arg, segs, nseg, error)
975 	void			*arg;
976 	bus_dma_segment_t	*segs;
977 	int			nseg;
978 	int			error;
979 {
980 	bus_addr_t		*addr;
981 
982 	if (error)
983 		return;
984 
985 	KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
986 	addr = arg;
987 	*addr = segs->ds_addr;
988 }
989 
990 static int
991 re_allocmem(dev, sc)
992 	device_t		dev;
993 	struct rl_softc		*sc;
994 {
995 	bus_size_t		rx_list_size, tx_list_size;
996 	int			error;
997 	int			i;
998 
999 	rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc);
1000 	tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc);
1001 
1002 	/*
1003 	 * Allocate the parent bus DMA tag appropriate for PCI.
1004 	 */
1005 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
1006 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1007 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1008 	    NULL, NULL, &sc->rl_parent_tag);
1009 	if (error) {
1010 		device_printf(dev, "could not allocate parent DMA tag\n");
1011 		return (error);
1012 	}
1013 
1014 	/*
1015 	 * Allocate map for TX mbufs.
1016 	 */
1017 	error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0,
1018 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
1019 	    NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0,
1020 	    NULL, NULL, &sc->rl_ldata.rl_tx_mtag);
1021 	if (error) {
1022 		device_printf(dev, "could not allocate TX DMA tag\n");
1023 		return (error);
1024 	}
1025 
1026 	/*
1027 	 * Allocate map for RX mbufs.
1028 	 */
1029 
1030 	error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0,
1031 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1032 	    MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag);
1033 	if (error) {
1034 		device_printf(dev, "could not allocate RX DMA tag\n");
1035 		return (error);
1036 	}
1037 
1038 	/*
1039 	 * Allocate map for TX descriptor list.
1040 	 */
1041 	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1042 	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1043 	    NULL, tx_list_size, 1, tx_list_size, 0,
1044 	    NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
1045 	if (error) {
1046 		device_printf(dev, "could not allocate TX DMA ring tag\n");
1047 		return (error);
1048 	}
1049 
1050 	/* Allocate DMA'able memory for the TX ring */
1051 
1052 	error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
1053 	    (void **)&sc->rl_ldata.rl_tx_list,
1054 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1055 	    &sc->rl_ldata.rl_tx_list_map);
1056 	if (error) {
1057 		device_printf(dev, "could not allocate TX DMA ring\n");
1058 		return (error);
1059 	}
1060 
1061 	/* Load the map for the TX ring. */
1062 
1063 	sc->rl_ldata.rl_tx_list_addr = 0;
1064 	error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
1065 	     sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
1066 	     tx_list_size, re_dma_map_addr,
1067 	     &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
1068 	if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) {
1069 		device_printf(dev, "could not load TX DMA ring\n");
1070 		return (ENOMEM);
1071 	}
1072 
1073 	/* Create DMA maps for TX buffers */
1074 
1075 	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1076 		error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0,
1077 		    &sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1078 		if (error) {
1079 			device_printf(dev, "could not create DMA map for TX\n");
1080 			return (error);
1081 		}
1082 	}
1083 
1084 	/*
1085 	 * Allocate map for RX descriptor list.
1086 	 */
1087 	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1088 	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1089 	    NULL, rx_list_size, 1, rx_list_size, 0,
1090 	    NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
1091 	if (error) {
1092 		device_printf(dev, "could not create RX DMA ring tag\n");
1093 		return (error);
1094 	}
1095 
1096 	/* Allocate DMA'able memory for the RX ring */
1097 
1098 	error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
1099 	    (void **)&sc->rl_ldata.rl_rx_list,
1100 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1101 	    &sc->rl_ldata.rl_rx_list_map);
1102 	if (error) {
1103 		device_printf(dev, "could not allocate RX DMA ring\n");
1104 		return (error);
1105 	}
1106 
1107 	/* Load the map for the RX ring. */
1108 
1109 	sc->rl_ldata.rl_rx_list_addr = 0;
1110 	error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
1111 	     sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
1112 	     rx_list_size, re_dma_map_addr,
1113 	     &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
1114 	if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) {
1115 		device_printf(dev, "could not load RX DMA ring\n");
1116 		return (ENOMEM);
1117 	}
1118 
1119 	/* Create DMA maps for RX buffers */
1120 
1121 	error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1122 	    &sc->rl_ldata.rl_rx_sparemap);
1123 	if (error) {
1124 		device_printf(dev, "could not create spare DMA map for RX\n");
1125 		return (error);
1126 	}
1127 	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1128 		error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1129 		    &sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1130 		if (error) {
1131 			device_printf(dev, "could not create DMA map for RX\n");
1132 			return (error);
1133 		}
1134 	}
1135 
1136 	return (0);
1137 }
1138 
1139 /*
1140  * Attach the interface. Allocate softc structures, do ifmedia
1141  * setup and ethernet/BPF attach.
1142  */
1143 static int
1144 re_attach(dev)
1145 	device_t		dev;
1146 {
1147 	u_char			eaddr[ETHER_ADDR_LEN];
1148 	u_int16_t		as[ETHER_ADDR_LEN / 2];
1149 	struct rl_softc		*sc;
1150 	struct ifnet		*ifp;
1151 	struct rl_hwrev		*hw_rev;
1152 	int			hwrev;
1153 	u_int16_t		re_did = 0;
1154 	int			error = 0, rid, i;
1155 	int			msic, reg;
1156 
1157 	sc = device_get_softc(dev);
1158 	sc->rl_dev = dev;
1159 
1160 	mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
1161 	    MTX_DEF);
1162 	callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0);
1163 
1164 	/*
1165 	 * Map control/status registers.
1166 	 */
1167 	pci_enable_busmaster(dev);
1168 
1169 	rid = RL_RID;
1170 	sc->rl_res = bus_alloc_resource_any(dev, RL_RES, &rid,
1171 	    RF_ACTIVE);
1172 
1173 	if (sc->rl_res == NULL) {
1174 		device_printf(dev, "couldn't map ports/memory\n");
1175 		error = ENXIO;
1176 		goto fail;
1177 	}
1178 
1179 	sc->rl_btag = rman_get_bustag(sc->rl_res);
1180 	sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
1181 
1182 	msic = 0;
1183 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
1184 		msic = pci_msi_count(dev);
1185 		if (bootverbose)
1186 			device_printf(dev, "MSI count : %d\n", msic);
1187 	}
1188 	if (msic == RL_MSI_MESSAGES  && msi_disable == 0) {
1189 		if (pci_alloc_msi(dev, &msic) == 0) {
1190 			if (msic == RL_MSI_MESSAGES) {
1191 				device_printf(dev, "Using %d MSI messages\n",
1192 				    msic);
1193 				sc->rl_msi = 1;
1194 			} else
1195 				pci_release_msi(dev);
1196 		}
1197 	}
1198 
1199 	/* Allocate interrupt */
1200 	if (sc->rl_msi == 0) {
1201 		rid = 0;
1202 		sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1203 		    RF_SHAREABLE | RF_ACTIVE);
1204 		if (sc->rl_irq[0] == NULL) {
1205 			device_printf(dev, "couldn't allocate IRQ resources\n");
1206 			error = ENXIO;
1207 			goto fail;
1208 		}
1209 	} else {
1210 		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1211 			sc->rl_irq[i] = bus_alloc_resource_any(dev,
1212 			    SYS_RES_IRQ, &rid, RF_ACTIVE);
1213 			if (sc->rl_irq[i] == NULL) {
1214 				device_printf(dev,
1215 				    "couldn't llocate IRQ resources for "
1216 				    "message %d\n", rid);
1217 				error = ENXIO;
1218 				goto fail;
1219 			}
1220 		}
1221 	}
1222 
1223 	/* Reset the adapter. */
1224 	RL_LOCK(sc);
1225 	re_reset(sc);
1226 	RL_UNLOCK(sc);
1227 
1228 	hw_rev = re_hwrevs;
1229 	hwrev = CSR_READ_4(sc, RL_TXCFG) & RL_TXCFG_HWREV;
1230 	while (hw_rev->rl_desc != NULL) {
1231 		if (hw_rev->rl_rev == hwrev) {
1232 			sc->rl_type = hw_rev->rl_type;
1233 			break;
1234 		}
1235 		hw_rev++;
1236 	}
1237 	if (hw_rev->rl_desc == NULL) {
1238 		device_printf(dev, "Unknown H/W revision: %08x\n", hwrev);
1239 		error = ENXIO;
1240 		goto fail;
1241 	}
1242 
1243 	sc->rl_eewidth = RL_9356_ADDR_LEN;
1244 	re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
1245 	if (re_did != 0x8129)
1246 	        sc->rl_eewidth = RL_9346_ADDR_LEN;
1247 
1248 	/*
1249 	 * Get station address from the EEPROM.
1250 	 */
1251 	re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3);
1252 	for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
1253 		as[i] = le16toh(as[i]);
1254 	bcopy(as, eaddr, sizeof(eaddr));
1255 
1256 	if (sc->rl_type == RL_8169) {
1257 		/* Set RX length mask and number of descriptors. */
1258 		sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
1259 		sc->rl_txstart = RL_GTXSTART;
1260 		sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT;
1261 		sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT;
1262 	} else {
1263 		/* Set RX length mask and number of descriptors. */
1264 		sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
1265 		sc->rl_txstart = RL_TXSTART;
1266 		sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT;
1267 		sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT;
1268 	}
1269 
1270 	error = re_allocmem(dev, sc);
1271 	if (error)
1272 		goto fail;
1273 
1274 	ifp = sc->rl_ifp = if_alloc(IFT_ETHER);
1275 	if (ifp == NULL) {
1276 		device_printf(dev, "can not if_alloc()\n");
1277 		error = ENOSPC;
1278 		goto fail;
1279 	}
1280 
1281 	/* Do MII setup */
1282 	if (mii_phy_probe(dev, &sc->rl_miibus,
1283 	    re_ifmedia_upd, re_ifmedia_sts)) {
1284 		device_printf(dev, "MII without any phy!\n");
1285 		error = ENXIO;
1286 		goto fail;
1287 	}
1288 
1289 	/* Take PHY out of power down mode. */
1290 	if (sc->rl_type == RL_8169) {
1291 		uint32_t rev;
1292 
1293 		rev = CSR_READ_4(sc, RL_TXCFG);
1294 		/* HWVERID 0, 1 and 2 :  bit26-30, bit23 */
1295 		rev &= 0x7c800000;
1296 		if (rev != 0) {
1297 			/* RTL8169S single chip */
1298 			switch (rev) {
1299 			case RL_HWREV_8169_8110SB:
1300 			case RL_HWREV_8169_8110SC:
1301 			case RL_HWREV_8168_SPIN2:
1302 			case RL_HWREV_8168_SPIN3:
1303 				re_gmii_writereg(dev, 1, 0x1f, 0);
1304 				re_gmii_writereg(dev, 1, 0x0e, 0);
1305 				break;
1306 			default:
1307 				break;
1308 			}
1309 		}
1310 	}
1311 
1312 	ifp->if_softc = sc;
1313 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1314 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1315 	ifp->if_ioctl = re_ioctl;
1316 	ifp->if_start = re_start;
1317 	ifp->if_hwassist = RE_CSUM_FEATURES | CSUM_TSO;
1318 	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_TSO4;
1319 	ifp->if_capenable = ifp->if_capabilities;
1320 	ifp->if_init = re_init;
1321 	IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN);
1322 	ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
1323 	IFQ_SET_READY(&ifp->if_snd);
1324 
1325 	TASK_INIT(&sc->rl_txtask, 1, re_tx_task, ifp);
1326 	TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc);
1327 
1328 	/*
1329 	 * Call MI attach routine.
1330 	 */
1331 	ether_ifattach(ifp, eaddr);
1332 
1333 	/* VLAN capability setup */
1334 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1335 	if (ifp->if_capabilities & IFCAP_HWCSUM)
1336 		ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
1337 	ifp->if_capenable = ifp->if_capabilities;
1338 #ifdef DEVICE_POLLING
1339 	ifp->if_capabilities |= IFCAP_POLLING;
1340 #endif
1341 	/*
1342 	 * Tell the upper layer(s) we support long frames.
1343 	 * Must appear after the call to ether_ifattach() because
1344 	 * ether_ifattach() sets ifi_hdrlen to the default value.
1345 	 */
1346 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1347 
1348 #ifdef RE_DIAG
1349 	/*
1350 	 * Perform hardware diagnostic on the original RTL8169.
1351 	 * Some 32-bit cards were incorrectly wired and would
1352 	 * malfunction if plugged into a 64-bit slot.
1353 	 */
1354 
1355 	if (hwrev == RL_HWREV_8169) {
1356 		error = re_diag(sc);
1357 		if (error) {
1358 			device_printf(dev,
1359 		    	"attach aborted due to hardware diag failure\n");
1360 			ether_ifdetach(ifp);
1361 			goto fail;
1362 		}
1363 	}
1364 #endif
1365 
1366 	/* Hook interrupt last to avoid having to lock softc */
1367 	if (sc->rl_msi == 0)
1368 		error = bus_setup_intr(dev, sc->rl_irq[0],
1369 		    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1370 		    &sc->rl_intrhand[0]);
1371 	else {
1372 		for (i = 0; i < RL_MSI_MESSAGES; i++) {
1373 			error = bus_setup_intr(dev, sc->rl_irq[i],
1374 			    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1375 		    	    &sc->rl_intrhand[i]);
1376 			if (error != 0)
1377 				break;
1378 		}
1379 	}
1380 	if (error) {
1381 		device_printf(dev, "couldn't set up irq\n");
1382 		ether_ifdetach(ifp);
1383 	}
1384 
1385 fail:
1386 
1387 	if (error)
1388 		re_detach(dev);
1389 
1390 	return (error);
1391 }
1392 
1393 /*
1394  * Shutdown hardware and free up resources. This can be called any
1395  * time after the mutex has been initialized. It is called in both
1396  * the error case in attach and the normal detach case so it needs
1397  * to be careful about only freeing resources that have actually been
1398  * allocated.
1399  */
1400 static int
1401 re_detach(dev)
1402 	device_t		dev;
1403 {
1404 	struct rl_softc		*sc;
1405 	struct ifnet		*ifp;
1406 	int			i, rid;
1407 
1408 	sc = device_get_softc(dev);
1409 	ifp = sc->rl_ifp;
1410 	KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
1411 
1412 #ifdef DEVICE_POLLING
1413 	if (ifp->if_capenable & IFCAP_POLLING)
1414 		ether_poll_deregister(ifp);
1415 #endif
1416 	/* These should only be active if attach succeeded */
1417 	if (device_is_attached(dev)) {
1418 		RL_LOCK(sc);
1419 #if 0
1420 		sc->suspended = 1;
1421 #endif
1422 		re_stop(sc);
1423 		RL_UNLOCK(sc);
1424 		callout_drain(&sc->rl_stat_callout);
1425 		taskqueue_drain(taskqueue_fast, &sc->rl_inttask);
1426 		taskqueue_drain(taskqueue_fast, &sc->rl_txtask);
1427 		/*
1428 		 * Force off the IFF_UP flag here, in case someone
1429 		 * still had a BPF descriptor attached to this
1430 		 * interface. If they do, ether_ifdetach() will cause
1431 		 * the BPF code to try and clear the promisc mode
1432 		 * flag, which will bubble down to re_ioctl(),
1433 		 * which will try to call re_init() again. This will
1434 		 * turn the NIC back on and restart the MII ticker,
1435 		 * which will panic the system when the kernel tries
1436 		 * to invoke the re_tick() function that isn't there
1437 		 * anymore.
1438 		 */
1439 		ifp->if_flags &= ~IFF_UP;
1440 		ether_ifdetach(ifp);
1441 	}
1442 	if (sc->rl_miibus)
1443 		device_delete_child(dev, sc->rl_miibus);
1444 	bus_generic_detach(dev);
1445 
1446 	/*
1447 	 * The rest is resource deallocation, so we should already be
1448 	 * stopped here.
1449 	 */
1450 
1451 	for (i = 0; i < RL_MSI_MESSAGES; i++) {
1452 		if (sc->rl_intrhand[i] != NULL) {
1453 			bus_teardown_intr(dev, sc->rl_irq[i],
1454 			    sc->rl_intrhand[i]);
1455 			sc->rl_intrhand[i] = NULL;
1456 		}
1457 	}
1458 	if (ifp != NULL)
1459 		if_free(ifp);
1460 	if (sc->rl_msi == 0) {
1461 		if (sc->rl_irq[0] != NULL) {
1462 			bus_release_resource(dev, SYS_RES_IRQ, 0,
1463 			    sc->rl_irq[0]);
1464 			sc->rl_irq[0] = NULL;
1465 		}
1466 	} else {
1467 		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1468 			if (sc->rl_irq[i] != NULL) {
1469 				bus_release_resource(dev, SYS_RES_IRQ, rid,
1470 				    sc->rl_irq[i]);
1471 				sc->rl_irq[i] = NULL;
1472 			}
1473 		}
1474 		pci_release_msi(dev);
1475 	}
1476 	if (sc->rl_res)
1477 		bus_release_resource(dev, RL_RES, RL_RID, sc->rl_res);
1478 
1479 	/* Unload and free the RX DMA ring memory and map */
1480 
1481 	if (sc->rl_ldata.rl_rx_list_tag) {
1482 		bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
1483 		    sc->rl_ldata.rl_rx_list_map);
1484 		bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
1485 		    sc->rl_ldata.rl_rx_list,
1486 		    sc->rl_ldata.rl_rx_list_map);
1487 		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
1488 	}
1489 
1490 	/* Unload and free the TX DMA ring memory and map */
1491 
1492 	if (sc->rl_ldata.rl_tx_list_tag) {
1493 		bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
1494 		    sc->rl_ldata.rl_tx_list_map);
1495 		bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
1496 		    sc->rl_ldata.rl_tx_list,
1497 		    sc->rl_ldata.rl_tx_list_map);
1498 		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
1499 	}
1500 
1501 	/* Destroy all the RX and TX buffer maps */
1502 
1503 	if (sc->rl_ldata.rl_tx_mtag) {
1504 		for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1505 			bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag,
1506 			    sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1507 		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag);
1508 	}
1509 	if (sc->rl_ldata.rl_rx_mtag) {
1510 		for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++)
1511 			bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1512 			    sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1513 		if (sc->rl_ldata.rl_rx_sparemap)
1514 			bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1515 			    sc->rl_ldata.rl_rx_sparemap);
1516 		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag);
1517 	}
1518 
1519 	/* Unload and free the stats buffer and map */
1520 
1521 	if (sc->rl_ldata.rl_stag) {
1522 		bus_dmamap_unload(sc->rl_ldata.rl_stag,
1523 		    sc->rl_ldata.rl_rx_list_map);
1524 		bus_dmamem_free(sc->rl_ldata.rl_stag,
1525 		    sc->rl_ldata.rl_stats,
1526 		    sc->rl_ldata.rl_smap);
1527 		bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
1528 	}
1529 
1530 	if (sc->rl_parent_tag)
1531 		bus_dma_tag_destroy(sc->rl_parent_tag);
1532 
1533 	mtx_destroy(&sc->rl_mtx);
1534 
1535 	return (0);
1536 }
1537 
1538 static __inline void
1539 re_discard_rxbuf(sc, idx)
1540 	struct rl_softc		*sc;
1541 	int			idx;
1542 {
1543 	struct rl_desc		*desc;
1544 	struct rl_rxdesc	*rxd;
1545 	uint32_t		cmdstat;
1546 
1547 	rxd = &sc->rl_ldata.rl_rx_desc[idx];
1548 	desc = &sc->rl_ldata.rl_rx_list[idx];
1549 	desc->rl_vlanctl = 0;
1550 	cmdstat = rxd->rx_size;
1551 	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1552 		cmdstat |= RL_RDESC_CMD_EOR;
1553 	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1554 }
1555 
1556 static int
1557 re_newbuf(sc, idx)
1558 	struct rl_softc		*sc;
1559 	int			idx;
1560 {
1561 	struct mbuf		*m;
1562 	struct rl_rxdesc	*rxd;
1563 	bus_dma_segment_t	segs[1];
1564 	bus_dmamap_t		map;
1565 	struct rl_desc		*desc;
1566 	uint32_t		cmdstat;
1567 	int			error, nsegs;
1568 
1569 	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1570 	if (m == NULL)
1571 		return (ENOBUFS);
1572 
1573 	m->m_len = m->m_pkthdr.len = MCLBYTES;
1574 #ifdef RE_FIXUP_RX
1575 	/*
1576 	 * This is part of an evil trick to deal with non-x86 platforms.
1577 	 * The RealTek chip requires RX buffers to be aligned on 64-bit
1578 	 * boundaries, but that will hose non-x86 machines. To get around
1579 	 * this, we leave some empty space at the start of each buffer
1580 	 * and for non-x86 hosts, we copy the buffer back six bytes
1581 	 * to achieve word alignment. This is slightly more efficient
1582 	 * than allocating a new buffer, copying the contents, and
1583 	 * discarding the old buffer.
1584 	 */
1585 	m_adj(m, RE_ETHER_ALIGN);
1586 #endif
1587 	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag,
1588 	    sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
1589 	if (error != 0) {
1590 		m_freem(m);
1591 		return (ENOBUFS);
1592 	}
1593 	KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
1594 
1595 	rxd = &sc->rl_ldata.rl_rx_desc[idx];
1596 	if (rxd->rx_m != NULL) {
1597 		bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1598 		    BUS_DMASYNC_POSTREAD);
1599 		bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap);
1600 	}
1601 
1602 	rxd->rx_m = m;
1603 	map = rxd->rx_dmamap;
1604 	rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap;
1605 	rxd->rx_size = segs[0].ds_len;
1606 	sc->rl_ldata.rl_rx_sparemap = map;
1607 	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1608 	    BUS_DMASYNC_PREREAD);
1609 
1610 	desc = &sc->rl_ldata.rl_rx_list[idx];
1611 	desc->rl_vlanctl = 0;
1612 	desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
1613 	desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
1614 	cmdstat = segs[0].ds_len;
1615 	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1616 		cmdstat |= RL_RDESC_CMD_EOR;
1617 	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1618 
1619 	return (0);
1620 }
1621 
1622 #ifdef RE_FIXUP_RX
1623 static __inline void
1624 re_fixup_rx(m)
1625 	struct mbuf		*m;
1626 {
1627 	int                     i;
1628 	uint16_t                *src, *dst;
1629 
1630 	src = mtod(m, uint16_t *);
1631 	dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
1632 
1633 	for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
1634 		*dst++ = *src++;
1635 
1636 	m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
1637 
1638 	return;
1639 }
1640 #endif
1641 
1642 static int
1643 re_tx_list_init(sc)
1644 	struct rl_softc		*sc;
1645 {
1646 	struct rl_desc		*desc;
1647 	int			i;
1648 
1649 	RL_LOCK_ASSERT(sc);
1650 
1651 	bzero(sc->rl_ldata.rl_tx_list,
1652 	    sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
1653 	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1654 		sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
1655 	/* Set EOR. */
1656 	desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
1657 	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
1658 
1659 	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1660 	    sc->rl_ldata.rl_tx_list_map,
1661 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1662 
1663 	sc->rl_ldata.rl_tx_prodidx = 0;
1664 	sc->rl_ldata.rl_tx_considx = 0;
1665 	sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt;
1666 
1667 	return (0);
1668 }
1669 
1670 static int
1671 re_rx_list_init(sc)
1672 	struct rl_softc		*sc;
1673 {
1674 	int			error, i;
1675 
1676 	bzero(sc->rl_ldata.rl_rx_list,
1677 	    sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
1678 	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1679 		sc->rl_ldata.rl_rx_desc[i].rx_m = NULL;
1680 		if ((error = re_newbuf(sc, i)) != 0)
1681 			return (error);
1682 	}
1683 
1684 	/* Flush the RX descriptors */
1685 
1686 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1687 	    sc->rl_ldata.rl_rx_list_map,
1688 	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1689 
1690 	sc->rl_ldata.rl_rx_prodidx = 0;
1691 	sc->rl_head = sc->rl_tail = NULL;
1692 
1693 	return (0);
1694 }
1695 
1696 /*
1697  * RX handler for C+ and 8169. For the gigE chips, we support
1698  * the reception of jumbo frames that have been fragmented
1699  * across multiple 2K mbuf cluster buffers.
1700  */
1701 static int
1702 re_rxeof(sc)
1703 	struct rl_softc		*sc;
1704 {
1705 	struct mbuf		*m;
1706 	struct ifnet		*ifp;
1707 	int			i, total_len;
1708 	struct rl_desc		*cur_rx;
1709 	u_int32_t		rxstat, rxvlan;
1710 	int			maxpkt = 16;
1711 
1712 	RL_LOCK_ASSERT(sc);
1713 
1714 	ifp = sc->rl_ifp;
1715 
1716 	/* Invalidate the descriptor memory */
1717 
1718 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1719 	    sc->rl_ldata.rl_rx_list_map,
1720 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1721 
1722 	for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
1723 	    i = RL_RX_DESC_NXT(sc, i)) {
1724 		cur_rx = &sc->rl_ldata.rl_rx_list[i];
1725 		rxstat = le32toh(cur_rx->rl_cmdstat);
1726 		if ((rxstat & RL_RDESC_STAT_OWN) != 0)
1727 			break;
1728 		total_len = rxstat & sc->rl_rxlenmask;
1729 		rxvlan = le32toh(cur_rx->rl_vlanctl);
1730 		m = sc->rl_ldata.rl_rx_desc[i].rx_m;
1731 
1732 		if (!(rxstat & RL_RDESC_STAT_EOF)) {
1733 			if (re_newbuf(sc, i) != 0) {
1734 				/*
1735 				 * If this is part of a multi-fragment packet,
1736 				 * discard all the pieces.
1737 				 */
1738 				if (sc->rl_head != NULL) {
1739 					m_freem(sc->rl_head);
1740 					sc->rl_head = sc->rl_tail = NULL;
1741 				}
1742 				re_discard_rxbuf(sc, i);
1743 				continue;
1744 			}
1745 			m->m_len = RE_RX_DESC_BUFLEN;
1746 			if (sc->rl_head == NULL)
1747 				sc->rl_head = sc->rl_tail = m;
1748 			else {
1749 				m->m_flags &= ~M_PKTHDR;
1750 				sc->rl_tail->m_next = m;
1751 				sc->rl_tail = m;
1752 			}
1753 			continue;
1754 		}
1755 
1756 		/*
1757 		 * NOTE: for the 8139C+, the frame length field
1758 		 * is always 12 bits in size, but for the gigE chips,
1759 		 * it is 13 bits (since the max RX frame length is 16K).
1760 		 * Unfortunately, all 32 bits in the status word
1761 		 * were already used, so to make room for the extra
1762 		 * length bit, RealTek took out the 'frame alignment
1763 		 * error' bit and shifted the other status bits
1764 		 * over one slot. The OWN, EOR, FS and LS bits are
1765 		 * still in the same places. We have already extracted
1766 		 * the frame length and checked the OWN bit, so rather
1767 		 * than using an alternate bit mapping, we shift the
1768 		 * status bits one space to the right so we can evaluate
1769 		 * them using the 8169 status as though it was in the
1770 		 * same format as that of the 8139C+.
1771 		 */
1772 		if (sc->rl_type == RL_8169)
1773 			rxstat >>= 1;
1774 
1775 		/*
1776 		 * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
1777 		 * set, but if CRC is clear, it will still be a valid frame.
1778 		 */
1779 		if (rxstat & RL_RDESC_STAT_RXERRSUM && !(total_len > 8191 &&
1780 		    (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)) {
1781 			ifp->if_ierrors++;
1782 			/*
1783 			 * If this is part of a multi-fragment packet,
1784 			 * discard all the pieces.
1785 			 */
1786 			if (sc->rl_head != NULL) {
1787 				m_freem(sc->rl_head);
1788 				sc->rl_head = sc->rl_tail = NULL;
1789 			}
1790 			re_discard_rxbuf(sc, i);
1791 			continue;
1792 		}
1793 
1794 		/*
1795 		 * If allocating a replacement mbuf fails,
1796 		 * reload the current one.
1797 		 */
1798 
1799 		if (re_newbuf(sc, i) != 0) {
1800 			ifp->if_iqdrops++;
1801 			if (sc->rl_head != NULL) {
1802 				m_freem(sc->rl_head);
1803 				sc->rl_head = sc->rl_tail = NULL;
1804 			}
1805 			re_discard_rxbuf(sc, i);
1806 			continue;
1807 		}
1808 
1809 		if (sc->rl_head != NULL) {
1810 			m->m_len = total_len % RE_RX_DESC_BUFLEN;
1811 			if (m->m_len == 0)
1812 				m->m_len = RE_RX_DESC_BUFLEN;
1813 			/*
1814 			 * Special case: if there's 4 bytes or less
1815 			 * in this buffer, the mbuf can be discarded:
1816 			 * the last 4 bytes is the CRC, which we don't
1817 			 * care about anyway.
1818 			 */
1819 			if (m->m_len <= ETHER_CRC_LEN) {
1820 				sc->rl_tail->m_len -=
1821 				    (ETHER_CRC_LEN - m->m_len);
1822 				m_freem(m);
1823 			} else {
1824 				m->m_len -= ETHER_CRC_LEN;
1825 				m->m_flags &= ~M_PKTHDR;
1826 				sc->rl_tail->m_next = m;
1827 			}
1828 			m = sc->rl_head;
1829 			sc->rl_head = sc->rl_tail = NULL;
1830 			m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1831 		} else
1832 			m->m_pkthdr.len = m->m_len =
1833 			    (total_len - ETHER_CRC_LEN);
1834 
1835 #ifdef RE_FIXUP_RX
1836 		re_fixup_rx(m);
1837 #endif
1838 		ifp->if_ipackets++;
1839 		m->m_pkthdr.rcvif = ifp;
1840 
1841 		/* Do RX checksumming if enabled */
1842 
1843 		if (ifp->if_capenable & IFCAP_RXCSUM) {
1844 
1845 			/* Check IP header checksum */
1846 			if (rxstat & RL_RDESC_STAT_PROTOID)
1847 				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
1848 			if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
1849 				m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1850 
1851 			/* Check TCP/UDP checksum */
1852 			if ((RL_TCPPKT(rxstat) &&
1853 			    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1854 			    (RL_UDPPKT(rxstat) &&
1855 			    !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1856 				m->m_pkthdr.csum_flags |=
1857 				    CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1858 				m->m_pkthdr.csum_data = 0xffff;
1859 			}
1860 		}
1861 		maxpkt--;
1862 		if (rxvlan & RL_RDESC_VLANCTL_TAG) {
1863 			m->m_pkthdr.ether_vtag =
1864 			    ntohs((rxvlan & RL_RDESC_VLANCTL_DATA));
1865 			m->m_flags |= M_VLANTAG;
1866 		}
1867 		RL_UNLOCK(sc);
1868 		(*ifp->if_input)(ifp, m);
1869 		RL_LOCK(sc);
1870 	}
1871 
1872 	/* Flush the RX DMA ring */
1873 
1874 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1875 	    sc->rl_ldata.rl_rx_list_map,
1876 	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1877 
1878 	sc->rl_ldata.rl_rx_prodidx = i;
1879 
1880 	if (maxpkt)
1881 		return(EAGAIN);
1882 
1883 	return(0);
1884 }
1885 
1886 static void
1887 re_txeof(sc)
1888 	struct rl_softc		*sc;
1889 {
1890 	struct ifnet		*ifp;
1891 	struct rl_txdesc	*txd;
1892 	u_int32_t		txstat;
1893 	int			cons;
1894 
1895 	cons = sc->rl_ldata.rl_tx_considx;
1896 	if (cons == sc->rl_ldata.rl_tx_prodidx)
1897 		return;
1898 
1899 	ifp = sc->rl_ifp;
1900 	/* Invalidate the TX descriptor list */
1901 	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1902 	    sc->rl_ldata.rl_tx_list_map,
1903 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1904 
1905 	for (; cons != sc->rl_ldata.rl_tx_prodidx;
1906 	    cons = RL_TX_DESC_NXT(sc, cons)) {
1907 		txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat);
1908 		if (txstat & RL_TDESC_STAT_OWN)
1909 			break;
1910 		/*
1911 		 * We only stash mbufs in the last descriptor
1912 		 * in a fragment chain, which also happens to
1913 		 * be the only place where the TX status bits
1914 		 * are valid.
1915 		 */
1916 		if (txstat & RL_TDESC_CMD_EOF) {
1917 			txd = &sc->rl_ldata.rl_tx_desc[cons];
1918 			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
1919 			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
1920 			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
1921 			    txd->tx_dmamap);
1922 			KASSERT(txd->tx_m != NULL,
1923 			    ("%s: freeing NULL mbufs!", __func__));
1924 			m_freem(txd->tx_m);
1925 			txd->tx_m = NULL;
1926 			if (txstat & (RL_TDESC_STAT_EXCESSCOL|
1927 			    RL_TDESC_STAT_COLCNT))
1928 				ifp->if_collisions++;
1929 			if (txstat & RL_TDESC_STAT_TXERRSUM)
1930 				ifp->if_oerrors++;
1931 			else
1932 				ifp->if_opackets++;
1933 		}
1934 		sc->rl_ldata.rl_tx_free++;
1935 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1936 	}
1937 	sc->rl_ldata.rl_tx_considx = cons;
1938 
1939 	/* No changes made to the TX ring, so no flush needed */
1940 
1941 	if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) {
1942 		/*
1943 		 * Some chips will ignore a second TX request issued
1944 		 * while an existing transmission is in progress. If
1945 		 * the transmitter goes idle but there are still
1946 		 * packets waiting to be sent, we need to restart the
1947 		 * channel here to flush them out. This only seems to
1948 		 * be required with the PCIe devices.
1949 		 */
1950 		CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
1951 
1952 #ifdef RE_TX_MODERATION
1953 		/*
1954 		 * If not all descriptors have been reaped yet, reload
1955 		 * the timer so that we will eventually get another
1956 		 * interrupt that will cause us to re-enter this routine.
1957 		 * This is done in case the transmitter has gone idle.
1958 		 */
1959 		CSR_WRITE_4(sc, RL_TIMERCNT, 1);
1960 #endif
1961 	} else
1962 		sc->rl_watchdog_timer = 0;
1963 }
1964 
1965 static void
1966 re_tick(xsc)
1967 	void			*xsc;
1968 {
1969 	struct rl_softc		*sc;
1970 	struct mii_data		*mii;
1971 	struct ifnet		*ifp;
1972 
1973 	sc = xsc;
1974 	ifp = sc->rl_ifp;
1975 
1976 	RL_LOCK_ASSERT(sc);
1977 
1978 	re_watchdog(sc);
1979 
1980 	mii = device_get_softc(sc->rl_miibus);
1981 	mii_tick(mii);
1982 	if (sc->rl_link) {
1983 		if (!(mii->mii_media_status & IFM_ACTIVE))
1984 			sc->rl_link = 0;
1985 	} else {
1986 		if (mii->mii_media_status & IFM_ACTIVE &&
1987 		    IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) {
1988 			sc->rl_link = 1;
1989 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1990 				taskqueue_enqueue_fast(taskqueue_fast,
1991 				    &sc->rl_txtask);
1992 		}
1993 	}
1994 
1995 	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
1996 }
1997 
1998 #ifdef DEVICE_POLLING
1999 static void
2000 re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
2001 {
2002 	struct rl_softc *sc = ifp->if_softc;
2003 
2004 	RL_LOCK(sc);
2005 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2006 		re_poll_locked(ifp, cmd, count);
2007 	RL_UNLOCK(sc);
2008 }
2009 
2010 static void
2011 re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
2012 {
2013 	struct rl_softc *sc = ifp->if_softc;
2014 
2015 	RL_LOCK_ASSERT(sc);
2016 
2017 	sc->rxcycles = count;
2018 	re_rxeof(sc);
2019 	re_txeof(sc);
2020 
2021 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2022 		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2023 
2024 	if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
2025 		u_int16_t       status;
2026 
2027 		status = CSR_READ_2(sc, RL_ISR);
2028 		if (status == 0xffff)
2029 			return;
2030 		if (status)
2031 			CSR_WRITE_2(sc, RL_ISR, status);
2032 
2033 		/*
2034 		 * XXX check behaviour on receiver stalls.
2035 		 */
2036 
2037 		if (status & RL_ISR_SYSTEM_ERR) {
2038 			re_reset(sc);
2039 			re_init_locked(sc);
2040 		}
2041 	}
2042 }
2043 #endif /* DEVICE_POLLING */
2044 
2045 static int
2046 re_intr(arg)
2047 	void			*arg;
2048 {
2049 	struct rl_softc		*sc;
2050 	uint16_t		status;
2051 
2052 	sc = arg;
2053 
2054 	status = CSR_READ_2(sc, RL_ISR);
2055 	if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0)
2056                 return (FILTER_STRAY);
2057 	CSR_WRITE_2(sc, RL_IMR, 0);
2058 
2059 	taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2060 
2061 	return (FILTER_HANDLED);
2062 }
2063 
2064 static void
2065 re_int_task(arg, npending)
2066 	void			*arg;
2067 	int			npending;
2068 {
2069 	struct rl_softc		*sc;
2070 	struct ifnet		*ifp;
2071 	u_int16_t		status;
2072 	int			rval = 0;
2073 
2074 	sc = arg;
2075 	ifp = sc->rl_ifp;
2076 
2077 	RL_LOCK(sc);
2078 
2079 	status = CSR_READ_2(sc, RL_ISR);
2080         CSR_WRITE_2(sc, RL_ISR, status);
2081 
2082 	if (sc->suspended ||
2083 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2084 		RL_UNLOCK(sc);
2085 		return;
2086 	}
2087 
2088 #ifdef DEVICE_POLLING
2089 	if  (ifp->if_capenable & IFCAP_POLLING) {
2090 		RL_UNLOCK(sc);
2091 		return;
2092 	}
2093 #endif
2094 
2095 	if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW))
2096 		rval = re_rxeof(sc);
2097 
2098 #ifdef RE_TX_MODERATION
2099 	if (status & (RL_ISR_TIMEOUT_EXPIRED|
2100 #else
2101 	if (status & (RL_ISR_TX_OK|
2102 #endif
2103 	    RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL))
2104 		re_txeof(sc);
2105 
2106 	if (status & RL_ISR_SYSTEM_ERR) {
2107 		re_reset(sc);
2108 		re_init_locked(sc);
2109 	}
2110 
2111 	if (status & RL_ISR_LINKCHG) {
2112 		callout_stop(&sc->rl_stat_callout);
2113 		re_tick(sc);
2114 	}
2115 
2116 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2117 		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2118 
2119 	RL_UNLOCK(sc);
2120 
2121         if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
2122 		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2123 		return;
2124 	}
2125 
2126 	CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2127 
2128 	return;
2129 }
2130 
2131 static int
2132 re_encap(sc, m_head)
2133 	struct rl_softc		*sc;
2134 	struct mbuf		**m_head;
2135 {
2136 	struct rl_txdesc	*txd, *txd_last;
2137 	bus_dma_segment_t	segs[RL_NTXSEGS];
2138 	bus_dmamap_t		map;
2139 	struct mbuf		*m_new;
2140 	struct rl_desc		*desc;
2141 	int			nsegs, prod;
2142 	int			i, error, ei, si;
2143 	int			padlen;
2144 	uint32_t		cmdstat, csum_flags;
2145 
2146 	RL_LOCK_ASSERT(sc);
2147 	M_ASSERTPKTHDR((*m_head));
2148 
2149 	/*
2150 	 * With some of the RealTek chips, using the checksum offload
2151 	 * support in conjunction with the autopadding feature results
2152 	 * in the transmission of corrupt frames. For example, if we
2153 	 * need to send a really small IP fragment that's less than 60
2154 	 * bytes in size, and IP header checksumming is enabled, the
2155 	 * resulting ethernet frame that appears on the wire will
2156 	 * have garbled payload. To work around this, if TX checksum
2157 	 * offload is enabled, we always manually pad short frames out
2158 	 * to the minimum ethernet frame size.
2159 	 *
2160 	 * Note: this appears unnecessary for TCP, and doing it for TCP
2161 	 * with PCIe adapters seems to result in bad checksums.
2162 	 */
2163 	if ((*m_head)->m_pkthdr.csum_flags & (CSUM_IP | CSUM_UDP) &&
2164 	    ((*m_head)->m_pkthdr.csum_flags & CSUM_TCP) == 0 &&
2165             (*m_head)->m_pkthdr.len < RL_MIN_FRAMELEN) {
2166 		padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
2167 		if (M_WRITABLE(*m_head) == 0) {
2168 			/* Get a writable copy. */
2169 			m_new = m_dup(*m_head, M_DONTWAIT);
2170 			m_freem(*m_head);
2171 			if (m_new == NULL) {
2172 				*m_head = NULL;
2173 				return (ENOBUFS);
2174 			}
2175 			*m_head = m_new;
2176 		}
2177 		if ((*m_head)->m_next != NULL ||
2178 		    M_TRAILINGSPACE(*m_head) < padlen) {
2179 			m_new = m_defrag(*m_head, M_DONTWAIT);
2180 			if (m_new == NULL) {
2181 				m_freem(*m_head);
2182 				*m_head = NULL;
2183 				return (ENOBUFS);
2184 			}
2185 		} else
2186 			m_new = *m_head;
2187 
2188 		/*
2189 		 * Manually pad short frames, and zero the pad space
2190 		 * to avoid leaking data.
2191 		 */
2192 		bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen);
2193 		m_new->m_pkthdr.len += padlen;
2194 		m_new->m_len = m_new->m_pkthdr.len;
2195 		*m_head = m_new;
2196 	}
2197 
2198 	prod = sc->rl_ldata.rl_tx_prodidx;
2199 	txd = &sc->rl_ldata.rl_tx_desc[prod];
2200 	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2201 	    *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2202 	if (error == EFBIG) {
2203 		m_new = m_collapse(*m_head, M_DONTWAIT, RL_NTXSEGS);
2204 		if (m_new == NULL) {
2205 			m_freem(*m_head);
2206 			*m_head = NULL;
2207 			return (ENOBUFS);
2208 		}
2209 		*m_head = m_new;
2210 		error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag,
2211 		    txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2212 		if (error != 0) {
2213 			m_freem(*m_head);
2214 			*m_head = NULL;
2215 			return (error);
2216 		}
2217 	} else if (error != 0)
2218 		return (error);
2219 	if (nsegs == 0) {
2220 		m_freem(*m_head);
2221 		*m_head = NULL;
2222 		return (EIO);
2223 	}
2224 
2225 	/* Check for number of available descriptors. */
2226 	if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
2227 		bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap);
2228 		return (ENOBUFS);
2229 	}
2230 
2231 	bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2232 	    BUS_DMASYNC_PREWRITE);
2233 
2234 	/*
2235 	 * Set up checksum offload. Note: checksum offload bits must
2236 	 * appear in all descriptors of a multi-descriptor transmit
2237 	 * attempt. This is according to testing done with an 8169
2238 	 * chip. This is a requirement.
2239 	 */
2240 	csum_flags = 0;
2241 	if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2242 		csum_flags = RL_TDESC_CMD_LGSEND |
2243 		    ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2244 		    RL_TDESC_CMD_MSSVAL_SHIFT);
2245 	else {
2246 		if ((*m_head)->m_pkthdr.csum_flags & CSUM_IP)
2247 			csum_flags |= RL_TDESC_CMD_IPCSUM;
2248 		if ((*m_head)->m_pkthdr.csum_flags & CSUM_TCP)
2249 			csum_flags |= RL_TDESC_CMD_TCPCSUM;
2250 		if ((*m_head)->m_pkthdr.csum_flags & CSUM_UDP)
2251 			csum_flags |= RL_TDESC_CMD_UDPCSUM;
2252 	}
2253 
2254 	si = prod;
2255 	for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) {
2256 		desc = &sc->rl_ldata.rl_tx_list[prod];
2257 		desc->rl_vlanctl = 0;
2258 		desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
2259 		desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
2260 		cmdstat = segs[i].ds_len;
2261 		if (i != 0)
2262 			cmdstat |= RL_TDESC_CMD_OWN;
2263 		if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1)
2264 			cmdstat |= RL_TDESC_CMD_EOR;
2265 		desc->rl_cmdstat = htole32(cmdstat | csum_flags);
2266 		sc->rl_ldata.rl_tx_free--;
2267 	}
2268 	/* Update producer index. */
2269 	sc->rl_ldata.rl_tx_prodidx = prod;
2270 
2271 	/* Set EOF on the last descriptor. */
2272 	ei = RL_TX_DESC_PRV(sc, prod);
2273 	desc = &sc->rl_ldata.rl_tx_list[ei];
2274 	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
2275 
2276 	desc = &sc->rl_ldata.rl_tx_list[si];
2277 	/*
2278 	 * Set up hardware VLAN tagging. Note: vlan tag info must
2279 	 * appear in the first descriptor of a multi-descriptor
2280 	 * transmission attempt.
2281 	 */
2282 	if ((*m_head)->m_flags & M_VLANTAG)
2283 		desc->rl_vlanctl =
2284 		    htole32(htons((*m_head)->m_pkthdr.ether_vtag) |
2285 		    RL_TDESC_VLANCTL_TAG);
2286 	/* Set SOF and transfer ownership of packet to the chip. */
2287 	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF);
2288 
2289 	/*
2290 	 * Insure that the map for this transmission
2291 	 * is placed at the array index of the last descriptor
2292 	 * in this chain.  (Swap last and first dmamaps.)
2293 	 */
2294 	txd_last = &sc->rl_ldata.rl_tx_desc[ei];
2295 	map = txd->tx_dmamap;
2296 	txd->tx_dmamap = txd_last->tx_dmamap;
2297 	txd_last->tx_dmamap = map;
2298 	txd_last->tx_m = *m_head;
2299 
2300 	return (0);
2301 }
2302 
2303 static void
2304 re_tx_task(arg, npending)
2305 	void			*arg;
2306 	int			npending;
2307 {
2308 	struct ifnet		*ifp;
2309 
2310 	ifp = arg;
2311 	re_start(ifp);
2312 
2313 	return;
2314 }
2315 
2316 /*
2317  * Main transmit routine for C+ and gigE NICs.
2318  */
2319 static void
2320 re_start(ifp)
2321 	struct ifnet		*ifp;
2322 {
2323 	struct rl_softc		*sc;
2324 	struct mbuf		*m_head;
2325 	int			queued;
2326 
2327 	sc = ifp->if_softc;
2328 
2329 	RL_LOCK(sc);
2330 
2331 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
2332 	    IFF_DRV_RUNNING || sc->rl_link == 0) {
2333 		RL_UNLOCK(sc);
2334 		return;
2335 	}
2336 
2337 	for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
2338 	    sc->rl_ldata.rl_tx_free > 1;) {
2339 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2340 		if (m_head == NULL)
2341 			break;
2342 
2343 		if (re_encap(sc, &m_head) != 0) {
2344 			if (m_head == NULL)
2345 				break;
2346 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2347 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2348 			break;
2349 		}
2350 
2351 		/*
2352 		 * If there's a BPF listener, bounce a copy of this frame
2353 		 * to him.
2354 		 */
2355 		ETHER_BPF_MTAP(ifp, m_head);
2356 
2357 		queued++;
2358 	}
2359 
2360 	if (queued == 0) {
2361 #ifdef RE_TX_MODERATION
2362 		if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt)
2363 			CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2364 #endif
2365 		RL_UNLOCK(sc);
2366 		return;
2367 	}
2368 
2369 	/* Flush the TX descriptors */
2370 
2371 	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2372 	    sc->rl_ldata.rl_tx_list_map,
2373 	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2374 
2375 	CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2376 
2377 #ifdef RE_TX_MODERATION
2378 	/*
2379 	 * Use the countdown timer for interrupt moderation.
2380 	 * 'TX done' interrupts are disabled. Instead, we reset the
2381 	 * countdown timer, which will begin counting until it hits
2382 	 * the value in the TIMERINT register, and then trigger an
2383 	 * interrupt. Each time we write to the TIMERCNT register,
2384 	 * the timer count is reset to 0.
2385 	 */
2386 	CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2387 #endif
2388 
2389 	/*
2390 	 * Set a timeout in case the chip goes out to lunch.
2391 	 */
2392 	sc->rl_watchdog_timer = 5;
2393 
2394 	RL_UNLOCK(sc);
2395 
2396 	return;
2397 }
2398 
2399 static void
2400 re_init(xsc)
2401 	void			*xsc;
2402 {
2403 	struct rl_softc		*sc = xsc;
2404 
2405 	RL_LOCK(sc);
2406 	re_init_locked(sc);
2407 	RL_UNLOCK(sc);
2408 }
2409 
2410 static void
2411 re_init_locked(sc)
2412 	struct rl_softc		*sc;
2413 {
2414 	struct ifnet		*ifp = sc->rl_ifp;
2415 	struct mii_data		*mii;
2416 	u_int32_t		rxcfg = 0;
2417 	union {
2418 		uint32_t align_dummy;
2419 		u_char eaddr[ETHER_ADDR_LEN];
2420         } eaddr;
2421 
2422 	RL_LOCK_ASSERT(sc);
2423 
2424 	mii = device_get_softc(sc->rl_miibus);
2425 
2426 	/*
2427 	 * Cancel pending I/O and free all RX/TX buffers.
2428 	 */
2429 	re_stop(sc);
2430 
2431 	/*
2432 	 * Enable C+ RX and TX mode, as well as VLAN stripping and
2433 	 * RX checksum offload. We must configure the C+ register
2434 	 * before all others.
2435 	 */
2436 	CSR_WRITE_2(sc, RL_CPLUS_CMD, RL_CPLUSCMD_RXENB|
2437 	    RL_CPLUSCMD_TXENB|RL_CPLUSCMD_PCI_MRW|
2438 	    RL_CPLUSCMD_VLANSTRIP|RL_CPLUSCMD_RXCSUM_ENB);
2439 
2440 	/*
2441 	 * Init our MAC address.  Even though the chipset
2442 	 * documentation doesn't mention it, we need to enter "Config
2443 	 * register write enable" mode to modify the ID registers.
2444 	 */
2445 	/* Copy MAC address on stack to align. */
2446 	bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN);
2447 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
2448 	CSR_WRITE_4(sc, RL_IDR0,
2449 	    htole32(*(u_int32_t *)(&eaddr.eaddr[0])));
2450 	CSR_WRITE_4(sc, RL_IDR4,
2451 	    htole32(*(u_int32_t *)(&eaddr.eaddr[4])));
2452 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
2453 
2454 	/*
2455 	 * For C+ mode, initialize the RX descriptors and mbufs.
2456 	 */
2457 	re_rx_list_init(sc);
2458 	re_tx_list_init(sc);
2459 
2460 	/*
2461 	 * Load the addresses of the RX and TX lists into the chip.
2462 	 */
2463 
2464 	CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
2465 	    RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
2466 	CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
2467 	    RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
2468 
2469 	CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
2470 	    RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
2471 	CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
2472 	    RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
2473 
2474 	/*
2475 	 * Enable transmit and receive.
2476 	 */
2477 	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2478 
2479 	/*
2480 	 * Set the initial TX and RX configuration.
2481 	 */
2482 	if (sc->rl_testmode) {
2483 		if (sc->rl_type == RL_8169)
2484 			CSR_WRITE_4(sc, RL_TXCFG,
2485 			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
2486 		else
2487 			CSR_WRITE_4(sc, RL_TXCFG,
2488 			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
2489 	} else
2490 		CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
2491 
2492 	CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
2493 
2494 	CSR_WRITE_4(sc, RL_RXCFG, RL_RXCFG_CONFIG);
2495 
2496 	/* Set the individual bit to receive frames for this host only. */
2497 	rxcfg = CSR_READ_4(sc, RL_RXCFG);
2498 	rxcfg |= RL_RXCFG_RX_INDIV;
2499 
2500 	/* If we want promiscuous mode, set the allframes bit. */
2501 	if (ifp->if_flags & IFF_PROMISC)
2502 		rxcfg |= RL_RXCFG_RX_ALLPHYS;
2503 	else
2504 		rxcfg &= ~RL_RXCFG_RX_ALLPHYS;
2505 	CSR_WRITE_4(sc, RL_RXCFG, rxcfg);
2506 
2507 	/*
2508 	 * Set capture broadcast bit to capture broadcast frames.
2509 	 */
2510 	if (ifp->if_flags & IFF_BROADCAST)
2511 		rxcfg |= RL_RXCFG_RX_BROAD;
2512 	else
2513 		rxcfg &= ~RL_RXCFG_RX_BROAD;
2514 	CSR_WRITE_4(sc, RL_RXCFG, rxcfg);
2515 
2516 	/*
2517 	 * Program the multicast filter, if necessary.
2518 	 */
2519 	re_setmulti(sc);
2520 
2521 #ifdef DEVICE_POLLING
2522 	/*
2523 	 * Disable interrupts if we are polling.
2524 	 */
2525 	if (ifp->if_capenable & IFCAP_POLLING)
2526 		CSR_WRITE_2(sc, RL_IMR, 0);
2527 	else	/* otherwise ... */
2528 #endif
2529 
2530 	/*
2531 	 * Enable interrupts.
2532 	 */
2533 	if (sc->rl_testmode)
2534 		CSR_WRITE_2(sc, RL_IMR, 0);
2535 	else
2536 		CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2537 	CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS);
2538 
2539 	/* Set initial TX threshold */
2540 	sc->rl_txthresh = RL_TX_THRESH_INIT;
2541 
2542 	/* Start RX/TX process. */
2543 	CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
2544 #ifdef notdef
2545 	/* Enable receiver and transmitter. */
2546 	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2547 #endif
2548 
2549 #ifdef RE_TX_MODERATION
2550 	/*
2551 	 * Initialize the timer interrupt register so that
2552 	 * a timer interrupt will be generated once the timer
2553 	 * reaches a certain number of ticks. The timer is
2554 	 * reloaded on each transmit. This gives us TX interrupt
2555 	 * moderation, which dramatically improves TX frame rate.
2556 	 */
2557 	if (sc->rl_type == RL_8169)
2558 		CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
2559 	else
2560 		CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
2561 #endif
2562 
2563 	/*
2564 	 * For 8169 gigE NICs, set the max allowed RX packet
2565 	 * size so we can receive jumbo frames.
2566 	 */
2567 	if (sc->rl_type == RL_8169)
2568 		CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
2569 
2570 	if (sc->rl_testmode)
2571 		return;
2572 
2573 	mii_mediachg(mii);
2574 
2575 	CSR_WRITE_1(sc, RL_CFG1, CSR_READ_1(sc, RL_CFG1) | RL_CFG1_DRVLOAD);
2576 
2577 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2578 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2579 
2580 	sc->rl_link = 0;
2581 	sc->rl_watchdog_timer = 0;
2582 	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2583 }
2584 
2585 /*
2586  * Set media options.
2587  */
2588 static int
2589 re_ifmedia_upd(ifp)
2590 	struct ifnet		*ifp;
2591 {
2592 	struct rl_softc		*sc;
2593 	struct mii_data		*mii;
2594 
2595 	sc = ifp->if_softc;
2596 	mii = device_get_softc(sc->rl_miibus);
2597 	RL_LOCK(sc);
2598 	mii_mediachg(mii);
2599 	RL_UNLOCK(sc);
2600 
2601 	return (0);
2602 }
2603 
2604 /*
2605  * Report current media status.
2606  */
2607 static void
2608 re_ifmedia_sts(ifp, ifmr)
2609 	struct ifnet		*ifp;
2610 	struct ifmediareq	*ifmr;
2611 {
2612 	struct rl_softc		*sc;
2613 	struct mii_data		*mii;
2614 
2615 	sc = ifp->if_softc;
2616 	mii = device_get_softc(sc->rl_miibus);
2617 
2618 	RL_LOCK(sc);
2619 	mii_pollstat(mii);
2620 	RL_UNLOCK(sc);
2621 	ifmr->ifm_active = mii->mii_media_active;
2622 	ifmr->ifm_status = mii->mii_media_status;
2623 }
2624 
2625 static int
2626 re_ioctl(ifp, command, data)
2627 	struct ifnet		*ifp;
2628 	u_long			command;
2629 	caddr_t			data;
2630 {
2631 	struct rl_softc		*sc = ifp->if_softc;
2632 	struct ifreq		*ifr = (struct ifreq *) data;
2633 	struct mii_data		*mii;
2634 	int			error = 0;
2635 
2636 	switch (command) {
2637 	case SIOCSIFMTU:
2638 		RL_LOCK(sc);
2639 		if (ifr->ifr_mtu > RL_JUMBO_MTU)
2640 			error = EINVAL;
2641 		ifp->if_mtu = ifr->ifr_mtu;
2642 		RL_UNLOCK(sc);
2643 		break;
2644 	case SIOCSIFFLAGS:
2645 		RL_LOCK(sc);
2646 		if ((ifp->if_flags & IFF_UP) != 0) {
2647 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2648 				if (((ifp->if_flags ^ sc->rl_if_flags)
2649 				    & IFF_PROMISC) != 0)
2650 					re_setmulti(sc);
2651 			} else
2652 				re_init_locked(sc);
2653 		} else {
2654 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2655 				re_stop(sc);
2656 		}
2657 		sc->rl_if_flags = ifp->if_flags;
2658 		RL_UNLOCK(sc);
2659 		break;
2660 	case SIOCADDMULTI:
2661 	case SIOCDELMULTI:
2662 		RL_LOCK(sc);
2663 		re_setmulti(sc);
2664 		RL_UNLOCK(sc);
2665 		break;
2666 	case SIOCGIFMEDIA:
2667 	case SIOCSIFMEDIA:
2668 		mii = device_get_softc(sc->rl_miibus);
2669 		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
2670 		break;
2671 	case SIOCSIFCAP:
2672 	    {
2673 		int mask, reinit;
2674 
2675 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2676 		reinit = 0;
2677 #ifdef DEVICE_POLLING
2678 		if (mask & IFCAP_POLLING) {
2679 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
2680 				error = ether_poll_register(re_poll, ifp);
2681 				if (error)
2682 					return(error);
2683 				RL_LOCK(sc);
2684 				/* Disable interrupts */
2685 				CSR_WRITE_2(sc, RL_IMR, 0x0000);
2686 				ifp->if_capenable |= IFCAP_POLLING;
2687 				RL_UNLOCK(sc);
2688 			} else {
2689 				error = ether_poll_deregister(ifp);
2690 				/* Enable interrupts. */
2691 				RL_LOCK(sc);
2692 				CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2693 				ifp->if_capenable &= ~IFCAP_POLLING;
2694 				RL_UNLOCK(sc);
2695 			}
2696 		}
2697 #endif /* DEVICE_POLLING */
2698 		if (mask & IFCAP_HWCSUM) {
2699 			ifp->if_capenable ^= IFCAP_HWCSUM;
2700 			if (ifp->if_capenable & IFCAP_TXCSUM)
2701 				ifp->if_hwassist |= RE_CSUM_FEATURES;
2702 			else
2703 				ifp->if_hwassist &= ~RE_CSUM_FEATURES;
2704 			reinit = 1;
2705 		}
2706 		if (mask & IFCAP_VLAN_HWTAGGING) {
2707 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2708 			reinit = 1;
2709 		}
2710 		if (mask & IFCAP_TSO4) {
2711 			ifp->if_capenable ^= IFCAP_TSO4;
2712 			if ((IFCAP_TSO4 & ifp->if_capenable) &&
2713 			    (IFCAP_TSO4 & ifp->if_capabilities))
2714 				ifp->if_hwassist |= CSUM_TSO;
2715 			else
2716 				ifp->if_hwassist &= ~CSUM_TSO;
2717 		}
2718 		if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING)
2719 			re_init(sc);
2720 		VLAN_CAPABILITIES(ifp);
2721 	    }
2722 		break;
2723 	default:
2724 		error = ether_ioctl(ifp, command, data);
2725 		break;
2726 	}
2727 
2728 	return (error);
2729 }
2730 
2731 static void
2732 re_watchdog(sc)
2733 	struct rl_softc		*sc;
2734 {
2735 
2736 	RL_LOCK_ASSERT(sc);
2737 
2738 	if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0)
2739 		return;
2740 
2741 	device_printf(sc->rl_dev, "watchdog timeout\n");
2742 	sc->rl_ifp->if_oerrors++;
2743 
2744 	re_txeof(sc);
2745 	re_rxeof(sc);
2746 	re_init_locked(sc);
2747 }
2748 
2749 /*
2750  * Stop the adapter and free any mbufs allocated to the
2751  * RX and TX lists.
2752  */
2753 static void
2754 re_stop(sc)
2755 	struct rl_softc		*sc;
2756 {
2757 	register int		i;
2758 	struct ifnet		*ifp;
2759 	struct rl_txdesc	*txd;
2760 	struct rl_rxdesc	*rxd;
2761 
2762 	RL_LOCK_ASSERT(sc);
2763 
2764 	ifp = sc->rl_ifp;
2765 
2766 	sc->rl_watchdog_timer = 0;
2767 	callout_stop(&sc->rl_stat_callout);
2768 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2769 
2770 	CSR_WRITE_1(sc, RL_COMMAND, 0x00);
2771 	CSR_WRITE_2(sc, RL_IMR, 0x0000);
2772 	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
2773 
2774 	if (sc->rl_head != NULL) {
2775 		m_freem(sc->rl_head);
2776 		sc->rl_head = sc->rl_tail = NULL;
2777 	}
2778 
2779 	/* Free the TX list buffers. */
2780 
2781 	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
2782 		txd = &sc->rl_ldata.rl_tx_desc[i];
2783 		if (txd->tx_m != NULL) {
2784 			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2785 			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2786 			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2787 			    txd->tx_dmamap);
2788 			m_freem(txd->tx_m);
2789 			txd->tx_m = NULL;
2790 		}
2791 	}
2792 
2793 	/* Free the RX list buffers. */
2794 
2795 	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
2796 		rxd = &sc->rl_ldata.rl_rx_desc[i];
2797 		if (rxd->rx_m != NULL) {
2798 			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2799 			    rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
2800 			bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
2801 			    rxd->rx_dmamap);
2802 			m_freem(rxd->rx_m);
2803 			rxd->rx_m = NULL;
2804 		}
2805 	}
2806 }
2807 
2808 /*
2809  * Device suspend routine.  Stop the interface and save some PCI
2810  * settings in case the BIOS doesn't restore them properly on
2811  * resume.
2812  */
2813 static int
2814 re_suspend(dev)
2815 	device_t		dev;
2816 {
2817 	struct rl_softc		*sc;
2818 
2819 	sc = device_get_softc(dev);
2820 
2821 	RL_LOCK(sc);
2822 	re_stop(sc);
2823 	sc->suspended = 1;
2824 	RL_UNLOCK(sc);
2825 
2826 	return (0);
2827 }
2828 
2829 /*
2830  * Device resume routine.  Restore some PCI settings in case the BIOS
2831  * doesn't, re-enable busmastering, and restart the interface if
2832  * appropriate.
2833  */
2834 static int
2835 re_resume(dev)
2836 	device_t		dev;
2837 {
2838 	struct rl_softc		*sc;
2839 	struct ifnet		*ifp;
2840 
2841 	sc = device_get_softc(dev);
2842 
2843 	RL_LOCK(sc);
2844 
2845 	ifp = sc->rl_ifp;
2846 
2847 	/* reinitialize interface if necessary */
2848 	if (ifp->if_flags & IFF_UP)
2849 		re_init_locked(sc);
2850 
2851 	sc->suspended = 0;
2852 	RL_UNLOCK(sc);
2853 
2854 	return (0);
2855 }
2856 
2857 /*
2858  * Stop all chip I/O so that the kernel's probe routines don't
2859  * get confused by errant DMAs when rebooting.
2860  */
2861 static int
2862 re_shutdown(dev)
2863 	device_t		dev;
2864 {
2865 	struct rl_softc		*sc;
2866 
2867 	sc = device_get_softc(dev);
2868 
2869 	RL_LOCK(sc);
2870 	re_stop(sc);
2871 	/*
2872 	 * Mark interface as down since otherwise we will panic if
2873 	 * interrupt comes in later on, which can happen in some
2874 	 * cases.
2875 	 */
2876 	sc->rl_ifp->if_flags &= ~IFF_UP;
2877 	RL_UNLOCK(sc);
2878 
2879 	return (0);
2880 }
2881