xref: /freebsd/sys/dev/re/if_re.c (revision 54ebdd631db8c0bba2baab0155f603a8b5cf014a)
1 /*-
2  * Copyright (c) 1997, 1998-2003
3  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by Bill Paul.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30  * THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 /*
37  * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
38  *
39  * Written by Bill Paul <wpaul@windriver.com>
40  * Senior Networking Software Engineer
41  * Wind River Systems
42  */
43 
44 /*
45  * This driver is designed to support RealTek's next generation of
46  * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
47  * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
48  * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
49  *
50  * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
51  * with the older 8139 family, however it also supports a special
52  * C+ mode of operation that provides several new performance enhancing
53  * features. These include:
54  *
55  *	o Descriptor based DMA mechanism. Each descriptor represents
56  *	  a single packet fragment. Data buffers may be aligned on
57  *	  any byte boundary.
58  *
59  *	o 64-bit DMA
60  *
61  *	o TCP/IP checksum offload for both RX and TX
62  *
63  *	o High and normal priority transmit DMA rings
64  *
65  *	o VLAN tag insertion and extraction
66  *
67  *	o TCP large send (segmentation offload)
68  *
69  * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
70  * programming API is fairly straightforward. The RX filtering, EEPROM
71  * access and PHY access is the same as it is on the older 8139 series
72  * chips.
73  *
74  * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
75  * same programming API and feature set as the 8139C+ with the following
76  * differences and additions:
77  *
78  *	o 1000Mbps mode
79  *
80  *	o Jumbo frames
81  *
82  *	o GMII and TBI ports/registers for interfacing with copper
83  *	  or fiber PHYs
84  *
85  *	o RX and TX DMA rings can have up to 1024 descriptors
86  *	  (the 8139C+ allows a maximum of 64)
87  *
88  *	o Slight differences in register layout from the 8139C+
89  *
90  * The TX start and timer interrupt registers are at different locations
91  * on the 8169 than they are on the 8139C+. Also, the status word in the
92  * RX descriptor has a slightly different bit layout. The 8169 does not
93  * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
94  * copper gigE PHY.
95  *
96  * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
97  * (the 'S' stands for 'single-chip'). These devices have the same
98  * programming API as the older 8169, but also have some vendor-specific
99  * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
100  * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
101  *
102  * This driver takes advantage of the RX and TX checksum offload and
103  * VLAN tag insertion/extraction features. It also implements TX
104  * interrupt moderation using the timer interrupt registers, which
105  * significantly reduces TX interrupt load. There is also support
106  * for jumbo frames, however the 8169/8169S/8110S can not transmit
107  * jumbo frames larger than 7440, so the max MTU possible with this
108  * driver is 7422 bytes.
109  */
110 
111 #ifdef HAVE_KERNEL_OPTION_HEADERS
112 #include "opt_device_polling.h"
113 #endif
114 
115 #include <sys/param.h>
116 #include <sys/endian.h>
117 #include <sys/systm.h>
118 #include <sys/sockio.h>
119 #include <sys/mbuf.h>
120 #include <sys/malloc.h>
121 #include <sys/module.h>
122 #include <sys/kernel.h>
123 #include <sys/socket.h>
124 #include <sys/lock.h>
125 #include <sys/mutex.h>
126 #include <sys/taskqueue.h>
127 
128 #include <net/if.h>
129 #include <net/if_arp.h>
130 #include <net/ethernet.h>
131 #include <net/if_dl.h>
132 #include <net/if_media.h>
133 #include <net/if_types.h>
134 #include <net/if_vlan_var.h>
135 
136 #include <net/bpf.h>
137 
138 #include <machine/bus.h>
139 #include <machine/resource.h>
140 #include <sys/bus.h>
141 #include <sys/rman.h>
142 
143 #include <dev/mii/mii.h>
144 #include <dev/mii/miivar.h>
145 
146 #include <dev/pci/pcireg.h>
147 #include <dev/pci/pcivar.h>
148 
149 #include <pci/if_rlreg.h>
150 
151 MODULE_DEPEND(re, pci, 1, 1, 1);
152 MODULE_DEPEND(re, ether, 1, 1, 1);
153 MODULE_DEPEND(re, miibus, 1, 1, 1);
154 
155 /* "device miibus" required.  See GENERIC if you get errors here. */
156 #include "miibus_if.h"
157 
158 /* Tunables. */
159 static int msi_disable = 1;
160 TUNABLE_INT("hw.re.msi_disable", &msi_disable);
161 
162 #define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
163 
164 /*
165  * Various supported device vendors/types and their names.
166  */
167 static struct rl_type re_devs[] = {
168 	{ DLINK_VENDORID, DLINK_DEVICEID_528T, 0,
169 	    "D-Link DGE-528(T) Gigabit Ethernet Adapter" },
170 	{ RT_VENDORID, RT_DEVICEID_8139, 0,
171 	    "RealTek 8139C+ 10/100BaseTX" },
172 	{ RT_VENDORID, RT_DEVICEID_8101E, 0,
173 	    "RealTek 8101E/8102E/8102EL PCIe 10/100baseTX" },
174 	{ RT_VENDORID, RT_DEVICEID_8168, 0,
175 	    "RealTek 8168/8168B/8168C/8168CP/8168D/8111B/8111C/8111CP PCIe "
176 	    "Gigabit Ethernet" },
177 	{ RT_VENDORID, RT_DEVICEID_8169, 0,
178 	    "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" },
179 	{ RT_VENDORID, RT_DEVICEID_8169SC, 0,
180 	    "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
181 	{ COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0,
182 	    "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
183 	{ LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0,
184 	    "Linksys EG1032 (RTL8169S) Gigabit Ethernet" },
185 	{ USR_VENDORID, USR_DEVICEID_997902, 0,
186 	    "US Robotics 997902 (RTL8169S) Gigabit Ethernet" }
187 };
188 
189 static struct rl_hwrev re_hwrevs[] = {
190 	{ RL_HWREV_8139, RL_8139,  "" },
191 	{ RL_HWREV_8139A, RL_8139, "A" },
192 	{ RL_HWREV_8139AG, RL_8139, "A-G" },
193 	{ RL_HWREV_8139B, RL_8139, "B" },
194 	{ RL_HWREV_8130, RL_8139, "8130" },
195 	{ RL_HWREV_8139C, RL_8139, "C" },
196 	{ RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C" },
197 	{ RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+"},
198 	{ RL_HWREV_8168_SPIN1, RL_8169, "8168"},
199 	{ RL_HWREV_8169, RL_8169, "8169"},
200 	{ RL_HWREV_8169S, RL_8169, "8169S"},
201 	{ RL_HWREV_8110S, RL_8169, "8110S"},
202 	{ RL_HWREV_8169_8110SB, RL_8169, "8169SB"},
203 	{ RL_HWREV_8169_8110SC, RL_8169, "8169SC"},
204 	{ RL_HWREV_8169_8110SBL, RL_8169, "8169SBL"},
205 	{ RL_HWREV_8100, RL_8139, "8100"},
206 	{ RL_HWREV_8101, RL_8139, "8101"},
207 	{ RL_HWREV_8100E, RL_8169, "8100E"},
208 	{ RL_HWREV_8101E, RL_8169, "8101E"},
209 	{ RL_HWREV_8102E, RL_8169, "8102E"},
210 	{ RL_HWREV_8102EL, RL_8169, "8102EL"},
211 	{ RL_HWREV_8168_SPIN2, RL_8169, "8168"},
212 	{ RL_HWREV_8168_SPIN3, RL_8169, "8168"},
213 	{ RL_HWREV_8168C, RL_8169, "8168C/8111C"},
214 	{ RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C"},
215 	{ RL_HWREV_8168CP, RL_8169, "8168CP/8111CP"},
216 	{ RL_HWREV_8168D, RL_8169, "8168D"},
217 	{ 0, 0, NULL }
218 };
219 
220 static int re_probe		(device_t);
221 static int re_attach		(device_t);
222 static int re_detach		(device_t);
223 
224 static int re_encap		(struct rl_softc *, struct mbuf **);
225 
226 static void re_dma_map_addr	(void *, bus_dma_segment_t *, int, int);
227 static int re_allocmem		(device_t, struct rl_softc *);
228 static __inline void re_discard_rxbuf
229 				(struct rl_softc *, int);
230 static int re_newbuf		(struct rl_softc *, int);
231 static int re_rx_list_init	(struct rl_softc *);
232 static int re_tx_list_init	(struct rl_softc *);
233 #ifdef RE_FIXUP_RX
234 static __inline void re_fixup_rx
235 				(struct mbuf *);
236 #endif
237 static int re_rxeof		(struct rl_softc *);
238 static void re_txeof		(struct rl_softc *);
239 #ifdef DEVICE_POLLING
240 static void re_poll		(struct ifnet *, enum poll_cmd, int);
241 static void re_poll_locked	(struct ifnet *, enum poll_cmd, int);
242 #endif
243 static int re_intr		(void *);
244 static void re_tick		(void *);
245 static void re_tx_task		(void *, int);
246 static void re_int_task		(void *, int);
247 static void re_start		(struct ifnet *);
248 static int re_ioctl		(struct ifnet *, u_long, caddr_t);
249 static void re_init		(void *);
250 static void re_init_locked	(struct rl_softc *);
251 static void re_stop		(struct rl_softc *);
252 static void re_watchdog		(struct rl_softc *);
253 static int re_suspend		(device_t);
254 static int re_resume		(device_t);
255 static int re_shutdown		(device_t);
256 static int re_ifmedia_upd	(struct ifnet *);
257 static void re_ifmedia_sts	(struct ifnet *, struct ifmediareq *);
258 
259 static void re_eeprom_putbyte	(struct rl_softc *, int);
260 static void re_eeprom_getword	(struct rl_softc *, int, u_int16_t *);
261 static void re_read_eeprom	(struct rl_softc *, caddr_t, int, int);
262 static int re_gmii_readreg	(device_t, int, int);
263 static int re_gmii_writereg	(device_t, int, int, int);
264 
265 static int re_miibus_readreg	(device_t, int, int);
266 static int re_miibus_writereg	(device_t, int, int, int);
267 static void re_miibus_statchg	(device_t);
268 
269 static void re_setmulti		(struct rl_softc *);
270 static void re_reset		(struct rl_softc *);
271 static void re_setwol		(struct rl_softc *);
272 static void re_clrwol		(struct rl_softc *);
273 
274 #ifdef RE_DIAG
275 static int re_diag		(struct rl_softc *);
276 #endif
277 
278 static device_method_t re_methods[] = {
279 	/* Device interface */
280 	DEVMETHOD(device_probe,		re_probe),
281 	DEVMETHOD(device_attach,	re_attach),
282 	DEVMETHOD(device_detach,	re_detach),
283 	DEVMETHOD(device_suspend,	re_suspend),
284 	DEVMETHOD(device_resume,	re_resume),
285 	DEVMETHOD(device_shutdown,	re_shutdown),
286 
287 	/* bus interface */
288 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
289 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
290 
291 	/* MII interface */
292 	DEVMETHOD(miibus_readreg,	re_miibus_readreg),
293 	DEVMETHOD(miibus_writereg,	re_miibus_writereg),
294 	DEVMETHOD(miibus_statchg,	re_miibus_statchg),
295 
296 	{ 0, 0 }
297 };
298 
299 static driver_t re_driver = {
300 	"re",
301 	re_methods,
302 	sizeof(struct rl_softc)
303 };
304 
305 static devclass_t re_devclass;
306 
307 DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
308 DRIVER_MODULE(re, cardbus, re_driver, re_devclass, 0, 0);
309 DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
310 
311 #define EE_SET(x)					\
312 	CSR_WRITE_1(sc, RL_EECMD,			\
313 		CSR_READ_1(sc, RL_EECMD) | x)
314 
315 #define EE_CLR(x)					\
316 	CSR_WRITE_1(sc, RL_EECMD,			\
317 		CSR_READ_1(sc, RL_EECMD) & ~x)
318 
319 /*
320  * Send a read command and address to the EEPROM, check for ACK.
321  */
322 static void
323 re_eeprom_putbyte(struct rl_softc *sc, int addr)
324 {
325 	int			d, i;
326 
327 	d = addr | (RL_9346_READ << sc->rl_eewidth);
328 
329 	/*
330 	 * Feed in each bit and strobe the clock.
331 	 */
332 
333 	for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) {
334 		if (d & i) {
335 			EE_SET(RL_EE_DATAIN);
336 		} else {
337 			EE_CLR(RL_EE_DATAIN);
338 		}
339 		DELAY(100);
340 		EE_SET(RL_EE_CLK);
341 		DELAY(150);
342 		EE_CLR(RL_EE_CLK);
343 		DELAY(100);
344 	}
345 }
346 
347 /*
348  * Read a word of data stored in the EEPROM at address 'addr.'
349  */
350 static void
351 re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest)
352 {
353 	int			i;
354 	u_int16_t		word = 0;
355 
356 	/*
357 	 * Send address of word we want to read.
358 	 */
359 	re_eeprom_putbyte(sc, addr);
360 
361 	/*
362 	 * Start reading bits from EEPROM.
363 	 */
364 	for (i = 0x8000; i; i >>= 1) {
365 		EE_SET(RL_EE_CLK);
366 		DELAY(100);
367 		if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
368 			word |= i;
369 		EE_CLR(RL_EE_CLK);
370 		DELAY(100);
371 	}
372 
373 	*dest = word;
374 }
375 
376 /*
377  * Read a sequence of words from the EEPROM.
378  */
379 static void
380 re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt)
381 {
382 	int			i;
383 	u_int16_t		word = 0, *ptr;
384 
385 	CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
386 
387         DELAY(100);
388 
389 	for (i = 0; i < cnt; i++) {
390 		CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL);
391 		re_eeprom_getword(sc, off + i, &word);
392 		CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL);
393 		ptr = (u_int16_t *)(dest + (i * 2));
394                 *ptr = word;
395 	}
396 
397 	CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
398 }
399 
400 static int
401 re_gmii_readreg(device_t dev, int phy, int reg)
402 {
403 	struct rl_softc		*sc;
404 	u_int32_t		rval;
405 	int			i;
406 
407 	if (phy != 1)
408 		return (0);
409 
410 	sc = device_get_softc(dev);
411 
412 	/* Let the rgephy driver read the GMEDIASTAT register */
413 
414 	if (reg == RL_GMEDIASTAT) {
415 		rval = CSR_READ_1(sc, RL_GMEDIASTAT);
416 		return (rval);
417 	}
418 
419 	CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
420 
421 	for (i = 0; i < RL_TIMEOUT; i++) {
422 		rval = CSR_READ_4(sc, RL_PHYAR);
423 		if (rval & RL_PHYAR_BUSY)
424 			break;
425 		DELAY(100);
426 	}
427 
428 	if (i == RL_TIMEOUT) {
429 		device_printf(sc->rl_dev, "PHY read failed\n");
430 		return (0);
431 	}
432 
433 	return (rval & RL_PHYAR_PHYDATA);
434 }
435 
436 static int
437 re_gmii_writereg(device_t dev, int phy, int reg, int data)
438 {
439 	struct rl_softc		*sc;
440 	u_int32_t		rval;
441 	int			i;
442 
443 	sc = device_get_softc(dev);
444 
445 	CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
446 	    (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
447 
448 	for (i = 0; i < RL_TIMEOUT; i++) {
449 		rval = CSR_READ_4(sc, RL_PHYAR);
450 		if (!(rval & RL_PHYAR_BUSY))
451 			break;
452 		DELAY(100);
453 	}
454 
455 	if (i == RL_TIMEOUT) {
456 		device_printf(sc->rl_dev, "PHY write failed\n");
457 		return (0);
458 	}
459 
460 	return (0);
461 }
462 
463 static int
464 re_miibus_readreg(device_t dev, int phy, int reg)
465 {
466 	struct rl_softc		*sc;
467 	u_int16_t		rval = 0;
468 	u_int16_t		re8139_reg = 0;
469 
470 	sc = device_get_softc(dev);
471 
472 	if (sc->rl_type == RL_8169) {
473 		rval = re_gmii_readreg(dev, phy, reg);
474 		return (rval);
475 	}
476 
477 	/* Pretend the internal PHY is only at address 0 */
478 	if (phy) {
479 		return (0);
480 	}
481 	switch (reg) {
482 	case MII_BMCR:
483 		re8139_reg = RL_BMCR;
484 		break;
485 	case MII_BMSR:
486 		re8139_reg = RL_BMSR;
487 		break;
488 	case MII_ANAR:
489 		re8139_reg = RL_ANAR;
490 		break;
491 	case MII_ANER:
492 		re8139_reg = RL_ANER;
493 		break;
494 	case MII_ANLPAR:
495 		re8139_reg = RL_LPAR;
496 		break;
497 	case MII_PHYIDR1:
498 	case MII_PHYIDR2:
499 		return (0);
500 	/*
501 	 * Allow the rlphy driver to read the media status
502 	 * register. If we have a link partner which does not
503 	 * support NWAY, this is the register which will tell
504 	 * us the results of parallel detection.
505 	 */
506 	case RL_MEDIASTAT:
507 		rval = CSR_READ_1(sc, RL_MEDIASTAT);
508 		return (rval);
509 	default:
510 		device_printf(sc->rl_dev, "bad phy register\n");
511 		return (0);
512 	}
513 	rval = CSR_READ_2(sc, re8139_reg);
514 	if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) {
515 		/* 8139C+ has different bit layout. */
516 		rval &= ~(BMCR_LOOP | BMCR_ISO);
517 	}
518 	return (rval);
519 }
520 
521 static int
522 re_miibus_writereg(device_t dev, int phy, int reg, int data)
523 {
524 	struct rl_softc		*sc;
525 	u_int16_t		re8139_reg = 0;
526 	int			rval = 0;
527 
528 	sc = device_get_softc(dev);
529 
530 	if (sc->rl_type == RL_8169) {
531 		rval = re_gmii_writereg(dev, phy, reg, data);
532 		return (rval);
533 	}
534 
535 	/* Pretend the internal PHY is only at address 0 */
536 	if (phy)
537 		return (0);
538 
539 	switch (reg) {
540 	case MII_BMCR:
541 		re8139_reg = RL_BMCR;
542 		if (sc->rl_type == RL_8139CPLUS) {
543 			/* 8139C+ has different bit layout. */
544 			data &= ~(BMCR_LOOP | BMCR_ISO);
545 		}
546 		break;
547 	case MII_BMSR:
548 		re8139_reg = RL_BMSR;
549 		break;
550 	case MII_ANAR:
551 		re8139_reg = RL_ANAR;
552 		break;
553 	case MII_ANER:
554 		re8139_reg = RL_ANER;
555 		break;
556 	case MII_ANLPAR:
557 		re8139_reg = RL_LPAR;
558 		break;
559 	case MII_PHYIDR1:
560 	case MII_PHYIDR2:
561 		return (0);
562 		break;
563 	default:
564 		device_printf(sc->rl_dev, "bad phy register\n");
565 		return (0);
566 	}
567 	CSR_WRITE_2(sc, re8139_reg, data);
568 	return (0);
569 }
570 
571 static void
572 re_miibus_statchg(device_t dev)
573 {
574 	struct rl_softc		*sc;
575 	struct ifnet		*ifp;
576 	struct mii_data		*mii;
577 
578 	sc = device_get_softc(dev);
579 	mii = device_get_softc(sc->rl_miibus);
580 	ifp = sc->rl_ifp;
581 	if (mii == NULL || ifp == NULL ||
582 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
583 		return;
584 
585 	sc->rl_flags &= ~RL_FLAG_LINK;
586 	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
587 	    (IFM_ACTIVE | IFM_AVALID)) {
588 		switch (IFM_SUBTYPE(mii->mii_media_active)) {
589 		case IFM_10_T:
590 		case IFM_100_TX:
591 			sc->rl_flags |= RL_FLAG_LINK;
592 			break;
593 		case IFM_1000_T:
594 			if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
595 				break;
596 			sc->rl_flags |= RL_FLAG_LINK;
597 			break;
598 		default:
599 			break;
600 		}
601 	}
602 	/*
603 	 * RealTek controllers does not provide any interface to
604 	 * Tx/Rx MACs for resolved speed, duplex and flow-control
605 	 * parameters.
606 	 */
607 }
608 
609 /*
610  * Program the 64-bit multicast hash filter.
611  */
612 static void
613 re_setmulti(struct rl_softc *sc)
614 {
615 	struct ifnet		*ifp;
616 	int			h = 0;
617 	u_int32_t		hashes[2] = { 0, 0 };
618 	struct ifmultiaddr	*ifma;
619 	u_int32_t		rxfilt;
620 	int			mcnt = 0;
621 
622 	RL_LOCK_ASSERT(sc);
623 
624 	ifp = sc->rl_ifp;
625 
626 
627 	rxfilt = CSR_READ_4(sc, RL_RXCFG);
628 	rxfilt &= ~(RL_RXCFG_RX_ALLPHYS | RL_RXCFG_RX_MULTI);
629 	if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) {
630 		if (ifp->if_flags & IFF_PROMISC)
631 			rxfilt |= RL_RXCFG_RX_ALLPHYS;
632 		/*
633 		 * Unlike other hardwares, we have to explicitly set
634 		 * RL_RXCFG_RX_MULTI to receive multicast frames in
635 		 * promiscuous mode.
636 		 */
637 		rxfilt |= RL_RXCFG_RX_MULTI;
638 		CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
639 		CSR_WRITE_4(sc, RL_MAR0, 0xFFFFFFFF);
640 		CSR_WRITE_4(sc, RL_MAR4, 0xFFFFFFFF);
641 		return;
642 	}
643 
644 	/* first, zot all the existing hash bits */
645 	CSR_WRITE_4(sc, RL_MAR0, 0);
646 	CSR_WRITE_4(sc, RL_MAR4, 0);
647 
648 	/* now program new ones */
649 	IF_ADDR_LOCK(ifp);
650 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
651 		if (ifma->ifma_addr->sa_family != AF_LINK)
652 			continue;
653 		h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
654 		    ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
655 		if (h < 32)
656 			hashes[0] |= (1 << h);
657 		else
658 			hashes[1] |= (1 << (h - 32));
659 		mcnt++;
660 	}
661 	IF_ADDR_UNLOCK(ifp);
662 
663 	if (mcnt)
664 		rxfilt |= RL_RXCFG_RX_MULTI;
665 	else
666 		rxfilt &= ~RL_RXCFG_RX_MULTI;
667 
668 	CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
669 
670 	/*
671 	 * For some unfathomable reason, RealTek decided to reverse
672 	 * the order of the multicast hash registers in the PCI Express
673 	 * parts. This means we have to write the hash pattern in reverse
674 	 * order for those devices.
675 	 */
676 
677 	if ((sc->rl_flags & RL_FLAG_INVMAR) != 0) {
678 		CSR_WRITE_4(sc, RL_MAR0, bswap32(hashes[1]));
679 		CSR_WRITE_4(sc, RL_MAR4, bswap32(hashes[0]));
680 	} else {
681 		CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
682 		CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
683 	}
684 }
685 
686 static void
687 re_reset(struct rl_softc *sc)
688 {
689 	int			i;
690 
691 	RL_LOCK_ASSERT(sc);
692 
693 	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
694 
695 	for (i = 0; i < RL_TIMEOUT; i++) {
696 		DELAY(10);
697 		if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
698 			break;
699 	}
700 	if (i == RL_TIMEOUT)
701 		device_printf(sc->rl_dev, "reset never completed!\n");
702 
703 	if ((sc->rl_flags & RL_FLAG_PHY8169) != 0)
704 		CSR_WRITE_1(sc, 0x82, 1);
705 	if ((sc->rl_flags & RL_FLAG_PHY8110S) != 0) {
706 		CSR_WRITE_1(sc, 0x82, 1);
707 		re_gmii_writereg(sc->rl_dev, 1, 0x0B, 0);
708 	}
709 }
710 
711 #ifdef RE_DIAG
712 
713 /*
714  * The following routine is designed to test for a defect on some
715  * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
716  * lines connected to the bus, however for a 32-bit only card, they
717  * should be pulled high. The result of this defect is that the
718  * NIC will not work right if you plug it into a 64-bit slot: DMA
719  * operations will be done with 64-bit transfers, which will fail
720  * because the 64-bit data lines aren't connected.
721  *
722  * There's no way to work around this (short of talking a soldering
723  * iron to the board), however we can detect it. The method we use
724  * here is to put the NIC into digital loopback mode, set the receiver
725  * to promiscuous mode, and then try to send a frame. We then compare
726  * the frame data we sent to what was received. If the data matches,
727  * then the NIC is working correctly, otherwise we know the user has
728  * a defective NIC which has been mistakenly plugged into a 64-bit PCI
729  * slot. In the latter case, there's no way the NIC can work correctly,
730  * so we print out a message on the console and abort the device attach.
731  */
732 
733 static int
734 re_diag(struct rl_softc *sc)
735 {
736 	struct ifnet		*ifp = sc->rl_ifp;
737 	struct mbuf		*m0;
738 	struct ether_header	*eh;
739 	struct rl_desc		*cur_rx;
740 	u_int16_t		status;
741 	u_int32_t		rxstat;
742 	int			total_len, i, error = 0, phyaddr;
743 	u_int8_t		dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
744 	u_int8_t		src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
745 
746 	/* Allocate a single mbuf */
747 	MGETHDR(m0, M_DONTWAIT, MT_DATA);
748 	if (m0 == NULL)
749 		return (ENOBUFS);
750 
751 	RL_LOCK(sc);
752 
753 	/*
754 	 * Initialize the NIC in test mode. This sets the chip up
755 	 * so that it can send and receive frames, but performs the
756 	 * following special functions:
757 	 * - Puts receiver in promiscuous mode
758 	 * - Enables digital loopback mode
759 	 * - Leaves interrupts turned off
760 	 */
761 
762 	ifp->if_flags |= IFF_PROMISC;
763 	sc->rl_testmode = 1;
764 	re_init_locked(sc);
765 	sc->rl_flags |= RL_FLAG_LINK;
766 	if (sc->rl_type == RL_8169)
767 		phyaddr = 1;
768 	else
769 		phyaddr = 0;
770 
771 	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET);
772 	for (i = 0; i < RL_TIMEOUT; i++) {
773 		status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR);
774 		if (!(status & BMCR_RESET))
775 			break;
776 	}
777 
778 	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP);
779 	CSR_WRITE_2(sc, RL_ISR, RL_INTRS);
780 
781 	DELAY(100000);
782 
783 	/* Put some data in the mbuf */
784 
785 	eh = mtod(m0, struct ether_header *);
786 	bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
787 	bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
788 	eh->ether_type = htons(ETHERTYPE_IP);
789 	m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
790 
791 	/*
792 	 * Queue the packet, start transmission.
793 	 * Note: IF_HANDOFF() ultimately calls re_start() for us.
794 	 */
795 
796 	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
797 	RL_UNLOCK(sc);
798 	/* XXX: re_diag must not be called when in ALTQ mode */
799 	IF_HANDOFF(&ifp->if_snd, m0, ifp);
800 	RL_LOCK(sc);
801 	m0 = NULL;
802 
803 	/* Wait for it to propagate through the chip */
804 
805 	DELAY(100000);
806 	for (i = 0; i < RL_TIMEOUT; i++) {
807 		status = CSR_READ_2(sc, RL_ISR);
808 		CSR_WRITE_2(sc, RL_ISR, status);
809 		if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
810 		    (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
811 			break;
812 		DELAY(10);
813 	}
814 
815 	if (i == RL_TIMEOUT) {
816 		device_printf(sc->rl_dev,
817 		    "diagnostic failed, failed to receive packet in"
818 		    " loopback mode\n");
819 		error = EIO;
820 		goto done;
821 	}
822 
823 	/*
824 	 * The packet should have been dumped into the first
825 	 * entry in the RX DMA ring. Grab it from there.
826 	 */
827 
828 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
829 	    sc->rl_ldata.rl_rx_list_map,
830 	    BUS_DMASYNC_POSTREAD);
831 	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
832 	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap,
833 	    BUS_DMASYNC_POSTREAD);
834 	bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
835 	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap);
836 
837 	m0 = sc->rl_ldata.rl_rx_desc[0].rx_m;
838 	sc->rl_ldata.rl_rx_desc[0].rx_m = NULL;
839 	eh = mtod(m0, struct ether_header *);
840 
841 	cur_rx = &sc->rl_ldata.rl_rx_list[0];
842 	total_len = RL_RXBYTES(cur_rx);
843 	rxstat = le32toh(cur_rx->rl_cmdstat);
844 
845 	if (total_len != ETHER_MIN_LEN) {
846 		device_printf(sc->rl_dev,
847 		    "diagnostic failed, received short packet\n");
848 		error = EIO;
849 		goto done;
850 	}
851 
852 	/* Test that the received packet data matches what we sent. */
853 
854 	if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
855 	    bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
856 	    ntohs(eh->ether_type) != ETHERTYPE_IP) {
857 		device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n");
858 		device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n",
859 		    dst, ":", src, ":", ETHERTYPE_IP);
860 		device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n",
861 		    eh->ether_dhost, ":",  eh->ether_shost, ":",
862 		    ntohs(eh->ether_type));
863 		device_printf(sc->rl_dev, "You may have a defective 32-bit "
864 		    "NIC plugged into a 64-bit PCI slot.\n");
865 		device_printf(sc->rl_dev, "Please re-install the NIC in a "
866 		    "32-bit slot for proper operation.\n");
867 		device_printf(sc->rl_dev, "Read the re(4) man page for more "
868 		    "details.\n");
869 		error = EIO;
870 	}
871 
872 done:
873 	/* Turn interface off, release resources */
874 
875 	sc->rl_testmode = 0;
876 	sc->rl_flags &= ~RL_FLAG_LINK;
877 	ifp->if_flags &= ~IFF_PROMISC;
878 	re_stop(sc);
879 	if (m0 != NULL)
880 		m_freem(m0);
881 
882 	RL_UNLOCK(sc);
883 
884 	return (error);
885 }
886 
887 #endif
888 
889 /*
890  * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
891  * IDs against our list and return a device name if we find a match.
892  */
893 static int
894 re_probe(device_t dev)
895 {
896 	struct rl_type		*t;
897 	uint16_t		devid, vendor;
898 	uint16_t		revid, sdevid;
899 	int			i;
900 
901 	vendor = pci_get_vendor(dev);
902 	devid = pci_get_device(dev);
903 	revid = pci_get_revid(dev);
904 	sdevid = pci_get_subdevice(dev);
905 
906 	if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) {
907 		if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) {
908 			/*
909 			 * Only attach to rev. 3 of the Linksys EG1032 adapter.
910 			 * Rev. 2 is supported by sk(4).
911 			 */
912 			return (ENXIO);
913 		}
914 	}
915 
916 	if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) {
917 		if (revid != 0x20) {
918 			/* 8139, let rl(4) take care of this device. */
919 			return (ENXIO);
920 		}
921 	}
922 
923 	t = re_devs;
924 	for (i = 0; i < sizeof(re_devs) / sizeof(re_devs[0]); i++, t++) {
925 		if (vendor == t->rl_vid && devid == t->rl_did) {
926 			device_set_desc(dev, t->rl_name);
927 			return (BUS_PROBE_DEFAULT);
928 		}
929 	}
930 
931 	return (ENXIO);
932 }
933 
934 /*
935  * Map a single buffer address.
936  */
937 
938 static void
939 re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
940 {
941 	bus_addr_t		*addr;
942 
943 	if (error)
944 		return;
945 
946 	KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
947 	addr = arg;
948 	*addr = segs->ds_addr;
949 }
950 
951 static int
952 re_allocmem(device_t dev, struct rl_softc *sc)
953 {
954 	bus_size_t		rx_list_size, tx_list_size;
955 	int			error;
956 	int			i;
957 
958 	rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc);
959 	tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc);
960 
961 	/*
962 	 * Allocate the parent bus DMA tag appropriate for PCI.
963 	 * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD
964 	 * register should be set. However some RealTek chips are known
965 	 * to be buggy on DAC handling, therefore disable DAC by limiting
966 	 * DMA address space to 32bit. PCIe variants of RealTek chips
967 	 * may not have the limitation but I took safer path.
968 	 */
969 	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
970 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
971 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
972 	    NULL, NULL, &sc->rl_parent_tag);
973 	if (error) {
974 		device_printf(dev, "could not allocate parent DMA tag\n");
975 		return (error);
976 	}
977 
978 	/*
979 	 * Allocate map for TX mbufs.
980 	 */
981 	error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0,
982 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
983 	    NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0,
984 	    NULL, NULL, &sc->rl_ldata.rl_tx_mtag);
985 	if (error) {
986 		device_printf(dev, "could not allocate TX DMA tag\n");
987 		return (error);
988 	}
989 
990 	/*
991 	 * Allocate map for RX mbufs.
992 	 */
993 
994 	error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0,
995 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
996 	    MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag);
997 	if (error) {
998 		device_printf(dev, "could not allocate RX DMA tag\n");
999 		return (error);
1000 	}
1001 
1002 	/*
1003 	 * Allocate map for TX descriptor list.
1004 	 */
1005 	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1006 	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1007 	    NULL, tx_list_size, 1, tx_list_size, 0,
1008 	    NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
1009 	if (error) {
1010 		device_printf(dev, "could not allocate TX DMA ring tag\n");
1011 		return (error);
1012 	}
1013 
1014 	/* Allocate DMA'able memory for the TX ring */
1015 
1016 	error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
1017 	    (void **)&sc->rl_ldata.rl_tx_list,
1018 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1019 	    &sc->rl_ldata.rl_tx_list_map);
1020 	if (error) {
1021 		device_printf(dev, "could not allocate TX DMA ring\n");
1022 		return (error);
1023 	}
1024 
1025 	/* Load the map for the TX ring. */
1026 
1027 	sc->rl_ldata.rl_tx_list_addr = 0;
1028 	error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
1029 	     sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
1030 	     tx_list_size, re_dma_map_addr,
1031 	     &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
1032 	if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) {
1033 		device_printf(dev, "could not load TX DMA ring\n");
1034 		return (ENOMEM);
1035 	}
1036 
1037 	/* Create DMA maps for TX buffers */
1038 
1039 	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1040 		error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0,
1041 		    &sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1042 		if (error) {
1043 			device_printf(dev, "could not create DMA map for TX\n");
1044 			return (error);
1045 		}
1046 	}
1047 
1048 	/*
1049 	 * Allocate map for RX descriptor list.
1050 	 */
1051 	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1052 	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1053 	    NULL, rx_list_size, 1, rx_list_size, 0,
1054 	    NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
1055 	if (error) {
1056 		device_printf(dev, "could not create RX DMA ring tag\n");
1057 		return (error);
1058 	}
1059 
1060 	/* Allocate DMA'able memory for the RX ring */
1061 
1062 	error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
1063 	    (void **)&sc->rl_ldata.rl_rx_list,
1064 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1065 	    &sc->rl_ldata.rl_rx_list_map);
1066 	if (error) {
1067 		device_printf(dev, "could not allocate RX DMA ring\n");
1068 		return (error);
1069 	}
1070 
1071 	/* Load the map for the RX ring. */
1072 
1073 	sc->rl_ldata.rl_rx_list_addr = 0;
1074 	error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
1075 	     sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
1076 	     rx_list_size, re_dma_map_addr,
1077 	     &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
1078 	if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) {
1079 		device_printf(dev, "could not load RX DMA ring\n");
1080 		return (ENOMEM);
1081 	}
1082 
1083 	/* Create DMA maps for RX buffers */
1084 
1085 	error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1086 	    &sc->rl_ldata.rl_rx_sparemap);
1087 	if (error) {
1088 		device_printf(dev, "could not create spare DMA map for RX\n");
1089 		return (error);
1090 	}
1091 	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1092 		error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1093 		    &sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1094 		if (error) {
1095 			device_printf(dev, "could not create DMA map for RX\n");
1096 			return (error);
1097 		}
1098 	}
1099 
1100 	return (0);
1101 }
1102 
1103 /*
1104  * Attach the interface. Allocate softc structures, do ifmedia
1105  * setup and ethernet/BPF attach.
1106  */
1107 static int
1108 re_attach(device_t dev)
1109 {
1110 	u_char			eaddr[ETHER_ADDR_LEN];
1111 	u_int16_t		as[ETHER_ADDR_LEN / 2];
1112 	struct rl_softc		*sc;
1113 	struct ifnet		*ifp;
1114 	struct rl_hwrev		*hw_rev;
1115 	int			hwrev;
1116 	u_int16_t		devid, re_did = 0;
1117 	int			error = 0, rid, i;
1118 	int			msic, reg;
1119 	uint8_t			cfg;
1120 
1121 	sc = device_get_softc(dev);
1122 	sc->rl_dev = dev;
1123 
1124 	mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
1125 	    MTX_DEF);
1126 	callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0);
1127 
1128 	/*
1129 	 * Map control/status registers.
1130 	 */
1131 	pci_enable_busmaster(dev);
1132 
1133 	devid = pci_get_device(dev);
1134 	/* Prefer memory space register mapping over IO space. */
1135 	sc->rl_res_id = PCIR_BAR(1);
1136 	sc->rl_res_type = SYS_RES_MEMORY;
1137 	/* RTL8168/8101E seems to use different BARs. */
1138 	if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E)
1139 		sc->rl_res_id = PCIR_BAR(2);
1140 	sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1141 	    &sc->rl_res_id, RF_ACTIVE);
1142 
1143 	if (sc->rl_res == NULL) {
1144 		sc->rl_res_id = PCIR_BAR(0);
1145 		sc->rl_res_type = SYS_RES_IOPORT;
1146 		sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1147 		    &sc->rl_res_id, RF_ACTIVE);
1148 		if (sc->rl_res == NULL) {
1149 			device_printf(dev, "couldn't map ports/memory\n");
1150 			error = ENXIO;
1151 			goto fail;
1152 		}
1153 	}
1154 
1155 	sc->rl_btag = rman_get_bustag(sc->rl_res);
1156 	sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
1157 
1158 	msic = 0;
1159 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
1160 		msic = pci_msi_count(dev);
1161 		if (bootverbose)
1162 			device_printf(dev, "MSI count : %d\n", msic);
1163 	}
1164 	if (msic == RL_MSI_MESSAGES  && msi_disable == 0) {
1165 		if (pci_alloc_msi(dev, &msic) == 0) {
1166 			if (msic == RL_MSI_MESSAGES) {
1167 				device_printf(dev, "Using %d MSI messages\n",
1168 				    msic);
1169 				sc->rl_flags |= RL_FLAG_MSI;
1170 				/* Explicitly set MSI enable bit. */
1171 				CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1172 				cfg = CSR_READ_1(sc, RL_CFG2);
1173 				cfg |= RL_CFG2_MSI;
1174 				CSR_WRITE_1(sc, RL_CFG2, cfg);
1175 				CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1176 			} else
1177 				pci_release_msi(dev);
1178 		}
1179 	}
1180 
1181 	/* Allocate interrupt */
1182 	if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1183 		rid = 0;
1184 		sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1185 		    RF_SHAREABLE | RF_ACTIVE);
1186 		if (sc->rl_irq[0] == NULL) {
1187 			device_printf(dev, "couldn't allocate IRQ resources\n");
1188 			error = ENXIO;
1189 			goto fail;
1190 		}
1191 	} else {
1192 		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1193 			sc->rl_irq[i] = bus_alloc_resource_any(dev,
1194 			    SYS_RES_IRQ, &rid, RF_ACTIVE);
1195 			if (sc->rl_irq[i] == NULL) {
1196 				device_printf(dev,
1197 				    "couldn't llocate IRQ resources for "
1198 				    "message %d\n", rid);
1199 				error = ENXIO;
1200 				goto fail;
1201 			}
1202 		}
1203 	}
1204 
1205 	if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1206 		CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1207 		cfg = CSR_READ_1(sc, RL_CFG2);
1208 		if ((cfg & RL_CFG2_MSI) != 0) {
1209 			device_printf(dev, "turning off MSI enable bit.\n");
1210 			cfg &= ~RL_CFG2_MSI;
1211 			CSR_WRITE_1(sc, RL_CFG2, cfg);
1212 		}
1213 		CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1214 	}
1215 
1216 	/* Reset the adapter. */
1217 	RL_LOCK(sc);
1218 	re_reset(sc);
1219 	RL_UNLOCK(sc);
1220 
1221 	hw_rev = re_hwrevs;
1222 	hwrev = CSR_READ_4(sc, RL_TXCFG);
1223 	device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000);
1224 	device_printf(dev, "MAC rev. 0x%08x\n", hwrev & 0x00700000);
1225 	hwrev &= RL_TXCFG_HWREV;
1226 	while (hw_rev->rl_desc != NULL) {
1227 		if (hw_rev->rl_rev == hwrev) {
1228 			sc->rl_type = hw_rev->rl_type;
1229 			break;
1230 		}
1231 		hw_rev++;
1232 	}
1233 	if (hw_rev->rl_desc == NULL) {
1234 		device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev);
1235 		error = ENXIO;
1236 		goto fail;
1237 	}
1238 
1239 	switch (hw_rev->rl_rev) {
1240 	case RL_HWREV_8139CPLUS:
1241 		sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_FASTETHER;
1242 		break;
1243 	case RL_HWREV_8110S:
1244 		sc->rl_flags |= RL_FLAG_PHY8110S;
1245 		break;
1246 	case RL_HWREV_8100E:
1247 	case RL_HWREV_8101E:
1248 		sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_INVMAR |
1249 		    RL_FLAG_PHYWAKE | RL_FLAG_FASTETHER;
1250 		break;
1251 	case RL_HWREV_8102E:
1252 	case RL_HWREV_8102EL:
1253 		sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_INVMAR |
1254 		    RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 |
1255 		    RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP;
1256 		break;
1257 	case RL_HWREV_8168_SPIN1:
1258 	case RL_HWREV_8168_SPIN2:
1259 		sc->rl_flags |= RL_FLAG_WOLRXENB;
1260 		/* FALLTHROUGH */
1261 	case RL_HWREV_8168_SPIN3:
1262 		sc->rl_flags |= RL_FLAG_INVMAR | RL_FLAG_PHYWAKE |
1263 		    RL_FLAG_MACSTAT;
1264 		break;
1265 	case RL_HWREV_8168C:
1266 	case RL_HWREV_8168C_SPIN2:
1267 	case RL_HWREV_8168CP:
1268 	case RL_HWREV_8168D:
1269 		sc->rl_flags |= RL_FLAG_INVMAR | RL_FLAG_PHYWAKE |
1270 		    RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1271 		    RL_FLAG_CMDSTOP;
1272 		/*
1273 		 * These controllers support jumbo frame but it seems
1274 		 * that enabling it requires touching additional magic
1275 		 * registers. Depending on MAC revisions some
1276 		 * controllers need to disable checksum offload. So
1277 		 * disable jumbo frame until I have better idea what
1278 		 * it really requires to make it support.
1279 		 * RTL8168C/CP : supports up to 6KB jumbo frame.
1280 		 * RTL8111C/CP : supports up to 9KB jumbo frame.
1281 		 */
1282 		sc->rl_flags |= RL_FLAG_NOJUMBO;
1283 		break;
1284 	case RL_HWREV_8169:
1285 	case RL_HWREV_8169S:
1286 		sc->rl_flags |= RL_FLAG_PHY8169;
1287 		break;
1288 	case RL_HWREV_8169_8110SB:
1289 	case RL_HWREV_8169_8110SC:
1290 	case RL_HWREV_8169_8110SBL:
1291 		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHY8169;
1292 		break;
1293 	default:
1294 		break;
1295 	}
1296 
1297 	/* Enable PME. */
1298 	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1299 	cfg = CSR_READ_1(sc, RL_CFG1);
1300 	cfg |= RL_CFG1_PME;
1301 	CSR_WRITE_1(sc, RL_CFG1, cfg);
1302 	cfg = CSR_READ_1(sc, RL_CFG5);
1303 	cfg &= RL_CFG5_PME_STS;
1304 	CSR_WRITE_1(sc, RL_CFG5, cfg);
1305 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1306 
1307 	if ((sc->rl_flags & RL_FLAG_PAR) != 0) {
1308 		/*
1309 		 * XXX Should have a better way to extract station
1310 		 * address from EEPROM.
1311 		 */
1312 		for (i = 0; i < ETHER_ADDR_LEN; i++)
1313 			eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i);
1314 	} else {
1315 		sc->rl_eewidth = RL_9356_ADDR_LEN;
1316 		re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
1317 		if (re_did != 0x8129)
1318 			sc->rl_eewidth = RL_9346_ADDR_LEN;
1319 
1320 		/*
1321 		 * Get station address from the EEPROM.
1322 		 */
1323 		re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3);
1324 		for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
1325 			as[i] = le16toh(as[i]);
1326 		bcopy(as, eaddr, sizeof(eaddr));
1327 	}
1328 
1329 	if (sc->rl_type == RL_8169) {
1330 		/* Set RX length mask and number of descriptors. */
1331 		sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
1332 		sc->rl_txstart = RL_GTXSTART;
1333 		sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT;
1334 		sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT;
1335 	} else {
1336 		/* Set RX length mask and number of descriptors. */
1337 		sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
1338 		sc->rl_txstart = RL_TXSTART;
1339 		sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT;
1340 		sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT;
1341 	}
1342 
1343 	error = re_allocmem(dev, sc);
1344 	if (error)
1345 		goto fail;
1346 
1347 	ifp = sc->rl_ifp = if_alloc(IFT_ETHER);
1348 	if (ifp == NULL) {
1349 		device_printf(dev, "can not if_alloc()\n");
1350 		error = ENOSPC;
1351 		goto fail;
1352 	}
1353 
1354 	/* Take PHY out of power down mode. */
1355 	if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) {
1356 		re_gmii_writereg(dev, 1, 0x1f, 0);
1357 		re_gmii_writereg(dev, 1, 0x0e, 0);
1358 	}
1359 
1360 	/* Do MII setup */
1361 	if (mii_phy_probe(dev, &sc->rl_miibus,
1362 	    re_ifmedia_upd, re_ifmedia_sts)) {
1363 		device_printf(dev, "MII without any phy!\n");
1364 		error = ENXIO;
1365 		goto fail;
1366 	}
1367 
1368 	ifp->if_softc = sc;
1369 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1370 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1371 	ifp->if_ioctl = re_ioctl;
1372 	ifp->if_start = re_start;
1373 	ifp->if_hwassist = RE_CSUM_FEATURES;
1374 	ifp->if_capabilities = IFCAP_HWCSUM;
1375 	ifp->if_capenable = ifp->if_capabilities;
1376 	ifp->if_init = re_init;
1377 	IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN);
1378 	ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
1379 	IFQ_SET_READY(&ifp->if_snd);
1380 
1381 	TASK_INIT(&sc->rl_txtask, 1, re_tx_task, ifp);
1382 	TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc);
1383 
1384 	/*
1385 	 * XXX
1386 	 * Still have no idea how to make TSO work on 8168C, 8168CP,
1387 	 * 8111C and 8111CP.
1388 	 */
1389 	if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1390 		ifp->if_hwassist |= CSUM_TSO;
1391 		ifp->if_capabilities |= IFCAP_TSO4;
1392 	}
1393 
1394 	/*
1395 	 * Call MI attach routine.
1396 	 */
1397 	ether_ifattach(ifp, eaddr);
1398 
1399 	/* VLAN capability setup */
1400 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1401 	if (ifp->if_capabilities & IFCAP_HWCSUM)
1402 		ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
1403 	/* Enable WOL if PM is supported. */
1404 	if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &reg) == 0)
1405 		ifp->if_capabilities |= IFCAP_WOL;
1406 	ifp->if_capenable = ifp->if_capabilities;
1407 	/*
1408 	 * Don't enable TSO by default. Under certain
1409 	 * circumtances the controller generated corrupted
1410 	 * packets in TSO size.
1411 	 */
1412 	ifp->if_hwassist &= ~CSUM_TSO;
1413 	ifp->if_capenable &= ~IFCAP_TSO4;
1414 #ifdef DEVICE_POLLING
1415 	ifp->if_capabilities |= IFCAP_POLLING;
1416 #endif
1417 	/*
1418 	 * Tell the upper layer(s) we support long frames.
1419 	 * Must appear after the call to ether_ifattach() because
1420 	 * ether_ifattach() sets ifi_hdrlen to the default value.
1421 	 */
1422 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1423 
1424 #ifdef RE_DIAG
1425 	/*
1426 	 * Perform hardware diagnostic on the original RTL8169.
1427 	 * Some 32-bit cards were incorrectly wired and would
1428 	 * malfunction if plugged into a 64-bit slot.
1429 	 */
1430 
1431 	if (hwrev == RL_HWREV_8169) {
1432 		error = re_diag(sc);
1433 		if (error) {
1434 			device_printf(dev,
1435 		    	"attach aborted due to hardware diag failure\n");
1436 			ether_ifdetach(ifp);
1437 			goto fail;
1438 		}
1439 	}
1440 #endif
1441 
1442 	/* Hook interrupt last to avoid having to lock softc */
1443 	if ((sc->rl_flags & RL_FLAG_MSI) == 0)
1444 		error = bus_setup_intr(dev, sc->rl_irq[0],
1445 		    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1446 		    &sc->rl_intrhand[0]);
1447 	else {
1448 		for (i = 0; i < RL_MSI_MESSAGES; i++) {
1449 			error = bus_setup_intr(dev, sc->rl_irq[i],
1450 			    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1451 		    	    &sc->rl_intrhand[i]);
1452 			if (error != 0)
1453 				break;
1454 		}
1455 	}
1456 	if (error) {
1457 		device_printf(dev, "couldn't set up irq\n");
1458 		ether_ifdetach(ifp);
1459 	}
1460 
1461 fail:
1462 
1463 	if (error)
1464 		re_detach(dev);
1465 
1466 	return (error);
1467 }
1468 
1469 /*
1470  * Shutdown hardware and free up resources. This can be called any
1471  * time after the mutex has been initialized. It is called in both
1472  * the error case in attach and the normal detach case so it needs
1473  * to be careful about only freeing resources that have actually been
1474  * allocated.
1475  */
1476 static int
1477 re_detach(device_t dev)
1478 {
1479 	struct rl_softc		*sc;
1480 	struct ifnet		*ifp;
1481 	int			i, rid;
1482 
1483 	sc = device_get_softc(dev);
1484 	ifp = sc->rl_ifp;
1485 	KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
1486 
1487 	/* These should only be active if attach succeeded */
1488 	if (device_is_attached(dev)) {
1489 #ifdef DEVICE_POLLING
1490 		if (ifp->if_capenable & IFCAP_POLLING)
1491 			ether_poll_deregister(ifp);
1492 #endif
1493 		RL_LOCK(sc);
1494 #if 0
1495 		sc->suspended = 1;
1496 #endif
1497 		re_stop(sc);
1498 		RL_UNLOCK(sc);
1499 		callout_drain(&sc->rl_stat_callout);
1500 		taskqueue_drain(taskqueue_fast, &sc->rl_inttask);
1501 		taskqueue_drain(taskqueue_fast, &sc->rl_txtask);
1502 		/*
1503 		 * Force off the IFF_UP flag here, in case someone
1504 		 * still had a BPF descriptor attached to this
1505 		 * interface. If they do, ether_ifdetach() will cause
1506 		 * the BPF code to try and clear the promisc mode
1507 		 * flag, which will bubble down to re_ioctl(),
1508 		 * which will try to call re_init() again. This will
1509 		 * turn the NIC back on and restart the MII ticker,
1510 		 * which will panic the system when the kernel tries
1511 		 * to invoke the re_tick() function that isn't there
1512 		 * anymore.
1513 		 */
1514 		ifp->if_flags &= ~IFF_UP;
1515 		ether_ifdetach(ifp);
1516 	}
1517 	if (sc->rl_miibus)
1518 		device_delete_child(dev, sc->rl_miibus);
1519 	bus_generic_detach(dev);
1520 
1521 	/*
1522 	 * The rest is resource deallocation, so we should already be
1523 	 * stopped here.
1524 	 */
1525 
1526 	for (i = 0; i < RL_MSI_MESSAGES; i++) {
1527 		if (sc->rl_intrhand[i] != NULL) {
1528 			bus_teardown_intr(dev, sc->rl_irq[i],
1529 			    sc->rl_intrhand[i]);
1530 			sc->rl_intrhand[i] = NULL;
1531 		}
1532 	}
1533 	if (ifp != NULL)
1534 		if_free(ifp);
1535 	if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1536 		if (sc->rl_irq[0] != NULL) {
1537 			bus_release_resource(dev, SYS_RES_IRQ, 0,
1538 			    sc->rl_irq[0]);
1539 			sc->rl_irq[0] = NULL;
1540 		}
1541 	} else {
1542 		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1543 			if (sc->rl_irq[i] != NULL) {
1544 				bus_release_resource(dev, SYS_RES_IRQ, rid,
1545 				    sc->rl_irq[i]);
1546 				sc->rl_irq[i] = NULL;
1547 			}
1548 		}
1549 		pci_release_msi(dev);
1550 	}
1551 	if (sc->rl_res)
1552 		bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id,
1553 		    sc->rl_res);
1554 
1555 	/* Unload and free the RX DMA ring memory and map */
1556 
1557 	if (sc->rl_ldata.rl_rx_list_tag) {
1558 		bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
1559 		    sc->rl_ldata.rl_rx_list_map);
1560 		bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
1561 		    sc->rl_ldata.rl_rx_list,
1562 		    sc->rl_ldata.rl_rx_list_map);
1563 		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
1564 	}
1565 
1566 	/* Unload and free the TX DMA ring memory and map */
1567 
1568 	if (sc->rl_ldata.rl_tx_list_tag) {
1569 		bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
1570 		    sc->rl_ldata.rl_tx_list_map);
1571 		bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
1572 		    sc->rl_ldata.rl_tx_list,
1573 		    sc->rl_ldata.rl_tx_list_map);
1574 		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
1575 	}
1576 
1577 	/* Destroy all the RX and TX buffer maps */
1578 
1579 	if (sc->rl_ldata.rl_tx_mtag) {
1580 		for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1581 			bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag,
1582 			    sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1583 		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag);
1584 	}
1585 	if (sc->rl_ldata.rl_rx_mtag) {
1586 		for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++)
1587 			bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1588 			    sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1589 		if (sc->rl_ldata.rl_rx_sparemap)
1590 			bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1591 			    sc->rl_ldata.rl_rx_sparemap);
1592 		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag);
1593 	}
1594 
1595 	/* Unload and free the stats buffer and map */
1596 
1597 	if (sc->rl_ldata.rl_stag) {
1598 		bus_dmamap_unload(sc->rl_ldata.rl_stag,
1599 		    sc->rl_ldata.rl_rx_list_map);
1600 		bus_dmamem_free(sc->rl_ldata.rl_stag,
1601 		    sc->rl_ldata.rl_stats,
1602 		    sc->rl_ldata.rl_smap);
1603 		bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
1604 	}
1605 
1606 	if (sc->rl_parent_tag)
1607 		bus_dma_tag_destroy(sc->rl_parent_tag);
1608 
1609 	mtx_destroy(&sc->rl_mtx);
1610 
1611 	return (0);
1612 }
1613 
1614 static __inline void
1615 re_discard_rxbuf(struct rl_softc *sc, int idx)
1616 {
1617 	struct rl_desc		*desc;
1618 	struct rl_rxdesc	*rxd;
1619 	uint32_t		cmdstat;
1620 
1621 	rxd = &sc->rl_ldata.rl_rx_desc[idx];
1622 	desc = &sc->rl_ldata.rl_rx_list[idx];
1623 	desc->rl_vlanctl = 0;
1624 	cmdstat = rxd->rx_size;
1625 	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1626 		cmdstat |= RL_RDESC_CMD_EOR;
1627 	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1628 }
1629 
1630 static int
1631 re_newbuf(struct rl_softc *sc, int idx)
1632 {
1633 	struct mbuf		*m;
1634 	struct rl_rxdesc	*rxd;
1635 	bus_dma_segment_t	segs[1];
1636 	bus_dmamap_t		map;
1637 	struct rl_desc		*desc;
1638 	uint32_t		cmdstat;
1639 	int			error, nsegs;
1640 
1641 	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1642 	if (m == NULL)
1643 		return (ENOBUFS);
1644 
1645 	m->m_len = m->m_pkthdr.len = MCLBYTES;
1646 #ifdef RE_FIXUP_RX
1647 	/*
1648 	 * This is part of an evil trick to deal with non-x86 platforms.
1649 	 * The RealTek chip requires RX buffers to be aligned on 64-bit
1650 	 * boundaries, but that will hose non-x86 machines. To get around
1651 	 * this, we leave some empty space at the start of each buffer
1652 	 * and for non-x86 hosts, we copy the buffer back six bytes
1653 	 * to achieve word alignment. This is slightly more efficient
1654 	 * than allocating a new buffer, copying the contents, and
1655 	 * discarding the old buffer.
1656 	 */
1657 	m_adj(m, RE_ETHER_ALIGN);
1658 #endif
1659 	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag,
1660 	    sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
1661 	if (error != 0) {
1662 		m_freem(m);
1663 		return (ENOBUFS);
1664 	}
1665 	KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
1666 
1667 	rxd = &sc->rl_ldata.rl_rx_desc[idx];
1668 	if (rxd->rx_m != NULL) {
1669 		bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1670 		    BUS_DMASYNC_POSTREAD);
1671 		bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap);
1672 	}
1673 
1674 	rxd->rx_m = m;
1675 	map = rxd->rx_dmamap;
1676 	rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap;
1677 	rxd->rx_size = segs[0].ds_len;
1678 	sc->rl_ldata.rl_rx_sparemap = map;
1679 	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1680 	    BUS_DMASYNC_PREREAD);
1681 
1682 	desc = &sc->rl_ldata.rl_rx_list[idx];
1683 	desc->rl_vlanctl = 0;
1684 	desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
1685 	desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
1686 	cmdstat = segs[0].ds_len;
1687 	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1688 		cmdstat |= RL_RDESC_CMD_EOR;
1689 	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1690 
1691 	return (0);
1692 }
1693 
1694 #ifdef RE_FIXUP_RX
1695 static __inline void
1696 re_fixup_rx(struct mbuf *m)
1697 {
1698 	int                     i;
1699 	uint16_t                *src, *dst;
1700 
1701 	src = mtod(m, uint16_t *);
1702 	dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
1703 
1704 	for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
1705 		*dst++ = *src++;
1706 
1707 	m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
1708 }
1709 #endif
1710 
1711 static int
1712 re_tx_list_init(struct rl_softc *sc)
1713 {
1714 	struct rl_desc		*desc;
1715 	int			i;
1716 
1717 	RL_LOCK_ASSERT(sc);
1718 
1719 	bzero(sc->rl_ldata.rl_tx_list,
1720 	    sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
1721 	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1722 		sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
1723 	/* Set EOR. */
1724 	desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
1725 	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
1726 
1727 	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1728 	    sc->rl_ldata.rl_tx_list_map,
1729 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1730 
1731 	sc->rl_ldata.rl_tx_prodidx = 0;
1732 	sc->rl_ldata.rl_tx_considx = 0;
1733 	sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt;
1734 
1735 	return (0);
1736 }
1737 
1738 static int
1739 re_rx_list_init(struct rl_softc *sc)
1740 {
1741 	int			error, i;
1742 
1743 	bzero(sc->rl_ldata.rl_rx_list,
1744 	    sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
1745 	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1746 		sc->rl_ldata.rl_rx_desc[i].rx_m = NULL;
1747 		if ((error = re_newbuf(sc, i)) != 0)
1748 			return (error);
1749 	}
1750 
1751 	/* Flush the RX descriptors */
1752 
1753 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1754 	    sc->rl_ldata.rl_rx_list_map,
1755 	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1756 
1757 	sc->rl_ldata.rl_rx_prodidx = 0;
1758 	sc->rl_head = sc->rl_tail = NULL;
1759 
1760 	return (0);
1761 }
1762 
1763 /*
1764  * RX handler for C+ and 8169. For the gigE chips, we support
1765  * the reception of jumbo frames that have been fragmented
1766  * across multiple 2K mbuf cluster buffers.
1767  */
1768 static int
1769 re_rxeof(struct rl_softc *sc)
1770 {
1771 	struct mbuf		*m;
1772 	struct ifnet		*ifp;
1773 	int			i, total_len;
1774 	struct rl_desc		*cur_rx;
1775 	u_int32_t		rxstat, rxvlan;
1776 	int			maxpkt = 16;
1777 
1778 	RL_LOCK_ASSERT(sc);
1779 
1780 	ifp = sc->rl_ifp;
1781 
1782 	/* Invalidate the descriptor memory */
1783 
1784 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1785 	    sc->rl_ldata.rl_rx_list_map,
1786 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1787 
1788 	for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
1789 	    i = RL_RX_DESC_NXT(sc, i)) {
1790 		cur_rx = &sc->rl_ldata.rl_rx_list[i];
1791 		rxstat = le32toh(cur_rx->rl_cmdstat);
1792 		if ((rxstat & RL_RDESC_STAT_OWN) != 0)
1793 			break;
1794 		total_len = rxstat & sc->rl_rxlenmask;
1795 		rxvlan = le32toh(cur_rx->rl_vlanctl);
1796 		m = sc->rl_ldata.rl_rx_desc[i].rx_m;
1797 
1798 		if (!(rxstat & RL_RDESC_STAT_EOF)) {
1799 			if (re_newbuf(sc, i) != 0) {
1800 				/*
1801 				 * If this is part of a multi-fragment packet,
1802 				 * discard all the pieces.
1803 				 */
1804 				if (sc->rl_head != NULL) {
1805 					m_freem(sc->rl_head);
1806 					sc->rl_head = sc->rl_tail = NULL;
1807 				}
1808 				re_discard_rxbuf(sc, i);
1809 				continue;
1810 			}
1811 			m->m_len = RE_RX_DESC_BUFLEN;
1812 			if (sc->rl_head == NULL)
1813 				sc->rl_head = sc->rl_tail = m;
1814 			else {
1815 				m->m_flags &= ~M_PKTHDR;
1816 				sc->rl_tail->m_next = m;
1817 				sc->rl_tail = m;
1818 			}
1819 			continue;
1820 		}
1821 
1822 		/*
1823 		 * NOTE: for the 8139C+, the frame length field
1824 		 * is always 12 bits in size, but for the gigE chips,
1825 		 * it is 13 bits (since the max RX frame length is 16K).
1826 		 * Unfortunately, all 32 bits in the status word
1827 		 * were already used, so to make room for the extra
1828 		 * length bit, RealTek took out the 'frame alignment
1829 		 * error' bit and shifted the other status bits
1830 		 * over one slot. The OWN, EOR, FS and LS bits are
1831 		 * still in the same places. We have already extracted
1832 		 * the frame length and checked the OWN bit, so rather
1833 		 * than using an alternate bit mapping, we shift the
1834 		 * status bits one space to the right so we can evaluate
1835 		 * them using the 8169 status as though it was in the
1836 		 * same format as that of the 8139C+.
1837 		 */
1838 		if (sc->rl_type == RL_8169)
1839 			rxstat >>= 1;
1840 
1841 		/*
1842 		 * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
1843 		 * set, but if CRC is clear, it will still be a valid frame.
1844 		 */
1845 		if (rxstat & RL_RDESC_STAT_RXERRSUM && !(total_len > 8191 &&
1846 		    (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)) {
1847 			ifp->if_ierrors++;
1848 			/*
1849 			 * If this is part of a multi-fragment packet,
1850 			 * discard all the pieces.
1851 			 */
1852 			if (sc->rl_head != NULL) {
1853 				m_freem(sc->rl_head);
1854 				sc->rl_head = sc->rl_tail = NULL;
1855 			}
1856 			re_discard_rxbuf(sc, i);
1857 			continue;
1858 		}
1859 
1860 		/*
1861 		 * If allocating a replacement mbuf fails,
1862 		 * reload the current one.
1863 		 */
1864 
1865 		if (re_newbuf(sc, i) != 0) {
1866 			ifp->if_iqdrops++;
1867 			if (sc->rl_head != NULL) {
1868 				m_freem(sc->rl_head);
1869 				sc->rl_head = sc->rl_tail = NULL;
1870 			}
1871 			re_discard_rxbuf(sc, i);
1872 			continue;
1873 		}
1874 
1875 		if (sc->rl_head != NULL) {
1876 			m->m_len = total_len % RE_RX_DESC_BUFLEN;
1877 			if (m->m_len == 0)
1878 				m->m_len = RE_RX_DESC_BUFLEN;
1879 			/*
1880 			 * Special case: if there's 4 bytes or less
1881 			 * in this buffer, the mbuf can be discarded:
1882 			 * the last 4 bytes is the CRC, which we don't
1883 			 * care about anyway.
1884 			 */
1885 			if (m->m_len <= ETHER_CRC_LEN) {
1886 				sc->rl_tail->m_len -=
1887 				    (ETHER_CRC_LEN - m->m_len);
1888 				m_freem(m);
1889 			} else {
1890 				m->m_len -= ETHER_CRC_LEN;
1891 				m->m_flags &= ~M_PKTHDR;
1892 				sc->rl_tail->m_next = m;
1893 			}
1894 			m = sc->rl_head;
1895 			sc->rl_head = sc->rl_tail = NULL;
1896 			m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1897 		} else
1898 			m->m_pkthdr.len = m->m_len =
1899 			    (total_len - ETHER_CRC_LEN);
1900 
1901 #ifdef RE_FIXUP_RX
1902 		re_fixup_rx(m);
1903 #endif
1904 		ifp->if_ipackets++;
1905 		m->m_pkthdr.rcvif = ifp;
1906 
1907 		/* Do RX checksumming if enabled */
1908 
1909 		if (ifp->if_capenable & IFCAP_RXCSUM) {
1910 			if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1911 				/* Check IP header checksum */
1912 				if (rxstat & RL_RDESC_STAT_PROTOID)
1913 					m->m_pkthdr.csum_flags |=
1914 					    CSUM_IP_CHECKED;
1915 				if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
1916 					m->m_pkthdr.csum_flags |=
1917 					    CSUM_IP_VALID;
1918 
1919 				/* Check TCP/UDP checksum */
1920 				if ((RL_TCPPKT(rxstat) &&
1921 				    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1922 				    (RL_UDPPKT(rxstat) &&
1923 				     !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1924 					m->m_pkthdr.csum_flags |=
1925 						CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1926 					m->m_pkthdr.csum_data = 0xffff;
1927 				}
1928 			} else {
1929 				/*
1930 				 * RTL8168C/RTL816CP/RTL8111C/RTL8111CP
1931 				 */
1932 				if ((rxstat & RL_RDESC_STAT_PROTOID) &&
1933 				    (rxvlan & RL_RDESC_IPV4))
1934 					m->m_pkthdr.csum_flags |=
1935 					    CSUM_IP_CHECKED;
1936 				if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) &&
1937 				    (rxvlan & RL_RDESC_IPV4))
1938 					m->m_pkthdr.csum_flags |=
1939 					    CSUM_IP_VALID;
1940 				if (((rxstat & RL_RDESC_STAT_TCP) &&
1941 				    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1942 				    ((rxstat & RL_RDESC_STAT_UDP) &&
1943 				    !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1944 					m->m_pkthdr.csum_flags |=
1945 						CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1946 					m->m_pkthdr.csum_data = 0xffff;
1947 				}
1948 			}
1949 		}
1950 		maxpkt--;
1951 		if (rxvlan & RL_RDESC_VLANCTL_TAG) {
1952 			m->m_pkthdr.ether_vtag =
1953 			    bswap16((rxvlan & RL_RDESC_VLANCTL_DATA));
1954 			m->m_flags |= M_VLANTAG;
1955 		}
1956 		RL_UNLOCK(sc);
1957 		(*ifp->if_input)(ifp, m);
1958 		RL_LOCK(sc);
1959 	}
1960 
1961 	/* Flush the RX DMA ring */
1962 
1963 	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1964 	    sc->rl_ldata.rl_rx_list_map,
1965 	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1966 
1967 	sc->rl_ldata.rl_rx_prodidx = i;
1968 
1969 	if (maxpkt)
1970 		return(EAGAIN);
1971 
1972 	return(0);
1973 }
1974 
1975 static void
1976 re_txeof(struct rl_softc *sc)
1977 {
1978 	struct ifnet		*ifp;
1979 	struct rl_txdesc	*txd;
1980 	u_int32_t		txstat;
1981 	int			cons;
1982 
1983 	cons = sc->rl_ldata.rl_tx_considx;
1984 	if (cons == sc->rl_ldata.rl_tx_prodidx)
1985 		return;
1986 
1987 	ifp = sc->rl_ifp;
1988 	/* Invalidate the TX descriptor list */
1989 	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1990 	    sc->rl_ldata.rl_tx_list_map,
1991 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1992 
1993 	for (; cons != sc->rl_ldata.rl_tx_prodidx;
1994 	    cons = RL_TX_DESC_NXT(sc, cons)) {
1995 		txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat);
1996 		if (txstat & RL_TDESC_STAT_OWN)
1997 			break;
1998 		/*
1999 		 * We only stash mbufs in the last descriptor
2000 		 * in a fragment chain, which also happens to
2001 		 * be the only place where the TX status bits
2002 		 * are valid.
2003 		 */
2004 		if (txstat & RL_TDESC_CMD_EOF) {
2005 			txd = &sc->rl_ldata.rl_tx_desc[cons];
2006 			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2007 			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2008 			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2009 			    txd->tx_dmamap);
2010 			KASSERT(txd->tx_m != NULL,
2011 			    ("%s: freeing NULL mbufs!", __func__));
2012 			m_freem(txd->tx_m);
2013 			txd->tx_m = NULL;
2014 			if (txstat & (RL_TDESC_STAT_EXCESSCOL|
2015 			    RL_TDESC_STAT_COLCNT))
2016 				ifp->if_collisions++;
2017 			if (txstat & RL_TDESC_STAT_TXERRSUM)
2018 				ifp->if_oerrors++;
2019 			else
2020 				ifp->if_opackets++;
2021 		}
2022 		sc->rl_ldata.rl_tx_free++;
2023 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2024 	}
2025 	sc->rl_ldata.rl_tx_considx = cons;
2026 
2027 	/* No changes made to the TX ring, so no flush needed */
2028 
2029 	if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) {
2030 		/*
2031 		 * Some chips will ignore a second TX request issued
2032 		 * while an existing transmission is in progress. If
2033 		 * the transmitter goes idle but there are still
2034 		 * packets waiting to be sent, we need to restart the
2035 		 * channel here to flush them out. This only seems to
2036 		 * be required with the PCIe devices.
2037 		 */
2038 		CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2039 
2040 #ifdef RE_TX_MODERATION
2041 		/*
2042 		 * If not all descriptors have been reaped yet, reload
2043 		 * the timer so that we will eventually get another
2044 		 * interrupt that will cause us to re-enter this routine.
2045 		 * This is done in case the transmitter has gone idle.
2046 		 */
2047 		CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2048 #endif
2049 	} else
2050 		sc->rl_watchdog_timer = 0;
2051 }
2052 
2053 static void
2054 re_tick(void *xsc)
2055 {
2056 	struct rl_softc		*sc;
2057 	struct mii_data		*mii;
2058 
2059 	sc = xsc;
2060 
2061 	RL_LOCK_ASSERT(sc);
2062 
2063 	mii = device_get_softc(sc->rl_miibus);
2064 	mii_tick(mii);
2065 	re_watchdog(sc);
2066 	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2067 }
2068 
2069 #ifdef DEVICE_POLLING
2070 static void
2071 re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
2072 {
2073 	struct rl_softc *sc = ifp->if_softc;
2074 
2075 	RL_LOCK(sc);
2076 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2077 		re_poll_locked(ifp, cmd, count);
2078 	RL_UNLOCK(sc);
2079 }
2080 
2081 static void
2082 re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
2083 {
2084 	struct rl_softc *sc = ifp->if_softc;
2085 
2086 	RL_LOCK_ASSERT(sc);
2087 
2088 	sc->rxcycles = count;
2089 	re_rxeof(sc);
2090 	re_txeof(sc);
2091 
2092 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2093 		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2094 
2095 	if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
2096 		u_int16_t       status;
2097 
2098 		status = CSR_READ_2(sc, RL_ISR);
2099 		if (status == 0xffff)
2100 			return;
2101 		if (status)
2102 			CSR_WRITE_2(sc, RL_ISR, status);
2103 
2104 		/*
2105 		 * XXX check behaviour on receiver stalls.
2106 		 */
2107 
2108 		if (status & RL_ISR_SYSTEM_ERR)
2109 			re_init_locked(sc);
2110 	}
2111 }
2112 #endif /* DEVICE_POLLING */
2113 
2114 static int
2115 re_intr(void *arg)
2116 {
2117 	struct rl_softc		*sc;
2118 	uint16_t		status;
2119 
2120 	sc = arg;
2121 
2122 	status = CSR_READ_2(sc, RL_ISR);
2123 	if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0)
2124                 return (FILTER_STRAY);
2125 	CSR_WRITE_2(sc, RL_IMR, 0);
2126 
2127 	taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2128 
2129 	return (FILTER_HANDLED);
2130 }
2131 
2132 static void
2133 re_int_task(void *arg, int npending)
2134 {
2135 	struct rl_softc		*sc;
2136 	struct ifnet		*ifp;
2137 	u_int16_t		status;
2138 	int			rval = 0;
2139 
2140 	sc = arg;
2141 	ifp = sc->rl_ifp;
2142 
2143 	RL_LOCK(sc);
2144 
2145 	status = CSR_READ_2(sc, RL_ISR);
2146         CSR_WRITE_2(sc, RL_ISR, status);
2147 
2148 	if (sc->suspended ||
2149 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2150 		RL_UNLOCK(sc);
2151 		return;
2152 	}
2153 
2154 #ifdef DEVICE_POLLING
2155 	if  (ifp->if_capenable & IFCAP_POLLING) {
2156 		RL_UNLOCK(sc);
2157 		return;
2158 	}
2159 #endif
2160 
2161 	if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW))
2162 		rval = re_rxeof(sc);
2163 
2164 	if (status & (
2165 #ifdef RE_TX_MODERATION
2166 	    RL_ISR_TIMEOUT_EXPIRED|
2167 #else
2168 	    RL_ISR_TX_OK|
2169 #endif
2170 	    RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL))
2171 		re_txeof(sc);
2172 
2173 	if (status & RL_ISR_SYSTEM_ERR)
2174 		re_init_locked(sc);
2175 
2176 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2177 		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2178 
2179 	RL_UNLOCK(sc);
2180 
2181         if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
2182 		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2183 		return;
2184 	}
2185 
2186 	CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2187 }
2188 
2189 static int
2190 re_encap(struct rl_softc *sc, struct mbuf **m_head)
2191 {
2192 	struct rl_txdesc	*txd, *txd_last;
2193 	bus_dma_segment_t	segs[RL_NTXSEGS];
2194 	bus_dmamap_t		map;
2195 	struct mbuf		*m_new;
2196 	struct rl_desc		*desc;
2197 	int			nsegs, prod;
2198 	int			i, error, ei, si;
2199 	int			padlen;
2200 	uint32_t		cmdstat, csum_flags, vlanctl;
2201 
2202 	RL_LOCK_ASSERT(sc);
2203 	M_ASSERTPKTHDR((*m_head));
2204 
2205 	/*
2206 	 * With some of the RealTek chips, using the checksum offload
2207 	 * support in conjunction with the autopadding feature results
2208 	 * in the transmission of corrupt frames. For example, if we
2209 	 * need to send a really small IP fragment that's less than 60
2210 	 * bytes in size, and IP header checksumming is enabled, the
2211 	 * resulting ethernet frame that appears on the wire will
2212 	 * have garbled payload. To work around this, if TX IP checksum
2213 	 * offload is enabled, we always manually pad short frames out
2214 	 * to the minimum ethernet frame size.
2215 	 */
2216 	if ((sc->rl_flags & RL_FLAG_DESCV2) == 0 &&
2217 	    (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN &&
2218 	    ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2219 		padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
2220 		if (M_WRITABLE(*m_head) == 0) {
2221 			/* Get a writable copy. */
2222 			m_new = m_dup(*m_head, M_DONTWAIT);
2223 			m_freem(*m_head);
2224 			if (m_new == NULL) {
2225 				*m_head = NULL;
2226 				return (ENOBUFS);
2227 			}
2228 			*m_head = m_new;
2229 		}
2230 		if ((*m_head)->m_next != NULL ||
2231 		    M_TRAILINGSPACE(*m_head) < padlen) {
2232 			m_new = m_defrag(*m_head, M_DONTWAIT);
2233 			if (m_new == NULL) {
2234 				m_freem(*m_head);
2235 				*m_head = NULL;
2236 				return (ENOBUFS);
2237 			}
2238 		} else
2239 			m_new = *m_head;
2240 
2241 		/*
2242 		 * Manually pad short frames, and zero the pad space
2243 		 * to avoid leaking data.
2244 		 */
2245 		bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen);
2246 		m_new->m_pkthdr.len += padlen;
2247 		m_new->m_len = m_new->m_pkthdr.len;
2248 		*m_head = m_new;
2249 	}
2250 
2251 	prod = sc->rl_ldata.rl_tx_prodidx;
2252 	txd = &sc->rl_ldata.rl_tx_desc[prod];
2253 	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2254 	    *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2255 	if (error == EFBIG) {
2256 		m_new = m_collapse(*m_head, M_DONTWAIT, RL_NTXSEGS);
2257 		if (m_new == NULL) {
2258 			m_freem(*m_head);
2259 			*m_head = NULL;
2260 			return (ENOBUFS);
2261 		}
2262 		*m_head = m_new;
2263 		error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag,
2264 		    txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2265 		if (error != 0) {
2266 			m_freem(*m_head);
2267 			*m_head = NULL;
2268 			return (error);
2269 		}
2270 	} else if (error != 0)
2271 		return (error);
2272 	if (nsegs == 0) {
2273 		m_freem(*m_head);
2274 		*m_head = NULL;
2275 		return (EIO);
2276 	}
2277 
2278 	/* Check for number of available descriptors. */
2279 	if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
2280 		bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap);
2281 		return (ENOBUFS);
2282 	}
2283 
2284 	bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2285 	    BUS_DMASYNC_PREWRITE);
2286 
2287 	/*
2288 	 * Set up checksum offload. Note: checksum offload bits must
2289 	 * appear in all descriptors of a multi-descriptor transmit
2290 	 * attempt. This is according to testing done with an 8169
2291 	 * chip. This is a requirement.
2292 	 */
2293 	vlanctl = 0;
2294 	csum_flags = 0;
2295 	if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2296 		csum_flags = RL_TDESC_CMD_LGSEND |
2297 		    ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2298 		    RL_TDESC_CMD_MSSVAL_SHIFT);
2299 	else {
2300 		/*
2301 		 * Unconditionally enable IP checksum if TCP or UDP
2302 		 * checksum is required. Otherwise, TCP/UDP checksum
2303 		 * does't make effects.
2304 		 */
2305 		if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) {
2306 			if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
2307 				csum_flags |= RL_TDESC_CMD_IPCSUM;
2308 				if (((*m_head)->m_pkthdr.csum_flags &
2309 				    CSUM_TCP) != 0)
2310 					csum_flags |= RL_TDESC_CMD_TCPCSUM;
2311 				if (((*m_head)->m_pkthdr.csum_flags &
2312 				    CSUM_UDP) != 0)
2313 					csum_flags |= RL_TDESC_CMD_UDPCSUM;
2314 			} else {
2315 				vlanctl |= RL_TDESC_CMD_IPCSUMV2;
2316 				if (((*m_head)->m_pkthdr.csum_flags &
2317 				    CSUM_TCP) != 0)
2318 					vlanctl |= RL_TDESC_CMD_TCPCSUMV2;
2319 				if (((*m_head)->m_pkthdr.csum_flags &
2320 				    CSUM_UDP) != 0)
2321 					vlanctl |= RL_TDESC_CMD_UDPCSUMV2;
2322 			}
2323 		}
2324 	}
2325 
2326 	/*
2327 	 * Set up hardware VLAN tagging. Note: vlan tag info must
2328 	 * appear in all descriptors of a multi-descriptor
2329 	 * transmission attempt.
2330 	 */
2331 	if ((*m_head)->m_flags & M_VLANTAG)
2332 		vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) |
2333 		    RL_TDESC_VLANCTL_TAG;
2334 
2335 	si = prod;
2336 	for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) {
2337 		desc = &sc->rl_ldata.rl_tx_list[prod];
2338 		desc->rl_vlanctl = htole32(vlanctl);
2339 		desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
2340 		desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
2341 		cmdstat = segs[i].ds_len;
2342 		if (i != 0)
2343 			cmdstat |= RL_TDESC_CMD_OWN;
2344 		if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1)
2345 			cmdstat |= RL_TDESC_CMD_EOR;
2346 		desc->rl_cmdstat = htole32(cmdstat | csum_flags);
2347 		sc->rl_ldata.rl_tx_free--;
2348 	}
2349 	/* Update producer index. */
2350 	sc->rl_ldata.rl_tx_prodidx = prod;
2351 
2352 	/* Set EOF on the last descriptor. */
2353 	ei = RL_TX_DESC_PRV(sc, prod);
2354 	desc = &sc->rl_ldata.rl_tx_list[ei];
2355 	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
2356 
2357 	desc = &sc->rl_ldata.rl_tx_list[si];
2358 	/* Set SOF and transfer ownership of packet to the chip. */
2359 	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF);
2360 
2361 	/*
2362 	 * Insure that the map for this transmission
2363 	 * is placed at the array index of the last descriptor
2364 	 * in this chain.  (Swap last and first dmamaps.)
2365 	 */
2366 	txd_last = &sc->rl_ldata.rl_tx_desc[ei];
2367 	map = txd->tx_dmamap;
2368 	txd->tx_dmamap = txd_last->tx_dmamap;
2369 	txd_last->tx_dmamap = map;
2370 	txd_last->tx_m = *m_head;
2371 
2372 	return (0);
2373 }
2374 
2375 static void
2376 re_tx_task(void *arg, int npending)
2377 {
2378 	struct ifnet		*ifp;
2379 
2380 	ifp = arg;
2381 	re_start(ifp);
2382 }
2383 
2384 /*
2385  * Main transmit routine for C+ and gigE NICs.
2386  */
2387 static void
2388 re_start(struct ifnet *ifp)
2389 {
2390 	struct rl_softc		*sc;
2391 	struct mbuf		*m_head;
2392 	int			queued;
2393 
2394 	sc = ifp->if_softc;
2395 
2396 	RL_LOCK(sc);
2397 
2398 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
2399 	    IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) {
2400 		RL_UNLOCK(sc);
2401 		return;
2402 	}
2403 
2404 	for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
2405 	    sc->rl_ldata.rl_tx_free > 1;) {
2406 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2407 		if (m_head == NULL)
2408 			break;
2409 
2410 		if (re_encap(sc, &m_head) != 0) {
2411 			if (m_head == NULL)
2412 				break;
2413 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2414 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2415 			break;
2416 		}
2417 
2418 		/*
2419 		 * If there's a BPF listener, bounce a copy of this frame
2420 		 * to him.
2421 		 */
2422 		ETHER_BPF_MTAP(ifp, m_head);
2423 
2424 		queued++;
2425 	}
2426 
2427 	if (queued == 0) {
2428 #ifdef RE_TX_MODERATION
2429 		if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt)
2430 			CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2431 #endif
2432 		RL_UNLOCK(sc);
2433 		return;
2434 	}
2435 
2436 	/* Flush the TX descriptors */
2437 
2438 	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2439 	    sc->rl_ldata.rl_tx_list_map,
2440 	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2441 
2442 	CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2443 
2444 #ifdef RE_TX_MODERATION
2445 	/*
2446 	 * Use the countdown timer for interrupt moderation.
2447 	 * 'TX done' interrupts are disabled. Instead, we reset the
2448 	 * countdown timer, which will begin counting until it hits
2449 	 * the value in the TIMERINT register, and then trigger an
2450 	 * interrupt. Each time we write to the TIMERCNT register,
2451 	 * the timer count is reset to 0.
2452 	 */
2453 	CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2454 #endif
2455 
2456 	/*
2457 	 * Set a timeout in case the chip goes out to lunch.
2458 	 */
2459 	sc->rl_watchdog_timer = 5;
2460 
2461 	RL_UNLOCK(sc);
2462 }
2463 
2464 static void
2465 re_init(void *xsc)
2466 {
2467 	struct rl_softc		*sc = xsc;
2468 
2469 	RL_LOCK(sc);
2470 	re_init_locked(sc);
2471 	RL_UNLOCK(sc);
2472 }
2473 
2474 static void
2475 re_init_locked(struct rl_softc *sc)
2476 {
2477 	struct ifnet		*ifp = sc->rl_ifp;
2478 	struct mii_data		*mii;
2479 	u_int32_t		rxcfg = 0;
2480 	uint16_t		cfg;
2481 	union {
2482 		uint32_t align_dummy;
2483 		u_char eaddr[ETHER_ADDR_LEN];
2484         } eaddr;
2485 
2486 	RL_LOCK_ASSERT(sc);
2487 
2488 	mii = device_get_softc(sc->rl_miibus);
2489 
2490 	/*
2491 	 * Cancel pending I/O and free all RX/TX buffers.
2492 	 */
2493 	re_stop(sc);
2494 
2495 	/* Put controller into known state. */
2496 	re_reset(sc);
2497 
2498 	/*
2499 	 * Enable C+ RX and TX mode, as well as VLAN stripping and
2500 	 * RX checksum offload. We must configure the C+ register
2501 	 * before all others.
2502 	 */
2503 	cfg = RL_CPLUSCMD_PCI_MRW;
2504 	if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
2505 		cfg |= RL_CPLUSCMD_RXCSUM_ENB;
2506 	if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0)
2507 		cfg |= RL_CPLUSCMD_VLANSTRIP;
2508 	if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) {
2509 		cfg |= RL_CPLUSCMD_MACSTAT_DIS;
2510 		/* XXX magic. */
2511 		cfg |= 0x0001;
2512 	} else
2513 		cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB;
2514 	CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg);
2515 	/*
2516 	 * Disable TSO if interface MTU size is greater than MSS
2517 	 * allowed in controller.
2518 	 */
2519 	if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) {
2520 		ifp->if_capenable &= ~IFCAP_TSO4;
2521 		ifp->if_hwassist &= ~CSUM_TSO;
2522 	}
2523 
2524 	/*
2525 	 * Init our MAC address.  Even though the chipset
2526 	 * documentation doesn't mention it, we need to enter "Config
2527 	 * register write enable" mode to modify the ID registers.
2528 	 */
2529 	/* Copy MAC address on stack to align. */
2530 	bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN);
2531 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
2532 	CSR_WRITE_4(sc, RL_IDR0,
2533 	    htole32(*(u_int32_t *)(&eaddr.eaddr[0])));
2534 	CSR_WRITE_4(sc, RL_IDR4,
2535 	    htole32(*(u_int32_t *)(&eaddr.eaddr[4])));
2536 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
2537 
2538 	/*
2539 	 * For C+ mode, initialize the RX descriptors and mbufs.
2540 	 */
2541 	re_rx_list_init(sc);
2542 	re_tx_list_init(sc);
2543 
2544 	/*
2545 	 * Load the addresses of the RX and TX lists into the chip.
2546 	 */
2547 
2548 	CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
2549 	    RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
2550 	CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
2551 	    RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
2552 
2553 	CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
2554 	    RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
2555 	CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
2556 	    RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
2557 
2558 	/*
2559 	 * Enable transmit and receive.
2560 	 */
2561 	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2562 
2563 	/*
2564 	 * Set the initial TX and RX configuration.
2565 	 */
2566 	if (sc->rl_testmode) {
2567 		if (sc->rl_type == RL_8169)
2568 			CSR_WRITE_4(sc, RL_TXCFG,
2569 			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
2570 		else
2571 			CSR_WRITE_4(sc, RL_TXCFG,
2572 			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
2573 	} else
2574 		CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
2575 
2576 	CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
2577 
2578 	CSR_WRITE_4(sc, RL_RXCFG, RL_RXCFG_CONFIG);
2579 
2580 	/* Set the individual bit to receive frames for this host only. */
2581 	rxcfg = CSR_READ_4(sc, RL_RXCFG);
2582 	rxcfg |= RL_RXCFG_RX_INDIV;
2583 
2584 	/* If we want promiscuous mode, set the allframes bit. */
2585 	if (ifp->if_flags & IFF_PROMISC)
2586 		rxcfg |= RL_RXCFG_RX_ALLPHYS;
2587 	else
2588 		rxcfg &= ~RL_RXCFG_RX_ALLPHYS;
2589 	CSR_WRITE_4(sc, RL_RXCFG, rxcfg);
2590 
2591 	/*
2592 	 * Set capture broadcast bit to capture broadcast frames.
2593 	 */
2594 	if (ifp->if_flags & IFF_BROADCAST)
2595 		rxcfg |= RL_RXCFG_RX_BROAD;
2596 	else
2597 		rxcfg &= ~RL_RXCFG_RX_BROAD;
2598 	CSR_WRITE_4(sc, RL_RXCFG, rxcfg);
2599 
2600 	/*
2601 	 * Program the multicast filter, if necessary.
2602 	 */
2603 	re_setmulti(sc);
2604 
2605 #ifdef DEVICE_POLLING
2606 	/*
2607 	 * Disable interrupts if we are polling.
2608 	 */
2609 	if (ifp->if_capenable & IFCAP_POLLING)
2610 		CSR_WRITE_2(sc, RL_IMR, 0);
2611 	else	/* otherwise ... */
2612 #endif
2613 
2614 	/*
2615 	 * Enable interrupts.
2616 	 */
2617 	if (sc->rl_testmode)
2618 		CSR_WRITE_2(sc, RL_IMR, 0);
2619 	else
2620 		CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2621 	CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS);
2622 
2623 	/* Set initial TX threshold */
2624 	sc->rl_txthresh = RL_TX_THRESH_INIT;
2625 
2626 	/* Start RX/TX process. */
2627 	CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
2628 #ifdef notdef
2629 	/* Enable receiver and transmitter. */
2630 	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2631 #endif
2632 
2633 #ifdef RE_TX_MODERATION
2634 	/*
2635 	 * Initialize the timer interrupt register so that
2636 	 * a timer interrupt will be generated once the timer
2637 	 * reaches a certain number of ticks. The timer is
2638 	 * reloaded on each transmit. This gives us TX interrupt
2639 	 * moderation, which dramatically improves TX frame rate.
2640 	 */
2641 	if (sc->rl_type == RL_8169)
2642 		CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
2643 	else
2644 		CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
2645 #endif
2646 
2647 	/*
2648 	 * For 8169 gigE NICs, set the max allowed RX packet
2649 	 * size so we can receive jumbo frames.
2650 	 */
2651 	if (sc->rl_type == RL_8169)
2652 		CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
2653 
2654 	if (sc->rl_testmode)
2655 		return;
2656 
2657 	mii_mediachg(mii);
2658 
2659 	CSR_WRITE_1(sc, RL_CFG1, CSR_READ_1(sc, RL_CFG1) | RL_CFG1_DRVLOAD);
2660 
2661 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2662 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2663 
2664 	sc->rl_flags &= ~RL_FLAG_LINK;
2665 	sc->rl_watchdog_timer = 0;
2666 	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2667 }
2668 
2669 /*
2670  * Set media options.
2671  */
2672 static int
2673 re_ifmedia_upd(struct ifnet *ifp)
2674 {
2675 	struct rl_softc		*sc;
2676 	struct mii_data		*mii;
2677 	int			error;
2678 
2679 	sc = ifp->if_softc;
2680 	mii = device_get_softc(sc->rl_miibus);
2681 	RL_LOCK(sc);
2682 	error = mii_mediachg(mii);
2683 	RL_UNLOCK(sc);
2684 
2685 	return (error);
2686 }
2687 
2688 /*
2689  * Report current media status.
2690  */
2691 static void
2692 re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2693 {
2694 	struct rl_softc		*sc;
2695 	struct mii_data		*mii;
2696 
2697 	sc = ifp->if_softc;
2698 	mii = device_get_softc(sc->rl_miibus);
2699 
2700 	RL_LOCK(sc);
2701 	mii_pollstat(mii);
2702 	RL_UNLOCK(sc);
2703 	ifmr->ifm_active = mii->mii_media_active;
2704 	ifmr->ifm_status = mii->mii_media_status;
2705 }
2706 
2707 static int
2708 re_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2709 {
2710 	struct rl_softc		*sc = ifp->if_softc;
2711 	struct ifreq		*ifr = (struct ifreq *) data;
2712 	struct mii_data		*mii;
2713 	int			error = 0;
2714 
2715 	switch (command) {
2716 	case SIOCSIFMTU:
2717 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > RL_JUMBO_MTU) {
2718 			error = EINVAL;
2719 			break;
2720 		}
2721 		if ((sc->rl_flags & RL_FLAG_NOJUMBO) != 0 &&
2722 		    ifr->ifr_mtu > RL_MAX_FRAMELEN) {
2723 			error = EINVAL;
2724 			break;
2725 		}
2726 		RL_LOCK(sc);
2727 		if (ifp->if_mtu != ifr->ifr_mtu)
2728 			ifp->if_mtu = ifr->ifr_mtu;
2729 		if (ifp->if_mtu > RL_TSO_MTU &&
2730 		    (ifp->if_capenable & IFCAP_TSO4) != 0) {
2731 			ifp->if_capenable &= ~IFCAP_TSO4;
2732 			ifp->if_hwassist &= ~CSUM_TSO;
2733 		}
2734 		RL_UNLOCK(sc);
2735 		break;
2736 	case SIOCSIFFLAGS:
2737 		RL_LOCK(sc);
2738 		if ((ifp->if_flags & IFF_UP) != 0) {
2739 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2740 				if (((ifp->if_flags ^ sc->rl_if_flags)
2741 				    & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
2742 					re_setmulti(sc);
2743 			} else
2744 				re_init_locked(sc);
2745 		} else {
2746 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2747 				re_stop(sc);
2748 		}
2749 		sc->rl_if_flags = ifp->if_flags;
2750 		RL_UNLOCK(sc);
2751 		break;
2752 	case SIOCADDMULTI:
2753 	case SIOCDELMULTI:
2754 		RL_LOCK(sc);
2755 		re_setmulti(sc);
2756 		RL_UNLOCK(sc);
2757 		break;
2758 	case SIOCGIFMEDIA:
2759 	case SIOCSIFMEDIA:
2760 		mii = device_get_softc(sc->rl_miibus);
2761 		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
2762 		break;
2763 	case SIOCSIFCAP:
2764 	    {
2765 		int mask, reinit;
2766 
2767 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2768 		reinit = 0;
2769 #ifdef DEVICE_POLLING
2770 		if (mask & IFCAP_POLLING) {
2771 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
2772 				error = ether_poll_register(re_poll, ifp);
2773 				if (error)
2774 					return(error);
2775 				RL_LOCK(sc);
2776 				/* Disable interrupts */
2777 				CSR_WRITE_2(sc, RL_IMR, 0x0000);
2778 				ifp->if_capenable |= IFCAP_POLLING;
2779 				RL_UNLOCK(sc);
2780 			} else {
2781 				error = ether_poll_deregister(ifp);
2782 				/* Enable interrupts. */
2783 				RL_LOCK(sc);
2784 				CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2785 				ifp->if_capenable &= ~IFCAP_POLLING;
2786 				RL_UNLOCK(sc);
2787 			}
2788 		}
2789 #endif /* DEVICE_POLLING */
2790 		if (mask & IFCAP_HWCSUM) {
2791 			ifp->if_capenable ^= IFCAP_HWCSUM;
2792 			if (ifp->if_capenable & IFCAP_TXCSUM)
2793 				ifp->if_hwassist |= RE_CSUM_FEATURES;
2794 			else
2795 				ifp->if_hwassist &= ~RE_CSUM_FEATURES;
2796 			reinit = 1;
2797 		}
2798 		if (mask & IFCAP_VLAN_HWTAGGING) {
2799 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2800 			reinit = 1;
2801 		}
2802 		if (mask & IFCAP_TSO4) {
2803 			ifp->if_capenable ^= IFCAP_TSO4;
2804 			if ((IFCAP_TSO4 & ifp->if_capenable) &&
2805 			    (IFCAP_TSO4 & ifp->if_capabilities))
2806 				ifp->if_hwassist |= CSUM_TSO;
2807 			else
2808 				ifp->if_hwassist &= ~CSUM_TSO;
2809 			if (ifp->if_mtu > RL_TSO_MTU &&
2810 			    (ifp->if_capenable & IFCAP_TSO4) != 0) {
2811 				ifp->if_capenable &= ~IFCAP_TSO4;
2812 				ifp->if_hwassist &= ~CSUM_TSO;
2813 			}
2814 		}
2815 		if ((mask & IFCAP_WOL) != 0 &&
2816 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
2817 			if ((mask & IFCAP_WOL_UCAST) != 0)
2818 				ifp->if_capenable ^= IFCAP_WOL_UCAST;
2819 			if ((mask & IFCAP_WOL_MCAST) != 0)
2820 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
2821 			if ((mask & IFCAP_WOL_MAGIC) != 0)
2822 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2823 		}
2824 		if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING)
2825 			re_init(sc);
2826 		VLAN_CAPABILITIES(ifp);
2827 	    }
2828 		break;
2829 	default:
2830 		error = ether_ioctl(ifp, command, data);
2831 		break;
2832 	}
2833 
2834 	return (error);
2835 }
2836 
2837 static void
2838 re_watchdog(struct rl_softc *sc)
2839 {
2840 	struct ifnet		*ifp;
2841 
2842 	RL_LOCK_ASSERT(sc);
2843 
2844 	if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0)
2845 		return;
2846 
2847 	ifp = sc->rl_ifp;
2848 	re_txeof(sc);
2849 	if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) {
2850 		if_printf(ifp, "watchdog timeout (missed Tx interrupts) "
2851 		    "-- recovering\n");
2852 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2853 			taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2854 		return;
2855 	}
2856 
2857 	if_printf(ifp, "watchdog timeout\n");
2858 	ifp->if_oerrors++;
2859 
2860 	re_rxeof(sc);
2861 	re_init_locked(sc);
2862 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2863 		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2864 }
2865 
2866 /*
2867  * Stop the adapter and free any mbufs allocated to the
2868  * RX and TX lists.
2869  */
2870 static void
2871 re_stop(struct rl_softc *sc)
2872 {
2873 	int			i;
2874 	struct ifnet		*ifp;
2875 	struct rl_txdesc	*txd;
2876 	struct rl_rxdesc	*rxd;
2877 
2878 	RL_LOCK_ASSERT(sc);
2879 
2880 	ifp = sc->rl_ifp;
2881 
2882 	sc->rl_watchdog_timer = 0;
2883 	callout_stop(&sc->rl_stat_callout);
2884 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2885 
2886 	if ((sc->rl_flags & RL_FLAG_CMDSTOP) != 0)
2887 		CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_STOPREQ | RL_CMD_TX_ENB |
2888 		    RL_CMD_RX_ENB);
2889 	else
2890 		CSR_WRITE_1(sc, RL_COMMAND, 0x00);
2891 	DELAY(1000);
2892 	CSR_WRITE_2(sc, RL_IMR, 0x0000);
2893 	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
2894 
2895 	if (sc->rl_head != NULL) {
2896 		m_freem(sc->rl_head);
2897 		sc->rl_head = sc->rl_tail = NULL;
2898 	}
2899 
2900 	/* Free the TX list buffers. */
2901 
2902 	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
2903 		txd = &sc->rl_ldata.rl_tx_desc[i];
2904 		if (txd->tx_m != NULL) {
2905 			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2906 			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2907 			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2908 			    txd->tx_dmamap);
2909 			m_freem(txd->tx_m);
2910 			txd->tx_m = NULL;
2911 		}
2912 	}
2913 
2914 	/* Free the RX list buffers. */
2915 
2916 	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
2917 		rxd = &sc->rl_ldata.rl_rx_desc[i];
2918 		if (rxd->rx_m != NULL) {
2919 			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2920 			    rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
2921 			bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
2922 			    rxd->rx_dmamap);
2923 			m_freem(rxd->rx_m);
2924 			rxd->rx_m = NULL;
2925 		}
2926 	}
2927 }
2928 
2929 /*
2930  * Device suspend routine.  Stop the interface and save some PCI
2931  * settings in case the BIOS doesn't restore them properly on
2932  * resume.
2933  */
2934 static int
2935 re_suspend(device_t dev)
2936 {
2937 	struct rl_softc		*sc;
2938 
2939 	sc = device_get_softc(dev);
2940 
2941 	RL_LOCK(sc);
2942 	re_stop(sc);
2943 	re_setwol(sc);
2944 	sc->suspended = 1;
2945 	RL_UNLOCK(sc);
2946 
2947 	return (0);
2948 }
2949 
2950 /*
2951  * Device resume routine.  Restore some PCI settings in case the BIOS
2952  * doesn't, re-enable busmastering, and restart the interface if
2953  * appropriate.
2954  */
2955 static int
2956 re_resume(device_t dev)
2957 {
2958 	struct rl_softc		*sc;
2959 	struct ifnet		*ifp;
2960 
2961 	sc = device_get_softc(dev);
2962 
2963 	RL_LOCK(sc);
2964 
2965 	ifp = sc->rl_ifp;
2966 
2967 	/* reinitialize interface if necessary */
2968 	if (ifp->if_flags & IFF_UP)
2969 		re_init_locked(sc);
2970 
2971 	/*
2972 	 * Clear WOL matching such that normal Rx filtering
2973 	 * wouldn't interfere with WOL patterns.
2974 	 */
2975 	re_clrwol(sc);
2976 	sc->suspended = 0;
2977 	RL_UNLOCK(sc);
2978 
2979 	return (0);
2980 }
2981 
2982 /*
2983  * Stop all chip I/O so that the kernel's probe routines don't
2984  * get confused by errant DMAs when rebooting.
2985  */
2986 static int
2987 re_shutdown(device_t dev)
2988 {
2989 	struct rl_softc		*sc;
2990 
2991 	sc = device_get_softc(dev);
2992 
2993 	RL_LOCK(sc);
2994 	re_stop(sc);
2995 	/*
2996 	 * Mark interface as down since otherwise we will panic if
2997 	 * interrupt comes in later on, which can happen in some
2998 	 * cases.
2999 	 */
3000 	sc->rl_ifp->if_flags &= ~IFF_UP;
3001 	re_setwol(sc);
3002 	RL_UNLOCK(sc);
3003 
3004 	return (0);
3005 }
3006 
3007 static void
3008 re_setwol(struct rl_softc *sc)
3009 {
3010 	struct ifnet		*ifp;
3011 	int			pmc;
3012 	uint16_t		pmstat;
3013 	uint8_t			v;
3014 
3015 	RL_LOCK_ASSERT(sc);
3016 
3017 	if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3018 		return;
3019 
3020 	ifp = sc->rl_ifp;
3021 	if ((ifp->if_capenable & IFCAP_WOL) != 0 &&
3022 	    (sc->rl_flags & RL_FLAG_WOLRXENB) != 0)
3023 		CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RX_ENB);
3024 	/* Enable config register write. */
3025 	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3026 
3027 	/* Enable PME. */
3028 	v = CSR_READ_1(sc, RL_CFG1);
3029 	v &= ~RL_CFG1_PME;
3030 	if ((ifp->if_capenable & IFCAP_WOL) != 0)
3031 		v |= RL_CFG1_PME;
3032 	CSR_WRITE_1(sc, RL_CFG1, v);
3033 
3034 	v = CSR_READ_1(sc, RL_CFG3);
3035 	v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3036 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0)
3037 		v |= RL_CFG3_WOL_MAGIC;
3038 	CSR_WRITE_1(sc, RL_CFG3, v);
3039 
3040 	/* Config register write done. */
3041 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3042 
3043 	v = CSR_READ_1(sc, RL_CFG5);
3044 	v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3045 	v &= ~RL_CFG5_WOL_LANWAKE;
3046 	if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0)
3047 		v |= RL_CFG5_WOL_UCAST;
3048 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0)
3049 		v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST;
3050 	if ((ifp->if_capenable & IFCAP_WOL) != 0)
3051 		v |= RL_CFG5_WOL_LANWAKE;
3052 	CSR_WRITE_1(sc, RL_CFG5, v);
3053 
3054 	/*
3055 	 * It seems that hardware resets its link speed to 100Mbps in
3056 	 * power down mode so switching to 100Mbps in driver is not
3057 	 * needed.
3058 	 */
3059 
3060 	/* Request PME if WOL is requested. */
3061 	pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2);
3062 	pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
3063 	if ((ifp->if_capenable & IFCAP_WOL) != 0)
3064 		pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
3065 	pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
3066 }
3067 
3068 static void
3069 re_clrwol(struct rl_softc *sc)
3070 {
3071 	int			pmc;
3072 	uint8_t			v;
3073 
3074 	RL_LOCK_ASSERT(sc);
3075 
3076 	if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3077 		return;
3078 
3079 	/* Enable config register write. */
3080 	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3081 
3082 	v = CSR_READ_1(sc, RL_CFG3);
3083 	v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3084 	CSR_WRITE_1(sc, RL_CFG3, v);
3085 
3086 	/* Config register write done. */
3087 	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3088 
3089 	v = CSR_READ_1(sc, RL_CFG5);
3090 	v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3091 	v &= ~RL_CFG5_WOL_LANWAKE;
3092 	CSR_WRITE_1(sc, RL_CFG5, v);
3093 }
3094