xref: /freebsd/sys/dev/rge/if_rge.c (revision 187d8a3ce55a4e2d41fbe61465d5ff4ac0fc6bd5)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2019, 2020, 2023-2025 Kevin Lo <kevlo@openbsd.org>
5  * Copyright (c) 2025 Adrian Chadd <adrian@FreeBSD.org>
6  *
7  * Hardware programming portions from Realtek Semiconductor.
8  *
9  * Permission to use, copy, modify, and distribute this software for any
10  * purpose with or without fee is hereby granted, provided that the above
11  * copyright notice and this permission notice appear in all copies.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20  */
21 
22 /*	$OpenBSD: if_rge.c,v 1.38 2025/09/19 00:41:14 kevlo Exp $	*/
23 
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/sockio.h>
27 #include <sys/mbuf.h>
28 #include <sys/malloc.h>
29 #include <sys/endian.h>
30 #include <sys/socket.h>
31 #include <net/if.h>
32 #include <net/if_media.h>
33 #include <sys/queue.h>
34 #include <sys/taskqueue.h>
35 #include <sys/bus.h>
36 #include <sys/module.h>
37 #include <sys/rman.h>
38 #include <sys/kernel.h>
39 
40 #include <netinet/in.h>
41 #include <netinet/if_ether.h>
42 
43 #include <net/bpf.h>
44 #include <net/ethernet.h>
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/if_arp.h>
48 #include <net/if_dl.h>
49 #include <net/if_media.h>
50 #include <net/if_types.h>
51 #include <net/if_vlan_var.h>
52 
53 #include <machine/bus.h>
54 #include <machine/resource.h>
55 
56 #include <dev/mii/mii.h>
57 
58 #include <dev/pci/pcivar.h>
59 #include <dev/pci/pcireg.h>
60 
61 #include "if_rge_vendor.h"
62 #include "if_rgereg.h"
63 #include "if_rgevar.h"
64 #include "if_rge_hw.h"
65 #include "if_rge_microcode.h"
66 #include "if_rge_debug.h"
67 #include "if_rge_sysctl.h"
68 #include "if_rge_stats.h"
69 
70 #define ETHER_IS_VALID(addr) \
71 	(!ETHER_IS_MULTICAST(addr.octet) && !ETHER_IS_ZERO(addr.octet))
72 
73 #define	RGE_CSUM_FEATURES		(CSUM_IP | CSUM_TCP | CSUM_UDP)
74 
75 static int		rge_attach(device_t);
76 static int		rge_detach(device_t);
77 
78 #if 0
79 int		rge_activate(struct device *, int);
80 #endif
81 static void	rge_intr_msi(void *);
82 static int	rge_ioctl(struct ifnet *, u_long, caddr_t);
83 static int	rge_transmit_if(if_t, struct mbuf *);
84 static void	rge_qflush_if(if_t);
85 static void	rge_init_if(void *);
86 static void	rge_init_locked(struct rge_softc *);
87 static void	rge_stop_locked(struct rge_softc *);
88 static int	rge_ifmedia_upd(if_t);
89 static void	rge_ifmedia_sts(if_t, struct ifmediareq *);
90 static int	rge_allocmem(struct rge_softc *);
91 static int	rge_alloc_stats_mem(struct rge_softc *);
92 static int	rge_freemem(struct rge_softc *);
93 static int	rge_free_stats_mem(struct rge_softc *);
94 static int	rge_newbuf(struct rge_queues *);
95 static void	rge_rx_list_init(struct rge_queues *);
96 static void	rge_tx_list_init(struct rge_queues *);
97 static void	rge_fill_rx_ring(struct rge_queues *);
98 static int	rge_rxeof(struct rge_queues *, struct mbufq *);
99 static int	rge_txeof(struct rge_queues *);
100 static void	rge_iff_locked(struct rge_softc *);
101 static void	rge_add_media_types(struct rge_softc *);
102 static void	rge_tx_task(void *, int);
103 static void	rge_txq_flush_mbufs(struct rge_softc *sc);
104 static void	rge_tick(void *);
105 static void	rge_link_state(struct rge_softc *);
106 #if 0
107 #ifndef SMALL_KERNEL
108 int		rge_wol(struct ifnet *, int);
109 void		rge_wol_power(struct rge_softc *);
110 #endif
111 #endif
112 
113 struct rge_matchid {
114 	uint16_t vendor;
115 	uint16_t device;
116 	const char *name;
117 };
118 
119 const struct rge_matchid rge_devices[] = {
120 	{ PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_E3000, "Killer E3000" },
121 	{ PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RTL8125, "RTL8125" },
122 	{ PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RTL8126, "RTL8126", },
123 	{ PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RTL8127, "RTL8127" },
124 	{ 0, 0, NULL }
125 };
126 
127 static int
128 rge_probe(device_t dev)
129 {
130 	uint16_t vendor, device;
131 	const struct rge_matchid *ri;
132 
133 	vendor = pci_get_vendor(dev);
134 	device = pci_get_device(dev);
135 
136 	for (ri = rge_devices; ri->name != NULL; ri++) {
137 		if ((vendor == ri->vendor) && (device == ri->device)) {
138 			device_set_desc(dev, ri->name);
139 			return (BUS_PROBE_DEFAULT);
140 		}
141 	}
142 
143 	return (ENXIO);
144 }
145 
146 static void
147 rge_attach_if(struct rge_softc *sc, const char *eaddr)
148 {
149 	if_initname(sc->sc_ifp, device_get_name(sc->sc_dev),
150 	    device_get_unit(sc->sc_dev));
151 	if_setdev(sc->sc_ifp, sc->sc_dev);
152 	if_setinitfn(sc->sc_ifp, rge_init_if);
153 	if_setsoftc(sc->sc_ifp, sc);
154 	if_setflags(sc->sc_ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
155 	if_setioctlfn(sc->sc_ifp, rge_ioctl);
156 	if_settransmitfn(sc->sc_ifp, rge_transmit_if);
157 	if_setqflushfn(sc->sc_ifp, rge_qflush_if);
158 
159 	/* Set offload as appropriate */
160 	if_sethwassist(sc->sc_ifp, CSUM_IP | CSUM_TCP | CSUM_UDP);
161 	if_setcapabilities(sc->sc_ifp, IFCAP_HWCSUM);
162 	if_setcapenable(sc->sc_ifp, if_getcapabilities(sc->sc_ifp));
163 
164 	/* TODO: set WOL */
165 
166 	/* Attach interface */
167 	ether_ifattach(sc->sc_ifp, eaddr);
168 	sc->sc_ether_attached = true;
169 
170 	/* post ether_ifattach() bits */
171 
172 	/* VLAN capabilities */
173 	if_setcapabilitiesbit(sc->sc_ifp, IFCAP_VLAN_MTU |
174 	    IFCAP_VLAN_HWTAGGING, 0);
175 	if_setcapabilitiesbit(sc->sc_ifp, IFCAP_VLAN_HWCSUM, 0);
176 	if_setcapenable(sc->sc_ifp, if_getcapabilities(sc->sc_ifp));
177 
178 	if_setifheaderlen(sc->sc_ifp, sizeof(struct ether_vlan_header));
179 
180 	/* TODO: is this needed for iftransmit? */
181 	if_setsendqlen(sc->sc_ifp, RGE_TX_LIST_CNT - 1);
182 	if_setsendqready(sc->sc_ifp);
183 }
184 
185 static int
186 rge_attach(device_t dev)
187 {
188 	struct ether_addr eaddr;
189 	struct rge_softc *sc;
190 	struct rge_queues *q;
191 	uint32_t hwrev, reg;
192 	int i, rid;
193 	int error;
194 	int msic;
195 
196 	sc = device_get_softc(dev);
197 	sc->sc_dev = dev;
198 	sc->sc_ifp = if_gethandle(IFT_ETHER);
199 	mtx_init(&sc->sc_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
200 	    MTX_DEF);
201 
202 	callout_init_mtx(&sc->sc_timeout, &sc->sc_mtx, 0);
203 
204 	/* Enable bus mastering */
205 	pci_enable_busmaster(dev);
206 
207 	/*
208 	 * Map control/status registers.
209 	 */
210 
211 	/*
212 	 * The openbsd driver (and my E3000 NIC) handle registering three
213 	 * kinds of BARs - a 64 bit MMIO BAR, a 32 bit MMIO BAR, and then
214 	 * a legacy IO port BAR.
215 	 *
216 	 * To simplify bring-up, I'm going to request resources for the first
217 	 * MMIO BAR (BAR2) which should be a 32 bit BAR.
218 	 */
219 	rid = PCIR_BAR(2);
220 	sc->sc_bres = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
221 	    RF_ACTIVE);
222 	if (sc->sc_bres == NULL) {
223 		RGE_PRINT_ERROR(sc,
224 		    "Unable to allocate bus resource: memory\n");
225 		goto fail;
226 	}
227 	sc->rge_bhandle = rman_get_bushandle(sc->sc_bres);
228 	sc->rge_btag = rman_get_bustag(sc->sc_bres);
229 	sc->rge_bsize = rman_get_size(sc->sc_bres);
230 
231 	q = malloc(sizeof(struct rge_queues), M_DEVBUF, M_NOWAIT | M_ZERO);
232 	if (q == NULL) {
233 		RGE_PRINT_ERROR(sc, "Unable to malloc rge_queues memory\n");
234 		goto fail;
235 	}
236 	q->q_sc = sc;
237 	q->q_index = 0;
238 
239 	sc->sc_queues = q;
240 	sc->sc_nqueues = 1;
241 
242 	/* Check if PCIe */
243 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
244 		sc->rge_flags |= RGE_FLAG_PCIE;
245 		sc->sc_expcap = reg;
246 	}
247 
248 	/* Allocate MSI */
249 	msic = pci_msi_count(dev);
250 	if (msic == 0) {
251 		RGE_PRINT_ERROR(sc, "%s: only MSI interrupts supported\n",
252 		    __func__);
253 		goto fail;
254 	}
255 
256 	msic = RGE_MSI_MESSAGES;
257 	if (pci_alloc_msi(dev, &msic) != 0) {
258 		RGE_PRINT_ERROR(sc, "%s: failed to allocate MSI\n",
259 		    __func__);
260 		goto fail;
261 	}
262 
263 	sc->rge_flags |= RGE_FLAG_MSI;
264 
265 	/* We need at least one MSI */
266 	if (msic < RGE_MSI_MESSAGES) {
267 		RGE_PRINT_ERROR(sc, "%s: didn't allocate enough MSI\n",
268 		    __func__);
269 		goto fail;
270 	}
271 
272 	/*
273 	 * Allocate interrupt entries.
274 	 */
275 	for (i = 0, rid = 1; i < RGE_MSI_MESSAGES; i++, rid++) {
276 		sc->sc_irq[i] = bus_alloc_resource_any(dev, SYS_RES_IRQ,
277 		    &rid, RF_ACTIVE);
278 		if (sc->sc_irq[i] == NULL) {
279 			RGE_PRINT_ERROR(sc, "%s: couldn't allocate MSI %d",
280 			    __func__, rid);
281 			goto fail;
282 		}
283 	}
284 
285 	/* Hook interrupts */
286 	for (i = 0; i < RGE_MSI_MESSAGES; i++) {
287 		error = bus_setup_intr(dev, sc->sc_irq[i],
288 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, rge_intr_msi,
289 		    sc, &sc->sc_ih[i]);
290 		if (error != 0) {
291 			RGE_PRINT_ERROR(sc,
292 			    "%s: couldn't setup intr %d (error %d)", __func__,
293 			    i, error);
294 			goto fail;
295 		}
296 	}
297 
298 	/* Allocate top level bus DMA tag */
299 	error = bus_dma_tag_create(bus_get_dma_tag(dev),
300 	    1, /* alignment */
301 	    0, /* boundary */
302 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
303 	    NULL, NULL, /* filter (unused) */
304 	    BUS_SPACE_MAXADDR, /* maxsize */
305 	    BUS_SPACE_UNRESTRICTED, /* nsegments */
306 	    BUS_SPACE_MAXADDR, /* maxsegsize */
307 	    0, /* flags */
308 	    NULL, NULL, /* lockfunc, lockarg */
309 	    &sc->sc_dmat);
310 	if (error) {
311 		RGE_PRINT_ERROR(sc,
312 		    "couldn't allocate device DMA tag (error %d)\n", error);
313 		goto fail;
314 	}
315 
316 	/* Allocate TX/RX descriptor and buffer tags */
317 	error = bus_dma_tag_create(sc->sc_dmat,
318 	    RGE_ALIGN, /* alignment */
319 	    0, /* boundary */
320 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR,
321 	    NULL, NULL, /* filter (unused) */
322 	    RGE_TX_LIST_SZ, /* maxsize */
323 	    1, /* nsegments */
324 	    RGE_TX_LIST_SZ, /* maxsegsize */
325 	    0, /* flags */
326 	    NULL, NULL, /* lockfunc, lockarg */
327 	    &sc->sc_dmat_tx_desc);
328 	if (error) {
329 		RGE_PRINT_ERROR(sc,
330 		    "couldn't allocate device TX descriptor "
331 		    "DMA tag (error %d)\n", error);
332 		    goto fail;
333 	}
334 
335 	error = bus_dma_tag_create(sc->sc_dmat,
336 	    1, /* alignment */
337 	    0, /* boundary */
338 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
339 	    NULL, NULL, /* filter (unused) */
340 	    RGE_JUMBO_FRAMELEN, /* maxsize */
341 	    RGE_TX_NSEGS, /* nsegments */
342 	    RGE_JUMBO_FRAMELEN, /* maxsegsize */
343 	    0, /* flags */
344 	    NULL, NULL, /* lockfunc, lockarg */
345 	    &sc->sc_dmat_tx_buf);
346 	if (error) {
347 		RGE_PRINT_ERROR(sc,
348 		    "couldn't allocate device TX buffer DMA tag (error %d)\n",
349 		    error);
350 		goto fail;
351 	}
352 
353 	error = bus_dma_tag_create(sc->sc_dmat,
354 	    RGE_ALIGN, /* alignment */
355 	    0, /* boundary */
356 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR,
357 	    NULL, NULL, /* filter (unused) */
358 	    RGE_RX_LIST_SZ, /* maxsize */
359 	    1, /* nsegments */
360 	    RGE_RX_LIST_SZ, /* maxsegsize */
361 	    0, /* flags */
362 	    NULL, NULL, /* lockfunc, lockarg */
363 	    &sc->sc_dmat_rx_desc);
364 	if (error) {
365 		RGE_PRINT_ERROR(sc,
366 		    "couldn't allocate device RX descriptor "
367 		    "DMA tag (error %d)\n", error);
368 		goto fail;
369 	}
370 
371 	error = bus_dma_tag_create(sc->sc_dmat,
372 	    1, /* alignment */
373 	    0, /* boundary */
374 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
375 	    NULL, NULL, /* filter (unused) */
376 	    MCLBYTES, /* maxsize */
377 	    1, /* nsegments */
378 	    MCLBYTES, /* maxsegsize */
379 	    0, /* flags */
380 	    NULL, NULL, /* lockfunc, lockarg */
381 	    &sc->sc_dmat_rx_buf);
382 	if (error) {
383 		RGE_PRINT_ERROR(sc,
384 		    "couldn't allocate device RX buffer DMA tag (error %d)\n",
385 		    error);
386 		goto fail;
387 	}
388 
389 	error = bus_dma_tag_create(sc->sc_dmat,
390 	    RGE_STATS_ALIGNMENT, /* alignment */
391 	    0, /* boundary */
392 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
393 	    NULL, NULL, /* filter (unused) */
394 	    RGE_STATS_BUF_SIZE, /* maxsize */
395 	    1, /* nsegments */
396 	    RGE_STATS_BUF_SIZE, /* maxsegsize */
397 	    0, /* flags */
398 	    NULL, NULL, /* lockfunc, lockarg */
399 	    &sc->sc_dmat_stats_buf);
400 	if (error) {
401 		RGE_PRINT_ERROR(sc,
402 		    "couldn't allocate device RX buffer DMA tag (error %d)\n",
403 		    error);
404 		goto fail;
405 	}
406 
407 
408 	/* Attach sysctl nodes */
409 	rge_sysctl_attach(sc);
410 
411 	/* Determine hardware revision */
412 	hwrev = RGE_READ_4(sc, RGE_TXCFG) & RGE_TXCFG_HWREV;
413 	switch (hwrev) {
414 	case 0x60900000:
415 		sc->rge_type = MAC_R25;
416 		device_printf(dev, "chip rev: RTL8125 (0x%08x)\n", hwrev);
417 		break;
418 	case 0x64100000:
419 		sc->rge_type = MAC_R25B;
420 		device_printf(dev, "chip rev: RTL8125B (0x%08x)\n", hwrev);
421 		break;
422 	case 0x64900000:
423 		sc->rge_type = MAC_R26_1;
424 		device_printf(dev, "chip rev: RTL8126_1 (0x%08x)\n", hwrev);
425 		break;
426 	case 0x64a00000:
427 		sc->rge_type = MAC_R26_2;
428 		device_printf(dev, "chip rev: RTL8126_2 (0x%08x)\n", hwrev);
429 		break;
430 	case 0x68800000:
431 		sc->rge_type = MAC_R25D_1;
432 		device_printf(dev, "chip rev: RTL8125D_1 (0x%08x)\n", hwrev);
433 		break;
434 	case 0x68900000:
435 		sc->rge_type = MAC_R25D_2;
436 		device_printf(dev, "chip rev: RTL8125D_2 (0x%08x)\n", hwrev);
437 		break;
438 	case 0x6c900000:
439 		sc->rge_type = MAC_R27;
440 		device_printf(dev, "chip rev: RTL8127 (0x%08x)\n", hwrev);
441 		break;
442 	default:
443 		RGE_PRINT_ERROR(sc, "unknown version 0x%08x\n", hwrev);
444 		goto fail;
445 	}
446 
447 	rge_config_imtype(sc, RGE_IMTYPE_SIM);
448 
449 	/* TODO: disable ASPM/ECPM? */
450 
451 #if 0
452 	/*
453 	 * PCI Express check.
454 	 */
455 	if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_PCIEXPRESS,
456 	    &offset, NULL)) {
457 		/* Disable PCIe ASPM and ECPM. */
458 		reg = pci_conf_read(pa->pa_pc, pa->pa_tag,
459 		    offset + PCI_PCIE_LCSR);
460 		reg &= ~(PCI_PCIE_LCSR_ASPM_L0S | PCI_PCIE_LCSR_ASPM_L1 |
461 		    PCI_PCIE_LCSR_ECPM);
462 		pci_conf_write(pa->pa_pc, pa->pa_tag, offset + PCI_PCIE_LCSR,
463 		    reg);
464 	}
465 #endif
466 
467 	RGE_LOCK(sc);
468 	if (rge_chipinit(sc)) {
469 		RGE_UNLOCK(sc);
470 		goto fail;
471 	}
472 
473 	rge_get_macaddr(sc, eaddr.octet);
474 	RGE_UNLOCK(sc);
475 
476 	if (!ETHER_IS_VALID(eaddr)) {
477 		device_printf(dev,
478 		    "No MAC address found.  Using ether_gen_addr().\n");
479 		ether_gen_addr_byname(device_get_nameunit(dev), &eaddr);
480 	}
481 
482 	if (rge_allocmem(sc))
483 		goto fail;
484 	if (rge_alloc_stats_mem(sc))
485 		goto fail;
486 
487 	/* Initialize ifmedia structures. */
488 	ifmedia_init(&sc->sc_media, IFM_IMASK, rge_ifmedia_upd,
489 	    rge_ifmedia_sts);
490 	rge_add_media_types(sc);
491 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
492 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
493 	sc->sc_media.ifm_media = sc->sc_media.ifm_cur->ifm_media;
494 
495 	rge_attach_if(sc, eaddr.octet);
496 
497 	/*
498 	 * TODO: technically should be per txq but we only support
499 	 * one TXQ at the moment.
500 	 */
501 	mbufq_init(&sc->sc_txq, RGE_TX_LIST_CNT);
502 
503 	snprintf(sc->sc_tq_name, sizeof(sc->sc_tq_name),
504 	    "%s taskq", device_get_nameunit(sc->sc_dev));
505 	snprintf(sc->sc_tq_thr_name, sizeof(sc->sc_tq_thr_name),
506 	    "%s taskq thread", device_get_nameunit(sc->sc_dev));
507 
508 	sc->sc_tq = taskqueue_create(sc->sc_tq_name, M_NOWAIT,
509 	    taskqueue_thread_enqueue, &sc->sc_tq);
510 	taskqueue_start_threads(&sc->sc_tq, 1, PI_NET, "%s",
511 	    sc->sc_tq_thr_name);
512 
513 	TASK_INIT(&sc->sc_tx_task, 0, rge_tx_task, sc);
514 
515 	return (0);
516 fail:
517 	rge_detach(dev);
518 	return (ENXIO);
519 }
520 
521 /**
522  * @brief flush the mbufq queue
523  *
524  * Again this should likely be per-TXQ.
525  *
526  * This should be called with the driver lock held.
527  */
528 static void
529 rge_txq_flush_mbufs(struct rge_softc *sc)
530 {
531 	struct mbuf *m;
532 	int ntx = 0;
533 
534 	RGE_ASSERT_LOCKED(sc);
535 
536 	while ((m = mbufq_dequeue(&sc->sc_txq)) != NULL) {
537 		m_freem(m);
538 		ntx++;
539 	}
540 
541 	RGE_DPRINTF(sc, RGE_DEBUG_XMIT, "%s: %d frames flushed\n", __func__,
542 	    ntx);
543 }
544 
545 static int
546 rge_detach(device_t dev)
547 {
548 	struct rge_softc *sc = device_get_softc(dev);
549 	int i, rid;
550 
551 	/* global flag, detaching */
552 	RGE_LOCK(sc);
553 	sc->sc_stopped = true;
554 	sc->sc_detaching = true;
555 	RGE_UNLOCK(sc);
556 
557 	/* stop/drain network interface */
558 	callout_drain(&sc->sc_timeout);
559 
560 	/* Make sure TX task isn't running */
561 	if (sc->sc_tq != NULL) {
562 		while (taskqueue_cancel(sc->sc_tq, &sc->sc_tx_task, NULL) != 0)
563 			taskqueue_drain(sc->sc_tq, &sc->sc_tx_task);
564 	}
565 
566 	RGE_LOCK(sc);
567 	callout_stop(&sc->sc_timeout);
568 
569 	/* stop NIC / DMA */
570 	rge_stop_locked(sc);
571 
572 	/* TODO: wait for completion */
573 
574 	/* Free pending TX mbufs */
575 	rge_txq_flush_mbufs(sc);
576 
577 	RGE_UNLOCK(sc);
578 
579 	/* Free taskqueue */
580 	if (sc->sc_tq != NULL) {
581 		taskqueue_free(sc->sc_tq);
582 		sc->sc_tq = NULL;
583 	}
584 
585 	/* Free descriptor memory */
586 	RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: freemem\n", __func__);
587 	rge_freemem(sc);
588 	rge_free_stats_mem(sc);
589 
590 	if (sc->sc_ifp) {
591 		RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: ifdetach/if_free\n",
592 		    __func__);
593 		if (sc->sc_ether_attached)
594 			ether_ifdetach(sc->sc_ifp);
595 		if_free(sc->sc_ifp);
596 	}
597 
598 	RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: sc_dmat_tx_desc\n", __func__);
599 	if (sc->sc_dmat_tx_desc)
600 		bus_dma_tag_destroy(sc->sc_dmat_tx_desc);
601 	RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: sc_dmat_tx_buf\n", __func__);
602 	if (sc->sc_dmat_tx_buf)
603 		bus_dma_tag_destroy(sc->sc_dmat_tx_buf);
604 	RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: sc_dmat_rx_desc\n", __func__);
605 	if (sc->sc_dmat_rx_desc)
606 		bus_dma_tag_destroy(sc->sc_dmat_rx_desc);
607 	RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: sc_dmat_rx_buf\n", __func__);
608 	if (sc->sc_dmat_rx_buf)
609 		bus_dma_tag_destroy(sc->sc_dmat_rx_buf);
610 	RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: sc_dmat_stats_buf\n", __func__);
611 	if (sc->sc_dmat_stats_buf)
612 		bus_dma_tag_destroy(sc->sc_dmat_stats_buf);
613 	RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: sc_dmat\n", __func__);
614 	if (sc->sc_dmat)
615 		bus_dma_tag_destroy(sc->sc_dmat);
616 
617 	/* Teardown interrupts */
618 	for (i = 0; i < RGE_MSI_MESSAGES; i++) {
619 		if (sc->sc_ih[i] != NULL) {
620 			bus_teardown_intr(sc->sc_dev, sc->sc_irq[i],
621 			    sc->sc_ih[i]);
622 			sc->sc_ih[i] = NULL;
623 		}
624 	}
625 
626 	/* Free interrupt resources */
627 	for (i = 0, rid = 1; i < RGE_MSI_MESSAGES; i++, rid++) {
628 		if (sc->sc_irq[i] != NULL) {
629 			bus_release_resource(sc->sc_dev, SYS_RES_IRQ,
630 			    rid, sc->sc_irq[i]);
631 			sc->sc_irq[i] = NULL;
632 		}
633 	}
634 
635 	/* Free MSI allocation */
636 	if (sc->rge_flags & RGE_FLAG_MSI)
637 		pci_release_msi(dev);
638 
639 	if (sc->sc_bres) {
640 		RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: release mmio\n",
641 		    __func__);
642 		bus_release_resource(dev, SYS_RES_MEMORY,
643 		    rman_get_rid(sc->sc_bres), sc->sc_bres);
644 		sc->sc_bres = NULL;
645 	}
646 
647 	if (sc->sc_queues) {
648 		free(sc->sc_queues, M_DEVBUF);
649 		sc->sc_queues = NULL;
650 	}
651 
652 	mtx_destroy(&sc->sc_mtx);
653 
654 	return (0);
655 }
656 
657 #if 0
658 
659 int
660 rge_activate(struct device *self, int act)
661 {
662 #ifndef SMALL_KERNEL
663 	struct rge_softc *sc = (struct rge_softc *)self;
664 #endif
665 
666 	switch (act) {
667 	case DVACT_POWERDOWN:
668 #ifndef SMALL_KERNEL
669 		rge_wol_power(sc);
670 #endif
671 		break;
672 	}
673 	return (0);
674 }
675 #endif
676 
677 static void
678 rge_intr_msi(void *arg)
679 {
680 	struct mbufq rx_mq;
681 	struct epoch_tracker et;
682 	struct mbuf *m;
683 	struct rge_softc *sc = arg;
684 	struct rge_queues *q = sc->sc_queues;
685 	uint32_t status;
686 	int claimed = 0, rv;
687 
688 	sc->sc_drv_stats.intr_cnt++;
689 
690 	mbufq_init(&rx_mq, RGE_RX_LIST_CNT);
691 
692 	if ((if_getdrvflags(sc->sc_ifp) & IFF_DRV_RUNNING) == 0)
693 		return;
694 
695 	RGE_LOCK(sc);
696 
697 	if (sc->sc_suspended || sc->sc_stopped || sc->sc_detaching) {
698 		RGE_UNLOCK(sc);
699 		return;
700 	}
701 
702 	/* Disable interrupts. */
703 	RGE_WRITE_4(sc, RGE_IMR, 0);
704 
705 	if (!(sc->rge_flags & RGE_FLAG_MSI)) {
706 		if ((RGE_READ_4(sc, RGE_ISR) & sc->rge_intrs) == 0)
707 			goto done;
708 	}
709 
710 	status = RGE_READ_4(sc, RGE_ISR);
711 	if (status)
712 		RGE_WRITE_4(sc, RGE_ISR, status);
713 
714 	if (status & RGE_ISR_PCS_TIMEOUT)
715 		claimed = 1;
716 
717 	rv = 0;
718 	if (status & sc->rge_intrs) {
719 
720 		(void) q;
721 		rv |= rge_rxeof(q, &rx_mq);
722 		rv |= rge_txeof(q);
723 
724 		if (status & RGE_ISR_SYSTEM_ERR) {
725 			sc->sc_drv_stats.intr_system_err_cnt++;
726 			rge_init_locked(sc);
727 		}
728 		claimed = 1;
729 	}
730 
731 	if (sc->rge_timerintr) {
732 		if (!rv) {
733 			/*
734 			 * Nothing needs to be processed, fallback
735 			 * to use TX/RX interrupts.
736 			 */
737 			rge_setup_intr(sc, RGE_IMTYPE_NONE);
738 
739 			/*
740 			 * Recollect, mainly to avoid the possible
741 			 * race introduced by changing interrupt
742 			 * masks.
743 			 */
744 			rge_rxeof(q, &rx_mq);
745 			rge_txeof(q);
746 		} else
747 			RGE_WRITE_4(sc, RGE_TIMERCNT, 1);
748 	} else if (rv) {
749 		/*
750 		 * Assume that using simulated interrupt moderation
751 		 * (hardware timer based) could reduce the interrupt
752 		 * rate.
753 		 */
754 		rge_setup_intr(sc, RGE_IMTYPE_SIM);
755 	}
756 
757 	RGE_WRITE_4(sc, RGE_IMR, sc->rge_intrs);
758 
759 done:
760 	RGE_UNLOCK(sc);
761 
762 	NET_EPOCH_ENTER(et);
763 	/* Handle any RX frames, outside of the driver lock */
764 	while ((m = mbufq_dequeue(&rx_mq)) != NULL) {
765 		sc->sc_drv_stats.recv_input_cnt++;
766 		if_input(sc->sc_ifp, m);
767 	}
768 	NET_EPOCH_EXIT(et);
769 
770 	(void) claimed;
771 }
772 
773 static inline void
774 rge_tx_list_sync(struct rge_softc *sc, struct rge_queues *q,
775     unsigned int idx, unsigned int len, int ops)
776 {
777 	bus_dmamap_sync(sc->sc_dmat_tx_desc, q->q_tx.rge_tx_list_map, ops);
778 }
779 
780 /**
781  * @brief Queue the given mbuf at the given TX slot index for transmit.
782  *
783  * If the frame couldn't be enqueued then 0 is returned.
784  * The caller needs to handle that and free/re-queue the mbuf as required.
785  *
786  * Note that this doesn't actually kick-start the transmit itself;
787  * see rge_txstart() for the register to poke to start transmit.
788  *
789  * This must be called with the driver lock held.
790  *
791  * @param sc	driver softc
792  * @param q	TX queue ring
793  * @param m	mbuf to enqueue
794  * @returns	if the mbuf is enqueued, it's consumed here and the number of
795  * 		TX descriptors used is returned; if there's no space then 0 is
796  *		returned; if the mbuf couldn't be defragged and the caller
797  *		should free it then -1 is returned.
798  */
799 static int
800 rge_encap(struct rge_softc *sc, struct rge_queues *q, struct mbuf *m, int idx)
801 {
802 	struct rge_tx_desc *d = NULL;
803 	struct rge_txq *txq;
804 	bus_dmamap_t txmap;
805 	uint32_t cmdsts, cflags = 0;
806 	int cur, error, i;
807 	bus_dma_segment_t seg[RGE_TX_NSEGS];
808 	int nsegs;
809 
810 	RGE_ASSERT_LOCKED(sc);
811 
812 	txq = &q->q_tx.rge_txq[idx];
813 	txmap = txq->txq_dmamap;
814 
815 	sc->sc_drv_stats.tx_encap_cnt++;
816 
817 	nsegs = RGE_TX_NSEGS;
818 	error = bus_dmamap_load_mbuf_sg(sc->sc_dmat_tx_buf, txmap, m,
819 	    seg, &nsegs, BUS_DMA_NOWAIT);
820 
821 	switch (error) {
822 	case 0:
823 		break;
824 	case EFBIG: /* mbuf chain is too fragmented */
825 		sc->sc_drv_stats.tx_encap_refrag_cnt++;
826 		nsegs = RGE_TX_NSEGS;
827 		if (m_defrag(m, M_NOWAIT) == 0 &&
828 		    bus_dmamap_load_mbuf_sg(sc->sc_dmat_tx_buf, txmap, m,
829 		    seg, &nsegs, BUS_DMA_NOWAIT) == 0)
830 			break;
831 		/* FALLTHROUGH */
832 	default:
833 		sc->sc_drv_stats.tx_encap_err_toofrag++;
834 		return (-1);
835 	}
836 
837 	bus_dmamap_sync(sc->sc_dmat_tx_buf, txmap, BUS_DMASYNC_PREWRITE);
838 
839 	/*
840 	 * Set RGE_TDEXTSTS_IPCSUM if any checksum offloading is requested.
841 	 * Otherwise, RGE_TDEXTSTS_TCPCSUM / RGE_TDEXTSTS_UDPCSUM does not
842 	 * take affect.
843 	 */
844 	if ((m->m_pkthdr.csum_flags & RGE_CSUM_FEATURES) != 0) {
845 		cflags |= RGE_TDEXTSTS_IPCSUM;
846 		sc->sc_drv_stats.tx_offload_ip_csum_set++;
847 		if (m->m_pkthdr.csum_flags & CSUM_TCP) {
848 			sc->sc_drv_stats.tx_offload_tcp_csum_set++;
849 			cflags |= RGE_TDEXTSTS_TCPCSUM;
850 		}
851 		if (m->m_pkthdr.csum_flags & CSUM_UDP) {
852 			sc->sc_drv_stats.tx_offload_udp_csum_set++;
853 			cflags |= RGE_TDEXTSTS_UDPCSUM;
854 		}
855 	}
856 
857 	/* Set up hardware VLAN tagging */
858 	if (m->m_flags & M_VLANTAG) {
859 		sc->sc_drv_stats.tx_offload_vlan_tag_set++;
860 		cflags |= htole16(m->m_pkthdr.ether_vtag) | RGE_TDEXTSTS_VTAG;
861 	}
862 
863 	cur = idx;
864 	for (i = 1; i < nsegs; i++) {
865 		cur = RGE_NEXT_TX_DESC(cur);
866 
867 		cmdsts = RGE_TDCMDSTS_OWN;
868 		cmdsts |= seg[i].ds_len;
869 
870 		if (cur == RGE_TX_LIST_CNT - 1)
871 			cmdsts |= RGE_TDCMDSTS_EOR;
872 		if (i == nsegs - 1)
873 			cmdsts |= RGE_TDCMDSTS_EOF;
874 
875 		/*
876 		 * Note: vendor driver puts wmb() after opts2/extsts,
877 		 * before opts1/status.
878 		 *
879 		 * See the other place I have this comment for more
880 		 * information.
881 		 */
882 		d = &q->q_tx.rge_tx_list[cur];
883 		d->rge_addr = htole64(seg[i].ds_addr);
884 		d->rge_extsts = htole32(cflags);
885 		wmb();
886 		d->rge_cmdsts = htole32(cmdsts);
887 	}
888 
889 	/* Update info of TX queue and descriptors. */
890 	txq->txq_mbuf = m;
891 	txq->txq_descidx = cur;
892 
893 	cmdsts = RGE_TDCMDSTS_SOF;
894 	cmdsts |= seg[0].ds_len;
895 
896 	if (idx == RGE_TX_LIST_CNT - 1)
897 		cmdsts |= RGE_TDCMDSTS_EOR;
898 	if (nsegs == 1)
899 		cmdsts |= RGE_TDCMDSTS_EOF;
900 
901 	/*
902 	 * Note: vendor driver puts wmb() after opts2/extsts,
903 	 * before opts1/status.
904 	 *
905 	 * It does this:
906 	 * - set rge_addr
907 	 * - set extsts
908 	 * - wmb
909 	 * - set status - at this point it's owned by the hardware
910 	 *
911 	 */
912 	d = &q->q_tx.rge_tx_list[idx];
913 	d->rge_addr = htole64(seg[0].ds_addr);
914 	d->rge_extsts = htole32(cflags);
915 	wmb();
916 	d->rge_cmdsts = htole32(cmdsts);
917 	wmb();
918 
919 	if (cur >= idx) {
920 		rge_tx_list_sync(sc, q, idx, nsegs,
921 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
922 	} else {
923 		rge_tx_list_sync(sc, q, idx, RGE_TX_LIST_CNT - idx,
924 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
925 		rge_tx_list_sync(sc, q, 0, cur + 1,
926 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
927 	}
928 
929 	/* Transfer ownership of packet to the chip. */
930 	cmdsts |= RGE_TDCMDSTS_OWN;
931 	rge_tx_list_sync(sc, q, idx, 1, BUS_DMASYNC_POSTWRITE);
932 	d->rge_cmdsts = htole32(cmdsts);
933 	rge_tx_list_sync(sc, q, idx, 1, BUS_DMASYNC_PREWRITE);
934 	wmb();
935 
936 	return (nsegs);
937 }
938 
939 static int
940 rge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
941 {
942 	struct rge_softc *sc = if_getsoftc(ifp);
943 	struct ifreq *ifr = (struct ifreq *)data;
944 	int error = 0;
945 
946 	switch (cmd) {
947 	case SIOCSIFMTU:
948 		/* Note: no hardware reinit is required */
949 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > RGE_JUMBO_MTU) {
950 			error = EINVAL;
951 			break;
952 		}
953 		if (if_getmtu(ifp) != ifr->ifr_mtu)
954 			if_setmtu(ifp, ifr->ifr_mtu);
955 
956 		VLAN_CAPABILITIES(ifp);
957 		break;
958 
959 	case SIOCSIFFLAGS:
960 		RGE_LOCK(sc);
961 		if ((if_getflags(ifp) & IFF_UP) != 0) {
962 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
963 				/*
964 				 * TODO: handle promisc/iffmulti changing
965 				 * without reprogramming everything.
966 				 */
967 				rge_init_locked(sc);
968 			} else {
969 				/* Reinit promisc/multi just in case */
970 				rge_iff_locked(sc);
971 			}
972 		} else {
973 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
974 				rge_stop_locked(sc);
975 			}
976 		}
977 		RGE_UNLOCK(sc);
978 		break;
979 	case SIOCADDMULTI:
980 	case SIOCDELMULTI:
981 		RGE_LOCK(sc);
982 		if ((if_getflags(ifp) & IFF_DRV_RUNNING) != 0) {
983 			rge_iff_locked(sc);
984 		}
985 		RGE_UNLOCK(sc);
986 		break;
987 	case SIOCGIFMEDIA:
988 	case SIOCSIFMEDIA:
989 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
990 		break;
991 	case SIOCSIFCAP:
992 		{
993 			int mask;
994 			bool reinit = false;
995 
996 			/* Get the mask of changed bits */
997 			mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
998 
999 			/*
1000 			 * Locked so we don't have a narrow window where frames
1001 			 * are being processed with the updated flags but the
1002 			 * hardware configuration hasn't yet changed.
1003 			 */
1004 			RGE_LOCK(sc);
1005 
1006 			if ((mask & IFCAP_TXCSUM) != 0 &&
1007 			    (if_getcapabilities(ifp) & IFCAP_TXCSUM) != 0) {
1008 				if_togglecapenable(ifp, IFCAP_TXCSUM);
1009 				if ((if_getcapenable(ifp) & IFCAP_TXCSUM) != 0)
1010 					if_sethwassistbits(ifp, RGE_CSUM_FEATURES, 0);
1011 				else
1012 					if_sethwassistbits(ifp, 0, RGE_CSUM_FEATURES);
1013 				reinit = 1;
1014 			}
1015 
1016 			if ((mask & IFCAP_VLAN_HWTAGGING) != 0 &&
1017 			    (if_getcapabilities(ifp) & IFCAP_VLAN_HWTAGGING) != 0) {
1018 				if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
1019 				reinit = 1;
1020 			}
1021 
1022 			/* TODO: WOL */
1023 
1024 			if ((mask & IFCAP_RXCSUM) != 0 &&
1025 			    (if_getcapabilities(ifp) & IFCAP_RXCSUM) != 0) {
1026 				if_togglecapenable(ifp, IFCAP_RXCSUM);
1027 				reinit = 1;
1028 			}
1029 
1030 			if (reinit && if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1031 				if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1032 				rge_init_locked(sc);
1033 			}
1034 
1035 			RGE_UNLOCK(sc);
1036 			VLAN_CAPABILITIES(ifp);
1037 		}
1038 		break;
1039 	default:
1040 		error = ether_ioctl(ifp, cmd, data);
1041 		break;
1042 	}
1043 
1044 	return (error);
1045 }
1046 
1047 static void
1048 rge_qflush_if(if_t ifp)
1049 {
1050 	struct rge_softc *sc = if_getsoftc(ifp);
1051 
1052 	/* TODO: this should iterate over the TXQs */
1053 	RGE_LOCK(sc);
1054 	rge_txq_flush_mbufs(sc);
1055 	RGE_UNLOCK(sc);
1056 }
1057 
1058 /**
1059  * @brief Transmit the given frame to the hardware.
1060  *
1061  * This routine is called by the network stack to send
1062  * a frame to the device.
1063  *
1064  * For now we simply direct dispatch this frame to the
1065  * hardware (and thus avoid maintaining our own internal
1066  * queue)
1067  */
1068 static int
1069 rge_transmit_if(if_t ifp, struct mbuf *m)
1070 {
1071 	struct rge_softc *sc = if_getsoftc(ifp);
1072 	int ret;
1073 
1074 	sc->sc_drv_stats.transmit_call_cnt++;
1075 
1076 	RGE_LOCK(sc);
1077 	if (sc->sc_stopped == true) {
1078 		sc->sc_drv_stats.transmit_stopped_cnt++;
1079 		RGE_UNLOCK(sc);
1080 		return (ENETDOWN);	/* TODO: better error? */
1081 	}
1082 
1083 	/* XXX again should be a per-TXQ thing */
1084 	ret = mbufq_enqueue(&sc->sc_txq, m);
1085 	if (ret != 0) {
1086 		sc->sc_drv_stats.transmit_full_cnt++;
1087 		RGE_UNLOCK(sc);
1088 		return (ret);
1089 	}
1090 	RGE_UNLOCK(sc);
1091 
1092 	/* mbuf is owned by the driver, schedule transmit */
1093 	taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task);
1094 	sc->sc_drv_stats.transmit_queued_cnt++;
1095 
1096 	return (0);
1097 }
1098 
1099 static void
1100 rge_init_if(void *xsc)
1101 {
1102 	struct rge_softc *sc = xsc;
1103 
1104 	RGE_LOCK(sc);
1105 	rge_init_locked(sc);
1106 	RGE_UNLOCK(sc);
1107 }
1108 
1109 static void
1110 rge_init_locked(struct rge_softc *sc)
1111 {
1112 	struct rge_queues *q = sc->sc_queues;
1113 	uint32_t rxconf, val;
1114 	int i, num_miti;
1115 
1116 	RGE_ASSERT_LOCKED(sc);
1117 
1118 	RGE_DPRINTF(sc, RGE_DEBUG_INIT, "%s: called!\n", __func__);
1119 
1120 	/* Don't double-init the hardware */
1121 	if ((if_getdrvflags(sc->sc_ifp) & IFF_DRV_RUNNING) != 0) {
1122 		/*
1123 		 * Note: I'm leaving this disabled by default; however
1124 		 * I'm leaving it in here so I can figure out what's
1125 		 * causing this to be initialised both from the ioctl
1126 		 * API and if_init() API.
1127 		 */
1128 /*		RGE_PRINT_ERROR(sc, "%s: called whilst running?\n", __func__); */
1129 		return;
1130 	}
1131 
1132 	/*
1133 	 * Bring the hardware down so we know it's in a good known
1134 	 * state before we bring it up in a good known state.
1135 	 */
1136 	rge_stop_locked(sc);
1137 
1138 	/* Set MAC address. */
1139 	rge_set_macaddr(sc, if_getlladdr(sc->sc_ifp));
1140 
1141 	/* Initialize RX and TX descriptors lists. */
1142 	rge_rx_list_init(q);
1143 	rge_tx_list_init(q);
1144 
1145 	if (rge_chipinit(sc)) {
1146 		RGE_PRINT_ERROR(sc, "%s: ERROR: chip init fail!\n", __func__);
1147 		return;
1148 	}
1149 
1150 	if (rge_phy_config(sc))
1151 		return;
1152 
1153 	RGE_SETBIT_1(sc, RGE_EECMD, RGE_EECMD_WRITECFG);
1154 
1155 	RGE_CLRBIT_1(sc, 0xf1, 0x80);
1156 	rge_disable_aspm_clkreq(sc);
1157 	RGE_WRITE_2(sc, RGE_EEE_TXIDLE_TIMER,
1158 	    RGE_JUMBO_MTU + ETHER_HDR_LEN + 32);
1159 
1160 	/* Load the addresses of the RX and TX lists into the chip. */
1161 	RGE_WRITE_4(sc, RGE_RXDESC_ADDR_LO,
1162 	    RGE_ADDR_LO(q->q_rx.rge_rx_list_paddr));
1163 	RGE_WRITE_4(sc, RGE_RXDESC_ADDR_HI,
1164 	    RGE_ADDR_HI(q->q_rx.rge_rx_list_paddr));
1165 	RGE_WRITE_4(sc, RGE_TXDESC_ADDR_LO,
1166 	    RGE_ADDR_LO(q->q_tx.rge_tx_list_paddr));
1167 	RGE_WRITE_4(sc, RGE_TXDESC_ADDR_HI,
1168 	    RGE_ADDR_HI(q->q_tx.rge_tx_list_paddr));
1169 
1170 	/* Set the initial RX and TX configurations. */
1171 	if (sc->rge_type == MAC_R25)
1172 		rxconf = RGE_RXCFG_CONFIG;
1173 	else if (sc->rge_type == MAC_R25B)
1174 		rxconf = RGE_RXCFG_CONFIG_8125B;
1175 	else if (RGE_TYPE_R25D(sc))
1176 		rxconf = RGE_RXCFG_CONFIG_8125D;
1177 	else
1178 		rxconf = RGE_RXCFG_CONFIG_8126;
1179 	RGE_WRITE_4(sc, RGE_RXCFG, rxconf);
1180 	RGE_WRITE_4(sc, RGE_TXCFG, RGE_TXCFG_CONFIG);
1181 
1182 	val = rge_read_csi(sc, 0x70c) & ~0x3f000000;
1183 	rge_write_csi(sc, 0x70c, val | 0x27000000);
1184 
1185 	if (RGE_TYPE_R26(sc) || sc->rge_type == MAC_R27) {
1186 		/* Disable L1 timeout. */
1187 		val = rge_read_csi(sc, 0x890) & ~0x00000001;
1188 		rge_write_csi(sc, 0x890, val);
1189 	} else if (!RGE_TYPE_R25D(sc))
1190 		RGE_WRITE_2(sc, 0x0382, 0x221b);
1191 
1192 	RGE_WRITE_1(sc, RGE_RSS_CTRL, 0);
1193 
1194 	val = RGE_READ_2(sc, RGE_RXQUEUE_CTRL) & ~0x001c;
1195 	RGE_WRITE_2(sc, RGE_RXQUEUE_CTRL, val | (fls(sc->sc_nqueues) - 1) << 2);
1196 
1197 	RGE_CLRBIT_1(sc, RGE_CFG1, RGE_CFG1_SPEED_DOWN);
1198 
1199 	rge_write_mac_ocp(sc, 0xc140, 0xffff);
1200 	rge_write_mac_ocp(sc, 0xc142, 0xffff);
1201 
1202 	RGE_MAC_SETBIT(sc, 0xeb58, 0x0001);
1203 
1204 	if (RGE_TYPE_R26(sc) || sc->rge_type == MAC_R27) {
1205 		RGE_CLRBIT_1(sc, 0xd8, 0x02);
1206 		if (sc->rge_type == MAC_R27) {
1207 			RGE_CLRBIT_1(sc, 0x20e4, 0x04);
1208 			RGE_MAC_CLRBIT(sc, 0xe00c, 0x1000);
1209 			RGE_MAC_CLRBIT(sc, 0xc0c2, 0x0040);
1210 		}
1211 	}
1212 
1213 	val = rge_read_mac_ocp(sc, 0xe614);
1214 	val &= (sc->rge_type == MAC_R27) ? ~0x0f00 : ~0x0700;
1215 	if (sc->rge_type == MAC_R25 || RGE_TYPE_R25D(sc))
1216 		rge_write_mac_ocp(sc, 0xe614, val | 0x0300);
1217 	else if (sc->rge_type == MAC_R25B)
1218 		rge_write_mac_ocp(sc, 0xe614, val | 0x0200);
1219 	else if (RGE_TYPE_R26(sc))
1220 		rge_write_mac_ocp(sc, 0xe614, val | 0x0300);
1221 	else
1222 		rge_write_mac_ocp(sc, 0xe614, val | 0x0f00);
1223 
1224 	val = rge_read_mac_ocp(sc, 0xe63e) & ~0x0c00;
1225 	rge_write_mac_ocp(sc, 0xe63e, val |
1226 	    ((fls(sc->sc_nqueues) - 1) & 0x03) << 10);
1227 
1228 	val = rge_read_mac_ocp(sc, 0xe63e) & ~0x0030;
1229 	rge_write_mac_ocp(sc, 0xe63e, val | 0x0020);
1230 
1231 	RGE_MAC_CLRBIT(sc, 0xc0b4, 0x0001);
1232 	RGE_MAC_SETBIT(sc, 0xc0b4, 0x0001);
1233 
1234 	RGE_MAC_SETBIT(sc, 0xc0b4, 0x000c);
1235 
1236 	val = rge_read_mac_ocp(sc, 0xeb6a) & ~0x00ff;
1237 	rge_write_mac_ocp(sc, 0xeb6a, val | 0x0033);
1238 
1239 	val = rge_read_mac_ocp(sc, 0xeb50) & ~0x03e0;
1240 	rge_write_mac_ocp(sc, 0xeb50, val | 0x0040);
1241 
1242 	RGE_MAC_CLRBIT(sc, 0xe056, 0x00f0);
1243 
1244 	RGE_WRITE_1(sc, RGE_TDFNR, 0x10);
1245 
1246 	RGE_MAC_CLRBIT(sc, 0xe040, 0x1000);
1247 
1248 	val = rge_read_mac_ocp(sc, 0xea1c) & ~0x0003;
1249 	rge_write_mac_ocp(sc, 0xea1c, val | 0x0001);
1250 
1251 	if (RGE_TYPE_R25D(sc))
1252 		rge_write_mac_ocp(sc, 0xe0c0, 0x4403);
1253 	else
1254 		rge_write_mac_ocp(sc, 0xe0c0, 0x4000);
1255 
1256 	RGE_MAC_SETBIT(sc, 0xe052, 0x0060);
1257 	RGE_MAC_CLRBIT(sc, 0xe052, 0x0088);
1258 
1259 	val = rge_read_mac_ocp(sc, 0xd430) & ~0x0fff;
1260 	rge_write_mac_ocp(sc, 0xd430, val | 0x045f);
1261 
1262 	RGE_SETBIT_1(sc, RGE_DLLPR, RGE_DLLPR_PFM_EN | RGE_DLLPR_TX_10M_PS_EN);
1263 
1264 	if (sc->rge_type == MAC_R25)
1265 		RGE_SETBIT_1(sc, RGE_MCUCMD, 0x01);
1266 
1267 	if (!RGE_TYPE_R25D(sc)) {
1268 		/* Disable EEE plus. */
1269 		RGE_MAC_CLRBIT(sc, 0xe080, 0x0002);
1270 	}
1271 
1272 	if (RGE_TYPE_R26(sc) || sc->rge_type == MAC_R27)
1273 		RGE_MAC_CLRBIT(sc, 0xea1c, 0x0304);
1274 	else
1275 		RGE_MAC_CLRBIT(sc, 0xea1c, 0x0004);
1276 
1277 	/* Clear tcam entries. */
1278 	RGE_MAC_SETBIT(sc, 0xeb54, 0x0001);
1279 	DELAY(1);
1280 	RGE_MAC_CLRBIT(sc, 0xeb54, 0x0001);
1281 
1282 	RGE_CLRBIT_2(sc, 0x1880, 0x0030);
1283 
1284 	if (sc->rge_type == MAC_R27) {
1285 		val = rge_read_mac_ocp(sc, 0xd40c) & ~0xe038;
1286 		rge_write_phy_ocp(sc, 0xd40c, val | 0x8020);
1287 	}
1288 
1289 	/* Config interrupt type. */
1290 	if (sc->rge_type == MAC_R27)
1291 		RGE_CLRBIT_1(sc, RGE_INT_CFG0, RGE_INT_CFG0_AVOID_MISS_INTR);
1292 	else if (sc->rge_type != MAC_R25)
1293 		RGE_CLRBIT_1(sc, RGE_INT_CFG0, RGE_INT_CFG0_EN);
1294 
1295 	/* Clear timer interrupts. */
1296 	RGE_WRITE_4(sc, RGE_TIMERINT0, 0);
1297 	RGE_WRITE_4(sc, RGE_TIMERINT1, 0);
1298 	RGE_WRITE_4(sc, RGE_TIMERINT2, 0);
1299 	RGE_WRITE_4(sc, RGE_TIMERINT3, 0);
1300 
1301 	num_miti =
1302 	    (sc->rge_type == MAC_R25B || RGE_TYPE_R26(sc)) ? 32 : 64;
1303 	/* Clear interrupt moderation timer. */
1304 	for (i = 0; i < num_miti; i++)
1305 		RGE_WRITE_4(sc, RGE_INTMITI(i), 0);
1306 
1307 	if (RGE_TYPE_R26(sc)) {
1308 		RGE_CLRBIT_1(sc, RGE_INT_CFG0,
1309 		    RGE_INT_CFG0_TIMEOUT_BYPASS | RGE_INT_CFG0_RDU_BYPASS_8126 |
1310 		    RGE_INT_CFG0_MITIGATION_BYPASS);
1311 		RGE_WRITE_2(sc, RGE_INT_CFG1, 0);
1312 	}
1313 
1314 	RGE_MAC_SETBIT(sc, 0xc0ac, 0x1f80);
1315 
1316 	rge_write_mac_ocp(sc, 0xe098, 0xc302);
1317 
1318 	RGE_MAC_CLRBIT(sc, 0xe032, 0x0003);
1319 	val = rge_read_csi(sc, 0x98) & ~0x0000ff00;
1320 	rge_write_csi(sc, 0x98, val);
1321 
1322 	if (RGE_TYPE_R25D(sc)) {
1323 		val = rge_read_mac_ocp(sc, 0xe092) & ~0x00ff;
1324 		rge_write_mac_ocp(sc, 0xe092, val | 0x0008);
1325 	} else
1326 		RGE_MAC_CLRBIT(sc, 0xe092, 0x00ff);
1327 
1328 	/* Enable/disable HW VLAN tagging based on enabled capability */
1329 	if ((if_getcapabilities(sc->sc_ifp) & IFCAP_VLAN_HWTAGGING) != 0)
1330 		RGE_SETBIT_4(sc, RGE_RXCFG, RGE_RXCFG_VLANSTRIP);
1331 	else
1332 		RGE_CLRBIT_4(sc, RGE_RXCFG, RGE_RXCFG_VLANSTRIP);
1333 
1334 	/* Enable/disable RX checksum based on enabled capability */
1335 	if ((if_getcapenable(sc->sc_ifp) & IFCAP_RXCSUM) != 0)
1336 		RGE_SETBIT_2(sc, RGE_CPLUSCMD, RGE_CPLUSCMD_RXCSUM);
1337 	else
1338 		RGE_CLRBIT_2(sc, RGE_CPLUSCMD, RGE_CPLUSCMD_RXCSUM);
1339 	RGE_READ_2(sc, RGE_CPLUSCMD);
1340 
1341 	/* Set Maximum frame size. */
1342 	RGE_WRITE_2(sc, RGE_RXMAXSIZE, RGE_JUMBO_FRAMELEN);
1343 
1344 	/* Disable RXDV gate. */
1345 	RGE_CLRBIT_1(sc, RGE_PPSW, 0x08);
1346 	DELAY(2000);
1347 
1348 	/* Program promiscuous mode and multicast filters. */
1349 	rge_iff_locked(sc);
1350 
1351 	if (sc->rge_type == MAC_R27)
1352 		RGE_CLRBIT_1(sc, RGE_RADMFIFO_PROTECT, 0x2001);
1353 
1354 	rge_disable_aspm_clkreq(sc);
1355 
1356 	RGE_CLRBIT_1(sc, RGE_EECMD, RGE_EECMD_WRITECFG);
1357 	DELAY(10);
1358 
1359 	rge_ifmedia_upd(sc->sc_ifp);
1360 
1361 	/* Enable transmit and receive. */
1362 	RGE_WRITE_1(sc, RGE_CMD, RGE_CMD_TXENB | RGE_CMD_RXENB);
1363 
1364 	/* Enable interrupts. */
1365 	rge_setup_intr(sc, RGE_IMTYPE_SIM);
1366 
1367 	if_setdrvflagbits(sc->sc_ifp, IFF_DRV_RUNNING, 0);
1368 	if_setdrvflagbits(sc->sc_ifp, 0, IFF_DRV_OACTIVE);
1369 
1370 	callout_reset(&sc->sc_timeout, hz, rge_tick, sc);
1371 
1372 	RGE_DPRINTF(sc, RGE_DEBUG_INIT, "%s: init completed!\n", __func__);
1373 
1374 	/* Unblock transmit when we release the lock */
1375 	sc->sc_stopped = false;
1376 }
1377 
1378 /*
1379  * @brief Stop the adapter and free any mbufs allocated to the RX and TX lists.
1380  *
1381  * Must be called with the driver lock held.
1382  */
1383 void
1384 rge_stop_locked(struct rge_softc *sc)
1385 {
1386 	struct rge_queues *q = sc->sc_queues;
1387 	int i;
1388 
1389 	RGE_ASSERT_LOCKED(sc);
1390 
1391 	RGE_DPRINTF(sc, RGE_DEBUG_INIT, "%s: called!\n", __func__);
1392 
1393 	callout_stop(&sc->sc_timeout);
1394 
1395 	/* Stop pending TX submissions */
1396 	sc->sc_stopped = true;
1397 
1398 	if_setdrvflagbits(sc->sc_ifp, 0, IFF_DRV_RUNNING);
1399 	sc->rge_timerintr = 0;
1400 	sc->sc_watchdog = 0;
1401 
1402 	RGE_CLRBIT_4(sc, RGE_RXCFG, RGE_RXCFG_ALLPHYS | RGE_RXCFG_INDIV |
1403 	    RGE_RXCFG_MULTI | RGE_RXCFG_BROAD | RGE_RXCFG_RUNT |
1404 	    RGE_RXCFG_ERRPKT);
1405 
1406 	rge_hw_reset(sc);
1407 
1408 	RGE_MAC_CLRBIT(sc, 0xc0ac, 0x1f80);
1409 
1410 	if_setdrvflagbits(sc->sc_ifp, 0, IFF_DRV_OACTIVE);
1411 
1412 	if (q->q_rx.rge_head != NULL) {
1413 		m_freem(q->q_rx.rge_head);
1414 		q->q_rx.rge_head = NULL;
1415 		q->q_rx.rge_tail = &q->q_rx.rge_head;
1416 	}
1417 
1418 	/* Free the TX list buffers. */
1419 	for (i = 0; i < RGE_TX_LIST_CNT; i++) {
1420 		if (q->q_tx.rge_txq[i].txq_mbuf != NULL) {
1421 			bus_dmamap_unload(sc->sc_dmat_tx_buf,
1422 			    q->q_tx.rge_txq[i].txq_dmamap);
1423 			m_freem(q->q_tx.rge_txq[i].txq_mbuf);
1424 			q->q_tx.rge_txq[i].txq_mbuf = NULL;
1425 		}
1426 	}
1427 
1428 	/* Free the RX list buffers. */
1429 	for (i = 0; i < RGE_RX_LIST_CNT; i++) {
1430 		if (q->q_rx.rge_rxq[i].rxq_mbuf != NULL) {
1431 			bus_dmamap_unload(sc->sc_dmat_rx_buf,
1432 			    q->q_rx.rge_rxq[i].rxq_dmamap);
1433 			m_freem(q->q_rx.rge_rxq[i].rxq_mbuf);
1434 			q->q_rx.rge_rxq[i].rxq_mbuf = NULL;
1435 		}
1436 	}
1437 
1438 	/* Free pending TX frames */
1439 	/* TODO: should be per TX queue */
1440 	rge_txq_flush_mbufs(sc);
1441 }
1442 
1443 /*
1444  * Set media options.
1445  */
1446 static int
1447 rge_ifmedia_upd(if_t ifp)
1448 {
1449 	struct rge_softc *sc = if_getsoftc(ifp);
1450 	struct ifmedia *ifm = &sc->sc_media;
1451 	int anar, gig, val;
1452 
1453 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1454 		return (EINVAL);
1455 
1456 	/* Disable Gigabit Lite. */
1457 	RGE_PHY_CLRBIT(sc, 0xa428, 0x0200);
1458 	RGE_PHY_CLRBIT(sc, 0xa5ea, 0x0001);
1459 	if (RGE_TYPE_R26(sc) || sc->rge_type == MAC_R27)
1460 		RGE_PHY_CLRBIT(sc, 0xa5ea, 0x0007);
1461 
1462 	val = rge_read_phy_ocp(sc, 0xa5d4);
1463 	switch (sc->rge_type) {
1464 	case MAC_R27:
1465 		val &= ~RGE_ADV_10000TFDX;
1466 		/* fallthrough */
1467 	case MAC_R26_1:
1468 	case MAC_R26_2:
1469 		val &= ~RGE_ADV_5000TFDX;
1470 		/* fallthrough */
1471 	default:
1472 		val &= ~RGE_ADV_2500TFDX;
1473 		break;
1474 	}
1475 
1476 	anar = ANAR_TX_FD | ANAR_TX | ANAR_10_FD | ANAR_10;
1477 	gig = GTCR_ADV_1000TFDX | GTCR_ADV_1000THDX;
1478 
1479 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1480 	case IFM_AUTO:
1481 		val |= RGE_ADV_2500TFDX;
1482 		if (RGE_TYPE_R26(sc))
1483 			val |= RGE_ADV_5000TFDX;
1484 		else if (sc->rge_type == MAC_R27)
1485 			val |= RGE_ADV_5000TFDX | RGE_ADV_10000TFDX;
1486 		break;
1487 	case IFM_10G_T:
1488 		val |= RGE_ADV_10000TFDX;
1489 		if_setbaudrate(ifp, IF_Gbps(10));
1490 		break;
1491 	case IFM_5000_T:
1492 		val |= RGE_ADV_5000TFDX;
1493 		if_setbaudrate(ifp, IF_Gbps(5));
1494 		break;
1495 	case IFM_2500_T:
1496 		val |= RGE_ADV_2500TFDX;
1497 		if_setbaudrate(ifp, IF_Mbps(2500));
1498 		break;
1499 	case IFM_1000_T:
1500 		if_setbaudrate(ifp, IF_Gbps(1));
1501 		break;
1502 	case IFM_100_TX:
1503 		gig = rge_read_phy(sc, 0, MII_100T2CR) &
1504 		    ~(GTCR_ADV_1000TFDX | GTCR_ADV_1000THDX);
1505 		anar = ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) ?
1506 		    ANAR_TX | ANAR_TX_FD | ANAR_10_FD | ANAR_10 :
1507 		    ANAR_TX | ANAR_10_FD | ANAR_10;
1508 		if_setbaudrate(ifp, IF_Mbps(100));
1509 		break;
1510 	case IFM_10_T:
1511 		gig = rge_read_phy(sc, 0, MII_100T2CR) &
1512 		    ~(GTCR_ADV_1000TFDX | GTCR_ADV_1000THDX);
1513 		anar = ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) ?
1514 		    ANAR_10_FD | ANAR_10 : ANAR_10;
1515 		if_setbaudrate(ifp, IF_Mbps(10));
1516 		break;
1517 	default:
1518 		RGE_PRINT_ERROR(sc, "unsupported media type\n");
1519 		return (EINVAL);
1520 	}
1521 
1522 	rge_write_phy(sc, 0, MII_ANAR, anar | ANAR_PAUSE_ASYM | ANAR_FC);
1523 	rge_write_phy(sc, 0, MII_100T2CR, gig);
1524 	rge_write_phy_ocp(sc, 0xa5d4, val);
1525 	rge_write_phy(sc, 0, MII_BMCR, BMCR_RESET | BMCR_AUTOEN |
1526 	    BMCR_STARTNEG);
1527 
1528 	return (0);
1529 }
1530 
1531 /*
1532  * Report current media status.
1533  */
1534 static void
1535 rge_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
1536 {
1537 	struct rge_softc *sc = if_getsoftc(ifp);
1538 	uint16_t status = 0;
1539 
1540 	ifmr->ifm_status = IFM_AVALID;
1541 	ifmr->ifm_active = IFM_ETHER;
1542 
1543 	if (rge_get_link_status(sc)) {
1544 		ifmr->ifm_status |= IFM_ACTIVE;
1545 
1546 		status = RGE_READ_2(sc, RGE_PHYSTAT);
1547 		if ((status & RGE_PHYSTAT_FDX) ||
1548 		    (status & (RGE_PHYSTAT_1000MBPS | RGE_PHYSTAT_2500MBPS |
1549 		    RGE_PHYSTAT_5000MBPS | RGE_PHYSTAT_10000MBPS)))
1550 			ifmr->ifm_active |= IFM_FDX;
1551 		else
1552 			ifmr->ifm_active |= IFM_HDX;
1553 
1554 		if (status & RGE_PHYSTAT_10MBPS)
1555 			ifmr->ifm_active |= IFM_10_T;
1556 		else if (status & RGE_PHYSTAT_100MBPS)
1557 			ifmr->ifm_active |= IFM_100_TX;
1558 		else if (status & RGE_PHYSTAT_1000MBPS)
1559 			ifmr->ifm_active |= IFM_1000_T;
1560 		else if (status & RGE_PHYSTAT_2500MBPS)
1561 			ifmr->ifm_active |= IFM_2500_T;
1562 		else if (status & RGE_PHYSTAT_5000MBPS)
1563 			ifmr->ifm_active |= IFM_5000_T;
1564 		else if (status & RGE_PHYSTAT_10000MBPS)
1565 			ifmr->ifm_active |= IFM_10G_T;
1566 	}
1567 }
1568 
1569 /**
1570  * @brief callback to load/populate a single physical address
1571  */
1572 static void
1573 rge_dma_load_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1574 {
1575 	bus_addr_t *paddr = (bus_addr_t *) arg;
1576 
1577 	*paddr = 0;
1578 
1579 	if (error) {
1580 		printf("%s: error! (%d)\n", __func__, error);
1581 		*paddr = 0;
1582 		return;
1583 	}
1584 
1585 	if (nsegs != 1) {
1586 		printf("%s: too many segs (got %d)\n", __func__, nsegs);
1587 		*paddr = 0;
1588 		return;
1589 	}
1590 
1591 	*paddr = segs[0].ds_addr;
1592 }
1593 
1594 /**
1595  * @brief Allocate memory for RX/TX rings.
1596  *
1597  * Called with the driver lock NOT held.
1598  */
1599 static int
1600 rge_allocmem(struct rge_softc *sc)
1601 {
1602 	struct rge_queues *q = sc->sc_queues;
1603 	int error;
1604 	int i;
1605 
1606 	RGE_ASSERT_UNLOCKED(sc);
1607 
1608 	/* Allocate DMA'able memory for the TX ring. */
1609 	error = bus_dmamem_alloc(sc->sc_dmat_tx_desc,
1610 	    (void **) &q->q_tx.rge_tx_list,
1611 	    BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT,
1612 	    &q->q_tx.rge_tx_list_map);
1613 	if (error) {
1614 		RGE_PRINT_ERROR(sc, "%s: error (alloc tx_list.map) (%d)\n",
1615 		    __func__, error);
1616 		goto error;
1617 	}
1618 
1619 	RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: tx_list=%p\n", __func__,
1620 	    q->q_tx.rge_tx_list);
1621 	RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: tx_list_map=%p\n", __func__,
1622 	    q->q_tx.rge_tx_list_map);
1623 
1624 	/* Load the map for the TX ring. */
1625 	error = bus_dmamap_load(sc->sc_dmat_tx_desc,
1626 	    q->q_tx.rge_tx_list_map,
1627 	    q->q_tx.rge_tx_list,
1628 	    RGE_TX_LIST_SZ,
1629 	    rge_dma_load_cb,
1630 	    (void *) &q->q_tx.rge_tx_list_paddr,
1631 	    BUS_DMA_NOWAIT);
1632 
1633 	if ((error != 0) || (q->q_tx.rge_tx_list_paddr == 0)) {
1634 		RGE_PRINT_ERROR(sc, "%s: error (load tx_list.map) (%d)\n",
1635 		    __func__, error);
1636 		goto error;
1637 	}
1638 
1639 	/* Create DMA maps for TX buffers. */
1640 	for (i = 0; i < RGE_TX_LIST_CNT; i++) {
1641 		error = bus_dmamap_create(sc->sc_dmat_tx_buf,
1642 		    0, &q->q_tx.rge_txq[i].txq_dmamap);
1643 		if (error) {
1644 			RGE_PRINT_ERROR(sc,
1645 			    "can't create DMA map for TX (%d)\n", error);
1646 			goto error;
1647 		}
1648 	}
1649 
1650 	/* Allocate DMA'able memory for the RX ring. */
1651 	error = bus_dmamem_alloc(sc->sc_dmat_rx_desc,
1652 	    (void **) &q->q_rx.rge_rx_list,
1653 	    BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT,
1654 	    &q->q_rx.rge_rx_list_map);
1655 	if (error) {
1656 		RGE_PRINT_ERROR(sc, "%s: error (alloc rx_list.map) (%d)\n",
1657 		    __func__, error);
1658 		goto error;
1659 	}
1660 
1661 	RGE_DPRINTF(sc, RGE_DEBUG_INIT, "%s: rx_list=%p\n", __func__,
1662 	    q->q_rx.rge_rx_list);
1663 	RGE_DPRINTF(sc, RGE_DEBUG_INIT, "%s: rx_list_map=%p\n", __func__,
1664 	    q->q_rx.rge_rx_list_map);
1665 
1666 	/* Load the map for the RX ring. */
1667 	error = bus_dmamap_load(sc->sc_dmat_rx_desc,
1668 	    q->q_rx.rge_rx_list_map,
1669 	    q->q_rx.rge_rx_list,
1670 	    RGE_RX_LIST_SZ,
1671 	    rge_dma_load_cb,
1672 	    (void *) &q->q_rx.rge_rx_list_paddr,
1673 	    BUS_DMA_NOWAIT);
1674 
1675 	if ((error != 0) || (q->q_rx.rge_rx_list_paddr == 0)) {
1676 		RGE_PRINT_ERROR(sc, "%s: error (load rx_list.map) (%d)\n",
1677 		    __func__, error);
1678 		goto error;
1679 	}
1680 
1681 	/* Create DMA maps for RX buffers. */
1682 	for (i = 0; i < RGE_RX_LIST_CNT; i++) {
1683 		error = bus_dmamap_create(sc->sc_dmat_rx_buf,
1684 		    0, &q->q_rx.rge_rxq[i].rxq_dmamap);
1685 		if (error) {
1686 			RGE_PRINT_ERROR(sc,
1687 			    "can't create DMA map for RX (%d)\n", error);
1688 			goto error;
1689 		}
1690 	}
1691 
1692 	return (0);
1693 error:
1694 
1695 	rge_freemem(sc);
1696 
1697 	return (error);
1698 }
1699 
1700 /**
1701  * @brief Allocate memory for MAC stats.
1702  *
1703  * Called with the driver lock NOT held.
1704  */
1705 static int
1706 rge_alloc_stats_mem(struct rge_softc *sc)
1707 {
1708 	struct rge_mac_stats *ss = &sc->sc_mac_stats;
1709 	int error;
1710 
1711 	RGE_ASSERT_UNLOCKED(sc);
1712 
1713 	/* Allocate DMA'able memory for the stats buffer. */
1714 	error = bus_dmamem_alloc(sc->sc_dmat_stats_buf,
1715 	    (void **) &ss->stats, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1716 	    &ss->map);
1717 	if (error) {
1718 		RGE_PRINT_ERROR(sc, "%s: error (alloc stats) (%d)\n",
1719 		    __func__, error);
1720 		goto error;
1721 	}
1722 
1723 	RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: stats=%p\n", __func__, ss->stats);
1724 	RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: map=%p\n", __func__, ss->map);
1725 
1726 	/* Load the map for the TX ring. */
1727 	error = bus_dmamap_load(sc->sc_dmat_stats_buf,
1728 	    ss->map,
1729 	    ss->stats,
1730 	    RGE_STATS_BUF_SIZE,
1731 	    rge_dma_load_cb,
1732 	    (void *) &ss->paddr,
1733 	    BUS_DMA_NOWAIT);
1734 
1735 	if ((error != 0) || (ss->paddr == 0)) {
1736 		RGE_PRINT_ERROR(sc, "%s: error (load stats.map) (%d)\n",
1737 		    __func__, error);
1738 		if (error == 0)
1739 			error = ENXIO;
1740 		goto error;
1741 	}
1742 
1743 	return (0);
1744 
1745 error:
1746 	rge_free_stats_mem(sc);
1747 
1748 	return (error);
1749 }
1750 
1751 
1752 /**
1753  * @brief Free the TX/RX DMA buffers and mbufs.
1754  *
1755  * Called with the driver lock NOT held.
1756  */
1757 static int
1758 rge_freemem(struct rge_softc *sc)
1759 {
1760 	struct rge_queues *q = sc->sc_queues;
1761 	int i;
1762 
1763 	RGE_ASSERT_UNLOCKED(sc);
1764 
1765 	/* TX buf */
1766 	for (i = 0; i < RGE_TX_LIST_CNT; i++) {
1767 		struct rge_txq *tx = &q->q_tx.rge_txq[i];
1768 
1769 		/* unmap/free mbuf if it's still alloc'ed and mapped */
1770 		if (tx->txq_mbuf != NULL) {
1771 			static bool do_warning = false;
1772 
1773 			if (do_warning == false) {
1774 				RGE_PRINT_ERROR(sc,
1775 				    "%s: TX mbuf should've been freed!\n",
1776 				    __func__);
1777 				do_warning = true;
1778 			}
1779 			if (tx->txq_dmamap != NULL) {
1780 				bus_dmamap_sync(sc->sc_dmat_tx_buf,
1781 				    tx->txq_dmamap, BUS_DMASYNC_POSTREAD);
1782 				bus_dmamap_unload(sc->sc_dmat_tx_buf,
1783 				    tx->txq_dmamap);
1784 			}
1785 			m_free(tx->txq_mbuf);
1786 			tx->txq_mbuf = NULL;
1787 		}
1788 
1789 		/* Destroy the dmamap if it's allocated */
1790 		if (tx->txq_dmamap != NULL) {
1791 			bus_dmamap_destroy(sc->sc_dmat_tx_buf, tx->txq_dmamap);
1792 			tx->txq_dmamap = NULL;
1793 		}
1794 	}
1795 
1796 	/* TX desc */
1797 	if (q->q_tx.rge_tx_list != NULL) {
1798 		bus_dmamap_unload(sc->sc_dmat_tx_desc, q->q_tx.rge_tx_list_map);
1799 		bus_dmamem_free(sc->sc_dmat_tx_desc, q->q_tx.rge_tx_list,
1800 		    q->q_tx.rge_tx_list_map);
1801 	}
1802 	memset(&q->q_tx, 0, sizeof(q->q_tx));
1803 
1804 	/* RX buf */
1805 	for (i = 0; i < RGE_RX_LIST_CNT; i++) {
1806 		struct rge_rxq *rx = &q->q_rx.rge_rxq[i];
1807 
1808 		/* unmap/free mbuf if it's still alloc'ed and mapped */
1809 		if (rx->rxq_mbuf != NULL) {
1810 			if (rx->rxq_dmamap != NULL) {
1811 				bus_dmamap_sync(sc->sc_dmat_rx_buf,
1812 				    rx->rxq_dmamap, BUS_DMASYNC_POSTREAD);
1813 				bus_dmamap_unload(sc->sc_dmat_rx_buf,
1814 				    rx->rxq_dmamap);
1815 			}
1816 			m_free(rx->rxq_mbuf);
1817 			rx->rxq_mbuf = NULL;
1818 		}
1819 
1820 		/* Destroy the dmamap if it's allocated */
1821 		if (rx->rxq_dmamap != NULL) {
1822 			bus_dmamap_destroy(sc->sc_dmat_rx_buf, rx->rxq_dmamap);
1823 			rx->rxq_dmamap = NULL;
1824 		}
1825 	}
1826 
1827 	/* RX desc */
1828 	if (q->q_rx.rge_rx_list != NULL) {
1829 		bus_dmamap_unload(sc->sc_dmat_rx_desc, q->q_rx.rge_rx_list_map);
1830 		bus_dmamem_free(sc->sc_dmat_rx_desc, q->q_rx.rge_rx_list,
1831 		    q->q_rx.rge_rx_list_map);
1832 	}
1833 	memset(&q->q_rx, 0, sizeof(q->q_tx));
1834 
1835 	return (0);
1836 }
1837 
1838 /**
1839  * @brief Free the stats memory.
1840  *
1841  * Called with the driver lock NOT held.
1842  */
1843 static int
1844 rge_free_stats_mem(struct rge_softc *sc)
1845 {
1846 	struct rge_mac_stats *ss = &sc->sc_mac_stats;
1847 
1848 	RGE_ASSERT_UNLOCKED(sc);
1849 
1850 	if (ss->stats != NULL) {
1851 		bus_dmamap_unload(sc->sc_dmat_stats_buf, ss->map);
1852 		bus_dmamem_free(sc->sc_dmat_stats_buf, ss->stats, ss->map);
1853 	}
1854 	memset(ss, 0, sizeof(*ss));
1855 	return (0);
1856 }
1857 
1858 static uint32_t
1859 rx_ring_space(struct rge_queues *q)
1860 {
1861 	uint32_t prod, cons;
1862 	uint32_t ret;
1863 
1864 	RGE_ASSERT_LOCKED(q->q_sc);
1865 
1866 	prod = q->q_rx.rge_rxq_prodidx;
1867 	cons = q->q_rx.rge_rxq_considx;
1868 
1869 	ret = (cons + RGE_RX_LIST_CNT - prod - 1) % RGE_RX_LIST_CNT + 1;
1870 
1871 	if (ret > RGE_RX_LIST_CNT)
1872 		return RGE_RX_LIST_CNT;
1873 
1874 	return (ret);
1875 }
1876 
1877 /*
1878  * Initialize the RX descriptor and attach an mbuf cluster at the given offset.
1879  *
1880  * Note: this relies on the rxr ring buffer abstraction to not
1881  * over-fill the RX ring.  For FreeBSD we'll need to use the
1882  * prod/cons RX indexes to know how much RX ring space to
1883  * populate.
1884  *
1885  * This routine will increment the producer index if successful.
1886  *
1887  * This must be called with the driver lock held.
1888  */
1889 static int
1890 rge_newbuf(struct rge_queues *q)
1891 {
1892 	struct rge_softc *sc = q->q_sc;
1893 	struct mbuf *m;
1894 	struct rge_rx_desc *r;
1895 	struct rge_rxq *rxq;
1896 	bus_dmamap_t rxmap;
1897 	bus_dma_segment_t seg[1];
1898 	uint32_t cmdsts;
1899 	int nsegs;
1900 	uint32_t idx;
1901 
1902 	RGE_ASSERT_LOCKED(q->q_sc);
1903 
1904 	/*
1905 	 * Verify we have enough space in the ring; error out
1906 	 * if we do not.
1907 	 */
1908 	if (rx_ring_space(q) == 0)
1909 		return (ENOBUFS);
1910 
1911 	idx = q->q_rx.rge_rxq_prodidx;
1912 	rxq = &q->q_rx.rge_rxq[idx];
1913 	rxmap = rxq->rxq_dmamap;
1914 
1915 	/*
1916 	 * If we already have an mbuf here then something messed up;
1917 	 * exit out as the hardware may be DMAing to it.
1918 	 */
1919 	if (rxq->rxq_mbuf != NULL) {
1920 		RGE_PRINT_ERROR(sc,
1921 		    "%s: RX ring slot %d already has an mbuf?\n", __func__,
1922 		    idx);
1923 		return (ENOBUFS);
1924 	}
1925 
1926 	/* Allocate single buffer backed mbuf of MCLBYTES */
1927 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1928 	if (m == NULL)
1929 		return (ENOBUFS);
1930 
1931 	m->m_len = m->m_pkthdr.len = MCLBYTES;
1932 
1933 	nsegs = 1;
1934 	if (bus_dmamap_load_mbuf_sg(sc->sc_dmat_rx_buf, rxmap, m, seg, &nsegs,
1935 	    BUS_DMA_NOWAIT)) {
1936 		m_freem(m);
1937 		return (ENOBUFS);
1938 	}
1939 
1940 	/*
1941 	 * Make sure any changes made to the buffer have been flushed to host
1942 	 * memory.
1943 	 */
1944 	bus_dmamap_sync(sc->sc_dmat_rx_buf, rxmap,
1945 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1946 
1947 	/*
1948 	 * Map the segment into RX descriptors.  Note that this
1949 	 * only currently supports a single segment per mbuf;
1950 	 * the call to load_mbuf_sg above specified a single segment.
1951 	 */
1952 	r = &q->q_rx.rge_rx_list[idx];
1953 
1954 	rxq->rxq_mbuf = m;
1955 
1956 	cmdsts = seg[0].ds_len; /* XXX how big is this field in the descriptor? */
1957 	if (idx == RGE_RX_LIST_CNT - 1)
1958 		cmdsts |= RGE_RDCMDSTS_EOR;
1959 
1960 	/*
1961 	 * Configure the DMA pointer and config, but don't hand
1962 	 * it yet to the hardware.
1963 	 */
1964 	r->hi_qword1.rx_qword4.rge_cmdsts = htole32(cmdsts);
1965 	r->hi_qword1.rx_qword4.rge_extsts = htole32(0);
1966 	r->hi_qword0.rge_addr = htole64(seg[0].ds_addr);
1967 	wmb();
1968 
1969 	/*
1970 	 * Mark the specific descriptor slot as "this descriptor is now
1971 	 * owned by the hardware", which when the hardware next sees
1972 	 * this, it'll continue RX DMA.
1973 	 */
1974 	cmdsts |= RGE_RDCMDSTS_OWN;
1975 	r->hi_qword1.rx_qword4.rge_cmdsts = htole32(cmdsts);
1976 	wmb();
1977 
1978 	/*
1979 	 * At this point the hope is the whole ring is now updated and
1980 	 * consistent; if the hardware was waiting for a descriptor to be
1981 	 * ready to write into then it should be ready here.
1982 	 */
1983 
1984 	RGE_DPRINTF(sc, RGE_DEBUG_RECV_DESC,
1985 	    "%s: [%d]: m=%p, m_data=%p, m_len=%ju, phys=0x%jx len %ju, "
1986 	    "desc=0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
1987 	    __func__,
1988 	    idx,
1989 	    m,
1990 	    m->m_data,
1991 	    (uintmax_t) m->m_len,
1992 	    (uintmax_t) seg[0].ds_addr,
1993 	    (uintmax_t) seg[0].ds_len,
1994 	    ((uint32_t *) r)[0],
1995 	    ((uint32_t *) r)[1],
1996 	    ((uint32_t *) r)[2],
1997 	    ((uint32_t *) r)[3],
1998 	    ((uint32_t *) r)[4],
1999 	    ((uint32_t *) r)[5],
2000 	    ((uint32_t *) r)[6],
2001 	    ((uint32_t *) r)[7]);
2002 
2003 	q->q_rx.rge_rxq_prodidx = RGE_NEXT_RX_DESC(idx);
2004 
2005 	return (0);
2006 }
2007 
2008 static void
2009 rge_rx_list_init(struct rge_queues *q)
2010 {
2011 	memset(q->q_rx.rge_rx_list, 0, RGE_RX_LIST_SZ);
2012 
2013 	RGE_ASSERT_LOCKED(q->q_sc);
2014 
2015 	q->q_rx.rge_rxq_prodidx = q->q_rx.rge_rxq_considx = 0;
2016 	q->q_rx.rge_head = NULL;
2017 	q->q_rx.rge_tail = &q->q_rx.rge_head;
2018 
2019 	RGE_DPRINTF(q->q_sc, RGE_DEBUG_SETUP, "%s: rx_list=%p\n", __func__,
2020 	    q->q_rx.rge_rx_list);
2021 
2022 	rge_fill_rx_ring(q);
2023 }
2024 
2025 /**
2026  * @brief Fill / refill the RX ring as needed.
2027  *
2028  * Refill the RX ring with one less than the total descriptors needed.
2029  * This makes the check in rge_rxeof() easier - it can just check
2030  * descriptors from cons -> prod and bail once it hits prod.
2031  * If the whole ring is filled then cons == prod, and that shortcut
2032  * fails.
2033  *
2034  * This must be called with the driver lock held.
2035  */
2036 static void
2037 rge_fill_rx_ring(struct rge_queues *q)
2038 {
2039 	struct rge_softc *sc = q->q_sc;
2040 	uint32_t count, i, prod, cons;
2041 
2042 	RGE_ASSERT_LOCKED(q->q_sc);
2043 
2044 	prod = q->q_rx.rge_rxq_prodidx;
2045 	cons = q->q_rx.rge_rxq_considx;
2046 	count = rx_ring_space(q);
2047 
2048 	/* Fill to count-1; bail if we don't have the space */
2049 	if (count <= 1)
2050 		return;
2051 	count--;
2052 
2053 	RGE_DPRINTF(sc, RGE_DEBUG_RECV_DESC, "%s: prod=%u, cons=%u, space=%u\n",
2054 	  __func__, prod, cons, count);
2055 
2056 	/* Make sure device->host changes are visible */
2057 	bus_dmamap_sync(sc->sc_dmat_rx_desc, q->q_rx.rge_rx_list_map,
2058 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2059 
2060 	for (i = 0; i < count; i++) {
2061 		if (rge_newbuf(q))
2062 			break;
2063 	}
2064 
2065 	/* Make changes visible to the device */
2066 	bus_dmamap_sync(sc->sc_dmat_rx_desc, q->q_rx.rge_rx_list_map,
2067 	    BUS_DMASYNC_PREWRITE);
2068 }
2069 
2070 static void
2071 rge_tx_list_init(struct rge_queues *q)
2072 {
2073 	struct rge_softc *sc = q->q_sc;
2074 	struct rge_tx_desc *d;
2075 	int i;
2076 
2077 	RGE_ASSERT_LOCKED(q->q_sc);
2078 
2079 	memset(q->q_tx.rge_tx_list, 0, RGE_TX_LIST_SZ);
2080 
2081 	for (i = 0; i < RGE_TX_LIST_CNT; i++)
2082 		q->q_tx.rge_txq[i].txq_mbuf = NULL;
2083 
2084 	d = &q->q_tx.rge_tx_list[RGE_TX_LIST_CNT - 1];
2085 	d->rge_cmdsts = htole32(RGE_TDCMDSTS_EOR);
2086 
2087 	bus_dmamap_sync(sc->sc_dmat_tx_desc, q->q_tx.rge_tx_list_map,
2088 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2089 	wmb();
2090 
2091 	q->q_tx.rge_txq_prodidx = q->q_tx.rge_txq_considx = 0;
2092 
2093 	RGE_DPRINTF(sc, RGE_DEBUG_SETUP, "%s: rx_list=%p\n", __func__,
2094 	    q->q_tx.rge_tx_list);
2095 }
2096 
2097 int
2098 rge_rxeof(struct rge_queues *q, struct mbufq *mq)
2099 {
2100 	struct rge_softc *sc = q->q_sc;
2101 	struct mbuf *m;
2102 	struct rge_rx_desc *cur_rx;
2103 	struct rge_rxq *rxq;
2104 	uint32_t rxstat, extsts;
2105 	int i, mlen, rx = 0;
2106 	int cons, prod;
2107 	int maxpkt = 16; /* XXX TODO: make this a tunable */
2108 	bool check_hwcsum;
2109 
2110 	check_hwcsum = ((if_getcapenable(sc->sc_ifp) & IFCAP_RXCSUM) != 0);
2111 
2112 	RGE_ASSERT_LOCKED(sc);
2113 
2114 	sc->sc_drv_stats.rxeof_cnt++;
2115 
2116 	RGE_DPRINTF(sc, RGE_DEBUG_INTR, "%s; called\n", __func__);
2117 
2118 	/* Note: if_re is POSTREAD/WRITE, rge is only POSTWRITE */
2119 	bus_dmamap_sync(sc->sc_dmat_rx_desc, q->q_rx.rge_rx_list_map,
2120 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2121 
2122 	prod = q->q_rx.rge_rxq_prodidx;
2123 
2124 	/*
2125 	 * Loop around until we've run out of active descriptors to check
2126 	 * or maxpkt has been reached.
2127 	 */
2128 	for (i = cons = q->q_rx.rge_rxq_considx;
2129 	    maxpkt > 0 && i != prod;
2130 	    i = RGE_NEXT_RX_DESC(i)) {
2131 		/* break out of loop if we're not running */
2132 		if ((if_getdrvflags(sc->sc_ifp) & IFF_DRV_RUNNING) == 0)
2133 			break;
2134 
2135 		/* get the current rx descriptor to check descriptor status */
2136 		cur_rx = &q->q_rx.rge_rx_list[i];
2137 		rxstat = le32toh(cur_rx->hi_qword1.rx_qword4.rge_cmdsts);
2138 		if ((rxstat & RGE_RDCMDSTS_OWN) != 0) {
2139 			break;
2140 		}
2141 
2142 		/* Ensure everything else has been DMAed */
2143 		rmb();
2144 
2145 		/* Get the current rx buffer, sync */
2146 		rxq = &q->q_rx.rge_rxq[i];
2147 
2148 		/* Ensure any device updates are now visible in host memory */
2149 		bus_dmamap_sync(sc->sc_dmat_rx_buf, rxq->rxq_dmamap,
2150 		    BUS_DMASYNC_POSTREAD);
2151 
2152 		/* Unload the DMA map, we are done with it here */
2153 		bus_dmamap_unload(sc->sc_dmat_rx_buf, rxq->rxq_dmamap);
2154 		m = rxq->rxq_mbuf;
2155 		rxq->rxq_mbuf = NULL;
2156 
2157 		rx = 1;
2158 
2159 		RGE_DPRINTF(sc, RGE_DEBUG_RECV_DESC,
2160 		    "%s: RX: [%d]: m=%p, m_data=%p, m_len=%ju, "
2161 		    "desc=0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
2162 		    __func__,
2163 		    i,
2164 		    m,
2165 		    m->m_data,
2166 		    (uintmax_t) m->m_len,
2167 		    ((uint32_t *) cur_rx)[0],
2168 		    ((uint32_t *) cur_rx)[1],
2169 		    ((uint32_t *) cur_rx)[2],
2170 		    ((uint32_t *) cur_rx)[3],
2171 		    ((uint32_t *) cur_rx)[4],
2172 		    ((uint32_t *) cur_rx)[5],
2173 		    ((uint32_t *) cur_rx)[6],
2174 		    ((uint32_t *) cur_rx)[7]);
2175 
2176 		if ((rxstat & RGE_RDCMDSTS_SOF) != 0) {
2177 			if (q->q_rx.rge_head != NULL) {
2178 				sc->sc_drv_stats.rx_desc_err_multidesc++;
2179 				if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS,
2180 				    1);
2181 				m_freem(q->q_rx.rge_head);
2182 				q->q_rx.rge_tail = &q->q_rx.rge_head;
2183 			}
2184 
2185 			m->m_pkthdr.len = 0;
2186 		} else if (q->q_rx.rge_head == NULL) {
2187 			m_freem(m);
2188 			continue;
2189 		} else
2190 			m->m_flags &= ~M_PKTHDR;
2191 
2192 		*q->q_rx.rge_tail = m;
2193 		q->q_rx.rge_tail = &m->m_next;
2194 
2195 		mlen = rxstat & RGE_RDCMDSTS_FRAGLEN;
2196 		m->m_len = mlen;
2197 
2198 		m = q->q_rx.rge_head;
2199 		m->m_pkthdr.len += mlen;
2200 
2201 		/* Ethernet CRC error */
2202 		if (rxstat & RGE_RDCMDSTS_RXERRSUM) {
2203 			sc->sc_drv_stats.rx_ether_csum_err++;
2204 			if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
2205 			m_freem(m);
2206 			q->q_rx.rge_head = NULL;
2207 			q->q_rx.rge_tail = &q->q_rx.rge_head;
2208 			continue;
2209 		}
2210 
2211 		/*
2212 		 * This mbuf is part of a multi-descriptor frame,
2213 		 * so count it towards that.
2214 		 *
2215 		 * Yes, this means we won't be counting the
2216 		 * final descriptor/mbuf as part of a multi-descriptor
2217 		 * frame; if someone wishes to do that then it
2218 		 * shouldn't be too hard to add.
2219 		 */
2220 		if ((rxstat & RGE_RDCMDSTS_EOF) == 0) {
2221 			sc->sc_drv_stats.rx_desc_jumbo_frag++;
2222 			continue;
2223 		}
2224 
2225 		q->q_rx.rge_head = NULL;
2226 		q->q_rx.rge_tail = &q->q_rx.rge_head;
2227 
2228 		m_adj(m, -ETHER_CRC_LEN);
2229 		m->m_pkthdr.rcvif = sc->sc_ifp;
2230 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
2231 
2232 		extsts = le32toh(cur_rx->hi_qword1.rx_qword4.rge_extsts);
2233 
2234 		/* Check IP header checksum. */
2235 		if (check_hwcsum) {
2236 			/* Does it exist for IPv4? */
2237 			if (extsts & RGE_RDEXTSTS_IPV4) {
2238 				sc->sc_drv_stats.rx_offload_csum_ipv4_exists++;
2239 				m->m_pkthdr.csum_flags |=
2240 				    CSUM_IP_CHECKED;
2241 			}
2242 			/* XXX IPv6 checksum check? */
2243 
2244 			if (((extsts & RGE_RDEXTSTS_IPCSUMERR) == 0)
2245 			    && ((extsts & RGE_RDEXTSTS_IPV4) != 0)) {
2246 				sc->sc_drv_stats.rx_offload_csum_ipv4_valid++;
2247 				m->m_pkthdr.csum_flags |=
2248 				    CSUM_IP_VALID;
2249 			}
2250 
2251 			/* Check TCP/UDP checksum. */
2252 			if ((extsts & (RGE_RDEXTSTS_IPV4 | RGE_RDEXTSTS_IPV6)) &&
2253 			    (extsts & RGE_RDEXTSTS_TCPPKT)) {
2254 				sc->sc_drv_stats.rx_offload_csum_tcp_exists++;
2255 				if ((extsts & RGE_RDEXTSTS_TCPCSUMERR) == 0) {
2256 					sc->sc_drv_stats.rx_offload_csum_tcp_valid++;
2257 					/* TCP checksum OK */
2258 					m->m_pkthdr.csum_flags |=
2259 					    CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
2260 					m->m_pkthdr.csum_data = 0xffff;
2261 				}
2262 			}
2263 
2264 			if ((extsts & (RGE_RDEXTSTS_IPV4 | RGE_RDEXTSTS_IPV6)) &&
2265 			    (extsts & RGE_RDEXTSTS_UDPPKT)) {
2266 				sc->sc_drv_stats.rx_offload_csum_udp_exists++;
2267 				if ((extsts & RGE_RDEXTSTS_UDPCSUMERR) == 0) {
2268 					sc->sc_drv_stats.rx_offload_csum_udp_valid++;
2269 					/* UDP checksum OK */
2270 					m->m_pkthdr.csum_flags |=
2271 					    CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
2272 					m->m_pkthdr.csum_data = 0xffff;
2273 				}
2274 			}
2275 		}
2276 
2277 		if (extsts & RGE_RDEXTSTS_VTAG) {
2278 			sc->sc_drv_stats.rx_offload_vlan_tag++;
2279 			m->m_pkthdr.ether_vtag =
2280 			    ntohs(extsts & RGE_RDEXTSTS_VLAN_MASK);
2281 			m->m_flags |= M_VLANTAG;
2282 		}
2283 
2284 		mbufq_enqueue(mq, m);
2285 
2286 		maxpkt--;
2287 	}
2288 
2289 	if (!rx)
2290 		return (0);
2291 
2292 	/*
2293 	 * Make sure any device updates to the descriptor ring are
2294 	 * visible to the host before we continue.
2295 	 */
2296 	bus_dmamap_sync(sc->sc_dmat_rx_desc, q->q_rx.rge_rx_list_map,
2297 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2298 	wmb();
2299 
2300 	/* Update the consumer index, refill the RX ring */
2301 	q->q_rx.rge_rxq_considx = i;
2302 	rge_fill_rx_ring(q);
2303 
2304 	return (1);
2305 }
2306 
2307 int
2308 rge_txeof(struct rge_queues *q)
2309 {
2310 	struct rge_softc *sc = q->q_sc;
2311 	struct ifnet *ifp = sc->sc_ifp;
2312 	struct rge_txq *txq;
2313 	uint32_t txstat;
2314 	int cons, prod, cur, idx;
2315 	int free = 0, ntx = 0;
2316 	int pktlen;
2317 	bool is_mcast;
2318 
2319 	RGE_ASSERT_LOCKED(sc);
2320 
2321 	sc->sc_drv_stats.txeof_cnt++;
2322 
2323 	prod = q->q_tx.rge_txq_prodidx;
2324 	cons = q->q_tx.rge_txq_considx;
2325 
2326 	idx = cons;
2327 	while (idx != prod) {
2328 		txq = &q->q_tx.rge_txq[idx];
2329 		cur = txq->txq_descidx;
2330 
2331 		rge_tx_list_sync(sc, q, cur, 1, BUS_DMASYNC_POSTREAD);
2332 		txstat = q->q_tx.rge_tx_list[cur].rge_cmdsts;
2333 		rge_tx_list_sync(sc, q, cur, 1, BUS_DMASYNC_PREREAD);
2334 		if ((txstat & htole32(RGE_TDCMDSTS_OWN)) != 0) {
2335 			free = 2;
2336 			break;
2337 		}
2338 
2339 		bus_dmamap_sync(sc->sc_dmat_tx_buf, txq->txq_dmamap,
2340 		    BUS_DMASYNC_POSTWRITE);
2341 		bus_dmamap_unload(sc->sc_dmat_tx_buf, txq->txq_dmamap);
2342 		pktlen = txq->txq_mbuf->m_pkthdr.len;
2343 		is_mcast = ((txq->txq_mbuf->m_flags & M_MCAST) != 0);
2344 		m_freem(txq->txq_mbuf);
2345 		txq->txq_mbuf = NULL;
2346 		ntx++;
2347 
2348 		if ((txstat &
2349 		    htole32(RGE_TDCMDSTS_EXCESSCOLL | RGE_TDCMDSTS_COLL)) != 0)
2350 			if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1);
2351 		if ((txstat & htole32(RGE_TDCMDSTS_TXERR)) != 0)
2352 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2353 		else {
2354 			if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2355 			if_inc_counter(ifp, IFCOUNTER_OBYTES, pktlen);
2356 			if (is_mcast)
2357 				if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2358 
2359 		}
2360 
2361 		idx = RGE_NEXT_TX_DESC(cur);
2362 		free = 1;
2363 	}
2364 
2365 	/* If we didn't complete any TX descriptors then return 0 */
2366 	if (free == 0)
2367 		return (0);
2368 
2369 	if (idx >= cons) {
2370 		rge_tx_list_sync(sc, q, cons, idx - cons,
2371 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2372 	} else {
2373 		rge_tx_list_sync(sc, q, cons, RGE_TX_LIST_CNT - cons,
2374 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2375 		rge_tx_list_sync(sc, q, 0, idx,
2376 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2377 	}
2378 
2379 	q->q_tx.rge_txq_considx = idx;
2380 
2381 	RGE_DPRINTF(sc, RGE_DEBUG_XMIT,
2382 	    "%s: handled %d frames; prod=%d, cons=%d\n", __func__,
2383 	    ntx, q->q_tx.rge_txq_prodidx, q->q_tx.rge_txq_considx);
2384 
2385 	/*
2386 	 * We processed the ring and hit a descriptor that was still
2387 	 * owned by the hardware, so there's still pending work.
2388 	 *
2389 	 * If we got to the end of the ring and there's no further
2390 	 * frames owned by the hardware then we can quieten the
2391 	 * watchdog.
2392 	 */
2393 	if (free == 2)
2394 		sc->sc_watchdog = 5;
2395 	else
2396 		sc->sc_watchdog = 0;
2397 
2398 	/*
2399 	 * Kick-start the transmit task just in case we have
2400 	 * more frames available.
2401 	 */
2402 	taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task);
2403 
2404 	return (1);
2405 }
2406 
2407 static u_int
2408 rge_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
2409 {
2410 	uint32_t crc, *hashes = arg;
2411 
2412 	/* XXX TODO: validate this does addrlo? */
2413 	crc = ether_crc32_be(LLADDR(sdl), ETHER_ADDR_LEN) >> 26;
2414 	crc &= 0x3f;
2415 
2416 	if (crc < 32)
2417 		hashes[0] |= (1 << crc);
2418 	else
2419 		hashes[1] |= (1 << (crc - 32));
2420 
2421 	return (1);
2422 }
2423 
2424 /**
2425  * @brief Configure the RX filter and multicast filter.
2426  *
2427  * This must be called with the driver lock held.
2428  */
2429 static void
2430 rge_iff_locked(struct rge_softc *sc)
2431 {
2432 	uint32_t hashes[2];
2433 	uint32_t rxfilt;
2434 
2435 	RGE_ASSERT_LOCKED(sc);
2436 
2437 	rxfilt = RGE_READ_4(sc, RGE_RXCFG);
2438 	rxfilt &= ~(RGE_RXCFG_ALLPHYS | RGE_RXCFG_MULTI);
2439 
2440 	/*
2441 	 * Always accept frames destined to our station address.
2442 	 * Always accept broadcast frames.
2443 	 */
2444 	rxfilt |= RGE_RXCFG_INDIV | RGE_RXCFG_BROAD;
2445 
2446 	if ((if_getflags(sc->sc_ifp) & (IFF_PROMISC | IFF_ALLMULTI)) != 0) {
2447 		rxfilt |= RGE_RXCFG_MULTI;
2448 		if ((if_getflags(sc->sc_ifp) & IFF_PROMISC) != 0)
2449 			rxfilt |= RGE_RXCFG_ALLPHYS;
2450 		hashes[0] = hashes[1] = 0xffffffff;
2451 	} else {
2452 		rxfilt |= RGE_RXCFG_MULTI;
2453 		/* Program new filter. */
2454 		memset(hashes, 0, sizeof(hashes));
2455 		if_foreach_llmaddr(sc->sc_ifp, rge_hash_maddr, &hashes);
2456 	}
2457 
2458 	RGE_WRITE_4(sc, RGE_RXCFG, rxfilt);
2459 	RGE_WRITE_4(sc, RGE_MAR0, bswap32(hashes[1]));
2460 	RGE_WRITE_4(sc, RGE_MAR4, bswap32(hashes[0]));
2461 }
2462 
2463 static void
2464 rge_add_media_types(struct rge_softc *sc)
2465 {
2466 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_T, 0, NULL);
2467 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL);
2468 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_TX, 0, NULL);
2469 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL);
2470 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_T, 0, NULL);
2471 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2472 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_2500_T, 0, NULL);
2473 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_2500_T | IFM_FDX, 0, NULL);
2474 
2475 	if (RGE_TYPE_R26(sc)) {
2476 		ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_5000_T, 0, NULL);
2477 		ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_5000_T | IFM_FDX,
2478 		    0, NULL);
2479 	} else if (sc->rge_type == MAC_R27) {
2480 		ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_T, 0, NULL);
2481 		ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_T | IFM_FDX,
2482 		    0, NULL);
2483 	}
2484 }
2485 
2486 /**
2487  * @brief Deferred packet dequeue and submit.
2488  */
2489 static void
2490 rge_tx_task(void *arg, int npending)
2491 {
2492 	struct rge_softc *sc = (struct rge_softc *) arg;
2493 	/* Note: for now, one queue */
2494 	struct rge_queues *q = sc->sc_queues;
2495 	struct mbuf *m;
2496 	int ntx = 0;
2497 	int idx, free, used;
2498 
2499 	RGE_DPRINTF(sc, RGE_DEBUG_XMIT, "%s: running\n", __func__);
2500 
2501 	RGE_LOCK(sc);
2502 	sc->sc_drv_stats.tx_task_cnt++;
2503 
2504 	if (sc->sc_stopped == true) {
2505 		sc->sc_watchdog = 0;
2506 		RGE_UNLOCK(sc);
2507 		return;
2508 	}
2509 
2510 	/* Calculate free space. */
2511 	idx = q->q_tx.rge_txq_prodidx;
2512 	free = q->q_tx.rge_txq_considx;
2513 	if (free <= idx)
2514 		free += RGE_TX_LIST_CNT;
2515 	free -= idx;
2516 
2517 	for (;;) {
2518 		if (free < RGE_TX_NSEGS + 2) {
2519 			break;
2520 		}
2521 
2522 		/* Dequeue */
2523 		m = mbufq_dequeue(&sc->sc_txq);
2524 		if (m == NULL)
2525 			break;
2526 
2527 		/* Attempt to encap */
2528 		used = rge_encap(sc, q, m, idx);
2529 		if (used < 0) {
2530 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
2531 			m_freem(m);
2532 			continue;
2533 		} else if (used == 0) {
2534 			mbufq_prepend(&sc->sc_txq, m);
2535 			break;
2536 		}
2537 
2538 		/*
2539 		 * Note: mbuf is now owned by the tx ring, but we hold the
2540 		 * lock so it's safe to pass it up here to be copied without
2541 		 * worrying the TX task will run and dequeue/free it before
2542 		 * we get a shot at it.
2543 		 */
2544 		ETHER_BPF_MTAP(sc->sc_ifp, m);
2545 
2546 		/* Update free/idx pointers */
2547 		free -= used;
2548 		idx += used;
2549 		if (idx >= RGE_TX_LIST_CNT)
2550 			idx -= RGE_TX_LIST_CNT;
2551 
2552 		ntx++;
2553 	}
2554 
2555 	/* Ok, did we queue anything? If so, poke the hardware */
2556 	if (ntx > 0) {
2557 		q->q_tx.rge_txq_prodidx = idx;
2558 		sc->sc_watchdog = 5;
2559 		RGE_WRITE_2(sc, RGE_TXSTART, RGE_TXSTART_START);
2560 	}
2561 
2562 	RGE_DPRINTF(sc, RGE_DEBUG_XMIT,
2563 	    "%s: handled %d frames; prod=%d, cons=%d\n", __func__,
2564 	    ntx, q->q_tx.rge_txq_prodidx, q->q_tx.rge_txq_considx);
2565 
2566 	RGE_UNLOCK(sc);
2567 }
2568 
2569 /**
2570  * @brief Called by the sc_timeout callout.
2571  *
2572  * This is called by the callout code with the driver lock held.
2573  */
2574 void
2575 rge_tick(void *arg)
2576 {
2577 	struct rge_softc *sc = arg;
2578 
2579 	RGE_ASSERT_LOCKED(sc);
2580 
2581 	rge_link_state(sc);
2582 
2583 	/*
2584 	 * Since we don't have any other place yet to trigger/test this,
2585 	 * let's do it here every second and just bite the driver
2586 	 * blocking for a little bit whilst it happens.
2587 	 */
2588 	if ((if_getdrvflags(sc->sc_ifp) & IFF_DRV_RUNNING) != 0)
2589 		rge_hw_mac_stats_fetch(sc, &sc->sc_mac_stats.lcl_stats);
2590 
2591 	/*
2592 	 * Handle the TX watchdog.
2593 	 */
2594 	if (sc->sc_watchdog > 0) {
2595 		sc->sc_watchdog--;
2596 		if (sc->sc_watchdog == 0) {
2597 			RGE_PRINT_ERROR(sc, "TX timeout (watchdog)\n");
2598 			rge_init_locked(sc);
2599 			sc->sc_drv_stats.tx_watchdog_timeout_cnt++;
2600 		}
2601 	}
2602 
2603 	callout_reset(&sc->sc_timeout, hz, rge_tick, sc);
2604 }
2605 
2606 /**
2607  * @brief process a link state change.
2608  *
2609  * Must be called with the driver lock held.
2610  */
2611 void
2612 rge_link_state(struct rge_softc *sc)
2613 {
2614 	int link = LINK_STATE_DOWN;
2615 
2616 	RGE_ASSERT_LOCKED(sc);
2617 
2618 	if (rge_get_link_status(sc))
2619 		link = LINK_STATE_UP;
2620 
2621 	if (if_getlinkstate(sc->sc_ifp) != link) {
2622 		sc->sc_drv_stats.link_state_change_cnt++;
2623 		if_link_state_change(sc->sc_ifp, link);
2624 	}
2625 }
2626 
2627 /**
2628  * @brief Suspend
2629  */
2630 static int
2631 rge_suspend(device_t dev)
2632 {
2633 	struct rge_softc *sc = device_get_softc(dev);
2634 
2635 	RGE_LOCK(sc);
2636 	rge_stop_locked(sc);
2637 	/* TODO: wake on lan */
2638 	sc->sc_suspended = true;
2639 	RGE_UNLOCK(sc);
2640 
2641 	return (0);
2642 }
2643 
2644 /**
2645  * @brief Resume
2646  */
2647 static int
2648 rge_resume(device_t dev)
2649 {
2650 	struct rge_softc *sc = device_get_softc(dev);
2651 
2652 	RGE_LOCK(sc);
2653 	/* TODO: wake on lan */
2654 
2655 	/* reinit if required */
2656 	if (if_getflags(sc->sc_ifp) & IFF_UP)
2657 		rge_init_locked(sc);
2658 
2659 	sc->sc_suspended = false;
2660 
2661 	RGE_UNLOCK(sc);
2662 
2663 	return (0);
2664 }
2665 
2666 /**
2667  * @brief Shutdown the driver during shutdown
2668  */
2669 static int
2670 rge_shutdown(device_t dev)
2671 {
2672 	struct rge_softc *sc = device_get_softc(dev);
2673 
2674 	RGE_LOCK(sc);
2675 	rge_stop_locked(sc);
2676 	RGE_UNLOCK(sc);
2677 
2678 	return (0);
2679 }
2680 
2681 static device_method_t rge_methods[] = {
2682 	DEVMETHOD(device_probe,			rge_probe),
2683 	DEVMETHOD(device_attach,		rge_attach),
2684 	DEVMETHOD(device_detach,		rge_detach),
2685 
2686 	DEVMETHOD(device_suspend,		rge_suspend),
2687 	DEVMETHOD(device_resume,		rge_resume),
2688 	DEVMETHOD(device_shutdown,		rge_shutdown),
2689 
2690 	DEVMETHOD_END
2691 };
2692 
2693 static driver_t rge_driver = {
2694 	"rge",
2695 	rge_methods,
2696 	sizeof(struct rge_softc)
2697 };
2698 
2699 MODULE_DEPEND(rge, pci, 1, 1, 1);
2700 MODULE_DEPEND(rge, ether, 1, 1, 1);
2701 
2702 DRIVER_MODULE_ORDERED(rge, pci, rge_driver, NULL, NULL, SI_ORDER_ANY);
2703 MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, rge, rge_devices,
2704     nitems(rge_devices) - 1);
2705