xref: /freebsd/sys/dev/cas/if_cas.c (revision 86aa9539fef591a363b06a0ebd3aa7a07f4c1579)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2001 Eduardo Horvath.
5  * Copyright (c) 2001-2003 Thomas Moestl
6  * Copyright (c) 2007-2009 Marius Strobl <marius@FreeBSD.org>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  *	from: NetBSD: gem.c,v 1.21 2002/06/01 23:50:58 lukem Exp
31  *	from: FreeBSD: if_gem.c 182060 2008-08-23 15:03:26Z marius
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 /*
38  * driver for Sun Cassini/Cassini+ and National Semiconductor DP83065
39  * Saturn Gigabit Ethernet controllers
40  */
41 
42 #if 0
43 #define	CAS_DEBUG
44 #endif
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bus.h>
49 #include <sys/callout.h>
50 #include <sys/endian.h>
51 #include <sys/mbuf.h>
52 #include <sys/malloc.h>
53 #include <sys/kernel.h>
54 #include <sys/lock.h>
55 #include <sys/module.h>
56 #include <sys/mutex.h>
57 #include <sys/refcount.h>
58 #include <sys/resource.h>
59 #include <sys/rman.h>
60 #include <sys/socket.h>
61 #include <sys/sockio.h>
62 #include <sys/taskqueue.h>
63 
64 #include <net/bpf.h>
65 #include <net/ethernet.h>
66 #include <net/if.h>
67 #include <net/if_var.h>
68 #include <net/if_arp.h>
69 #include <net/if_dl.h>
70 #include <net/if_media.h>
71 #include <net/if_types.h>
72 #include <net/if_vlan_var.h>
73 
74 #include <netinet/in.h>
75 #include <netinet/in_systm.h>
76 #include <netinet/ip.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79 
80 #include <machine/bus.h>
81 #if defined(__powerpc__) || defined(__sparc64__)
82 #include <dev/ofw/ofw_bus.h>
83 #include <dev/ofw/openfirm.h>
84 #include <machine/ofw_machdep.h>
85 #endif
86 #include <machine/resource.h>
87 
88 #include <dev/mii/mii.h>
89 #include <dev/mii/miivar.h>
90 
91 #include <dev/cas/if_casreg.h>
92 #include <dev/cas/if_casvar.h>
93 
94 #include <dev/pci/pcireg.h>
95 #include <dev/pci/pcivar.h>
96 
97 #include "miibus_if.h"
98 
99 #define RINGASSERT(n , min, max)					\
100 	CTASSERT(powerof2(n) && (n) >= (min) && (n) <= (max))
101 
102 RINGASSERT(CAS_NRXCOMP, 128, 32768);
103 RINGASSERT(CAS_NRXDESC, 32, 8192);
104 RINGASSERT(CAS_NRXDESC2, 32, 8192);
105 RINGASSERT(CAS_NTXDESC, 32, 8192);
106 
107 #undef RINGASSERT
108 
109 #define	CCDASSERT(m, a)							\
110 	CTASSERT((offsetof(struct cas_control_data, m) & ((a) - 1)) == 0)
111 
112 CCDASSERT(ccd_rxcomps, CAS_RX_COMP_ALIGN);
113 CCDASSERT(ccd_rxdescs, CAS_RX_DESC_ALIGN);
114 CCDASSERT(ccd_rxdescs2, CAS_RX_DESC_ALIGN);
115 
116 #undef CCDASSERT
117 
118 #define	CAS_TRIES	10000
119 
120 /*
121  * According to documentation, the hardware has support for basic TCP
122  * checksum offloading only, in practice this can be also used for UDP
123  * however (i.e. the problem of previous Sun NICs that a checksum of 0x0
124  * is not converted to 0xffff no longer exists).
125  */
126 #define	CAS_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP)
127 
128 static inline void cas_add_rxdesc(struct cas_softc *sc, u_int idx);
129 static int	cas_attach(struct cas_softc *sc);
130 static int	cas_bitwait(struct cas_softc *sc, bus_addr_t r, uint32_t clr,
131 		    uint32_t set);
132 static void	cas_cddma_callback(void *xsc, bus_dma_segment_t *segs,
133 		    int nsegs, int error);
134 static void	cas_detach(struct cas_softc *sc);
135 static int	cas_disable_rx(struct cas_softc *sc);
136 static int	cas_disable_tx(struct cas_softc *sc);
137 static void	cas_eint(struct cas_softc *sc, u_int status);
138 static void	cas_free(struct mbuf *m);
139 static void	cas_init(void *xsc);
140 static void	cas_init_locked(struct cas_softc *sc);
141 static void	cas_init_regs(struct cas_softc *sc);
142 static int	cas_intr(void *v);
143 static void	cas_intr_task(void *arg, int pending __unused);
144 static int	cas_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
145 static int	cas_load_txmbuf(struct cas_softc *sc, struct mbuf **m_head);
146 static int	cas_mediachange(struct ifnet *ifp);
147 static void	cas_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr);
148 static void	cas_meminit(struct cas_softc *sc);
149 static void	cas_mifinit(struct cas_softc *sc);
150 static int	cas_mii_readreg(device_t dev, int phy, int reg);
151 static void	cas_mii_statchg(device_t dev);
152 static int	cas_mii_writereg(device_t dev, int phy, int reg, int val);
153 static void	cas_reset(struct cas_softc *sc);
154 static int	cas_reset_rx(struct cas_softc *sc);
155 static int	cas_reset_tx(struct cas_softc *sc);
156 static void	cas_resume(struct cas_softc *sc);
157 static u_int	cas_descsize(u_int sz);
158 static void	cas_rint(struct cas_softc *sc);
159 static void	cas_rint_timeout(void *arg);
160 static inline void cas_rxcksum(struct mbuf *m, uint16_t cksum);
161 static inline void cas_rxcompinit(struct cas_rx_comp *rxcomp);
162 static u_int	cas_rxcompsize(u_int sz);
163 static void	cas_rxdma_callback(void *xsc, bus_dma_segment_t *segs,
164 		    int nsegs, int error);
165 static void	cas_setladrf(struct cas_softc *sc);
166 static void	cas_start(struct ifnet *ifp);
167 static void	cas_stop(struct ifnet *ifp);
168 static void	cas_suspend(struct cas_softc *sc);
169 static void	cas_tick(void *arg);
170 static void	cas_tint(struct cas_softc *sc);
171 static void	cas_tx_task(void *arg, int pending __unused);
172 static inline void cas_txkick(struct cas_softc *sc);
173 static void	cas_watchdog(struct cas_softc *sc);
174 
175 static devclass_t cas_devclass;
176 
177 MODULE_DEPEND(cas, ether, 1, 1, 1);
178 MODULE_DEPEND(cas, miibus, 1, 1, 1);
179 
180 #ifdef CAS_DEBUG
181 #include <sys/ktr.h>
182 #define	KTR_CAS		KTR_SPARE2
183 #endif
184 
185 static int
186 cas_attach(struct cas_softc *sc)
187 {
188 	struct cas_txsoft *txs;
189 	struct ifnet *ifp;
190 	int error, i;
191 	uint32_t v;
192 
193 	/* Set up ifnet structure. */
194 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
195 	if (ifp == NULL)
196 		return (ENOSPC);
197 	ifp->if_softc = sc;
198 	if_initname(ifp, device_get_name(sc->sc_dev),
199 	    device_get_unit(sc->sc_dev));
200 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
201 	ifp->if_start = cas_start;
202 	ifp->if_ioctl = cas_ioctl;
203 	ifp->if_init = cas_init;
204 	IFQ_SET_MAXLEN(&ifp->if_snd, CAS_TXQUEUELEN);
205 	ifp->if_snd.ifq_drv_maxlen = CAS_TXQUEUELEN;
206 	IFQ_SET_READY(&ifp->if_snd);
207 
208 	callout_init_mtx(&sc->sc_tick_ch, &sc->sc_mtx, 0);
209 	callout_init_mtx(&sc->sc_rx_ch, &sc->sc_mtx, 0);
210 	/* Create local taskq. */
211 	TASK_INIT(&sc->sc_intr_task, 0, cas_intr_task, sc);
212 	TASK_INIT(&sc->sc_tx_task, 1, cas_tx_task, ifp);
213 	sc->sc_tq = taskqueue_create_fast("cas_taskq", M_WAITOK,
214 	    taskqueue_thread_enqueue, &sc->sc_tq);
215 	if (sc->sc_tq == NULL) {
216 		device_printf(sc->sc_dev, "could not create taskqueue\n");
217 		error = ENXIO;
218 		goto fail_ifnet;
219 	}
220 	error = taskqueue_start_threads(&sc->sc_tq, 1, PI_NET, "%s taskq",
221 	    device_get_nameunit(sc->sc_dev));
222 	if (error != 0) {
223 		device_printf(sc->sc_dev, "could not start threads\n");
224 		goto fail_taskq;
225 	}
226 
227 	/* Make sure the chip is stopped. */
228 	cas_reset(sc);
229 
230 	error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0,
231 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
232 	    BUS_SPACE_MAXSIZE, 0, BUS_SPACE_MAXSIZE, 0, NULL, NULL,
233 	    &sc->sc_pdmatag);
234 	if (error != 0)
235 		goto fail_taskq;
236 
237 	error = bus_dma_tag_create(sc->sc_pdmatag, 1, 0,
238 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
239 	    CAS_PAGE_SIZE, 1, CAS_PAGE_SIZE, 0, NULL, NULL, &sc->sc_rdmatag);
240 	if (error != 0)
241 		goto fail_ptag;
242 
243 	error = bus_dma_tag_create(sc->sc_pdmatag, 1, 0,
244 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
245 	    MCLBYTES * CAS_NTXSEGS, CAS_NTXSEGS, MCLBYTES,
246 	    BUS_DMA_ALLOCNOW, NULL, NULL, &sc->sc_tdmatag);
247 	if (error != 0)
248 		goto fail_rtag;
249 
250 	error = bus_dma_tag_create(sc->sc_pdmatag, CAS_TX_DESC_ALIGN, 0,
251 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
252 	    sizeof(struct cas_control_data), 1,
253 	    sizeof(struct cas_control_data), 0,
254 	    NULL, NULL, &sc->sc_cdmatag);
255 	if (error != 0)
256 		goto fail_ttag;
257 
258 	/*
259 	 * Allocate the control data structures, create and load the
260 	 * DMA map for it.
261 	 */
262 	if ((error = bus_dmamem_alloc(sc->sc_cdmatag,
263 	    (void **)&sc->sc_control_data,
264 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
265 	    &sc->sc_cddmamap)) != 0) {
266 		device_printf(sc->sc_dev,
267 		    "unable to allocate control data, error = %d\n", error);
268 		goto fail_ctag;
269 	}
270 
271 	sc->sc_cddma = 0;
272 	if ((error = bus_dmamap_load(sc->sc_cdmatag, sc->sc_cddmamap,
273 	    sc->sc_control_data, sizeof(struct cas_control_data),
274 	    cas_cddma_callback, sc, 0)) != 0 || sc->sc_cddma == 0) {
275 		device_printf(sc->sc_dev,
276 		    "unable to load control data DMA map, error = %d\n",
277 		    error);
278 		goto fail_cmem;
279 	}
280 
281 	/*
282 	 * Initialize the transmit job descriptors.
283 	 */
284 	STAILQ_INIT(&sc->sc_txfreeq);
285 	STAILQ_INIT(&sc->sc_txdirtyq);
286 
287 	/*
288 	 * Create the transmit buffer DMA maps.
289 	 */
290 	error = ENOMEM;
291 	for (i = 0; i < CAS_TXQUEUELEN; i++) {
292 		txs = &sc->sc_txsoft[i];
293 		txs->txs_mbuf = NULL;
294 		txs->txs_ndescs = 0;
295 		if ((error = bus_dmamap_create(sc->sc_tdmatag, 0,
296 		    &txs->txs_dmamap)) != 0) {
297 			device_printf(sc->sc_dev,
298 			    "unable to create TX DMA map %d, error = %d\n",
299 			    i, error);
300 			goto fail_txd;
301 		}
302 		STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q);
303 	}
304 
305 	/*
306 	 * Allocate the receive buffers, create and load the DMA maps
307 	 * for them.
308 	 */
309 	for (i = 0; i < CAS_NRXDESC; i++) {
310 		if ((error = bus_dmamem_alloc(sc->sc_rdmatag,
311 		    &sc->sc_rxdsoft[i].rxds_buf, BUS_DMA_WAITOK,
312 		    &sc->sc_rxdsoft[i].rxds_dmamap)) != 0) {
313 			device_printf(sc->sc_dev,
314 			    "unable to allocate RX buffer %d, error = %d\n",
315 			    i, error);
316 			goto fail_rxmem;
317 		}
318 
319 		sc->sc_rxdptr = i;
320 		sc->sc_rxdsoft[i].rxds_paddr = 0;
321 		if ((error = bus_dmamap_load(sc->sc_rdmatag,
322 		    sc->sc_rxdsoft[i].rxds_dmamap, sc->sc_rxdsoft[i].rxds_buf,
323 		    CAS_PAGE_SIZE, cas_rxdma_callback, sc, 0)) != 0 ||
324 		    sc->sc_rxdsoft[i].rxds_paddr == 0) {
325 			device_printf(sc->sc_dev,
326 			    "unable to load RX DMA map %d, error = %d\n",
327 			    i, error);
328 			goto fail_rxmap;
329 		}
330 	}
331 
332 	if ((sc->sc_flags & CAS_SERDES) == 0) {
333 		CAS_WRITE_4(sc, CAS_PCS_DATAPATH, CAS_PCS_DATAPATH_MII);
334 		CAS_BARRIER(sc, CAS_PCS_DATAPATH, 4,
335 		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
336 		cas_mifinit(sc);
337 		/*
338 		 * Look for an external PHY.
339 		 */
340 		error = ENXIO;
341 		v = CAS_READ_4(sc, CAS_MIF_CONF);
342 		if ((v & CAS_MIF_CONF_MDI1) != 0) {
343 			v |= CAS_MIF_CONF_PHY_SELECT;
344 			CAS_WRITE_4(sc, CAS_MIF_CONF, v);
345 			CAS_BARRIER(sc, CAS_MIF_CONF, 4,
346 			    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
347 			/* Enable/unfreeze the GMII pins of Saturn. */
348 			if (sc->sc_variant == CAS_SATURN) {
349 				CAS_WRITE_4(sc, CAS_SATURN_PCFG,
350 				    CAS_READ_4(sc, CAS_SATURN_PCFG) &
351 				    ~CAS_SATURN_PCFG_FSI);
352 				CAS_BARRIER(sc, CAS_SATURN_PCFG, 4,
353 				    BUS_SPACE_BARRIER_READ |
354 				    BUS_SPACE_BARRIER_WRITE);
355 				DELAY(10000);
356 			}
357 			error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp,
358 			    cas_mediachange, cas_mediastatus, BMSR_DEFCAPMASK,
359 			    MII_PHY_ANY, MII_OFFSET_ANY, MIIF_DOPAUSE);
360 		}
361 		/*
362 		 * Fall back on an internal PHY if no external PHY was found.
363 		 */
364 		if (error != 0 && (v & CAS_MIF_CONF_MDI0) != 0) {
365 			v &= ~CAS_MIF_CONF_PHY_SELECT;
366 			CAS_WRITE_4(sc, CAS_MIF_CONF, v);
367 			CAS_BARRIER(sc, CAS_MIF_CONF, 4,
368 			    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
369 			/* Freeze the GMII pins of Saturn for saving power. */
370 			if (sc->sc_variant == CAS_SATURN) {
371 				CAS_WRITE_4(sc, CAS_SATURN_PCFG,
372 				    CAS_READ_4(sc, CAS_SATURN_PCFG) |
373 				    CAS_SATURN_PCFG_FSI);
374 				CAS_BARRIER(sc, CAS_SATURN_PCFG, 4,
375 				    BUS_SPACE_BARRIER_READ |
376 				    BUS_SPACE_BARRIER_WRITE);
377 				DELAY(10000);
378 			}
379 			error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp,
380 			    cas_mediachange, cas_mediastatus, BMSR_DEFCAPMASK,
381 			    MII_PHY_ANY, MII_OFFSET_ANY, MIIF_DOPAUSE);
382 		}
383 	} else {
384 		/*
385 		 * Use the external PCS SERDES.
386 		 */
387 		CAS_WRITE_4(sc, CAS_PCS_DATAPATH, CAS_PCS_DATAPATH_SERDES);
388 		CAS_BARRIER(sc, CAS_PCS_DATAPATH, 4, BUS_SPACE_BARRIER_WRITE);
389 		/* Enable/unfreeze the SERDES pins of Saturn. */
390 		if (sc->sc_variant == CAS_SATURN) {
391 			CAS_WRITE_4(sc, CAS_SATURN_PCFG, 0);
392 			CAS_BARRIER(sc, CAS_SATURN_PCFG, 4,
393 			    BUS_SPACE_BARRIER_WRITE);
394 		}
395 		CAS_WRITE_4(sc, CAS_PCS_SERDES_CTRL, CAS_PCS_SERDES_CTRL_ESD);
396 		CAS_BARRIER(sc, CAS_PCS_SERDES_CTRL, 4,
397 		    BUS_SPACE_BARRIER_WRITE);
398 		CAS_WRITE_4(sc, CAS_PCS_CONF, CAS_PCS_CONF_EN);
399 		CAS_BARRIER(sc, CAS_PCS_CONF, 4,
400 		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
401 		error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp,
402 		    cas_mediachange, cas_mediastatus, BMSR_DEFCAPMASK,
403 		    CAS_PHYAD_EXTERNAL, MII_OFFSET_ANY, MIIF_DOPAUSE);
404 	}
405 	if (error != 0) {
406 		device_printf(sc->sc_dev, "attaching PHYs failed\n");
407 		goto fail_rxmap;
408 	}
409 	sc->sc_mii = device_get_softc(sc->sc_miibus);
410 
411 	/*
412 	 * From this point forward, the attachment cannot fail.  A failure
413 	 * before this point releases all resources that may have been
414 	 * allocated.
415 	 */
416 
417 	/* Announce FIFO sizes. */
418 	v = CAS_READ_4(sc, CAS_TX_FIFO_SIZE);
419 	device_printf(sc->sc_dev, "%ukB RX FIFO, %ukB TX FIFO\n",
420 	    CAS_RX_FIFO_SIZE / 1024, v / 16);
421 
422 	/* Attach the interface. */
423 	ether_ifattach(ifp, sc->sc_enaddr);
424 
425 	/*
426 	 * Tell the upper layer(s) we support long frames/checksum offloads.
427 	 */
428 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
429 	ifp->if_capabilities = IFCAP_VLAN_MTU;
430 	if ((sc->sc_flags & CAS_NO_CSUM) == 0) {
431 		ifp->if_capabilities |= IFCAP_HWCSUM;
432 		ifp->if_hwassist = CAS_CSUM_FEATURES;
433 	}
434 	ifp->if_capenable = ifp->if_capabilities;
435 
436 	return (0);
437 
438 	/*
439 	 * Free any resources we've allocated during the failed attach
440 	 * attempt.  Do this in reverse order and fall through.
441 	 */
442  fail_rxmap:
443 	for (i = 0; i < CAS_NRXDESC; i++)
444 		if (sc->sc_rxdsoft[i].rxds_paddr != 0)
445 			bus_dmamap_unload(sc->sc_rdmatag,
446 			    sc->sc_rxdsoft[i].rxds_dmamap);
447  fail_rxmem:
448 	for (i = 0; i < CAS_NRXDESC; i++)
449 		if (sc->sc_rxdsoft[i].rxds_buf != NULL)
450 			bus_dmamem_free(sc->sc_rdmatag,
451 			    sc->sc_rxdsoft[i].rxds_buf,
452 			    sc->sc_rxdsoft[i].rxds_dmamap);
453  fail_txd:
454 	for (i = 0; i < CAS_TXQUEUELEN; i++)
455 		if (sc->sc_txsoft[i].txs_dmamap != NULL)
456 			bus_dmamap_destroy(sc->sc_tdmatag,
457 			    sc->sc_txsoft[i].txs_dmamap);
458 	bus_dmamap_unload(sc->sc_cdmatag, sc->sc_cddmamap);
459  fail_cmem:
460 	bus_dmamem_free(sc->sc_cdmatag, sc->sc_control_data,
461 	    sc->sc_cddmamap);
462  fail_ctag:
463 	bus_dma_tag_destroy(sc->sc_cdmatag);
464  fail_ttag:
465 	bus_dma_tag_destroy(sc->sc_tdmatag);
466  fail_rtag:
467 	bus_dma_tag_destroy(sc->sc_rdmatag);
468  fail_ptag:
469 	bus_dma_tag_destroy(sc->sc_pdmatag);
470  fail_taskq:
471 	taskqueue_free(sc->sc_tq);
472  fail_ifnet:
473 	if_free(ifp);
474 	return (error);
475 }
476 
477 static void
478 cas_detach(struct cas_softc *sc)
479 {
480 	struct ifnet *ifp = sc->sc_ifp;
481 	int i;
482 
483 	ether_ifdetach(ifp);
484 	CAS_LOCK(sc);
485 	cas_stop(ifp);
486 	CAS_UNLOCK(sc);
487 	callout_drain(&sc->sc_tick_ch);
488 	callout_drain(&sc->sc_rx_ch);
489 	taskqueue_drain(sc->sc_tq, &sc->sc_intr_task);
490 	taskqueue_drain(sc->sc_tq, &sc->sc_tx_task);
491 	if_free(ifp);
492 	taskqueue_free(sc->sc_tq);
493 	device_delete_child(sc->sc_dev, sc->sc_miibus);
494 
495 	for (i = 0; i < CAS_NRXDESC; i++)
496 		if (sc->sc_rxdsoft[i].rxds_dmamap != NULL)
497 			bus_dmamap_sync(sc->sc_rdmatag,
498 			    sc->sc_rxdsoft[i].rxds_dmamap,
499 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
500 	for (i = 0; i < CAS_NRXDESC; i++)
501 		if (sc->sc_rxdsoft[i].rxds_paddr != 0)
502 			bus_dmamap_unload(sc->sc_rdmatag,
503 			    sc->sc_rxdsoft[i].rxds_dmamap);
504 	for (i = 0; i < CAS_NRXDESC; i++)
505 		if (sc->sc_rxdsoft[i].rxds_buf != NULL)
506 			bus_dmamem_free(sc->sc_rdmatag,
507 			    sc->sc_rxdsoft[i].rxds_buf,
508 			    sc->sc_rxdsoft[i].rxds_dmamap);
509 	for (i = 0; i < CAS_TXQUEUELEN; i++)
510 		if (sc->sc_txsoft[i].txs_dmamap != NULL)
511 			bus_dmamap_destroy(sc->sc_tdmatag,
512 			    sc->sc_txsoft[i].txs_dmamap);
513 	CAS_CDSYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
514 	bus_dmamap_unload(sc->sc_cdmatag, sc->sc_cddmamap);
515 	bus_dmamem_free(sc->sc_cdmatag, sc->sc_control_data,
516 	    sc->sc_cddmamap);
517 	bus_dma_tag_destroy(sc->sc_cdmatag);
518 	bus_dma_tag_destroy(sc->sc_tdmatag);
519 	bus_dma_tag_destroy(sc->sc_rdmatag);
520 	bus_dma_tag_destroy(sc->sc_pdmatag);
521 }
522 
523 static void
524 cas_suspend(struct cas_softc *sc)
525 {
526 	struct ifnet *ifp = sc->sc_ifp;
527 
528 	CAS_LOCK(sc);
529 	cas_stop(ifp);
530 	CAS_UNLOCK(sc);
531 }
532 
533 static void
534 cas_resume(struct cas_softc *sc)
535 {
536 	struct ifnet *ifp = sc->sc_ifp;
537 
538 	CAS_LOCK(sc);
539 	/*
540 	 * On resume all registers have to be initialized again like
541 	 * after power-on.
542 	 */
543 	sc->sc_flags &= ~CAS_INITED;
544 	if (ifp->if_flags & IFF_UP)
545 		cas_init_locked(sc);
546 	CAS_UNLOCK(sc);
547 }
548 
549 static inline void
550 cas_rxcksum(struct mbuf *m, uint16_t cksum)
551 {
552 	struct ether_header *eh;
553 	struct ip *ip;
554 	struct udphdr *uh;
555 	uint16_t *opts;
556 	int32_t hlen, len, pktlen;
557 	uint32_t temp32;
558 
559 	pktlen = m->m_pkthdr.len;
560 	if (pktlen < sizeof(struct ether_header) + sizeof(struct ip))
561 		return;
562 	eh = mtod(m, struct ether_header *);
563 	if (eh->ether_type != htons(ETHERTYPE_IP))
564 		return;
565 	ip = (struct ip *)(eh + 1);
566 	if (ip->ip_v != IPVERSION)
567 		return;
568 
569 	hlen = ip->ip_hl << 2;
570 	pktlen -= sizeof(struct ether_header);
571 	if (hlen < sizeof(struct ip))
572 		return;
573 	if (ntohs(ip->ip_len) < hlen)
574 		return;
575 	if (ntohs(ip->ip_len) != pktlen)
576 		return;
577 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
578 		return;	/* Cannot handle fragmented packet. */
579 
580 	switch (ip->ip_p) {
581 	case IPPROTO_TCP:
582 		if (pktlen < (hlen + sizeof(struct tcphdr)))
583 			return;
584 		break;
585 	case IPPROTO_UDP:
586 		if (pktlen < (hlen + sizeof(struct udphdr)))
587 			return;
588 		uh = (struct udphdr *)((uint8_t *)ip + hlen);
589 		if (uh->uh_sum == 0)
590 			return; /* no checksum */
591 		break;
592 	default:
593 		return;
594 	}
595 
596 	cksum = ~cksum;
597 	/* checksum fixup for IP options */
598 	len = hlen - sizeof(struct ip);
599 	if (len > 0) {
600 		opts = (uint16_t *)(ip + 1);
601 		for (; len > 0; len -= sizeof(uint16_t), opts++) {
602 			temp32 = cksum - *opts;
603 			temp32 = (temp32 >> 16) + (temp32 & 65535);
604 			cksum = temp32 & 65535;
605 		}
606 	}
607 	m->m_pkthdr.csum_flags |= CSUM_DATA_VALID;
608 	m->m_pkthdr.csum_data = cksum;
609 }
610 
611 static void
612 cas_cddma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error)
613 {
614 	struct cas_softc *sc = xsc;
615 
616 	if (error != 0)
617 		return;
618 	if (nsegs != 1)
619 		panic("%s: bad control buffer segment count", __func__);
620 	sc->sc_cddma = segs[0].ds_addr;
621 }
622 
623 static void
624 cas_rxdma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error)
625 {
626 	struct cas_softc *sc = xsc;
627 
628 	if (error != 0)
629 		return;
630 	if (nsegs != 1)
631 		panic("%s: bad RX buffer segment count", __func__);
632 	sc->sc_rxdsoft[sc->sc_rxdptr].rxds_paddr = segs[0].ds_addr;
633 }
634 
635 static void
636 cas_tick(void *arg)
637 {
638 	struct cas_softc *sc = arg;
639 	struct ifnet *ifp = sc->sc_ifp;
640 	uint32_t v;
641 
642 	CAS_LOCK_ASSERT(sc, MA_OWNED);
643 
644 	/*
645 	 * Unload collision and error counters.
646 	 */
647 	if_inc_counter(ifp, IFCOUNTER_COLLISIONS,
648 	    CAS_READ_4(sc, CAS_MAC_NORM_COLL_CNT) +
649 	    CAS_READ_4(sc, CAS_MAC_FIRST_COLL_CNT));
650 	v = CAS_READ_4(sc, CAS_MAC_EXCESS_COLL_CNT) +
651 	    CAS_READ_4(sc, CAS_MAC_LATE_COLL_CNT);
652 	if_inc_counter(ifp, IFCOUNTER_COLLISIONS, v);
653 	if_inc_counter(ifp, IFCOUNTER_OERRORS, v);
654 	if_inc_counter(ifp, IFCOUNTER_IERRORS,
655 	    CAS_READ_4(sc, CAS_MAC_RX_LEN_ERR_CNT) +
656 	    CAS_READ_4(sc, CAS_MAC_RX_ALIGN_ERR) +
657 	    CAS_READ_4(sc, CAS_MAC_RX_CRC_ERR_CNT) +
658 	    CAS_READ_4(sc, CAS_MAC_RX_CODE_VIOL));
659 
660 	/*
661 	 * Then clear the hardware counters.
662 	 */
663 	CAS_WRITE_4(sc, CAS_MAC_NORM_COLL_CNT, 0);
664 	CAS_WRITE_4(sc, CAS_MAC_FIRST_COLL_CNT, 0);
665 	CAS_WRITE_4(sc, CAS_MAC_EXCESS_COLL_CNT, 0);
666 	CAS_WRITE_4(sc, CAS_MAC_LATE_COLL_CNT, 0);
667 	CAS_WRITE_4(sc, CAS_MAC_RX_LEN_ERR_CNT, 0);
668 	CAS_WRITE_4(sc, CAS_MAC_RX_ALIGN_ERR, 0);
669 	CAS_WRITE_4(sc, CAS_MAC_RX_CRC_ERR_CNT, 0);
670 	CAS_WRITE_4(sc, CAS_MAC_RX_CODE_VIOL, 0);
671 
672 	mii_tick(sc->sc_mii);
673 
674 	if (sc->sc_txfree != CAS_MAXTXFREE)
675 		cas_tint(sc);
676 
677 	cas_watchdog(sc);
678 
679 	callout_reset(&sc->sc_tick_ch, hz, cas_tick, sc);
680 }
681 
682 static int
683 cas_bitwait(struct cas_softc *sc, bus_addr_t r, uint32_t clr, uint32_t set)
684 {
685 	int i;
686 	uint32_t reg;
687 
688 	for (i = CAS_TRIES; i--; DELAY(100)) {
689 		reg = CAS_READ_4(sc, r);
690 		if ((reg & clr) == 0 && (reg & set) == set)
691 			return (1);
692 	}
693 	return (0);
694 }
695 
696 static void
697 cas_reset(struct cas_softc *sc)
698 {
699 
700 #ifdef CAS_DEBUG
701 	CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__);
702 #endif
703 	/* Disable all interrupts in order to avoid spurious ones. */
704 	CAS_WRITE_4(sc, CAS_INTMASK, 0xffffffff);
705 
706 	cas_reset_rx(sc);
707 	cas_reset_tx(sc);
708 
709 	/*
710 	 * Do a full reset modulo the result of the last auto-negotiation
711 	 * when using the SERDES.
712 	 */
713 	CAS_WRITE_4(sc, CAS_RESET, CAS_RESET_RX | CAS_RESET_TX |
714 	    ((sc->sc_flags & CAS_SERDES) != 0 ? CAS_RESET_PCS_DIS : 0));
715 	CAS_BARRIER(sc, CAS_RESET, 4,
716 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
717 	DELAY(3000);
718 	if (!cas_bitwait(sc, CAS_RESET, CAS_RESET_RX | CAS_RESET_TX, 0))
719 		device_printf(sc->sc_dev, "cannot reset device\n");
720 }
721 
722 static void
723 cas_stop(struct ifnet *ifp)
724 {
725 	struct cas_softc *sc = ifp->if_softc;
726 	struct cas_txsoft *txs;
727 
728 #ifdef CAS_DEBUG
729 	CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__);
730 #endif
731 
732 	callout_stop(&sc->sc_tick_ch);
733 	callout_stop(&sc->sc_rx_ch);
734 
735 	/* Disable all interrupts in order to avoid spurious ones. */
736 	CAS_WRITE_4(sc, CAS_INTMASK, 0xffffffff);
737 
738 	cas_reset_tx(sc);
739 	cas_reset_rx(sc);
740 
741 	/*
742 	 * Release any queued transmit buffers.
743 	 */
744 	while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) {
745 		STAILQ_REMOVE_HEAD(&sc->sc_txdirtyq, txs_q);
746 		if (txs->txs_ndescs != 0) {
747 			bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap,
748 			    BUS_DMASYNC_POSTWRITE);
749 			bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap);
750 			if (txs->txs_mbuf != NULL) {
751 				m_freem(txs->txs_mbuf);
752 				txs->txs_mbuf = NULL;
753 			}
754 		}
755 		STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q);
756 	}
757 
758 	/*
759 	 * Mark the interface down and cancel the watchdog timer.
760 	 */
761 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
762 	sc->sc_flags &= ~CAS_LINK;
763 	sc->sc_wdog_timer = 0;
764 }
765 
766 static int
767 cas_reset_rx(struct cas_softc *sc)
768 {
769 
770 	/*
771 	 * Resetting while DMA is in progress can cause a bus hang, so we
772 	 * disable DMA first.
773 	 */
774 	(void)cas_disable_rx(sc);
775 	CAS_WRITE_4(sc, CAS_RX_CONF, 0);
776 	CAS_BARRIER(sc, CAS_RX_CONF, 4,
777 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
778 	if (!cas_bitwait(sc, CAS_RX_CONF, CAS_RX_CONF_RXDMA_EN, 0))
779 		device_printf(sc->sc_dev, "cannot disable RX DMA\n");
780 
781 	/* Finally, reset the ERX. */
782 	CAS_WRITE_4(sc, CAS_RESET, CAS_RESET_RX |
783 	    ((sc->sc_flags & CAS_SERDES) != 0 ? CAS_RESET_PCS_DIS : 0));
784 	CAS_BARRIER(sc, CAS_RESET, 4,
785 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
786 	if (!cas_bitwait(sc, CAS_RESET, CAS_RESET_RX, 0)) {
787 		device_printf(sc->sc_dev, "cannot reset receiver\n");
788 		return (1);
789 	}
790 	return (0);
791 }
792 
793 static int
794 cas_reset_tx(struct cas_softc *sc)
795 {
796 
797 	/*
798 	 * Resetting while DMA is in progress can cause a bus hang, so we
799 	 * disable DMA first.
800 	 */
801 	(void)cas_disable_tx(sc);
802 	CAS_WRITE_4(sc, CAS_TX_CONF, 0);
803 	CAS_BARRIER(sc, CAS_TX_CONF, 4,
804 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
805 	if (!cas_bitwait(sc, CAS_TX_CONF, CAS_TX_CONF_TXDMA_EN, 0))
806 		device_printf(sc->sc_dev, "cannot disable TX DMA\n");
807 
808 	/* Finally, reset the ETX. */
809 	CAS_WRITE_4(sc, CAS_RESET, CAS_RESET_TX |
810 	    ((sc->sc_flags & CAS_SERDES) != 0 ? CAS_RESET_PCS_DIS : 0));
811 	CAS_BARRIER(sc, CAS_RESET, 4,
812 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
813 	if (!cas_bitwait(sc, CAS_RESET, CAS_RESET_TX, 0)) {
814 		device_printf(sc->sc_dev, "cannot reset transmitter\n");
815 		return (1);
816 	}
817 	return (0);
818 }
819 
820 static int
821 cas_disable_rx(struct cas_softc *sc)
822 {
823 
824 	CAS_WRITE_4(sc, CAS_MAC_RX_CONF,
825 	    CAS_READ_4(sc, CAS_MAC_RX_CONF) & ~CAS_MAC_RX_CONF_EN);
826 	CAS_BARRIER(sc, CAS_MAC_RX_CONF, 4,
827 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
828 	if (cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_EN, 0))
829 		return (1);
830 	if (bootverbose)
831 		device_printf(sc->sc_dev, "cannot disable RX MAC\n");
832 	return (0);
833 }
834 
835 static int
836 cas_disable_tx(struct cas_softc *sc)
837 {
838 
839 	CAS_WRITE_4(sc, CAS_MAC_TX_CONF,
840 	    CAS_READ_4(sc, CAS_MAC_TX_CONF) & ~CAS_MAC_TX_CONF_EN);
841 	CAS_BARRIER(sc, CAS_MAC_TX_CONF, 4,
842 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
843 	if (cas_bitwait(sc, CAS_MAC_TX_CONF, CAS_MAC_TX_CONF_EN, 0))
844 		return (1);
845 	if (bootverbose)
846 		device_printf(sc->sc_dev, "cannot disable TX MAC\n");
847 	return (0);
848 }
849 
850 static inline void
851 cas_rxcompinit(struct cas_rx_comp *rxcomp)
852 {
853 
854 	rxcomp->crc_word1 = 0;
855 	rxcomp->crc_word2 = 0;
856 	rxcomp->crc_word3 =
857 	    htole64(CAS_SET(ETHER_HDR_LEN + sizeof(struct ip), CAS_RC3_CSO));
858 	rxcomp->crc_word4 = htole64(CAS_RC4_ZERO);
859 }
860 
861 static void
862 cas_meminit(struct cas_softc *sc)
863 {
864 	int i;
865 
866 	CAS_LOCK_ASSERT(sc, MA_OWNED);
867 
868 	/*
869 	 * Initialize the transmit descriptor ring.
870 	 */
871 	for (i = 0; i < CAS_NTXDESC; i++) {
872 		sc->sc_txdescs[i].cd_flags = 0;
873 		sc->sc_txdescs[i].cd_buf_ptr = 0;
874 	}
875 	sc->sc_txfree = CAS_MAXTXFREE;
876 	sc->sc_txnext = 0;
877 	sc->sc_txwin = 0;
878 
879 	/*
880 	 * Initialize the receive completion ring.
881 	 */
882 	for (i = 0; i < CAS_NRXCOMP; i++)
883 		cas_rxcompinit(&sc->sc_rxcomps[i]);
884 	sc->sc_rxcptr = 0;
885 
886 	/*
887 	 * Initialize the first receive descriptor ring.  We leave
888 	 * the second one zeroed as we don't actually use it.
889 	 */
890 	for (i = 0; i < CAS_NRXDESC; i++)
891 		CAS_INIT_RXDESC(sc, i, i);
892 	sc->sc_rxdptr = 0;
893 
894 	CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
895 }
896 
897 static u_int
898 cas_descsize(u_int sz)
899 {
900 
901 	switch (sz) {
902 	case 32:
903 		return (CAS_DESC_32);
904 	case 64:
905 		return (CAS_DESC_64);
906 	case 128:
907 		return (CAS_DESC_128);
908 	case 256:
909 		return (CAS_DESC_256);
910 	case 512:
911 		return (CAS_DESC_512);
912 	case 1024:
913 		return (CAS_DESC_1K);
914 	case 2048:
915 		return (CAS_DESC_2K);
916 	case 4096:
917 		return (CAS_DESC_4K);
918 	case 8192:
919 		return (CAS_DESC_8K);
920 	default:
921 		printf("%s: invalid descriptor ring size %d\n", __func__, sz);
922 		return (CAS_DESC_32);
923 	}
924 }
925 
926 static u_int
927 cas_rxcompsize(u_int sz)
928 {
929 
930 	switch (sz) {
931 	case 128:
932 		return (CAS_RX_CONF_COMP_128);
933 	case 256:
934 		return (CAS_RX_CONF_COMP_256);
935 	case 512:
936 		return (CAS_RX_CONF_COMP_512);
937 	case 1024:
938 		return (CAS_RX_CONF_COMP_1K);
939 	case 2048:
940 		return (CAS_RX_CONF_COMP_2K);
941 	case 4096:
942 		return (CAS_RX_CONF_COMP_4K);
943 	case 8192:
944 		return (CAS_RX_CONF_COMP_8K);
945 	case 16384:
946 		return (CAS_RX_CONF_COMP_16K);
947 	case 32768:
948 		return (CAS_RX_CONF_COMP_32K);
949 	default:
950 		printf("%s: invalid dcompletion ring size %d\n", __func__, sz);
951 		return (CAS_RX_CONF_COMP_128);
952 	}
953 }
954 
955 static void
956 cas_init(void *xsc)
957 {
958 	struct cas_softc *sc = xsc;
959 
960 	CAS_LOCK(sc);
961 	cas_init_locked(sc);
962 	CAS_UNLOCK(sc);
963 }
964 
965 /*
966  * Initialization of interface; set up initialization block
967  * and transmit/receive descriptor rings.
968  */
969 static void
970 cas_init_locked(struct cas_softc *sc)
971 {
972 	struct ifnet *ifp = sc->sc_ifp;
973 	uint32_t v;
974 
975 	CAS_LOCK_ASSERT(sc, MA_OWNED);
976 
977 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
978 		return;
979 
980 #ifdef CAS_DEBUG
981 	CTR2(KTR_CAS, "%s: %s: calling stop", device_get_name(sc->sc_dev),
982 	    __func__);
983 #endif
984 	/*
985 	 * Initialization sequence.  The numbered steps below correspond
986 	 * to the sequence outlined in section 6.3.5.1 in the Ethernet
987 	 * Channel Engine manual (part of the PCIO manual).
988 	 * See also the STP2002-STQ document from Sun Microsystems.
989 	 */
990 
991 	/* step 1 & 2.  Reset the Ethernet Channel. */
992 	cas_stop(ifp);
993 	cas_reset(sc);
994 #ifdef CAS_DEBUG
995 	CTR2(KTR_CAS, "%s: %s: restarting", device_get_name(sc->sc_dev),
996 	    __func__);
997 #endif
998 
999 	if ((sc->sc_flags & CAS_SERDES) == 0)
1000 		/* Re-initialize the MIF. */
1001 		cas_mifinit(sc);
1002 
1003 	/* step 3.  Setup data structures in host memory. */
1004 	cas_meminit(sc);
1005 
1006 	/* step 4.  TX MAC registers & counters */
1007 	cas_init_regs(sc);
1008 
1009 	/* step 5.  RX MAC registers & counters */
1010 
1011 	/* step 6 & 7.  Program Ring Base Addresses. */
1012 	CAS_WRITE_4(sc, CAS_TX_DESC3_BASE_HI,
1013 	    (((uint64_t)CAS_CDTXDADDR(sc, 0)) >> 32));
1014 	CAS_WRITE_4(sc, CAS_TX_DESC3_BASE_LO,
1015 	    CAS_CDTXDADDR(sc, 0) & 0xffffffff);
1016 
1017 	CAS_WRITE_4(sc, CAS_RX_COMP_BASE_HI,
1018 	    (((uint64_t)CAS_CDRXCADDR(sc, 0)) >> 32));
1019 	CAS_WRITE_4(sc, CAS_RX_COMP_BASE_LO,
1020 	    CAS_CDRXCADDR(sc, 0) & 0xffffffff);
1021 
1022 	CAS_WRITE_4(sc, CAS_RX_DESC_BASE_HI,
1023 	    (((uint64_t)CAS_CDRXDADDR(sc, 0)) >> 32));
1024 	CAS_WRITE_4(sc, CAS_RX_DESC_BASE_LO,
1025 	    CAS_CDRXDADDR(sc, 0) & 0xffffffff);
1026 
1027 	if ((sc->sc_flags & CAS_REG_PLUS) != 0) {
1028 		CAS_WRITE_4(sc, CAS_RX_DESC2_BASE_HI,
1029 		    (((uint64_t)CAS_CDRXD2ADDR(sc, 0)) >> 32));
1030 		CAS_WRITE_4(sc, CAS_RX_DESC2_BASE_LO,
1031 		    CAS_CDRXD2ADDR(sc, 0) & 0xffffffff);
1032 	}
1033 
1034 #ifdef CAS_DEBUG
1035 	CTR5(KTR_CAS,
1036 	    "loading TXDR %lx, RXCR %lx, RXDR %lx, RXD2R %lx, cddma %lx",
1037 	    CAS_CDTXDADDR(sc, 0), CAS_CDRXCADDR(sc, 0), CAS_CDRXDADDR(sc, 0),
1038 	    CAS_CDRXD2ADDR(sc, 0), sc->sc_cddma);
1039 #endif
1040 
1041 	/* step 8.  Global Configuration & Interrupt Masks */
1042 
1043 	/* Disable weighted round robin. */
1044 	CAS_WRITE_4(sc, CAS_CAW, CAS_CAW_RR_DIS);
1045 
1046 	/*
1047 	 * Enable infinite bursts for revisions without PCI issues if
1048 	 * applicable.  Doing so greatly improves the TX performance on
1049 	 * !__sparc64__ (on sparc64, setting CAS_INF_BURST improves TX
1050 	 * performance only marginally but hurts RX throughput quite a bit).
1051 	 */
1052 	CAS_WRITE_4(sc, CAS_INF_BURST,
1053 #if !defined(__sparc64__)
1054 	    (sc->sc_flags & CAS_TABORT) == 0 ? CAS_INF_BURST_EN :
1055 #endif
1056 	    0);
1057 
1058 	/* Set up interrupts. */
1059 	CAS_WRITE_4(sc, CAS_INTMASK,
1060 	    ~(CAS_INTR_TX_INT_ME | CAS_INTR_TX_TAG_ERR |
1061 	    CAS_INTR_RX_DONE | CAS_INTR_RX_BUF_NA | CAS_INTR_RX_TAG_ERR |
1062 	    CAS_INTR_RX_COMP_FULL | CAS_INTR_RX_BUF_AEMPTY |
1063 	    CAS_INTR_RX_COMP_AFULL | CAS_INTR_RX_LEN_MMATCH |
1064 	    CAS_INTR_PCI_ERROR_INT
1065 #ifdef CAS_DEBUG
1066 	    | CAS_INTR_PCS_INT | CAS_INTR_MIF
1067 #endif
1068 	    ));
1069 	/* Don't clear top level interrupts when CAS_STATUS_ALIAS is read. */
1070 	CAS_WRITE_4(sc, CAS_CLEAR_ALIAS, 0);
1071 	CAS_WRITE_4(sc, CAS_MAC_RX_MASK, ~CAS_MAC_RX_OVERFLOW);
1072 	CAS_WRITE_4(sc, CAS_MAC_TX_MASK,
1073 	    ~(CAS_MAC_TX_UNDERRUN | CAS_MAC_TX_MAX_PKT_ERR));
1074 #ifdef CAS_DEBUG
1075 	CAS_WRITE_4(sc, CAS_MAC_CTRL_MASK,
1076 	    ~(CAS_MAC_CTRL_PAUSE_RCVD | CAS_MAC_CTRL_PAUSE |
1077 	    CAS_MAC_CTRL_NON_PAUSE));
1078 #else
1079 	CAS_WRITE_4(sc, CAS_MAC_CTRL_MASK,
1080 	    CAS_MAC_CTRL_PAUSE_RCVD | CAS_MAC_CTRL_PAUSE |
1081 	    CAS_MAC_CTRL_NON_PAUSE);
1082 #endif
1083 
1084 	/* Enable PCI error interrupts. */
1085 	CAS_WRITE_4(sc, CAS_ERROR_MASK,
1086 	    ~(CAS_ERROR_DTRTO | CAS_ERROR_OTHER | CAS_ERROR_DMAW_ZERO |
1087 	    CAS_ERROR_DMAR_ZERO | CAS_ERROR_RTRTO));
1088 
1089 	/* Enable PCI error interrupts in BIM configuration. */
1090 	CAS_WRITE_4(sc, CAS_BIM_CONF,
1091 	    CAS_BIM_CONF_DPAR_EN | CAS_BIM_CONF_RMA_EN | CAS_BIM_CONF_RTA_EN);
1092 
1093 	/*
1094 	 * step 9.  ETX Configuration: encode receive descriptor ring size,
1095 	 * enable DMA and disable pre-interrupt writeback completion.
1096 	 */
1097 	v = cas_descsize(CAS_NTXDESC) << CAS_TX_CONF_DESC3_SHFT;
1098 	CAS_WRITE_4(sc, CAS_TX_CONF, v | CAS_TX_CONF_TXDMA_EN |
1099 	    CAS_TX_CONF_RDPP_DIS | CAS_TX_CONF_PICWB_DIS);
1100 
1101 	/* step 10.  ERX Configuration */
1102 
1103 	/*
1104 	 * Encode receive completion and descriptor ring sizes, set the
1105 	 * swivel offset.
1106 	 */
1107 	v = cas_rxcompsize(CAS_NRXCOMP) << CAS_RX_CONF_COMP_SHFT;
1108 	v |= cas_descsize(CAS_NRXDESC) << CAS_RX_CONF_DESC_SHFT;
1109 	if ((sc->sc_flags & CAS_REG_PLUS) != 0)
1110 		v |= cas_descsize(CAS_NRXDESC2) << CAS_RX_CONF_DESC2_SHFT;
1111 	CAS_WRITE_4(sc, CAS_RX_CONF,
1112 	    v | (ETHER_ALIGN << CAS_RX_CONF_SOFF_SHFT));
1113 
1114 	/* Set the PAUSE thresholds.  We use the maximum OFF threshold. */
1115 	CAS_WRITE_4(sc, CAS_RX_PTHRS,
1116 	    (111 << CAS_RX_PTHRS_XOFF_SHFT) | (15 << CAS_RX_PTHRS_XON_SHFT));
1117 
1118 	/* RX blanking */
1119 	CAS_WRITE_4(sc, CAS_RX_BLANK,
1120 	    (15 << CAS_RX_BLANK_TIME_SHFT) | (5 << CAS_RX_BLANK_PKTS_SHFT));
1121 
1122 	/* Set RX_COMP_AFULL threshold to half of the RX completions. */
1123 	CAS_WRITE_4(sc, CAS_RX_AEMPTY_THRS,
1124 	    (CAS_NRXCOMP / 2) << CAS_RX_AEMPTY_COMP_SHFT);
1125 
1126 	/* Initialize the RX page size register as appropriate for 8k. */
1127 	CAS_WRITE_4(sc, CAS_RX_PSZ,
1128 	    (CAS_RX_PSZ_8K << CAS_RX_PSZ_SHFT) |
1129 	    (4 << CAS_RX_PSZ_MB_CNT_SHFT) |
1130 	    (CAS_RX_PSZ_MB_STRD_2K << CAS_RX_PSZ_MB_STRD_SHFT) |
1131 	    (CAS_RX_PSZ_MB_OFF_64 << CAS_RX_PSZ_MB_OFF_SHFT));
1132 
1133 	/* Disable RX random early detection. */
1134 	CAS_WRITE_4(sc,	CAS_RX_RED, 0);
1135 
1136 	/* Zero the RX reassembly DMA table. */
1137 	for (v = 0; v <= CAS_RX_REAS_DMA_ADDR_LC; v++) {
1138 		CAS_WRITE_4(sc,	CAS_RX_REAS_DMA_ADDR, v);
1139 		CAS_WRITE_4(sc,	CAS_RX_REAS_DMA_DATA_LO, 0);
1140 		CAS_WRITE_4(sc,	CAS_RX_REAS_DMA_DATA_MD, 0);
1141 		CAS_WRITE_4(sc,	CAS_RX_REAS_DMA_DATA_HI, 0);
1142 	}
1143 
1144 	/* Ensure the RX control FIFO and RX IPP FIFO addresses are zero. */
1145 	CAS_WRITE_4(sc, CAS_RX_CTRL_FIFO, 0);
1146 	CAS_WRITE_4(sc, CAS_RX_IPP_ADDR, 0);
1147 
1148 	/* Finally, enable RX DMA. */
1149 	CAS_WRITE_4(sc, CAS_RX_CONF,
1150 	    CAS_READ_4(sc, CAS_RX_CONF) | CAS_RX_CONF_RXDMA_EN);
1151 
1152 	/* step 11.  Configure Media. */
1153 
1154 	/* step 12.  RX_MAC Configuration Register */
1155 	v = CAS_READ_4(sc, CAS_MAC_RX_CONF);
1156 	v &= ~(CAS_MAC_RX_CONF_STRPPAD | CAS_MAC_RX_CONF_EN);
1157 	v |= CAS_MAC_RX_CONF_STRPFCS;
1158 	sc->sc_mac_rxcfg = v;
1159 	/*
1160 	 * Clear the RX filter and reprogram it.  This will also set the
1161 	 * current RX MAC configuration and enable it.
1162 	 */
1163 	cas_setladrf(sc);
1164 
1165 	/* step 13.  TX_MAC Configuration Register */
1166 	v = CAS_READ_4(sc, CAS_MAC_TX_CONF);
1167 	v |= CAS_MAC_TX_CONF_EN;
1168 	(void)cas_disable_tx(sc);
1169 	CAS_WRITE_4(sc, CAS_MAC_TX_CONF, v);
1170 
1171 	/* step 14.  Issue Transmit Pending command. */
1172 
1173 	/* step 15.  Give the receiver a swift kick. */
1174 	CAS_WRITE_4(sc, CAS_RX_KICK, CAS_NRXDESC - 4);
1175 	CAS_WRITE_4(sc, CAS_RX_COMP_TAIL, 0);
1176 	if ((sc->sc_flags & CAS_REG_PLUS) != 0)
1177 		CAS_WRITE_4(sc, CAS_RX_KICK2, CAS_NRXDESC2 - 4);
1178 
1179 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1180 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1181 
1182 	mii_mediachg(sc->sc_mii);
1183 
1184 	/* Start the one second timer. */
1185 	sc->sc_wdog_timer = 0;
1186 	callout_reset(&sc->sc_tick_ch, hz, cas_tick, sc);
1187 }
1188 
1189 static int
1190 cas_load_txmbuf(struct cas_softc *sc, struct mbuf **m_head)
1191 {
1192 	bus_dma_segment_t txsegs[CAS_NTXSEGS];
1193 	struct cas_txsoft *txs;
1194 	struct ip *ip;
1195 	struct mbuf *m;
1196 	uint64_t cflags;
1197 	int error, nexttx, nsegs, offset, seg;
1198 
1199 	CAS_LOCK_ASSERT(sc, MA_OWNED);
1200 
1201 	/* Get a work queue entry. */
1202 	if ((txs = STAILQ_FIRST(&sc->sc_txfreeq)) == NULL) {
1203 		/* Ran out of descriptors. */
1204 		return (ENOBUFS);
1205 	}
1206 
1207 	cflags = 0;
1208 	if (((*m_head)->m_pkthdr.csum_flags & CAS_CSUM_FEATURES) != 0) {
1209 		if (M_WRITABLE(*m_head) == 0) {
1210 			m = m_dup(*m_head, M_NOWAIT);
1211 			m_freem(*m_head);
1212 			*m_head = m;
1213 			if (m == NULL)
1214 				return (ENOBUFS);
1215 		}
1216 		offset = sizeof(struct ether_header);
1217 		m = m_pullup(*m_head, offset + sizeof(struct ip));
1218 		if (m == NULL) {
1219 			*m_head = NULL;
1220 			return (ENOBUFS);
1221 		}
1222 		ip = (struct ip *)(mtod(m, caddr_t) + offset);
1223 		offset += (ip->ip_hl << 2);
1224 		cflags = (offset << CAS_TD_CKSUM_START_SHFT) |
1225 		    ((offset + m->m_pkthdr.csum_data) <<
1226 		    CAS_TD_CKSUM_STUFF_SHFT) | CAS_TD_CKSUM_EN;
1227 		*m_head = m;
1228 	}
1229 
1230 	error = bus_dmamap_load_mbuf_sg(sc->sc_tdmatag, txs->txs_dmamap,
1231 	    *m_head, txsegs, &nsegs, BUS_DMA_NOWAIT);
1232 	if (error == EFBIG) {
1233 		m = m_collapse(*m_head, M_NOWAIT, CAS_NTXSEGS);
1234 		if (m == NULL) {
1235 			m_freem(*m_head);
1236 			*m_head = NULL;
1237 			return (ENOBUFS);
1238 		}
1239 		*m_head = m;
1240 		error = bus_dmamap_load_mbuf_sg(sc->sc_tdmatag,
1241 		    txs->txs_dmamap, *m_head, txsegs, &nsegs,
1242 		    BUS_DMA_NOWAIT);
1243 		if (error != 0) {
1244 			m_freem(*m_head);
1245 			*m_head = NULL;
1246 			return (error);
1247 		}
1248 	} else if (error != 0)
1249 		return (error);
1250 	/* If nsegs is wrong then the stack is corrupt. */
1251 	KASSERT(nsegs <= CAS_NTXSEGS,
1252 	    ("%s: too many DMA segments (%d)", __func__, nsegs));
1253 	if (nsegs == 0) {
1254 		m_freem(*m_head);
1255 		*m_head = NULL;
1256 		return (EIO);
1257 	}
1258 
1259 	/*
1260 	 * Ensure we have enough descriptors free to describe
1261 	 * the packet.  Note, we always reserve one descriptor
1262 	 * at the end of the ring as a termination point, in
1263 	 * order to prevent wrap-around.
1264 	 */
1265 	if (nsegs > sc->sc_txfree - 1) {
1266 		txs->txs_ndescs = 0;
1267 		bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap);
1268 		return (ENOBUFS);
1269 	}
1270 
1271 	txs->txs_ndescs = nsegs;
1272 	txs->txs_firstdesc = sc->sc_txnext;
1273 	nexttx = txs->txs_firstdesc;
1274 	for (seg = 0; seg < nsegs; seg++, nexttx = CAS_NEXTTX(nexttx)) {
1275 #ifdef CAS_DEBUG
1276 		CTR6(KTR_CAS,
1277 		    "%s: mapping seg %d (txd %d), len %lx, addr %#lx (%#lx)",
1278 		    __func__, seg, nexttx, txsegs[seg].ds_len,
1279 		    txsegs[seg].ds_addr, htole64(txsegs[seg].ds_addr));
1280 #endif
1281 		sc->sc_txdescs[nexttx].cd_buf_ptr =
1282 		    htole64(txsegs[seg].ds_addr);
1283 		KASSERT(txsegs[seg].ds_len <
1284 		    CAS_TD_BUF_LEN_MASK >> CAS_TD_BUF_LEN_SHFT,
1285 		    ("%s: segment size too large!", __func__));
1286 		sc->sc_txdescs[nexttx].cd_flags =
1287 		    htole64(txsegs[seg].ds_len << CAS_TD_BUF_LEN_SHFT);
1288 		txs->txs_lastdesc = nexttx;
1289 	}
1290 
1291 	/* Set EOF on the last descriptor. */
1292 #ifdef CAS_DEBUG
1293 	CTR3(KTR_CAS, "%s: end of frame at segment %d, TX %d",
1294 	    __func__, seg, nexttx);
1295 #endif
1296 	sc->sc_txdescs[txs->txs_lastdesc].cd_flags |=
1297 	    htole64(CAS_TD_END_OF_FRAME);
1298 
1299 	/* Lastly set SOF on the first descriptor. */
1300 #ifdef CAS_DEBUG
1301 	CTR3(KTR_CAS, "%s: start of frame at segment %d, TX %d",
1302 	    __func__, seg, nexttx);
1303 #endif
1304 	if (sc->sc_txwin += nsegs > CAS_MAXTXFREE * 2 / 3) {
1305 		sc->sc_txwin = 0;
1306 		sc->sc_txdescs[txs->txs_firstdesc].cd_flags |=
1307 		    htole64(cflags | CAS_TD_START_OF_FRAME | CAS_TD_INT_ME);
1308 	} else
1309 		sc->sc_txdescs[txs->txs_firstdesc].cd_flags |=
1310 		    htole64(cflags | CAS_TD_START_OF_FRAME);
1311 
1312 	/* Sync the DMA map. */
1313 	bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap,
1314 	    BUS_DMASYNC_PREWRITE);
1315 
1316 #ifdef CAS_DEBUG
1317 	CTR4(KTR_CAS, "%s: setting firstdesc=%d, lastdesc=%d, ndescs=%d",
1318 	    __func__, txs->txs_firstdesc, txs->txs_lastdesc,
1319 	    txs->txs_ndescs);
1320 #endif
1321 	STAILQ_REMOVE_HEAD(&sc->sc_txfreeq, txs_q);
1322 	STAILQ_INSERT_TAIL(&sc->sc_txdirtyq, txs, txs_q);
1323 	txs->txs_mbuf = *m_head;
1324 
1325 	sc->sc_txnext = CAS_NEXTTX(txs->txs_lastdesc);
1326 	sc->sc_txfree -= txs->txs_ndescs;
1327 
1328 	return (0);
1329 }
1330 
1331 static void
1332 cas_init_regs(struct cas_softc *sc)
1333 {
1334 	int i;
1335 	const u_char *laddr = IF_LLADDR(sc->sc_ifp);
1336 
1337 	CAS_LOCK_ASSERT(sc, MA_OWNED);
1338 
1339 	/* These registers are not cleared on reset. */
1340 	if ((sc->sc_flags & CAS_INITED) == 0) {
1341 		/* magic values */
1342 		CAS_WRITE_4(sc, CAS_MAC_IPG0, 0);
1343 		CAS_WRITE_4(sc, CAS_MAC_IPG1, 8);
1344 		CAS_WRITE_4(sc, CAS_MAC_IPG2, 4);
1345 
1346 		/* min frame length */
1347 		CAS_WRITE_4(sc, CAS_MAC_MIN_FRAME, ETHER_MIN_LEN);
1348 		/* max frame length and max burst size */
1349 		CAS_WRITE_4(sc, CAS_MAC_MAX_BF,
1350 		    ((ETHER_MAX_LEN_JUMBO + ETHER_VLAN_ENCAP_LEN) <<
1351 		    CAS_MAC_MAX_BF_FRM_SHFT) |
1352 		    (0x2000 << CAS_MAC_MAX_BF_BST_SHFT));
1353 
1354 		/* more magic values */
1355 		CAS_WRITE_4(sc, CAS_MAC_PREAMBLE_LEN, 0x7);
1356 		CAS_WRITE_4(sc, CAS_MAC_JAM_SIZE, 0x4);
1357 		CAS_WRITE_4(sc, CAS_MAC_ATTEMPT_LIMIT, 0x10);
1358 		CAS_WRITE_4(sc, CAS_MAC_CTRL_TYPE, 0x8808);
1359 
1360 		/* random number seed */
1361 		CAS_WRITE_4(sc, CAS_MAC_RANDOM_SEED,
1362 		    ((laddr[5] << 8) | laddr[4]) & 0x3ff);
1363 
1364 		/* secondary MAC addresses: 0:0:0:0:0:0 */
1365 		for (i = CAS_MAC_ADDR3; i <= CAS_MAC_ADDR41;
1366 		    i += CAS_MAC_ADDR4 - CAS_MAC_ADDR3)
1367 			CAS_WRITE_4(sc, i, 0);
1368 
1369 		/* MAC control address: 01:80:c2:00:00:01 */
1370 		CAS_WRITE_4(sc, CAS_MAC_ADDR42, 0x0001);
1371 		CAS_WRITE_4(sc, CAS_MAC_ADDR43, 0xc200);
1372 		CAS_WRITE_4(sc, CAS_MAC_ADDR44, 0x0180);
1373 
1374 		/* MAC filter address: 0:0:0:0:0:0 */
1375 		CAS_WRITE_4(sc, CAS_MAC_AFILTER0, 0);
1376 		CAS_WRITE_4(sc, CAS_MAC_AFILTER1, 0);
1377 		CAS_WRITE_4(sc, CAS_MAC_AFILTER2, 0);
1378 		CAS_WRITE_4(sc, CAS_MAC_AFILTER_MASK1_2, 0);
1379 		CAS_WRITE_4(sc, CAS_MAC_AFILTER_MASK0, 0);
1380 
1381 		/* Zero the hash table. */
1382 		for (i = CAS_MAC_HASH0; i <= CAS_MAC_HASH15;
1383 		    i += CAS_MAC_HASH1 - CAS_MAC_HASH0)
1384 			CAS_WRITE_4(sc, i, 0);
1385 
1386 		sc->sc_flags |= CAS_INITED;
1387 	}
1388 
1389 	/* Counters need to be zeroed. */
1390 	CAS_WRITE_4(sc, CAS_MAC_NORM_COLL_CNT, 0);
1391 	CAS_WRITE_4(sc, CAS_MAC_FIRST_COLL_CNT, 0);
1392 	CAS_WRITE_4(sc, CAS_MAC_EXCESS_COLL_CNT, 0);
1393 	CAS_WRITE_4(sc, CAS_MAC_LATE_COLL_CNT, 0);
1394 	CAS_WRITE_4(sc, CAS_MAC_DEFER_TMR_CNT, 0);
1395 	CAS_WRITE_4(sc, CAS_MAC_PEAK_ATTEMPTS, 0);
1396 	CAS_WRITE_4(sc, CAS_MAC_RX_FRAME_COUNT, 0);
1397 	CAS_WRITE_4(sc, CAS_MAC_RX_LEN_ERR_CNT, 0);
1398 	CAS_WRITE_4(sc, CAS_MAC_RX_ALIGN_ERR, 0);
1399 	CAS_WRITE_4(sc, CAS_MAC_RX_CRC_ERR_CNT, 0);
1400 	CAS_WRITE_4(sc, CAS_MAC_RX_CODE_VIOL, 0);
1401 
1402 	/* Set XOFF PAUSE time. */
1403 	CAS_WRITE_4(sc, CAS_MAC_SPC, 0x1BF0 << CAS_MAC_SPC_TIME_SHFT);
1404 
1405 	/* Set the station address. */
1406 	CAS_WRITE_4(sc, CAS_MAC_ADDR0, (laddr[4] << 8) | laddr[5]);
1407 	CAS_WRITE_4(sc, CAS_MAC_ADDR1, (laddr[2] << 8) | laddr[3]);
1408 	CAS_WRITE_4(sc, CAS_MAC_ADDR2, (laddr[0] << 8) | laddr[1]);
1409 
1410 	/* Enable MII outputs. */
1411 	CAS_WRITE_4(sc, CAS_MAC_XIF_CONF, CAS_MAC_XIF_CONF_TX_OE);
1412 }
1413 
1414 static void
1415 cas_tx_task(void *arg, int pending __unused)
1416 {
1417 	struct ifnet *ifp;
1418 
1419 	ifp = (struct ifnet *)arg;
1420 	cas_start(ifp);
1421 }
1422 
1423 static inline void
1424 cas_txkick(struct cas_softc *sc)
1425 {
1426 
1427 	/*
1428 	 * Update the TX kick register.  This register has to point to the
1429 	 * descriptor after the last valid one and for optimum performance
1430 	 * should be incremented in multiples of 4 (the DMA engine fetches/
1431 	 * updates descriptors in batches of 4).
1432 	 */
1433 #ifdef CAS_DEBUG
1434 	CTR3(KTR_CAS, "%s: %s: kicking TX %d",
1435 	    device_get_name(sc->sc_dev), __func__, sc->sc_txnext);
1436 #endif
1437 	CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1438 	CAS_WRITE_4(sc, CAS_TX_KICK3, sc->sc_txnext);
1439 }
1440 
1441 static void
1442 cas_start(struct ifnet *ifp)
1443 {
1444 	struct cas_softc *sc = ifp->if_softc;
1445 	struct mbuf *m;
1446 	int kicked, ntx;
1447 
1448 	CAS_LOCK(sc);
1449 
1450 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1451 	    IFF_DRV_RUNNING || (sc->sc_flags & CAS_LINK) == 0) {
1452 		CAS_UNLOCK(sc);
1453 		return;
1454 	}
1455 
1456 	if (sc->sc_txfree < CAS_MAXTXFREE / 4)
1457 		cas_tint(sc);
1458 
1459 #ifdef CAS_DEBUG
1460 	CTR4(KTR_CAS, "%s: %s: txfree %d, txnext %d",
1461 	    device_get_name(sc->sc_dev), __func__, sc->sc_txfree,
1462 	    sc->sc_txnext);
1463 #endif
1464 	ntx = 0;
1465 	kicked = 0;
1466 	for (; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->sc_txfree > 1;) {
1467 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1468 		if (m == NULL)
1469 			break;
1470 		if (cas_load_txmbuf(sc, &m) != 0) {
1471 			if (m == NULL)
1472 				break;
1473 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1474 			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1475 			break;
1476 		}
1477 		if ((sc->sc_txnext % 4) == 0) {
1478 			cas_txkick(sc);
1479 			kicked = 1;
1480 		} else
1481 			kicked = 0;
1482 		ntx++;
1483 		BPF_MTAP(ifp, m);
1484 	}
1485 
1486 	if (ntx > 0) {
1487 		if (kicked == 0)
1488 			cas_txkick(sc);
1489 #ifdef CAS_DEBUG
1490 		CTR2(KTR_CAS, "%s: packets enqueued, OWN on %d",
1491 		    device_get_name(sc->sc_dev), sc->sc_txnext);
1492 #endif
1493 
1494 		/* Set a watchdog timer in case the chip flakes out. */
1495 		sc->sc_wdog_timer = 5;
1496 #ifdef CAS_DEBUG
1497 		CTR3(KTR_CAS, "%s: %s: watchdog %d",
1498 		    device_get_name(sc->sc_dev), __func__,
1499 		    sc->sc_wdog_timer);
1500 #endif
1501 	}
1502 
1503 	CAS_UNLOCK(sc);
1504 }
1505 
1506 static void
1507 cas_tint(struct cas_softc *sc)
1508 {
1509 	struct ifnet *ifp = sc->sc_ifp;
1510 	struct cas_txsoft *txs;
1511 	int progress;
1512 	uint32_t txlast;
1513 #ifdef CAS_DEBUG
1514 	int i;
1515 
1516 	CAS_LOCK_ASSERT(sc, MA_OWNED);
1517 
1518 	CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__);
1519 #endif
1520 
1521 	/*
1522 	 * Go through our TX list and free mbufs for those
1523 	 * frames that have been transmitted.
1524 	 */
1525 	progress = 0;
1526 	CAS_CDSYNC(sc, BUS_DMASYNC_POSTREAD);
1527 	while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) {
1528 #ifdef CAS_DEBUG
1529 		if ((ifp->if_flags & IFF_DEBUG) != 0) {
1530 			printf("    txsoft %p transmit chain:\n", txs);
1531 			for (i = txs->txs_firstdesc;; i = CAS_NEXTTX(i)) {
1532 				printf("descriptor %d: ", i);
1533 				printf("cd_flags: 0x%016llx\t",
1534 				    (long long)le64toh(
1535 				    sc->sc_txdescs[i].cd_flags));
1536 				printf("cd_buf_ptr: 0x%016llx\n",
1537 				    (long long)le64toh(
1538 				    sc->sc_txdescs[i].cd_buf_ptr));
1539 				if (i == txs->txs_lastdesc)
1540 					break;
1541 			}
1542 		}
1543 #endif
1544 
1545 		/*
1546 		 * In theory, we could harvest some descriptors before
1547 		 * the ring is empty, but that's a bit complicated.
1548 		 *
1549 		 * CAS_TX_COMPn points to the last descriptor
1550 		 * processed + 1.
1551 		 */
1552 		txlast = CAS_READ_4(sc, CAS_TX_COMP3);
1553 #ifdef CAS_DEBUG
1554 		CTR4(KTR_CAS, "%s: txs->txs_firstdesc = %d, "
1555 		    "txs->txs_lastdesc = %d, txlast = %d",
1556 		    __func__, txs->txs_firstdesc, txs->txs_lastdesc, txlast);
1557 #endif
1558 		if (txs->txs_firstdesc <= txs->txs_lastdesc) {
1559 			if ((txlast >= txs->txs_firstdesc) &&
1560 			    (txlast <= txs->txs_lastdesc))
1561 				break;
1562 		} else {
1563 			/* Ick -- this command wraps. */
1564 			if ((txlast >= txs->txs_firstdesc) ||
1565 			    (txlast <= txs->txs_lastdesc))
1566 				break;
1567 		}
1568 
1569 #ifdef CAS_DEBUG
1570 		CTR1(KTR_CAS, "%s: releasing a descriptor", __func__);
1571 #endif
1572 		STAILQ_REMOVE_HEAD(&sc->sc_txdirtyq, txs_q);
1573 
1574 		sc->sc_txfree += txs->txs_ndescs;
1575 
1576 		bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap,
1577 		    BUS_DMASYNC_POSTWRITE);
1578 		bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap);
1579 		if (txs->txs_mbuf != NULL) {
1580 			m_freem(txs->txs_mbuf);
1581 			txs->txs_mbuf = NULL;
1582 		}
1583 
1584 		STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q);
1585 
1586 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
1587 		progress = 1;
1588 	}
1589 
1590 #ifdef CAS_DEBUG
1591 	CTR5(KTR_CAS, "%s: CAS_TX_SM1 %x CAS_TX_SM2 %x CAS_TX_DESC_BASE %llx "
1592 	    "CAS_TX_COMP3 %x",
1593 	    __func__, CAS_READ_4(sc, CAS_TX_SM1), CAS_READ_4(sc, CAS_TX_SM2),
1594 	    ((long long)CAS_READ_4(sc, CAS_TX_DESC3_BASE_HI) << 32) |
1595 	    CAS_READ_4(sc, CAS_TX_DESC3_BASE_LO),
1596 	    CAS_READ_4(sc, CAS_TX_COMP3));
1597 #endif
1598 
1599 	if (progress) {
1600 		/* We freed some descriptors, so reset IFF_DRV_OACTIVE. */
1601 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1602 		if (STAILQ_EMPTY(&sc->sc_txdirtyq))
1603 			sc->sc_wdog_timer = 0;
1604 	}
1605 
1606 #ifdef CAS_DEBUG
1607 	CTR3(KTR_CAS, "%s: %s: watchdog %d",
1608 	    device_get_name(sc->sc_dev), __func__, sc->sc_wdog_timer);
1609 #endif
1610 }
1611 
1612 static void
1613 cas_rint_timeout(void *arg)
1614 {
1615 	struct cas_softc *sc = arg;
1616 
1617 	CAS_LOCK_ASSERT(sc, MA_OWNED);
1618 
1619 	cas_rint(sc);
1620 }
1621 
1622 static void
1623 cas_rint(struct cas_softc *sc)
1624 {
1625 	struct cas_rxdsoft *rxds, *rxds2;
1626 	struct ifnet *ifp = sc->sc_ifp;
1627 	struct mbuf *m, *m2;
1628 	uint64_t word1, word2, word3, word4;
1629 	uint32_t rxhead;
1630 	u_int idx, idx2, len, off, skip;
1631 
1632 	CAS_LOCK_ASSERT(sc, MA_OWNED);
1633 
1634 	callout_stop(&sc->sc_rx_ch);
1635 
1636 #ifdef CAS_DEBUG
1637 	CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__);
1638 #endif
1639 
1640 #define	PRINTWORD(n, delimiter)						\
1641 	printf("word ## n: 0x%016llx%c", (long long)word ## n, delimiter)
1642 
1643 #define	SKIPASSERT(n)							\
1644 	KASSERT(sc->sc_rxcomps[sc->sc_rxcptr].crc_word ## n == 0,	\
1645 	    ("%s: word ## n not 0", __func__))
1646 
1647 #define	WORDTOH(n)							\
1648 	word ## n = le64toh(sc->sc_rxcomps[sc->sc_rxcptr].crc_word ## n)
1649 
1650 	/*
1651 	 * Read the completion head register once.  This limits
1652 	 * how long the following loop can execute.
1653 	 */
1654 	rxhead = CAS_READ_4(sc, CAS_RX_COMP_HEAD);
1655 #ifdef CAS_DEBUG
1656 	CTR4(KTR_CAS, "%s: sc->sc_rxcptr %d, sc->sc_rxdptr %d, head %d",
1657 	    __func__, sc->sc_rxcptr, sc->sc_rxdptr, rxhead);
1658 #endif
1659 	skip = 0;
1660 	CAS_CDSYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1661 	for (; sc->sc_rxcptr != rxhead;
1662 	    sc->sc_rxcptr = CAS_NEXTRXCOMP(sc->sc_rxcptr)) {
1663 		if (skip != 0) {
1664 			SKIPASSERT(1);
1665 			SKIPASSERT(2);
1666 			SKIPASSERT(3);
1667 
1668 			--skip;
1669 			goto skip;
1670 		}
1671 
1672 		WORDTOH(1);
1673 		WORDTOH(2);
1674 		WORDTOH(3);
1675 		WORDTOH(4);
1676 
1677 #ifdef CAS_DEBUG
1678 		if ((ifp->if_flags & IFF_DEBUG) != 0) {
1679 			printf("    completion %d: ", sc->sc_rxcptr);
1680 			PRINTWORD(1, '\t');
1681 			PRINTWORD(2, '\t');
1682 			PRINTWORD(3, '\t');
1683 			PRINTWORD(4, '\n');
1684 		}
1685 #endif
1686 
1687 		if (__predict_false(
1688 		    (word1 & CAS_RC1_TYPE_MASK) == CAS_RC1_TYPE_HW ||
1689 		    (word4 & CAS_RC4_ZERO) != 0)) {
1690 			/*
1691 			 * The descriptor is still marked as owned, although
1692 			 * it is supposed to have completed.  This has been
1693 			 * observed on some machines.  Just exiting here
1694 			 * might leave the packet sitting around until another
1695 			 * one arrives to trigger a new interrupt, which is
1696 			 * generally undesirable, so set up a timeout.
1697 			 */
1698 			callout_reset(&sc->sc_rx_ch, CAS_RXOWN_TICKS,
1699 			    cas_rint_timeout, sc);
1700 			break;
1701 		}
1702 
1703 		if (__predict_false(
1704 		    (word4 & (CAS_RC4_BAD | CAS_RC4_LEN_MMATCH)) != 0)) {
1705 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1706 			device_printf(sc->sc_dev,
1707 			    "receive error: CRC error\n");
1708 			continue;
1709 		}
1710 
1711 		KASSERT(CAS_GET(word1, CAS_RC1_DATA_SIZE) == 0 ||
1712 		    CAS_GET(word2, CAS_RC2_HDR_SIZE) == 0,
1713 		    ("%s: data and header present", __func__));
1714 		KASSERT((word1 & CAS_RC1_SPLIT_PKT) == 0 ||
1715 		    CAS_GET(word2, CAS_RC2_HDR_SIZE) == 0,
1716 		    ("%s: split and header present", __func__));
1717 		KASSERT(CAS_GET(word1, CAS_RC1_DATA_SIZE) == 0 ||
1718 		    (word1 & CAS_RC1_RELEASE_HDR) == 0,
1719 		    ("%s: data present but header release", __func__));
1720 		KASSERT(CAS_GET(word2, CAS_RC2_HDR_SIZE) == 0 ||
1721 		    (word1 & CAS_RC1_RELEASE_DATA) == 0,
1722 		    ("%s: header present but data release", __func__));
1723 
1724 		if ((len = CAS_GET(word2, CAS_RC2_HDR_SIZE)) != 0) {
1725 			idx = CAS_GET(word2, CAS_RC2_HDR_INDEX);
1726 			off = CAS_GET(word2, CAS_RC2_HDR_OFF);
1727 #ifdef CAS_DEBUG
1728 			CTR4(KTR_CAS, "%s: hdr at idx %d, off %d, len %d",
1729 			    __func__, idx, off, len);
1730 #endif
1731 			rxds = &sc->sc_rxdsoft[idx];
1732 			MGETHDR(m, M_NOWAIT, MT_DATA);
1733 			if (m != NULL) {
1734 				refcount_acquire(&rxds->rxds_refcount);
1735 				bus_dmamap_sync(sc->sc_rdmatag,
1736 				    rxds->rxds_dmamap, BUS_DMASYNC_POSTREAD);
1737 				m_extadd(m, (char *)rxds->rxds_buf +
1738 				    off * 256 + ETHER_ALIGN, len, cas_free,
1739 				    sc, (void *)(uintptr_t)idx,
1740 				    M_RDONLY, EXT_NET_DRV);
1741 				if ((m->m_flags & M_EXT) == 0) {
1742 					m_freem(m);
1743 					m = NULL;
1744 				}
1745 			}
1746 			if (m != NULL) {
1747 				m->m_pkthdr.rcvif = ifp;
1748 				m->m_pkthdr.len = m->m_len = len;
1749 				if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
1750 				if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1751 					cas_rxcksum(m, CAS_GET(word4,
1752 					    CAS_RC4_TCP_CSUM));
1753 				/* Pass it on. */
1754 				CAS_UNLOCK(sc);
1755 				(*ifp->if_input)(ifp, m);
1756 				CAS_LOCK(sc);
1757 			} else
1758 				if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
1759 
1760 			if ((word1 & CAS_RC1_RELEASE_HDR) != 0 &&
1761 			    refcount_release(&rxds->rxds_refcount) != 0)
1762 				cas_add_rxdesc(sc, idx);
1763 		} else if ((len = CAS_GET(word1, CAS_RC1_DATA_SIZE)) != 0) {
1764 			idx = CAS_GET(word1, CAS_RC1_DATA_INDEX);
1765 			off = CAS_GET(word1, CAS_RC1_DATA_OFF);
1766 #ifdef CAS_DEBUG
1767 			CTR4(KTR_CAS, "%s: data at idx %d, off %d, len %d",
1768 			    __func__, idx, off, len);
1769 #endif
1770 			rxds = &sc->sc_rxdsoft[idx];
1771 			MGETHDR(m, M_NOWAIT, MT_DATA);
1772 			if (m != NULL) {
1773 				refcount_acquire(&rxds->rxds_refcount);
1774 				off += ETHER_ALIGN;
1775 				m->m_len = min(CAS_PAGE_SIZE - off, len);
1776 				bus_dmamap_sync(sc->sc_rdmatag,
1777 				    rxds->rxds_dmamap, BUS_DMASYNC_POSTREAD);
1778 				m_extadd(m, (char *)rxds->rxds_buf + off,
1779 				    m->m_len, cas_free, sc,
1780 				    (void *)(uintptr_t)idx, M_RDONLY,
1781 				    EXT_NET_DRV);
1782 				if ((m->m_flags & M_EXT) == 0) {
1783 					m_freem(m);
1784 					m = NULL;
1785 				}
1786 			}
1787 			idx2 = 0;
1788 			m2 = NULL;
1789 			rxds2 = NULL;
1790 			if ((word1 & CAS_RC1_SPLIT_PKT) != 0) {
1791 				KASSERT((word1 & CAS_RC1_RELEASE_NEXT) != 0,
1792 				    ("%s: split but no release next",
1793 				    __func__));
1794 
1795 				idx2 = CAS_GET(word2, CAS_RC2_NEXT_INDEX);
1796 #ifdef CAS_DEBUG
1797 				CTR2(KTR_CAS, "%s: split at idx %d",
1798 				    __func__, idx2);
1799 #endif
1800 				rxds2 = &sc->sc_rxdsoft[idx2];
1801 				if (m != NULL) {
1802 					MGET(m2, M_NOWAIT, MT_DATA);
1803 					if (m2 != NULL) {
1804 						refcount_acquire(
1805 						    &rxds2->rxds_refcount);
1806 						m2->m_len = len - m->m_len;
1807 						bus_dmamap_sync(
1808 						    sc->sc_rdmatag,
1809 						    rxds2->rxds_dmamap,
1810 						    BUS_DMASYNC_POSTREAD);
1811 						m_extadd(m2,
1812 						    (char *)rxds2->rxds_buf,
1813 						    m2->m_len, cas_free, sc,
1814 						    (void *)(uintptr_t)idx2,
1815 						    M_RDONLY, EXT_NET_DRV);
1816 						if ((m2->m_flags & M_EXT) ==
1817 						    0) {
1818 							m_freem(m2);
1819 							m2 = NULL;
1820 						}
1821 					}
1822 				}
1823 				if (m2 != NULL)
1824 					m->m_next = m2;
1825 				else if (m != NULL) {
1826 					m_freem(m);
1827 					m = NULL;
1828 				}
1829 			}
1830 			if (m != NULL) {
1831 				m->m_pkthdr.rcvif = ifp;
1832 				m->m_pkthdr.len = len;
1833 				if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
1834 				if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1835 					cas_rxcksum(m, CAS_GET(word4,
1836 					    CAS_RC4_TCP_CSUM));
1837 				/* Pass it on. */
1838 				CAS_UNLOCK(sc);
1839 				(*ifp->if_input)(ifp, m);
1840 				CAS_LOCK(sc);
1841 			} else
1842 				if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
1843 
1844 			if ((word1 & CAS_RC1_RELEASE_DATA) != 0 &&
1845 			    refcount_release(&rxds->rxds_refcount) != 0)
1846 				cas_add_rxdesc(sc, idx);
1847 			if ((word1 & CAS_RC1_SPLIT_PKT) != 0 &&
1848 			    refcount_release(&rxds2->rxds_refcount) != 0)
1849 				cas_add_rxdesc(sc, idx2);
1850 		}
1851 
1852 		skip = CAS_GET(word1, CAS_RC1_SKIP);
1853 
1854  skip:
1855 		cas_rxcompinit(&sc->sc_rxcomps[sc->sc_rxcptr]);
1856 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1857 			break;
1858 	}
1859 	CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1860 	CAS_WRITE_4(sc, CAS_RX_COMP_TAIL, sc->sc_rxcptr);
1861 
1862 #undef PRINTWORD
1863 #undef SKIPASSERT
1864 #undef WORDTOH
1865 
1866 #ifdef CAS_DEBUG
1867 	CTR4(KTR_CAS, "%s: done sc->sc_rxcptr %d, sc->sc_rxdptr %d, head %d",
1868 	    __func__, sc->sc_rxcptr, sc->sc_rxdptr,
1869 	    CAS_READ_4(sc, CAS_RX_COMP_HEAD));
1870 #endif
1871 }
1872 
1873 static void
1874 cas_free(struct mbuf *m)
1875 {
1876 	struct cas_rxdsoft *rxds;
1877 	struct cas_softc *sc;
1878 	u_int idx, locked;
1879 
1880 	sc = m->m_ext.ext_arg1;
1881 	idx = (uintptr_t)m->m_ext.ext_arg2;
1882 	rxds = &sc->sc_rxdsoft[idx];
1883 	if (refcount_release(&rxds->rxds_refcount) == 0)
1884 		return;
1885 
1886 	/*
1887 	 * NB: this function can be called via m_freem(9) within
1888 	 * this driver!
1889 	 */
1890 	if ((locked = CAS_LOCK_OWNED(sc)) == 0)
1891 		CAS_LOCK(sc);
1892 	cas_add_rxdesc(sc, idx);
1893 	if (locked == 0)
1894 		CAS_UNLOCK(sc);
1895 }
1896 
1897 static inline void
1898 cas_add_rxdesc(struct cas_softc *sc, u_int idx)
1899 {
1900 
1901 	CAS_LOCK_ASSERT(sc, MA_OWNED);
1902 
1903 	bus_dmamap_sync(sc->sc_rdmatag, sc->sc_rxdsoft[idx].rxds_dmamap,
1904 	    BUS_DMASYNC_PREREAD);
1905 	CAS_UPDATE_RXDESC(sc, sc->sc_rxdptr, idx);
1906 	sc->sc_rxdptr = CAS_NEXTRXDESC(sc->sc_rxdptr);
1907 
1908 	/*
1909 	 * Update the RX kick register.  This register has to point to the
1910 	 * descriptor after the last valid one (before the current batch)
1911 	 * and for optimum performance should be incremented in multiples
1912 	 * of 4 (the DMA engine fetches/updates descriptors in batches of 4).
1913 	 */
1914 	if ((sc->sc_rxdptr % 4) == 0) {
1915 		CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1916 		CAS_WRITE_4(sc, CAS_RX_KICK,
1917 		    (sc->sc_rxdptr + CAS_NRXDESC - 4) & CAS_NRXDESC_MASK);
1918 	}
1919 }
1920 
1921 static void
1922 cas_eint(struct cas_softc *sc, u_int status)
1923 {
1924 	struct ifnet *ifp = sc->sc_ifp;
1925 
1926 	CAS_LOCK_ASSERT(sc, MA_OWNED);
1927 
1928 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1929 
1930 	device_printf(sc->sc_dev, "%s: status 0x%x", __func__, status);
1931 	if ((status & CAS_INTR_PCI_ERROR_INT) != 0) {
1932 		status = CAS_READ_4(sc, CAS_ERROR_STATUS);
1933 		printf(", PCI bus error 0x%x", status);
1934 		if ((status & CAS_ERROR_OTHER) != 0) {
1935 			status = pci_read_config(sc->sc_dev, PCIR_STATUS, 2);
1936 			printf(", PCI status 0x%x", status);
1937 			pci_write_config(sc->sc_dev, PCIR_STATUS, status, 2);
1938 		}
1939 	}
1940 	printf("\n");
1941 
1942 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1943 	cas_init_locked(sc);
1944 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1945 		taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task);
1946 }
1947 
1948 static int
1949 cas_intr(void *v)
1950 {
1951 	struct cas_softc *sc = v;
1952 
1953 	if (__predict_false((CAS_READ_4(sc, CAS_STATUS_ALIAS) &
1954 	    CAS_INTR_SUMMARY) == 0))
1955 		return (FILTER_STRAY);
1956 
1957 	/* Disable interrupts. */
1958 	CAS_WRITE_4(sc, CAS_INTMASK, 0xffffffff);
1959 	taskqueue_enqueue(sc->sc_tq, &sc->sc_intr_task);
1960 
1961 	return (FILTER_HANDLED);
1962 }
1963 
1964 static void
1965 cas_intr_task(void *arg, int pending __unused)
1966 {
1967 	struct cas_softc *sc = arg;
1968 	struct ifnet *ifp = sc->sc_ifp;
1969 	uint32_t status, status2;
1970 
1971 	CAS_LOCK_ASSERT(sc, MA_NOTOWNED);
1972 
1973 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1974 		return;
1975 
1976 	status = CAS_READ_4(sc, CAS_STATUS);
1977 	if (__predict_false((status & CAS_INTR_SUMMARY) == 0))
1978 		goto done;
1979 
1980 	CAS_LOCK(sc);
1981 #ifdef CAS_DEBUG
1982 	CTR4(KTR_CAS, "%s: %s: cplt %x, status %x",
1983 	    device_get_name(sc->sc_dev), __func__,
1984 	    (status >> CAS_STATUS_TX_COMP3_SHFT), (u_int)status);
1985 
1986 	/*
1987 	 * PCS interrupts must be cleared, otherwise no traffic is passed!
1988 	 */
1989 	if ((status & CAS_INTR_PCS_INT) != 0) {
1990 		status2 =
1991 		    CAS_READ_4(sc, CAS_PCS_INTR_STATUS) |
1992 		    CAS_READ_4(sc, CAS_PCS_INTR_STATUS);
1993 		if ((status2 & CAS_PCS_INTR_LINK) != 0)
1994 			device_printf(sc->sc_dev,
1995 			    "%s: PCS link status changed\n", __func__);
1996 	}
1997 	if ((status & CAS_MAC_CTRL_STATUS) != 0) {
1998 		status2 = CAS_READ_4(sc, CAS_MAC_CTRL_STATUS);
1999 		if ((status2 & CAS_MAC_CTRL_PAUSE) != 0)
2000 			device_printf(sc->sc_dev,
2001 			    "%s: PAUSE received (PAUSE time %d slots)\n",
2002 			    __func__,
2003 			    (status2 & CAS_MAC_CTRL_STATUS_PT_MASK) >>
2004 			    CAS_MAC_CTRL_STATUS_PT_SHFT);
2005 		if ((status2 & CAS_MAC_CTRL_PAUSE) != 0)
2006 			device_printf(sc->sc_dev,
2007 			    "%s: transited to PAUSE state\n", __func__);
2008 		if ((status2 & CAS_MAC_CTRL_NON_PAUSE) != 0)
2009 			device_printf(sc->sc_dev,
2010 			    "%s: transited to non-PAUSE state\n", __func__);
2011 	}
2012 	if ((status & CAS_INTR_MIF) != 0)
2013 		device_printf(sc->sc_dev, "%s: MIF interrupt\n", __func__);
2014 #endif
2015 
2016 	if (__predict_false((status &
2017 	    (CAS_INTR_TX_TAG_ERR | CAS_INTR_RX_TAG_ERR |
2018 	    CAS_INTR_RX_LEN_MMATCH | CAS_INTR_PCI_ERROR_INT)) != 0)) {
2019 		cas_eint(sc, status);
2020 		CAS_UNLOCK(sc);
2021 		return;
2022 	}
2023 
2024 	if (__predict_false(status & CAS_INTR_TX_MAC_INT)) {
2025 		status2 = CAS_READ_4(sc, CAS_MAC_TX_STATUS);
2026 		if ((status2 &
2027 		    (CAS_MAC_TX_UNDERRUN | CAS_MAC_TX_MAX_PKT_ERR)) != 0)
2028 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2029 		else if ((status2 & ~CAS_MAC_TX_FRAME_XMTD) != 0)
2030 			device_printf(sc->sc_dev,
2031 			    "MAC TX fault, status %x\n", status2);
2032 	}
2033 
2034 	if (__predict_false(status & CAS_INTR_RX_MAC_INT)) {
2035 		status2 = CAS_READ_4(sc, CAS_MAC_RX_STATUS);
2036 		if ((status2 & CAS_MAC_RX_OVERFLOW) != 0)
2037 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2038 		else if ((status2 & ~CAS_MAC_RX_FRAME_RCVD) != 0)
2039 			device_printf(sc->sc_dev,
2040 			    "MAC RX fault, status %x\n", status2);
2041 	}
2042 
2043 	if ((status &
2044 	    (CAS_INTR_RX_DONE | CAS_INTR_RX_BUF_NA | CAS_INTR_RX_COMP_FULL |
2045 	    CAS_INTR_RX_BUF_AEMPTY | CAS_INTR_RX_COMP_AFULL)) != 0) {
2046 		cas_rint(sc);
2047 #ifdef CAS_DEBUG
2048 		if (__predict_false((status &
2049 		    (CAS_INTR_RX_BUF_NA | CAS_INTR_RX_COMP_FULL |
2050 		    CAS_INTR_RX_BUF_AEMPTY | CAS_INTR_RX_COMP_AFULL)) != 0))
2051 			device_printf(sc->sc_dev,
2052 			    "RX fault, status %x\n", status);
2053 #endif
2054 	}
2055 
2056 	if ((status &
2057 	    (CAS_INTR_TX_INT_ME | CAS_INTR_TX_ALL | CAS_INTR_TX_DONE)) != 0)
2058 		cas_tint(sc);
2059 
2060 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2061 		CAS_UNLOCK(sc);
2062 		return;
2063 	} else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2064 		taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task);
2065 	CAS_UNLOCK(sc);
2066 
2067 	status = CAS_READ_4(sc, CAS_STATUS_ALIAS);
2068 	if (__predict_false((status & CAS_INTR_SUMMARY) != 0)) {
2069 		taskqueue_enqueue(sc->sc_tq, &sc->sc_intr_task);
2070 		return;
2071 	}
2072 
2073  done:
2074 	/* Re-enable interrupts. */
2075 	CAS_WRITE_4(sc, CAS_INTMASK,
2076 	    ~(CAS_INTR_TX_INT_ME | CAS_INTR_TX_TAG_ERR |
2077 	    CAS_INTR_RX_DONE | CAS_INTR_RX_BUF_NA | CAS_INTR_RX_TAG_ERR |
2078 	    CAS_INTR_RX_COMP_FULL | CAS_INTR_RX_BUF_AEMPTY |
2079 	    CAS_INTR_RX_COMP_AFULL | CAS_INTR_RX_LEN_MMATCH |
2080 	    CAS_INTR_PCI_ERROR_INT
2081 #ifdef CAS_DEBUG
2082 	    | CAS_INTR_PCS_INT | CAS_INTR_MIF
2083 #endif
2084 	));
2085 }
2086 
2087 static void
2088 cas_watchdog(struct cas_softc *sc)
2089 {
2090 	struct ifnet *ifp = sc->sc_ifp;
2091 
2092 	CAS_LOCK_ASSERT(sc, MA_OWNED);
2093 
2094 #ifdef CAS_DEBUG
2095 	CTR4(KTR_CAS,
2096 	    "%s: CAS_RX_CONF %x CAS_MAC_RX_STATUS %x CAS_MAC_RX_CONF %x",
2097 	    __func__, CAS_READ_4(sc, CAS_RX_CONF),
2098 	    CAS_READ_4(sc, CAS_MAC_RX_STATUS),
2099 	    CAS_READ_4(sc, CAS_MAC_RX_CONF));
2100 	CTR4(KTR_CAS,
2101 	    "%s: CAS_TX_CONF %x CAS_MAC_TX_STATUS %x CAS_MAC_TX_CONF %x",
2102 	    __func__, CAS_READ_4(sc, CAS_TX_CONF),
2103 	    CAS_READ_4(sc, CAS_MAC_TX_STATUS),
2104 	    CAS_READ_4(sc, CAS_MAC_TX_CONF));
2105 #endif
2106 
2107 	if (sc->sc_wdog_timer == 0 || --sc->sc_wdog_timer != 0)
2108 		return;
2109 
2110 	if ((sc->sc_flags & CAS_LINK) != 0)
2111 		device_printf(sc->sc_dev, "device timeout\n");
2112 	else if (bootverbose)
2113 		device_printf(sc->sc_dev, "device timeout (no link)\n");
2114 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2115 
2116 	/* Try to get more packets going. */
2117 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2118 	cas_init_locked(sc);
2119 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2120 		taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task);
2121 }
2122 
2123 static void
2124 cas_mifinit(struct cas_softc *sc)
2125 {
2126 
2127 	/* Configure the MIF in frame mode. */
2128 	CAS_WRITE_4(sc, CAS_MIF_CONF,
2129 	    CAS_READ_4(sc, CAS_MIF_CONF) & ~CAS_MIF_CONF_BB_MODE);
2130 	CAS_BARRIER(sc, CAS_MIF_CONF, 4,
2131 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2132 }
2133 
2134 /*
2135  * MII interface
2136  *
2137  * The MII interface supports at least three different operating modes:
2138  *
2139  * Bitbang mode is implemented using data, clock and output enable registers.
2140  *
2141  * Frame mode is implemented by loading a complete frame into the frame
2142  * register and polling the valid bit for completion.
2143  *
2144  * Polling mode uses the frame register but completion is indicated by
2145  * an interrupt.
2146  *
2147  */
2148 static int
2149 cas_mii_readreg(device_t dev, int phy, int reg)
2150 {
2151 	struct cas_softc *sc;
2152 	int n;
2153 	uint32_t v;
2154 
2155 #ifdef CAS_DEBUG_PHY
2156 	printf("%s: phy %d reg %d\n", __func__, phy, reg);
2157 #endif
2158 
2159 	sc = device_get_softc(dev);
2160 	if ((sc->sc_flags & CAS_SERDES) != 0) {
2161 		switch (reg) {
2162 		case MII_BMCR:
2163 			reg = CAS_PCS_CTRL;
2164 			break;
2165 		case MII_BMSR:
2166 			reg = CAS_PCS_STATUS;
2167 			break;
2168 		case MII_PHYIDR1:
2169 		case MII_PHYIDR2:
2170 			return (0);
2171 		case MII_ANAR:
2172 			reg = CAS_PCS_ANAR;
2173 			break;
2174 		case MII_ANLPAR:
2175 			reg = CAS_PCS_ANLPAR;
2176 			break;
2177 		case MII_EXTSR:
2178 			return (EXTSR_1000XFDX | EXTSR_1000XHDX);
2179 		default:
2180 			device_printf(sc->sc_dev,
2181 			    "%s: unhandled register %d\n", __func__, reg);
2182 			return (0);
2183 		}
2184 		return (CAS_READ_4(sc, reg));
2185 	}
2186 
2187 	/* Construct the frame command. */
2188 	v = CAS_MIF_FRAME_READ |
2189 	    (phy << CAS_MIF_FRAME_PHY_SHFT) |
2190 	    (reg << CAS_MIF_FRAME_REG_SHFT);
2191 
2192 	CAS_WRITE_4(sc, CAS_MIF_FRAME, v);
2193 	CAS_BARRIER(sc, CAS_MIF_FRAME, 4,
2194 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2195 	for (n = 0; n < 100; n++) {
2196 		DELAY(1);
2197 		v = CAS_READ_4(sc, CAS_MIF_FRAME);
2198 		if (v & CAS_MIF_FRAME_TA_LSB)
2199 			return (v & CAS_MIF_FRAME_DATA);
2200 	}
2201 
2202 	device_printf(sc->sc_dev, "%s: timed out\n", __func__);
2203 	return (0);
2204 }
2205 
2206 static int
2207 cas_mii_writereg(device_t dev, int phy, int reg, int val)
2208 {
2209 	struct cas_softc *sc;
2210 	int n;
2211 	uint32_t v;
2212 
2213 #ifdef CAS_DEBUG_PHY
2214 	printf("%s: phy %d reg %d val %x\n", phy, reg, val, __func__);
2215 #endif
2216 
2217 	sc = device_get_softc(dev);
2218 	if ((sc->sc_flags & CAS_SERDES) != 0) {
2219 		switch (reg) {
2220 		case MII_BMSR:
2221 			reg = CAS_PCS_STATUS;
2222 			break;
2223 		case MII_BMCR:
2224 			reg = CAS_PCS_CTRL;
2225 			if ((val & CAS_PCS_CTRL_RESET) == 0)
2226 				break;
2227 			CAS_WRITE_4(sc, CAS_PCS_CTRL, val);
2228 			CAS_BARRIER(sc, CAS_PCS_CTRL, 4,
2229 			    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2230 			if (!cas_bitwait(sc, CAS_PCS_CTRL,
2231 			    CAS_PCS_CTRL_RESET, 0))
2232 				device_printf(sc->sc_dev,
2233 				    "cannot reset PCS\n");
2234 			/* FALLTHROUGH */
2235 		case MII_ANAR:
2236 			CAS_WRITE_4(sc, CAS_PCS_CONF, 0);
2237 			CAS_BARRIER(sc, CAS_PCS_CONF, 4,
2238 			    BUS_SPACE_BARRIER_WRITE);
2239 			CAS_WRITE_4(sc, CAS_PCS_ANAR, val);
2240 			CAS_BARRIER(sc, CAS_PCS_ANAR, 4,
2241 			    BUS_SPACE_BARRIER_WRITE);
2242 			CAS_WRITE_4(sc, CAS_PCS_SERDES_CTRL,
2243 			    CAS_PCS_SERDES_CTRL_ESD);
2244 			CAS_BARRIER(sc, CAS_PCS_CONF, 4,
2245 			    BUS_SPACE_BARRIER_WRITE);
2246 			CAS_WRITE_4(sc, CAS_PCS_CONF,
2247 			    CAS_PCS_CONF_EN);
2248 			CAS_BARRIER(sc, CAS_PCS_CONF, 4,
2249 			    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2250 			return (0);
2251 		case MII_ANLPAR:
2252 			reg = CAS_PCS_ANLPAR;
2253 			break;
2254 		default:
2255 			device_printf(sc->sc_dev,
2256 			    "%s: unhandled register %d\n", __func__, reg);
2257 			return (0);
2258 		}
2259 		CAS_WRITE_4(sc, reg, val);
2260 		CAS_BARRIER(sc, reg, 4,
2261 		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2262 		return (0);
2263 	}
2264 
2265 	/* Construct the frame command. */
2266 	v = CAS_MIF_FRAME_WRITE |
2267 	    (phy << CAS_MIF_FRAME_PHY_SHFT) |
2268 	    (reg << CAS_MIF_FRAME_REG_SHFT) |
2269 	    (val & CAS_MIF_FRAME_DATA);
2270 
2271 	CAS_WRITE_4(sc, CAS_MIF_FRAME, v);
2272 	CAS_BARRIER(sc, CAS_MIF_FRAME, 4,
2273 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2274 	for (n = 0; n < 100; n++) {
2275 		DELAY(1);
2276 		v = CAS_READ_4(sc, CAS_MIF_FRAME);
2277 		if (v & CAS_MIF_FRAME_TA_LSB)
2278 			return (1);
2279 	}
2280 
2281 	device_printf(sc->sc_dev, "%s: timed out\n", __func__);
2282 	return (0);
2283 }
2284 
2285 static void
2286 cas_mii_statchg(device_t dev)
2287 {
2288 	struct cas_softc *sc;
2289 	struct ifnet *ifp;
2290 	int gigabit;
2291 	uint32_t rxcfg, txcfg, v;
2292 
2293 	sc = device_get_softc(dev);
2294 	ifp = sc->sc_ifp;
2295 
2296 	CAS_LOCK_ASSERT(sc, MA_OWNED);
2297 
2298 #ifdef CAS_DEBUG
2299 	if ((ifp->if_flags & IFF_DEBUG) != 0)
2300 		device_printf(sc->sc_dev, "%s: status changen", __func__);
2301 #endif
2302 
2303 	if ((sc->sc_mii->mii_media_status & IFM_ACTIVE) != 0 &&
2304 	    IFM_SUBTYPE(sc->sc_mii->mii_media_active) != IFM_NONE)
2305 		sc->sc_flags |= CAS_LINK;
2306 	else
2307 		sc->sc_flags &= ~CAS_LINK;
2308 
2309 	switch (IFM_SUBTYPE(sc->sc_mii->mii_media_active)) {
2310 	case IFM_1000_SX:
2311 	case IFM_1000_LX:
2312 	case IFM_1000_CX:
2313 	case IFM_1000_T:
2314 		gigabit = 1;
2315 		break;
2316 	default:
2317 		gigabit = 0;
2318 	}
2319 
2320 	/*
2321 	 * The configuration done here corresponds to the steps F) and
2322 	 * G) and as far as enabling of RX and TX MAC goes also step H)
2323 	 * of the initialization sequence outlined in section 11.2.1 of
2324 	 * the Cassini+ ASIC Specification.
2325 	 */
2326 
2327 	rxcfg = sc->sc_mac_rxcfg;
2328 	rxcfg &= ~CAS_MAC_RX_CONF_CARR;
2329 	txcfg = CAS_MAC_TX_CONF_EN_IPG0 | CAS_MAC_TX_CONF_NGU |
2330 	    CAS_MAC_TX_CONF_NGUL;
2331 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) != 0)
2332 		txcfg |= CAS_MAC_TX_CONF_ICARR | CAS_MAC_TX_CONF_ICOLLIS;
2333 	else if (gigabit != 0) {
2334 		rxcfg |= CAS_MAC_RX_CONF_CARR;
2335 		txcfg |= CAS_MAC_TX_CONF_CARR;
2336 	}
2337 	(void)cas_disable_tx(sc);
2338 	CAS_WRITE_4(sc, CAS_MAC_TX_CONF, txcfg);
2339 	(void)cas_disable_rx(sc);
2340 	CAS_WRITE_4(sc, CAS_MAC_RX_CONF, rxcfg);
2341 
2342 	v = CAS_READ_4(sc, CAS_MAC_CTRL_CONF) &
2343 	    ~(CAS_MAC_CTRL_CONF_TXP | CAS_MAC_CTRL_CONF_RXP);
2344 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) &
2345 	    IFM_ETH_RXPAUSE) != 0)
2346 		v |= CAS_MAC_CTRL_CONF_RXP;
2347 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) &
2348 	    IFM_ETH_TXPAUSE) != 0)
2349 		v |= CAS_MAC_CTRL_CONF_TXP;
2350 	CAS_WRITE_4(sc, CAS_MAC_CTRL_CONF, v);
2351 
2352 	/*
2353 	 * All supported chips have a bug causing incorrect checksum
2354 	 * to be calculated when letting them strip the FCS in half-
2355 	 * duplex mode.  In theory we could disable FCS stripping and
2356 	 * manually adjust the checksum accordingly.  It seems to make
2357 	 * more sense to optimze for the common case and just disable
2358 	 * hardware checksumming in half-duplex mode though.
2359 	 */
2360 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0) {
2361 		ifp->if_capenable &= ~IFCAP_HWCSUM;
2362 		ifp->if_hwassist = 0;
2363 	} else if ((sc->sc_flags & CAS_NO_CSUM) == 0) {
2364 		ifp->if_capenable = ifp->if_capabilities;
2365 		ifp->if_hwassist = CAS_CSUM_FEATURES;
2366 	}
2367 
2368 	if (sc->sc_variant == CAS_SATURN) {
2369 		if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0)
2370 			/* silicon bug workaround */
2371 			CAS_WRITE_4(sc, CAS_MAC_PREAMBLE_LEN, 0x41);
2372 		else
2373 			CAS_WRITE_4(sc, CAS_MAC_PREAMBLE_LEN, 0x7);
2374 	}
2375 
2376 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0 &&
2377 	    gigabit != 0)
2378 		CAS_WRITE_4(sc, CAS_MAC_SLOT_TIME,
2379 		    CAS_MAC_SLOT_TIME_CARR);
2380 	else
2381 		CAS_WRITE_4(sc, CAS_MAC_SLOT_TIME,
2382 		    CAS_MAC_SLOT_TIME_NORM);
2383 
2384 	/* XIF Configuration */
2385 	v = CAS_MAC_XIF_CONF_TX_OE | CAS_MAC_XIF_CONF_LNKLED;
2386 	if ((sc->sc_flags & CAS_SERDES) == 0) {
2387 		if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0)
2388 			v |= CAS_MAC_XIF_CONF_NOECHO;
2389 		v |= CAS_MAC_XIF_CONF_BUF_OE;
2390 	}
2391 	if (gigabit != 0)
2392 		v |= CAS_MAC_XIF_CONF_GMII;
2393 	if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) != 0)
2394 		v |= CAS_MAC_XIF_CONF_FDXLED;
2395 	CAS_WRITE_4(sc, CAS_MAC_XIF_CONF, v);
2396 
2397 	sc->sc_mac_rxcfg = rxcfg;
2398 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
2399 	    (sc->sc_flags & CAS_LINK) != 0) {
2400 		CAS_WRITE_4(sc, CAS_MAC_TX_CONF,
2401 		    txcfg | CAS_MAC_TX_CONF_EN);
2402 		CAS_WRITE_4(sc, CAS_MAC_RX_CONF,
2403 		    rxcfg | CAS_MAC_RX_CONF_EN);
2404 	}
2405 }
2406 
2407 static int
2408 cas_mediachange(struct ifnet *ifp)
2409 {
2410 	struct cas_softc *sc = ifp->if_softc;
2411 	int error;
2412 
2413 	/* XXX add support for serial media. */
2414 
2415 	CAS_LOCK(sc);
2416 	error = mii_mediachg(sc->sc_mii);
2417 	CAS_UNLOCK(sc);
2418 	return (error);
2419 }
2420 
2421 static void
2422 cas_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr)
2423 {
2424 	struct cas_softc *sc = ifp->if_softc;
2425 
2426 	CAS_LOCK(sc);
2427 	if ((ifp->if_flags & IFF_UP) == 0) {
2428 		CAS_UNLOCK(sc);
2429 		return;
2430 	}
2431 
2432 	mii_pollstat(sc->sc_mii);
2433 	ifmr->ifm_active = sc->sc_mii->mii_media_active;
2434 	ifmr->ifm_status = sc->sc_mii->mii_media_status;
2435 	CAS_UNLOCK(sc);
2436 }
2437 
2438 static int
2439 cas_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2440 {
2441 	struct cas_softc *sc = ifp->if_softc;
2442 	struct ifreq *ifr = (struct ifreq *)data;
2443 	int error;
2444 
2445 	error = 0;
2446 	switch (cmd) {
2447 	case SIOCSIFFLAGS:
2448 		CAS_LOCK(sc);
2449 		if ((ifp->if_flags & IFF_UP) != 0) {
2450 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 &&
2451 			    ((ifp->if_flags ^ sc->sc_ifflags) &
2452 			    (IFF_ALLMULTI | IFF_PROMISC)) != 0)
2453 				cas_setladrf(sc);
2454 			else
2455 				cas_init_locked(sc);
2456 		} else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2457 			cas_stop(ifp);
2458 		sc->sc_ifflags = ifp->if_flags;
2459 		CAS_UNLOCK(sc);
2460 		break;
2461 	case SIOCSIFCAP:
2462 		CAS_LOCK(sc);
2463 		if ((sc->sc_flags & CAS_NO_CSUM) != 0) {
2464 			error = EINVAL;
2465 			CAS_UNLOCK(sc);
2466 			break;
2467 		}
2468 		ifp->if_capenable = ifr->ifr_reqcap;
2469 		if ((ifp->if_capenable & IFCAP_TXCSUM) != 0)
2470 			ifp->if_hwassist = CAS_CSUM_FEATURES;
2471 		else
2472 			ifp->if_hwassist = 0;
2473 		CAS_UNLOCK(sc);
2474 		break;
2475 	case SIOCADDMULTI:
2476 	case SIOCDELMULTI:
2477 		CAS_LOCK(sc);
2478 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2479 			cas_setladrf(sc);
2480 		CAS_UNLOCK(sc);
2481 		break;
2482 	case SIOCSIFMTU:
2483 		if ((ifr->ifr_mtu < ETHERMIN) ||
2484 		    (ifr->ifr_mtu > ETHERMTU_JUMBO))
2485 			error = EINVAL;
2486 		else
2487 			ifp->if_mtu = ifr->ifr_mtu;
2488 		break;
2489 	case SIOCGIFMEDIA:
2490 	case SIOCSIFMEDIA:
2491 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, cmd);
2492 		break;
2493 	default:
2494 		error = ether_ioctl(ifp, cmd, data);
2495 		break;
2496 	}
2497 
2498 	return (error);
2499 }
2500 
2501 static void
2502 cas_setladrf(struct cas_softc *sc)
2503 {
2504 	struct ifnet *ifp = sc->sc_ifp;
2505 	struct ifmultiaddr *inm;
2506 	int i;
2507 	uint32_t hash[16];
2508 	uint32_t crc, v;
2509 
2510 	CAS_LOCK_ASSERT(sc, MA_OWNED);
2511 
2512 	/*
2513 	 * Turn off the RX MAC and the hash filter as required by the Sun
2514 	 * Cassini programming restrictions.
2515 	 */
2516 	v = sc->sc_mac_rxcfg & ~(CAS_MAC_RX_CONF_HFILTER |
2517 	    CAS_MAC_RX_CONF_EN);
2518 	CAS_WRITE_4(sc, CAS_MAC_RX_CONF, v);
2519 	CAS_BARRIER(sc, CAS_MAC_RX_CONF, 4,
2520 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2521 	if (!cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_HFILTER |
2522 	    CAS_MAC_RX_CONF_EN, 0))
2523 		device_printf(sc->sc_dev,
2524 		    "cannot disable RX MAC or hash filter\n");
2525 
2526 	v &= ~(CAS_MAC_RX_CONF_PROMISC | CAS_MAC_RX_CONF_PGRP);
2527 	if ((ifp->if_flags & IFF_PROMISC) != 0) {
2528 		v |= CAS_MAC_RX_CONF_PROMISC;
2529 		goto chipit;
2530 	}
2531 	if ((ifp->if_flags & IFF_ALLMULTI) != 0) {
2532 		v |= CAS_MAC_RX_CONF_PGRP;
2533 		goto chipit;
2534 	}
2535 
2536 	/*
2537 	 * Set up multicast address filter by passing all multicast
2538 	 * addresses through a crc generator, and then using the high
2539 	 * order 8 bits as an index into the 256 bit logical address
2540 	 * filter.  The high order 4 bits selects the word, while the
2541 	 * other 4 bits select the bit within the word (where bit 0
2542 	 * is the MSB).
2543 	 */
2544 
2545 	/* Clear the hash table. */
2546 	memset(hash, 0, sizeof(hash));
2547 
2548 	if_maddr_rlock(ifp);
2549 	CK_STAILQ_FOREACH(inm, &ifp->if_multiaddrs, ifma_link) {
2550 		if (inm->ifma_addr->sa_family != AF_LINK)
2551 			continue;
2552 		crc = ether_crc32_le(LLADDR((struct sockaddr_dl *)
2553 		    inm->ifma_addr), ETHER_ADDR_LEN);
2554 
2555 		/* We just want the 8 most significant bits. */
2556 		crc >>= 24;
2557 
2558 		/* Set the corresponding bit in the filter. */
2559 		hash[crc >> 4] |= 1 << (15 - (crc & 15));
2560 	}
2561 	if_maddr_runlock(ifp);
2562 
2563 	v |= CAS_MAC_RX_CONF_HFILTER;
2564 
2565 	/* Now load the hash table into the chip (if we are using it). */
2566 	for (i = 0; i < 16; i++)
2567 		CAS_WRITE_4(sc,
2568 		    CAS_MAC_HASH0 + i * (CAS_MAC_HASH1 - CAS_MAC_HASH0),
2569 		    hash[i]);
2570 
2571  chipit:
2572 	sc->sc_mac_rxcfg = v;
2573 	CAS_WRITE_4(sc, CAS_MAC_RX_CONF, v | CAS_MAC_RX_CONF_EN);
2574 }
2575 
2576 static int	cas_pci_attach(device_t dev);
2577 static int	cas_pci_detach(device_t dev);
2578 static int	cas_pci_probe(device_t dev);
2579 static int	cas_pci_resume(device_t dev);
2580 static int	cas_pci_suspend(device_t dev);
2581 
2582 static device_method_t cas_pci_methods[] = {
2583 	/* Device interface */
2584 	DEVMETHOD(device_probe,		cas_pci_probe),
2585 	DEVMETHOD(device_attach,	cas_pci_attach),
2586 	DEVMETHOD(device_detach,	cas_pci_detach),
2587 	DEVMETHOD(device_suspend,	cas_pci_suspend),
2588 	DEVMETHOD(device_resume,	cas_pci_resume),
2589 	/* Use the suspend handler here, it is all that is required. */
2590 	DEVMETHOD(device_shutdown,	cas_pci_suspend),
2591 
2592 	/* MII interface */
2593 	DEVMETHOD(miibus_readreg,	cas_mii_readreg),
2594 	DEVMETHOD(miibus_writereg,	cas_mii_writereg),
2595 	DEVMETHOD(miibus_statchg,	cas_mii_statchg),
2596 
2597 	DEVMETHOD_END
2598 };
2599 
2600 static driver_t cas_pci_driver = {
2601 	"cas",
2602 	cas_pci_methods,
2603 	sizeof(struct cas_softc)
2604 };
2605 
2606 static const struct cas_pci_dev {
2607 	uint32_t	cpd_devid;
2608 	uint8_t		cpd_revid;
2609 	int		cpd_variant;
2610 	const char	*cpd_desc;
2611 } cas_pci_devlist[] = {
2612 	{ 0x0035100b, 0x0, CAS_SATURN, "NS DP83065 Saturn Gigabit Ethernet" },
2613 	{ 0xabba108e, 0x10, CAS_CASPLUS, "Sun Cassini+ Gigabit Ethernet" },
2614 	{ 0xabba108e, 0x0, CAS_CAS, "Sun Cassini Gigabit Ethernet" },
2615 	{ 0, 0, 0, NULL }
2616 };
2617 
2618 DRIVER_MODULE(cas, pci, cas_pci_driver, cas_devclass, 0, 0);
2619 MODULE_PNP_INFO("W32:vendor/device", pci, cas, cas_pci_devlist,
2620     nitems(cas_pci_devlist) - 1);
2621 DRIVER_MODULE(miibus, cas, miibus_driver, miibus_devclass, 0, 0);
2622 MODULE_DEPEND(cas, pci, 1, 1, 1);
2623 
2624 static int
2625 cas_pci_probe(device_t dev)
2626 {
2627 	int i;
2628 
2629 	for (i = 0; cas_pci_devlist[i].cpd_desc != NULL; i++) {
2630 		if (pci_get_devid(dev) == cas_pci_devlist[i].cpd_devid &&
2631 		    pci_get_revid(dev) >= cas_pci_devlist[i].cpd_revid) {
2632 			device_set_desc(dev, cas_pci_devlist[i].cpd_desc);
2633 			return (BUS_PROBE_DEFAULT);
2634 		}
2635 	}
2636 
2637 	return (ENXIO);
2638 }
2639 
2640 static struct resource_spec cas_pci_res_spec[] = {
2641 	{ SYS_RES_IRQ, 0, RF_SHAREABLE | RF_ACTIVE },	/* CAS_RES_INTR */
2642 	{ SYS_RES_MEMORY, PCIR_BAR(0), RF_ACTIVE },	/* CAS_RES_MEM */
2643 	{ -1, 0 }
2644 };
2645 
2646 #define	CAS_LOCAL_MAC_ADDRESS	"local-mac-address"
2647 #define	CAS_PHY_INTERFACE	"phy-interface"
2648 #define	CAS_PHY_TYPE		"phy-type"
2649 #define	CAS_PHY_TYPE_PCS	"pcs"
2650 
2651 static int
2652 cas_pci_attach(device_t dev)
2653 {
2654 	char buf[sizeof(CAS_LOCAL_MAC_ADDRESS)];
2655 	struct cas_softc *sc;
2656 	int i;
2657 #if !(defined(__powerpc__) || defined(__sparc64__))
2658 	u_char enaddr[4][ETHER_ADDR_LEN];
2659 	u_int j, k, lma, pcs[4], phy;
2660 #endif
2661 
2662 	sc = device_get_softc(dev);
2663 	sc->sc_variant = CAS_UNKNOWN;
2664 	for (i = 0; cas_pci_devlist[i].cpd_desc != NULL; i++) {
2665 		if (pci_get_devid(dev) == cas_pci_devlist[i].cpd_devid &&
2666 		    pci_get_revid(dev) >= cas_pci_devlist[i].cpd_revid) {
2667 			sc->sc_variant = cas_pci_devlist[i].cpd_variant;
2668 			break;
2669 		}
2670 	}
2671 	if (sc->sc_variant == CAS_UNKNOWN) {
2672 		device_printf(dev, "unknown adaptor\n");
2673 		return (ENXIO);
2674 	}
2675 
2676 	/* PCI configuration */
2677 	pci_write_config(dev, PCIR_COMMAND,
2678 	    pci_read_config(dev, PCIR_COMMAND, 2) | PCIM_CMD_BUSMASTEREN |
2679 	    PCIM_CMD_MWRICEN | PCIM_CMD_PERRESPEN | PCIM_CMD_SERRESPEN, 2);
2680 
2681 	sc->sc_dev = dev;
2682 	if (sc->sc_variant == CAS_CAS && pci_get_devid(dev) < 0x02)
2683 		/* Hardware checksumming may hang TX. */
2684 		sc->sc_flags |= CAS_NO_CSUM;
2685 	if (sc->sc_variant == CAS_CASPLUS || sc->sc_variant == CAS_SATURN)
2686 		sc->sc_flags |= CAS_REG_PLUS;
2687 	if (sc->sc_variant == CAS_CAS ||
2688 	    (sc->sc_variant == CAS_CASPLUS && pci_get_revid(dev) < 0x11))
2689 		sc->sc_flags |= CAS_TABORT;
2690 	if (bootverbose)
2691 		device_printf(dev, "flags=0x%x\n", sc->sc_flags);
2692 
2693 	if (bus_alloc_resources(dev, cas_pci_res_spec, sc->sc_res)) {
2694 		device_printf(dev, "failed to allocate resources\n");
2695 		bus_release_resources(dev, cas_pci_res_spec, sc->sc_res);
2696 		return (ENXIO);
2697 	}
2698 
2699 	CAS_LOCK_INIT(sc, device_get_nameunit(dev));
2700 
2701 #if defined(__powerpc__) || defined(__sparc64__)
2702 	OF_getetheraddr(dev, sc->sc_enaddr);
2703 	if (OF_getprop(ofw_bus_get_node(dev), CAS_PHY_INTERFACE, buf,
2704 	    sizeof(buf)) > 0 || OF_getprop(ofw_bus_get_node(dev),
2705 	    CAS_PHY_TYPE, buf, sizeof(buf)) > 0) {
2706 		buf[sizeof(buf) - 1] = '\0';
2707 		if (strcmp(buf, CAS_PHY_TYPE_PCS) == 0)
2708 			sc->sc_flags |= CAS_SERDES;
2709 	}
2710 #else
2711 	/*
2712 	 * Dig out VPD (vital product data) and read the MAC address as well
2713 	 * as the PHY type.  The VPD resides in the PCI Expansion ROM (PCI
2714 	 * FCode) and can't be accessed via the PCI capability pointer.
2715 	 * SUNW,pci-ce and SUNW,pci-qge use the Enhanced VPD format described
2716 	 * in the free US Patent 7149820.
2717 	 */
2718 
2719 #define	PCI_ROMHDR_SIZE			0x1c
2720 #define	PCI_ROMHDR_SIG			0x00
2721 #define	PCI_ROMHDR_SIG_MAGIC		0xaa55		/* little endian */
2722 #define	PCI_ROMHDR_PTR_DATA		0x18
2723 #define	PCI_ROM_SIZE			0x18
2724 #define	PCI_ROM_SIG			0x00
2725 #define	PCI_ROM_SIG_MAGIC		0x52494350	/* "PCIR", endian */
2726 							/* reversed */
2727 #define	PCI_ROM_VENDOR			0x04
2728 #define	PCI_ROM_DEVICE			0x06
2729 #define	PCI_ROM_PTR_VPD			0x08
2730 #define	PCI_VPDRES_BYTE0		0x00
2731 #define	PCI_VPDRES_ISLARGE(x)		((x) & 0x80)
2732 #define	PCI_VPDRES_LARGE_NAME(x)	((x) & 0x7f)
2733 #define	PCI_VPDRES_LARGE_LEN_LSB	0x01
2734 #define	PCI_VPDRES_LARGE_LEN_MSB	0x02
2735 #define	PCI_VPDRES_LARGE_SIZE		0x03
2736 #define	PCI_VPDRES_TYPE_ID_STRING	0x02		/* large */
2737 #define	PCI_VPDRES_TYPE_VPD		0x10		/* large */
2738 #define	PCI_VPD_KEY0			0x00
2739 #define	PCI_VPD_KEY1			0x01
2740 #define	PCI_VPD_LEN			0x02
2741 #define	PCI_VPD_SIZE			0x03
2742 
2743 #define	CAS_ROM_READ_1(sc, offs)					\
2744 	CAS_READ_1((sc), CAS_PCI_ROM_OFFSET + (offs))
2745 #define	CAS_ROM_READ_2(sc, offs)					\
2746 	CAS_READ_2((sc), CAS_PCI_ROM_OFFSET + (offs))
2747 #define	CAS_ROM_READ_4(sc, offs)					\
2748 	CAS_READ_4((sc), CAS_PCI_ROM_OFFSET + (offs))
2749 
2750 	lma = phy = 0;
2751 	memset(enaddr, 0, sizeof(enaddr));
2752 	memset(pcs, 0, sizeof(pcs));
2753 
2754 	/* Enable PCI Expansion ROM access. */
2755 	CAS_WRITE_4(sc, CAS_BIM_LDEV_OEN,
2756 	    CAS_BIM_LDEV_OEN_PAD | CAS_BIM_LDEV_OEN_PROM);
2757 
2758 	/* Read PCI Expansion ROM header. */
2759 	if (CAS_ROM_READ_2(sc, PCI_ROMHDR_SIG) != PCI_ROMHDR_SIG_MAGIC ||
2760 	    (i = CAS_ROM_READ_2(sc, PCI_ROMHDR_PTR_DATA)) <
2761 	    PCI_ROMHDR_SIZE) {
2762 		device_printf(dev, "unexpected PCI Expansion ROM header\n");
2763 		goto fail_prom;
2764 	}
2765 
2766 	/* Read PCI Expansion ROM data. */
2767 	if (CAS_ROM_READ_4(sc, i + PCI_ROM_SIG) != PCI_ROM_SIG_MAGIC ||
2768 	    CAS_ROM_READ_2(sc, i + PCI_ROM_VENDOR) != pci_get_vendor(dev) ||
2769 	    CAS_ROM_READ_2(sc, i + PCI_ROM_DEVICE) != pci_get_device(dev) ||
2770 	    (j = CAS_ROM_READ_2(sc, i + PCI_ROM_PTR_VPD)) <
2771 	    i + PCI_ROM_SIZE) {
2772 		device_printf(dev, "unexpected PCI Expansion ROM data\n");
2773 		goto fail_prom;
2774 	}
2775 
2776 	/* Read PCI VPD. */
2777  next:
2778 	if (PCI_VPDRES_ISLARGE(CAS_ROM_READ_1(sc,
2779 	    j + PCI_VPDRES_BYTE0)) == 0) {
2780 		device_printf(dev, "no large PCI VPD\n");
2781 		goto fail_prom;
2782 	}
2783 
2784 	i = (CAS_ROM_READ_1(sc, j + PCI_VPDRES_LARGE_LEN_MSB) << 8) |
2785 	    CAS_ROM_READ_1(sc, j + PCI_VPDRES_LARGE_LEN_LSB);
2786 	switch (PCI_VPDRES_LARGE_NAME(CAS_ROM_READ_1(sc,
2787 	    j + PCI_VPDRES_BYTE0))) {
2788 	case PCI_VPDRES_TYPE_ID_STRING:
2789 		/* Skip identifier string. */
2790 		j += PCI_VPDRES_LARGE_SIZE + i;
2791 		goto next;
2792 	case PCI_VPDRES_TYPE_VPD:
2793 		for (j += PCI_VPDRES_LARGE_SIZE; i > 0;
2794 		    i -= PCI_VPD_SIZE + CAS_ROM_READ_1(sc, j + PCI_VPD_LEN),
2795 		    j += PCI_VPD_SIZE + CAS_ROM_READ_1(sc, j + PCI_VPD_LEN)) {
2796 			if (CAS_ROM_READ_1(sc, j + PCI_VPD_KEY0) != 'Z')
2797 				/* no Enhanced VPD */
2798 				continue;
2799 			if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE) != 'I')
2800 				/* no instance property */
2801 				continue;
2802 			if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 3) == 'B') {
2803 				/* byte array */
2804 				if (CAS_ROM_READ_1(sc,
2805 				    j + PCI_VPD_SIZE + 4) != ETHER_ADDR_LEN)
2806 					continue;
2807 				bus_read_region_1(sc->sc_res[CAS_RES_MEM],
2808 				    CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5,
2809 				    buf, sizeof(buf));
2810 				buf[sizeof(buf) - 1] = '\0';
2811 				if (strcmp(buf, CAS_LOCAL_MAC_ADDRESS) != 0)
2812 					continue;
2813 				bus_read_region_1(sc->sc_res[CAS_RES_MEM],
2814 				    CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE +
2815 				    5 + sizeof(CAS_LOCAL_MAC_ADDRESS),
2816 				    enaddr[lma], sizeof(enaddr[lma]));
2817 				lma++;
2818 				if (lma == 4 && phy == 4)
2819 					break;
2820 			} else if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 3) ==
2821 			   'S') {
2822 				/* string */
2823 				if (CAS_ROM_READ_1(sc,
2824 				    j + PCI_VPD_SIZE + 4) !=
2825 				    sizeof(CAS_PHY_TYPE_PCS))
2826 					continue;
2827 				bus_read_region_1(sc->sc_res[CAS_RES_MEM],
2828 				    CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5,
2829 				    buf, sizeof(buf));
2830 				buf[sizeof(buf) - 1] = '\0';
2831 				if (strcmp(buf, CAS_PHY_INTERFACE) == 0)
2832 					k = sizeof(CAS_PHY_INTERFACE);
2833 				else if (strcmp(buf, CAS_PHY_TYPE) == 0)
2834 					k = sizeof(CAS_PHY_TYPE);
2835 				else
2836 					continue;
2837 				bus_read_region_1(sc->sc_res[CAS_RES_MEM],
2838 				    CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE +
2839 				    5 + k, buf, sizeof(buf));
2840 				buf[sizeof(buf) - 1] = '\0';
2841 				if (strcmp(buf, CAS_PHY_TYPE_PCS) == 0)
2842 					pcs[phy] = 1;
2843 				phy++;
2844 				if (lma == 4 && phy == 4)
2845 					break;
2846 			}
2847 		}
2848 		break;
2849 	default:
2850 		device_printf(dev, "unexpected PCI VPD\n");
2851 		goto fail_prom;
2852 	}
2853 
2854  fail_prom:
2855 	CAS_WRITE_4(sc, CAS_BIM_LDEV_OEN, 0);
2856 
2857 	if (lma == 0) {
2858 		device_printf(dev, "could not determine Ethernet address\n");
2859 		goto fail;
2860 	}
2861 	i = 0;
2862 	if (lma > 1 && pci_get_slot(dev) < nitems(enaddr))
2863 		i = pci_get_slot(dev);
2864 	memcpy(sc->sc_enaddr, enaddr[i], ETHER_ADDR_LEN);
2865 
2866 	if (phy == 0) {
2867 		device_printf(dev, "could not determine PHY type\n");
2868 		goto fail;
2869 	}
2870 	i = 0;
2871 	if (phy > 1 && pci_get_slot(dev) < nitems(pcs))
2872 		i = pci_get_slot(dev);
2873 	if (pcs[i] != 0)
2874 		sc->sc_flags |= CAS_SERDES;
2875 #endif
2876 
2877 	if (cas_attach(sc) != 0) {
2878 		device_printf(dev, "could not be attached\n");
2879 		goto fail;
2880 	}
2881 
2882 	if (bus_setup_intr(dev, sc->sc_res[CAS_RES_INTR], INTR_TYPE_NET |
2883 	    INTR_MPSAFE, cas_intr, NULL, sc, &sc->sc_ih) != 0) {
2884 		device_printf(dev, "failed to set up interrupt\n");
2885 		cas_detach(sc);
2886 		goto fail;
2887 	}
2888 	return (0);
2889 
2890  fail:
2891 	CAS_LOCK_DESTROY(sc);
2892 	bus_release_resources(dev, cas_pci_res_spec, sc->sc_res);
2893 	return (ENXIO);
2894 }
2895 
2896 static int
2897 cas_pci_detach(device_t dev)
2898 {
2899 	struct cas_softc *sc;
2900 
2901 	sc = device_get_softc(dev);
2902 	bus_teardown_intr(dev, sc->sc_res[CAS_RES_INTR], sc->sc_ih);
2903 	cas_detach(sc);
2904 	CAS_LOCK_DESTROY(sc);
2905 	bus_release_resources(dev, cas_pci_res_spec, sc->sc_res);
2906 	return (0);
2907 }
2908 
2909 static int
2910 cas_pci_suspend(device_t dev)
2911 {
2912 
2913 	cas_suspend(device_get_softc(dev));
2914 	return (0);
2915 }
2916 
2917 static int
2918 cas_pci_resume(device_t dev)
2919 {
2920 
2921 	cas_resume(device_get_softc(dev));
2922 	return (0);
2923 }
2924