/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */


#include <sys/types.h>
#include <sys/sunddi.h>
#include <sys/policy.h>
#include <sys/sdt.h>
#include "dmfe_impl.h"

/*
 * This is the string displayed by modinfo, etc.
 */
static char dmfe_ident[] = "Davicom DM9102 Ethernet";


/*
 * NOTES:
 *
 * #defines:
 *
 *	DMFE_PCI_RNUMBER is the register-set number to use for the operating
 *	registers.  On an OBP-based machine, regset 0 refers to CONFIG space,
 *	regset 1 will be the operating registers in I/O space, and regset 2
 *	will be the operating registers in MEMORY space (preferred).  If an
 *	expansion ROM is fitted, it may appear as a further register set.
 *
 *	DMFE_SLOP defines the amount by which the chip may read beyond
 *	the end of a buffer or descriptor, apparently 6-8 dwords :(
 *	We have to make sure this doesn't cause it to access unallocated
 *	or unmapped memory.
 *
 *	DMFE_BUF_SIZE must be at least (ETHERMAX + ETHERFCSL + DMFE_SLOP)
 *	rounded up to a multiple of 4.  Here we choose a power of two for
 *	speed & simplicity at the cost of a bit more memory.
 *
 *	However, the buffer length field in the TX/RX descriptors is only
 *	eleven bits, so even though we allocate DMFE_BUF_SIZE (2048) bytes
 *	per buffer, we tell the chip that they're only DMFE_BUF_SIZE_1
 *	(2000) bytes each.
 *
 *	DMFE_DMA_MODE defines the mode (STREAMING/CONSISTENT) used for
 *	the data buffers.  The descriptors are always set up in CONSISTENT
 *	mode.
 *
 *	DMFE_HEADROOM defines how much space we'll leave in allocated
 *	mblks before the first valid data byte.  This should be chosen
 *	to be 2 modulo 4, so that once the ethernet header (14 bytes)
 *	has been stripped off, the packet data will be 4-byte aligned.
 *	The remaining space can be used by upstream modules to prepend
 *	any headers required.
 *
 * Patchable globals:
 *
 *	dmfe_bus_modes: the bus mode bits to be put into CSR0.
 *		Setting READ_MULTIPLE in this register seems to cause
 *		the chip to generate a READ LINE command with a parity
 *		error!  Don't do it!
 *
 *	dmfe_setup_desc1: the value to be put into descriptor word 1
 *		when sending a SETUP packet.
 *
 *		Setting TX_LAST_DESC in desc1 in a setup packet seems
 *		to make the chip spontaneously reset internally - it
 *		attempts to give back the setup packet descriptor by
 *		writing to PCI address 00000000 - which may or may not
 *		get a MASTER ABORT - after which most of its registers
 *		seem to have either default values or garbage!
 *
 *		TX_FIRST_DESC doesn't seem to have the same effect but
 *		it isn't needed on a setup packet so we'll leave it out
 *		too, just in case it has some other wierd side-effect.
 *
 *		The default hardware packet filtering mode is now
 *		HASH_AND_PERFECT (imperfect filtering of multicast
 *		packets and perfect filtering of unicast packets).
 *		If this is found not to work reliably, setting the
 *		TX_FILTER_TYPE1 bit will cause a switchover to using
 *		HASH_ONLY mode (imperfect filtering of *all* packets).
 *		Software will then perform the additional filtering
 *		as required.
 */

#define	DMFE_PCI_RNUMBER	2
#define	DMFE_SLOP		(8*sizeof (uint32_t))
#define	DMFE_BUF_SIZE		2048
#define	DMFE_BUF_SIZE_1		2000
#define	DMFE_DMA_MODE		DDI_DMA_STREAMING
#define	DMFE_HEADROOM		34

static uint32_t dmfe_bus_modes = TX_POLL_INTVL | CACHE_ALIGN;
static uint32_t dmfe_setup_desc1 = TX_SETUP_PACKET | SETUPBUF_SIZE |
					TX_FILTER_TYPE0;

/*
 * Some tunable parameters ...
 *	Number of RX/TX ring entries (128/128)
 *	Minimum number of TX ring slots to keep free (1)
 *	Low-water mark at which to try to reclaim TX ring slots (1)
 *	How often to take a TX-done interrupt (twice per ring cycle)
 *	Whether to reclaim TX ring entries on a TX-done interrupt (no)
 */

#define	DMFE_TX_DESC		128	/* Should be a multiple of 4 <= 256 */
#define	DMFE_RX_DESC		128	/* Should be a multiple of 4 <= 256 */

static uint32_t dmfe_rx_desc = DMFE_RX_DESC;
static uint32_t dmfe_tx_desc = DMFE_TX_DESC;
static uint32_t dmfe_tx_min_free = 1;
static uint32_t dmfe_tx_reclaim_level = 1;
static uint32_t dmfe_tx_int_factor = (DMFE_TX_DESC / 2) - 1;
static boolean_t dmfe_reclaim_on_done = B_FALSE;

/*
 * Time-related parameters:
 *
 *	We use a cyclic to provide a periodic callback; this is then used
 * 	to check for TX-stall and poll the link status register.
 *
 *	DMFE_TICK is the interval between cyclic callbacks, in microseconds.
 *
 *	TX_STALL_TIME_100 is the timeout in microseconds between passing
 *	a packet to the chip for transmission and seeing that it's gone,
 *	when running at 100Mb/s.  If we haven't reclaimed at least one
 *	descriptor in this time we assume the transmitter has stalled
 *	and reset the chip.
 *
 *	TX_STALL_TIME_10 is the equivalent timeout when running at 10Mb/s.
 *
 * Patchable globals:
 *
 *	dmfe_tick_us:		DMFE_TICK
 *	dmfe_tx100_stall_us:	TX_STALL_TIME_100
 *	dmfe_tx10_stall_us:	TX_STALL_TIME_10
 *
 * These are then used in _init() to calculate:
 *
 *	stall_100_tix[]: number of consecutive cyclic callbacks without a
 *			 reclaim before the TX process is considered stalled,
 *			 when running at 100Mb/s.  The elements are indexed
 *			 by transmit-engine-state.
 *	stall_10_tix[]:	 number of consecutive cyclic callbacks without a
 *			 reclaim before the TX process is considered stalled,
 *			 when running at 10Mb/s.  The elements are indexed
 *			 by transmit-engine-state.
 */

#define	DMFE_TICK		25000		/* microseconds		*/
#define	TX_STALL_TIME_100	50000		/* microseconds		*/
#define	TX_STALL_TIME_10	200000		/* microseconds		*/

static uint32_t dmfe_tick_us = DMFE_TICK;
static uint32_t dmfe_tx100_stall_us = TX_STALL_TIME_100;
static uint32_t dmfe_tx10_stall_us = TX_STALL_TIME_10;

/*
 * Calculated from above in _init()
 */

static uint32_t stall_100_tix[TX_PROCESS_MAX_STATE+1];
static uint32_t stall_10_tix[TX_PROCESS_MAX_STATE+1];

/*
 * Property names
 */
static char localmac_propname[] = "local-mac-address";
static char opmode_propname[] = "opmode-reg-value";

static int		dmfe_m_start(void *);
static void		dmfe_m_stop(void *);
static int		dmfe_m_promisc(void *, boolean_t);
static int		dmfe_m_multicst(void *, boolean_t, const uint8_t *);
static int		dmfe_m_unicst(void *, const uint8_t *);
static void		dmfe_m_ioctl(void *, queue_t *, mblk_t *);
static mblk_t		*dmfe_m_tx(void *, mblk_t *);
static int 		dmfe_m_stat(void *, uint_t, uint64_t *);
static int		dmfe_m_getprop(void *, const char *, mac_prop_id_t,
    uint_t, uint_t, void *, uint_t *);
static int		dmfe_m_setprop(void *, const char *, mac_prop_id_t,
    uint_t,  const void *);

static mac_callbacks_t dmfe_m_callbacks = {
	(MC_IOCTL | MC_SETPROP | MC_GETPROP),
	dmfe_m_stat,
	dmfe_m_start,
	dmfe_m_stop,
	dmfe_m_promisc,
	dmfe_m_multicst,
	dmfe_m_unicst,
	dmfe_m_tx,
	dmfe_m_ioctl,
	NULL,	/* getcapab */
	NULL,	/* open */
	NULL,	/* close */
	dmfe_m_setprop,
	dmfe_m_getprop
};


/*
 * Describes the chip's DMA engine
 */
static ddi_dma_attr_t dma_attr = {
	DMA_ATTR_V0,		/* dma_attr version */
	0,			/* dma_attr_addr_lo */
	(uint32_t)0xFFFFFFFF,	/* dma_attr_addr_hi */
	0x0FFFFFF,		/* dma_attr_count_max */
	0x20,			/* dma_attr_align */
	0x7F,			/* dma_attr_burstsizes */
	1,			/* dma_attr_minxfer */
	(uint32_t)0xFFFFFFFF,	/* dma_attr_maxxfer */
	(uint32_t)0xFFFFFFFF,	/* dma_attr_seg */
	1,			/* dma_attr_sgllen */
	1,			/* dma_attr_granular */
	0			/* dma_attr_flags */
};

/*
 * DMA access attributes for registers and descriptors
 */
static ddi_device_acc_attr_t dmfe_reg_accattr = {
	DDI_DEVICE_ATTR_V0,
	DDI_STRUCTURE_LE_ACC,
	DDI_STRICTORDER_ACC
};

/*
 * DMA access attributes for data: NOT to be byte swapped.
 */
static ddi_device_acc_attr_t dmfe_data_accattr = {
	DDI_DEVICE_ATTR_V0,
	DDI_NEVERSWAP_ACC,
	DDI_STRICTORDER_ACC
};

static uchar_t dmfe_broadcast_addr[ETHERADDRL] = {
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
};


/*
 * ========== Lowest-level chip register & ring access routines ==========
 */

/*
 * I/O register get/put routines
 */
uint32_t
dmfe_chip_get32(dmfe_t *dmfep, off_t offset)
{
	uint32_t *addr;

	addr = (void *)(dmfep->io_reg + offset);
	return (ddi_get32(dmfep->io_handle, addr));
}

void
dmfe_chip_put32(dmfe_t *dmfep, off_t offset, uint32_t value)
{
	uint32_t *addr;

	addr = (void *)(dmfep->io_reg + offset);
	ddi_put32(dmfep->io_handle, addr, value);
}

/*
 * TX/RX ring get/put routines
 */
static uint32_t
dmfe_ring_get32(dma_area_t *dma_p, uint_t index, uint_t offset)
{
	uint32_t *addr;

	addr = (void *)dma_p->mem_va;
	return (ddi_get32(dma_p->acc_hdl, addr + index*DESC_SIZE + offset));
}

static void
dmfe_ring_put32(dma_area_t *dma_p, uint_t index, uint_t offset, uint32_t value)
{
	uint32_t *addr;

	addr = (void *)dma_p->mem_va;
	ddi_put32(dma_p->acc_hdl, addr + index*DESC_SIZE + offset, value);
}

/*
 * Setup buffer get/put routines
 */
static uint32_t
dmfe_setup_get32(dma_area_t *dma_p, uint_t index)
{
	uint32_t *addr;

	addr = (void *)dma_p->setup_va;
	return (ddi_get32(dma_p->acc_hdl, addr + index));
}

static void
dmfe_setup_put32(dma_area_t *dma_p, uint_t index, uint32_t value)
{
	uint32_t *addr;

	addr = (void *)dma_p->setup_va;
	ddi_put32(dma_p->acc_hdl, addr + index, value);
}


/*
 * ========== Low-level chip & ring buffer manipulation ==========
 */

/*
 * dmfe_set_opmode() -- function to set operating mode
 */
static void
dmfe_set_opmode(dmfe_t *dmfep)
{
	ASSERT(mutex_owned(dmfep->oplock));

	dmfe_chip_put32(dmfep, OPN_MODE_REG, dmfep->opmode);
	drv_usecwait(10);
}

/*
 * dmfe_stop_chip() -- stop all chip processing & optionally reset the h/w
 */
static void
dmfe_stop_chip(dmfe_t *dmfep, enum chip_state newstate)
{
	ASSERT(mutex_owned(dmfep->oplock));

	/*
	 * Stop the chip:
	 *	disable all interrupts
	 *	stop TX/RX processes
	 *	clear the status bits for TX/RX stopped
	 * If required, reset the chip
	 * Record the new state
	 */
	dmfe_chip_put32(dmfep, INT_MASK_REG, 0);
	dmfep->opmode &= ~(START_TRANSMIT | START_RECEIVE);
	dmfe_set_opmode(dmfep);
	dmfe_chip_put32(dmfep, STATUS_REG, TX_STOPPED_INT | RX_STOPPED_INT);

	switch (newstate) {
	default:
		ASSERT(!"can't get here");
		return;

	case CHIP_STOPPED:
	case CHIP_ERROR:
		break;

	case CHIP_RESET:
		dmfe_chip_put32(dmfep, BUS_MODE_REG, SW_RESET);
		drv_usecwait(10);
		dmfe_chip_put32(dmfep, BUS_MODE_REG, 0);
		drv_usecwait(10);
		dmfe_chip_put32(dmfep, BUS_MODE_REG, dmfe_bus_modes);
		break;
	}

	dmfep->chip_state = newstate;
}

/*
 * Initialize transmit and receive descriptor rings, and
 * set the chip to point to the first entry in each ring
 */
static void
dmfe_init_rings(dmfe_t *dmfep)
{
	dma_area_t *descp;
	uint32_t pstart;
	uint32_t pnext;
	uint32_t pbuff;
	uint32_t desc1;
	int i;

	/*
	 * You need all the locks in order to rewrite the descriptor rings
	 */
	ASSERT(mutex_owned(dmfep->oplock));
	ASSERT(mutex_owned(dmfep->rxlock));
	ASSERT(mutex_owned(dmfep->txlock));

	/*
	 * Program the RX ring entries
	 */
	descp = &dmfep->rx_desc;
	pstart = descp->mem_dvma;
	pnext = pstart + sizeof (struct rx_desc_type);
	pbuff = dmfep->rx_buff.mem_dvma;
	desc1 = RX_CHAINING | DMFE_BUF_SIZE_1;

	for (i = 0; i < dmfep->rx.n_desc; ++i) {
		dmfe_ring_put32(descp, i, RD_NEXT, pnext);
		dmfe_ring_put32(descp, i, BUFFER1, pbuff);
		dmfe_ring_put32(descp, i, DESC1, desc1);
		dmfe_ring_put32(descp, i, DESC0, RX_OWN);

		pnext += sizeof (struct rx_desc_type);
		pbuff += DMFE_BUF_SIZE;
	}

	/*
	 * Fix up last entry & sync
	 */
	dmfe_ring_put32(descp, --i, RD_NEXT, pstart);
	DMA_SYNC(descp, DDI_DMA_SYNC_FORDEV);
	dmfep->rx.next_free = 0;

	/*
	 * Set the base address of the RX descriptor list in CSR3
	 */
	dmfe_chip_put32(dmfep, RX_BASE_ADDR_REG, descp->mem_dvma);

	/*
	 * Program the TX ring entries
	 */
	descp = &dmfep->tx_desc;
	pstart = descp->mem_dvma;
	pnext = pstart + sizeof (struct tx_desc_type);
	pbuff = dmfep->tx_buff.mem_dvma;
	desc1 = TX_CHAINING;

	for (i = 0; i < dmfep->tx.n_desc; ++i) {
		dmfe_ring_put32(descp, i, TD_NEXT, pnext);
		dmfe_ring_put32(descp, i, BUFFER1, pbuff);
		dmfe_ring_put32(descp, i, DESC1, desc1);
		dmfe_ring_put32(descp, i, DESC0, 0);

		pnext += sizeof (struct tx_desc_type);
		pbuff += DMFE_BUF_SIZE;
	}

	/*
	 * Fix up last entry & sync
	 */
	dmfe_ring_put32(descp, --i, TD_NEXT, pstart);
	DMA_SYNC(descp, DDI_DMA_SYNC_FORDEV);
	dmfep->tx.n_free = dmfep->tx.n_desc;
	dmfep->tx.next_free = dmfep->tx.next_busy = 0;

	/*
	 * Set the base address of the TX descrptor list in CSR4
	 */
	dmfe_chip_put32(dmfep, TX_BASE_ADDR_REG, descp->mem_dvma);
}

/*
 * dmfe_start_chip() -- start the chip transmitting and/or receiving
 */
static void
dmfe_start_chip(dmfe_t *dmfep, int mode)
{
	ASSERT(mutex_owned(dmfep->oplock));

	dmfep->opmode |= mode;
	dmfe_set_opmode(dmfep);

	dmfe_chip_put32(dmfep, W_J_TIMER_REG, 0);
	/*
	 * Enable VLAN length mode (allows packets to be 4 bytes Longer).
	 */
	dmfe_chip_put32(dmfep, W_J_TIMER_REG, VLAN_ENABLE);

	/*
	 * Clear any pending process-stopped interrupts
	 */
	dmfe_chip_put32(dmfep, STATUS_REG, TX_STOPPED_INT | RX_STOPPED_INT);
	dmfep->chip_state = mode & START_RECEIVE ? CHIP_TX_RX :
	    mode & START_TRANSMIT ? CHIP_TX_ONLY : CHIP_STOPPED;
}

/*
 * dmfe_enable_interrupts() -- enable our favourite set of interrupts.
 *
 * Normal interrupts:
 *	We always enable:
 *		RX_PKTDONE_INT		(packet received)
 *		TX_PKTDONE_INT		(TX complete)
 *	We never enable:
 *		TX_ALLDONE_INT		(next TX buffer not ready)
 *
 * Abnormal interrupts:
 *	We always enable:
 *		RX_STOPPED_INT
 *		TX_STOPPED_INT
 *		SYSTEM_ERR_INT
 *		RX_UNAVAIL_INT
 *	We never enable:
 *		RX_EARLY_INT
 *		RX_WATCHDOG_INT
 *		TX_JABBER_INT
 *		TX_EARLY_INT
 *		TX_UNDERFLOW_INT
 *		GP_TIMER_INT		(not valid in -9 chips)
 *		LINK_STATUS_INT		(not valid in -9 chips)
 */
static void
dmfe_enable_interrupts(dmfe_t *dmfep)
{
	ASSERT(mutex_owned(dmfep->oplock));

	/*
	 * Put 'the standard set of interrupts' in the interrupt mask register
	 */
	dmfep->imask =	RX_PKTDONE_INT | TX_PKTDONE_INT |
	    RX_STOPPED_INT | TX_STOPPED_INT | RX_UNAVAIL_INT | SYSTEM_ERR_INT;

	dmfe_chip_put32(dmfep, INT_MASK_REG,
	    NORMAL_SUMMARY_INT | ABNORMAL_SUMMARY_INT | dmfep->imask);
	dmfep->chip_state = CHIP_RUNNING;
}

/*
 * ========== RX side routines ==========
 */

/*
 * Function to update receive statistics on various errors
 */
static void
dmfe_update_rx_stats(dmfe_t *dmfep, uint32_t desc0)
{
	ASSERT(mutex_owned(dmfep->rxlock));

	/*
	 * The error summary bit and the error bits that it summarises
	 * are only valid if this is the last fragment.  Therefore, a
	 * fragment only contributes to the error statistics if both
	 * the last-fragment and error summary bits are set.
	 */
	if (((RX_LAST_DESC | RX_ERR_SUMMARY) & ~desc0) == 0) {
		dmfep->rx_stats_ierrors += 1;

		/*
		 * There are some other error bits in the descriptor for
		 * which there don't seem to be appropriate MAC statistics,
		 * notably RX_COLLISION and perhaps RX_DESC_ERR.  The
		 * latter may not be possible if it is supposed to indicate
		 * that one buffer has been filled with a partial packet
		 * and the next buffer required for the rest of the packet
		 * was not available, as all our buffers are more than large
		 * enough for a whole packet without fragmenting.
		 */

		if (desc0 & RX_OVERFLOW) {
			dmfep->rx_stats_overflow += 1;

		} else if (desc0 & RX_RUNT_FRAME)
			dmfep->rx_stats_short += 1;

		if (desc0 & RX_CRC)
			dmfep->rx_stats_fcs += 1;

		if (desc0 & RX_FRAME2LONG)
			dmfep->rx_stats_toolong += 1;
	}

	/*
	 * A receive watchdog timeout is counted as a MAC-level receive
	 * error.  Strangely, it doesn't set the packet error summary bit,
	 * according to the chip data sheet :-?
	 */
	if (desc0 & RX_RCV_WD_TO)
		dmfep->rx_stats_macrcv_errors += 1;

	if (desc0 & RX_DRIBBLING)
		dmfep->rx_stats_align += 1;

	if (desc0 & RX_MII_ERR)
		dmfep->rx_stats_macrcv_errors += 1;
}

/*
 * Receive incoming packet(s) and pass them up ...
 */
static mblk_t *
dmfe_getp(dmfe_t *dmfep)
{
	dma_area_t *descp;
	mblk_t **tail;
	mblk_t *head;
	mblk_t *mp;
	char *rxb;
	uchar_t *dp;
	uint32_t desc0;
	uint32_t misses;
	int packet_length;
	int index;

	mutex_enter(dmfep->rxlock);

	/*
	 * Update the missed frame statistic from the on-chip counter.
	 */
	misses = dmfe_chip_get32(dmfep, MISSED_FRAME_REG);
	dmfep->rx_stats_norcvbuf += (misses & MISSED_FRAME_MASK);

	/*
	 * sync (all) receive descriptors before inspecting them
	 */
	descp = &dmfep->rx_desc;
	DMA_SYNC(descp, DDI_DMA_SYNC_FORKERNEL);

	/*
	 * We should own at least one RX entry, since we've had a
	 * receive interrupt, but let's not be dogmatic about it.
	 */
	index = dmfep->rx.next_free;
	desc0 = dmfe_ring_get32(descp, index, DESC0);

	DTRACE_PROBE1(rx__start, uint32_t, desc0);
	for (head = NULL, tail = &head; (desc0 & RX_OWN) == 0; ) {
		/*
		 * Maintain statistics for every descriptor returned
		 * to us by the chip ...
		 */
		dmfe_update_rx_stats(dmfep, desc0);

		/*
		 * Check that the entry has both "packet start" and
		 * "packet end" flags.  We really shouldn't get packet
		 * fragments, 'cos all the RX buffers are bigger than
		 * the largest valid packet.  So we'll just drop any
		 * fragments we find & skip on to the next entry.
		 */
		if (((RX_FIRST_DESC | RX_LAST_DESC) & ~desc0) != 0) {
			DTRACE_PROBE1(rx__frag, uint32_t, desc0);
			goto skip;
		}

		/*
		 * A whole packet in one buffer.  We have to check error
		 * status and packet length before forwarding it upstream.
		 */
		if (desc0 & RX_ERR_SUMMARY) {
			DTRACE_PROBE1(rx__err, uint32_t, desc0);
			goto skip;
		}

		packet_length = (desc0 >> 16) & 0x3fff;
		if (packet_length > DMFE_MAX_PKT_SIZE) {
			DTRACE_PROBE1(rx__toobig, int, packet_length);
			goto skip;
		} else if (packet_length < ETHERMIN) {
			/*
			 * Note that VLAN packet would be even larger,
			 * but we don't worry about dropping runt VLAN
			 * frames.
			 *
			 * This check is probably redundant, as well,
			 * since the hardware should drop RUNT frames.
			 */
			DTRACE_PROBE1(rx__runt, int, packet_length);
			goto skip;
		}

		/*
		 * Sync the data, so we can examine it; then check that
		 * the packet is really intended for us (remember that
		 * if we're using Imperfect Filtering, then the chip will
		 * receive unicast packets sent to stations whose addresses
		 * just happen to hash to the same value as our own; we
		 * discard these here so they don't get sent upstream ...)
		 */
		(void) ddi_dma_sync(dmfep->rx_buff.dma_hdl,
		    index * DMFE_BUF_SIZE, DMFE_BUF_SIZE,
		    DDI_DMA_SYNC_FORKERNEL);
		rxb = &dmfep->rx_buff.mem_va[index*DMFE_BUF_SIZE];


		/*
		 * We do not bother to check that the packet is really for
		 * us, we let the MAC framework make that check instead.
		 * This is especially important if we ever want to support
		 * multiple MAC addresses.
		 */

		/*
		 * Packet looks good; get a buffer to copy it into.  We
		 * allow some space at the front of the allocated buffer
		 * (HEADROOM) in case any upstream modules want to prepend
		 * some sort of header.  The value has been carefully chosen
		 * So that it also has the side-effect of making the packet
		 * *contents* 4-byte aligned, as required by NCA!
		 */
		mp = allocb(DMFE_HEADROOM + packet_length, 0);
		if (mp == NULL) {
			DTRACE_PROBE(rx__no__buf);
			dmfep->rx_stats_norcvbuf += 1;
			goto skip;
		}

		/*
		 * Account for statistics of good packets.
		 */
		dmfep->rx_stats_ipackets += 1;
		dmfep->rx_stats_rbytes += packet_length;
		if (desc0 & RX_MULTI_FRAME) {
			if (bcmp(rxb, dmfe_broadcast_addr, ETHERADDRL)) {
				dmfep->rx_stats_multi += 1;
			} else {
				dmfep->rx_stats_bcast += 1;
			}
		}

		/*
		 * Copy the packet into the STREAMS buffer
		 */
		dp = mp->b_rptr += DMFE_HEADROOM;
		mp->b_cont = mp->b_next = NULL;

		/*
		 * Don't worry about stripping the vlan tag, the MAC
		 * layer will take care of that for us.
		 */
		bcopy(rxb, dp, packet_length);

		/*
		 * Fix up the packet length, and link it to the chain
		 */
		mp->b_wptr = mp->b_rptr + packet_length - ETHERFCSL;
		*tail = mp;
		tail = &mp->b_next;

	skip:
		/*
		 * Return ownership of ring entry & advance to next
		 */
		dmfe_ring_put32(descp, index, DESC0, RX_OWN);
		index = NEXT(index, dmfep->rx.n_desc);
		desc0 = dmfe_ring_get32(descp, index, DESC0);
	}

	/*
	 * Remember where to start looking next time ...
	 */
	dmfep->rx.next_free = index;

	/*
	 * sync the receive descriptors that we've given back
	 * (actually, we sync all of them for simplicity), and
	 * wake the chip in case it had suspended receive
	 */
	DMA_SYNC(descp, DDI_DMA_SYNC_FORDEV);
	dmfe_chip_put32(dmfep, RX_POLL_REG, 0);

	mutex_exit(dmfep->rxlock);
	return (head);
}

/*
 * ========== Primary TX side routines ==========
 */

/*
 *	TX ring management:
 *
 *	There are <tx.n_desc> entries in the ring, of which those from
 *	<tx.next_free> round to but not including <tx.next_busy> must
 *	be owned by the CPU.  The number of such entries should equal
 *	<tx.n_free>; but there may also be some more entries which the
 *	chip has given back but which we haven't yet accounted for.
 *	The routine dmfe_reclaim_tx_desc() adjusts the indexes & counts
 *	as it discovers such entries.
 *
 *	Initially, or when the ring is entirely free:
 *		C = Owned by CPU
 *		D = Owned by Davicom (DMFE) chip
 *
 *	tx.next_free					tx.n_desc = 16
 *	  |
 *	  v
 *	+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
 *	| C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C |
 *	+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
 *	  ^
 *	  |
 *	tx.next_busy					tx.n_free = 16
 *
 *	On entry to reclaim() during normal use:
 *
 *					tx.next_free	tx.n_desc = 16
 *					      |
 *					      v
 *	+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
 *	| C | C | C | C | C | C | D | D | D | C | C | C | C | C | C | C |
 *	+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
 *		  ^
 *		  |
 *		tx.next_busy				tx.n_free = 9
 *
 *	On exit from reclaim():
 *
 *					tx.next_free	tx.n_desc = 16
 *					      |
 *					      v
 *	+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
 *	| C | C | C | C | C | C | D | D | D | C | C | C | C | C | C | C |
 *	+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
 *				  ^
 *				  |
 *			     tx.next_busy		tx.n_free = 13
 *
 *	The ring is considered "full" when only one entry is owned by
 *	the CPU; thus <tx.n_free> should always be >= 1.
 *
 *			tx.next_free			tx.n_desc = 16
 *			      |
 *			      v
 *	+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
 *	| D | D | D | D | D | C | D | D | D | D | D | D | D | D | D | D |
 *	+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
 *				  ^
 *				  |
 *			     tx.next_busy		tx.n_free = 1
 */

/*
 * Function to update transmit statistics on various errors
 */
static void
dmfe_update_tx_stats(dmfe_t *dmfep, int index, uint32_t desc0, uint32_t desc1)
{
	uint32_t collisions;
	uint32_t errbits;
	uint32_t errsum;

	ASSERT(mutex_owned(dmfep->txlock));

	collisions = ((desc0 >> 3) & 0x0f);
	errsum = desc0 & TX_ERR_SUMMARY;
	errbits = desc0 & (TX_UNDERFLOW | TX_LATE_COLL | TX_CARRIER_LOSS |
	    TX_NO_CARRIER | TX_EXCESS_COLL | TX_JABBER_TO);
	if ((errsum == 0) != (errbits == 0)) {
		dmfe_log(dmfep, "dubious TX error status 0x%x", desc0);
		desc0 |= TX_ERR_SUMMARY;
	}

	if (desc0 & TX_ERR_SUMMARY) {
		dmfep->tx_stats_oerrors += 1;

		/*
		 * If we ever see a transmit jabber timeout, we count it
		 * as a MAC-level transmit error; but we probably won't
		 * see it as it causes an Abnormal interrupt and we reset
		 * the chip in order to recover
		 */
		if (desc0 & TX_JABBER_TO) {
			dmfep->tx_stats_macxmt_errors += 1;
			dmfep->tx_stats_jabber += 1;
		}

		if (desc0 & TX_UNDERFLOW)
			dmfep->tx_stats_underflow += 1;
		else if (desc0 & TX_LATE_COLL)
			dmfep->tx_stats_xmtlatecoll += 1;

		if (desc0 & (TX_CARRIER_LOSS | TX_NO_CARRIER))
			dmfep->tx_stats_nocarrier += 1;

		if (desc0 & TX_EXCESS_COLL) {
			dmfep->tx_stats_excoll += 1;
			collisions = 16;
		}
	} else {
		int	bit = index % NBBY;
		int	byt = index / NBBY;

		if (dmfep->tx_mcast[byt] & bit) {
			dmfep->tx_mcast[byt] &= ~bit;
			dmfep->tx_stats_multi += 1;

		} else if (dmfep->tx_bcast[byt] & bit) {
			dmfep->tx_bcast[byt] &= ~bit;
			dmfep->tx_stats_bcast += 1;
		}

		dmfep->tx_stats_opackets += 1;
		dmfep->tx_stats_obytes += desc1 & TX_BUFFER_SIZE1;
	}

	if (collisions == 1)
		dmfep->tx_stats_first_coll += 1;
	else if (collisions != 0)
		dmfep->tx_stats_multi_coll += 1;
	dmfep->tx_stats_collisions += collisions;

	if (desc0 & TX_DEFERRED)
		dmfep->tx_stats_defer += 1;
}

/*
 * Reclaim all the ring entries that the chip has returned to us ...
 *
 * Returns B_FALSE if no entries could be reclaimed.  Otherwise, reclaims
 * as many as possible, restarts the TX stall timeout, and returns B_TRUE.
 */
static boolean_t
dmfe_reclaim_tx_desc(dmfe_t *dmfep)
{
	dma_area_t *descp;
	uint32_t desc0;
	uint32_t desc1;
	int i;

	ASSERT(mutex_owned(dmfep->txlock));

	/*
	 * sync transmit descriptor ring before looking at it
	 */
	descp = &dmfep->tx_desc;
	DMA_SYNC(descp, DDI_DMA_SYNC_FORKERNEL);

	/*
	 * Early exit if there are no descriptors to reclaim, either
	 * because they're all reclaimed already, or because the next
	 * one is still owned by the chip ...
	 */
	i = dmfep->tx.next_busy;
	if (i == dmfep->tx.next_free)
		return (B_FALSE);
	desc0 = dmfe_ring_get32(descp, i, DESC0);
	if (desc0 & TX_OWN)
		return (B_FALSE);

	/*
	 * Reclaim as many descriptors as possible ...
	 */
	for (;;) {
		desc1 = dmfe_ring_get32(descp, i, DESC1);
		ASSERT((desc1 & (TX_SETUP_PACKET | TX_LAST_DESC)) != 0);

		if ((desc1 & TX_SETUP_PACKET) == 0) {
			/*
			 * Regular packet - just update stats
			 */
			dmfe_update_tx_stats(dmfep, i, desc0, desc1);
		}

		/*
		 * Update count & index; we're all done if the ring is
		 * now fully reclaimed, or the next entry if still owned
		 * by the chip ...
		 */
		dmfep->tx.n_free += 1;
		i = NEXT(i, dmfep->tx.n_desc);
		if (i == dmfep->tx.next_free)
			break;
		desc0 = dmfe_ring_get32(descp, i, DESC0);
		if (desc0 & TX_OWN)
			break;
	}

	dmfep->tx.next_busy = i;
	dmfep->tx_pending_tix = 0;
	return (B_TRUE);
}

/*
 * Send the message in the message block chain <mp>.
 *
 * The message is freed if and only if its contents are successfully copied
 * and queued for transmission (so that the return value is B_TRUE).
 * If we can't queue the message, the return value is B_FALSE and
 * the message is *not* freed.
 *
 * This routine handles the special case of <mp> == NULL, which indicates
 * that we want to "send" the special "setup packet" allocated during
 * startup.  We have to use some different flags in the packet descriptor
 * to say its a setup packet (from the global <dmfe_setup_desc1>), and the
 * setup packet *isn't* freed after use.
 */
static boolean_t
dmfe_send_msg(dmfe_t *dmfep, mblk_t *mp)
{
	dma_area_t *descp;
	mblk_t *bp;
	char *txb;
	uint32_t desc1;
	uint32_t index;
	size_t totlen;
	size_t mblen;
	uint32_t paddr;

	/*
	 * If the number of free slots is below the reclaim threshold
	 * (soft limit), we'll try to reclaim some.  If we fail, and
	 * the number of free slots is also below the minimum required
	 * (the hard limit, usually 1), then we can't send the packet.
	 */
	mutex_enter(dmfep->txlock);
	if (dmfep->suspended)
		return (B_FALSE);

	if (dmfep->tx.n_free <= dmfe_tx_reclaim_level &&
	    dmfe_reclaim_tx_desc(dmfep) == B_FALSE &&
	    dmfep->tx.n_free <= dmfe_tx_min_free) {
		/*
		 * Resource shortage - return B_FALSE so the packet
		 * will be queued for retry after the next TX-done
		 * interrupt.
		 */
		mutex_exit(dmfep->txlock);
		DTRACE_PROBE(tx__no__desc);
		return (B_FALSE);
	}

	/*
	 * There's a slot available, so claim it by incrementing
	 * the next-free index and decrementing the free count.
	 * If the ring is currently empty, we also restart the
	 * stall-detect timer.  The ASSERTions check that our
	 * invariants still hold:
	 *	the next-free index must not match the next-busy index
	 *	there must still be at least one free entry
	 * After this, we now have exclusive ownership of the ring
	 * entry (and matching buffer) indicated by <index>, so we
	 * don't need to hold the TX lock any longer
	 */
	index = dmfep->tx.next_free;
	dmfep->tx.next_free = NEXT(index, dmfep->tx.n_desc);
	ASSERT(dmfep->tx.next_free != dmfep->tx.next_busy);
	if (dmfep->tx.n_free-- == dmfep->tx.n_desc)
		dmfep->tx_pending_tix = 0;
	ASSERT(dmfep->tx.n_free >= 1);
	mutex_exit(dmfep->txlock);

	/*
	 * Check the ownership of the ring entry ...
	 */
	descp = &dmfep->tx_desc;
	ASSERT((dmfe_ring_get32(descp, index, DESC0) & TX_OWN) == 0);

	if (mp == NULL) {
		/*
		 * Indicates we should send a SETUP packet, which we do by
		 * temporarily switching the BUFFER1 pointer in the ring
		 * entry.  The reclaim routine will restore BUFFER1 to its
		 * usual value.
		 *
		 * Note that as the setup packet is tagged on the end of
		 * the TX ring, when we sync the descriptor we're also
		 * implicitly syncing the setup packet - hence, we don't
		 * need a separate ddi_dma_sync() call here.
		 */
		desc1 = dmfe_setup_desc1;
		paddr = descp->setup_dvma;
	} else {
		/*
		 * A regular packet; we copy the data into a pre-mapped
		 * buffer, which avoids the overhead (and complication)
		 * of mapping/unmapping STREAMS buffers and keeping hold
		 * of them until the DMA has completed.
		 *
		 * Because all buffers are the same size, and larger
		 * than the longest single valid message, we don't have
		 * to bother about splitting the message across multiple
		 * buffers.
		 */
		txb = &dmfep->tx_buff.mem_va[index*DMFE_BUF_SIZE];
		totlen = 0;
		bp = mp;

		/*
		 * Copy all (remaining) mblks in the message ...
		 */
		for (; bp != NULL; bp = bp->b_cont) {
			mblen = MBLKL(bp);
			if ((totlen += mblen) <= DMFE_MAX_PKT_SIZE) {
				bcopy(bp->b_rptr, txb, mblen);
				txb += mblen;
			}
		}

		/*
		 * Is this a multicast or broadcast packet?  We do
		 * this so that we can track statistics accurately
		 * when we reclaim it.
		 */
		txb = &dmfep->tx_buff.mem_va[index*DMFE_BUF_SIZE];
		if (txb[0] & 0x1) {
			if (bcmp(txb, dmfe_broadcast_addr, ETHERADDRL) == 0) {
				dmfep->tx_bcast[index / NBBY] |=
				    (1 << (index % NBBY));
			} else {
				dmfep->tx_mcast[index / NBBY] |=
				    (1 << (index % NBBY));
			}
		}

		/*
		 * We'e reached the end of the chain; and we should have
		 * collected no more than DMFE_MAX_PKT_SIZE bytes into our
		 * buffer.  Note that the <size> field in the descriptor is
		 * only 11 bits, so bigger packets would be a problem!
		 */
		ASSERT(bp == NULL);
		ASSERT(totlen <= DMFE_MAX_PKT_SIZE);
		totlen &= TX_BUFFER_SIZE1;
		desc1 = TX_FIRST_DESC | TX_LAST_DESC | totlen;
		paddr = dmfep->tx_buff.mem_dvma + index*DMFE_BUF_SIZE;

		(void) ddi_dma_sync(dmfep->tx_buff.dma_hdl,
		    index * DMFE_BUF_SIZE, DMFE_BUF_SIZE, DDI_DMA_SYNC_FORDEV);
	}

	/*
	 * Update ring descriptor entries, sync them, and wake up the
	 * transmit process
	 */
	if ((index & dmfe_tx_int_factor) == 0)
		desc1 |= TX_INT_ON_COMP;
	desc1 |= TX_CHAINING;
	dmfe_ring_put32(descp, index, BUFFER1, paddr);
	dmfe_ring_put32(descp, index, DESC1, desc1);
	dmfe_ring_put32(descp, index, DESC0, TX_OWN);
	DMA_SYNC(descp, DDI_DMA_SYNC_FORDEV);
	dmfe_chip_put32(dmfep, TX_POLL_REG, 0);

	/*
	 * Finally, free the message & return success
	 */
	if (mp)
		freemsg(mp);
	return (B_TRUE);
}

/*
 *	dmfe_m_tx() -- send a chain of packets
 *
 *	Called when packet(s) are ready to be transmitted. A pointer to an
 *	M_DATA message that contains the packet is passed to this routine.
 *	The complete LLC header is contained in the message's first message
 *	block, and the remainder of the packet is contained within
 *	additional M_DATA message blocks linked to the first message block.
 *
 *	Additional messages may be passed by linking with b_next.
 */
static mblk_t *
dmfe_m_tx(void *arg, mblk_t *mp)
{
	dmfe_t *dmfep = arg;			/* private device info	*/
	mblk_t *next;

	ASSERT(mp != NULL);
	ASSERT(dmfep->mac_state == DMFE_MAC_STARTED);

	if (dmfep->chip_state != CHIP_RUNNING)
		return (mp);

	while (mp != NULL) {
		next = mp->b_next;
		mp->b_next = NULL;
		if (!dmfe_send_msg(dmfep, mp)) {
			mp->b_next = next;
			break;
		}
		mp = next;
	}

	return (mp);
}

/*
 * ========== Address-setting routines (TX-side) ==========
 */

/*
 * Find the index of the relevant bit in the setup packet.
 * This must mirror the way the hardware will actually calculate it!
 */
static uint32_t
dmfe_hash_index(const uint8_t *address)
{
	uint32_t const POLY = HASH_POLY;
	uint32_t crc = HASH_CRC;
	uint32_t index;
	uint32_t msb;
	uchar_t currentbyte;
	int byteslength;
	int shift;
	int bit;

	for (byteslength = 0; byteslength < ETHERADDRL; ++byteslength) {
		currentbyte = address[byteslength];
		for (bit = 0; bit < 8; ++bit) {
			msb = crc >> 31;
			crc <<= 1;
			if (msb ^ (currentbyte & 1)) {
				crc ^= POLY;
				crc |= 0x00000001;
			}
			currentbyte >>= 1;
		}
	}

	for (index = 0, bit = 23, shift = 8; shift >= 0; ++bit, --shift)
		index |= (((crc >> bit) & 1) << shift);

	return (index);
}

/*
 * Find and set/clear the relevant bit in the setup packet hash table
 * This must mirror the way the hardware will actually interpret it!
 */
static void
dmfe_update_hash(dmfe_t *dmfep, uint32_t index, boolean_t val)
{
	dma_area_t *descp;
	uint32_t tmp;

	ASSERT(mutex_owned(dmfep->oplock));

	descp = &dmfep->tx_desc;
	tmp = dmfe_setup_get32(descp, index/16);
	if (val)
		tmp |= 1 << (index%16);
	else
		tmp &= ~(1 << (index%16));
	dmfe_setup_put32(descp, index/16, tmp);
}

/*
 * Update the refcount for the bit in the setup packet corresponding
 * to the specified address; if it changes between zero & nonzero,
 * also update the bitmap itself & return B_TRUE, so that the caller
 * knows to re-send the setup packet.  Otherwise (only the refcount
 * changed), return B_FALSE
 */
static boolean_t
dmfe_update_mcast(dmfe_t *dmfep, const uint8_t *mca, boolean_t val)
{
	uint32_t index;
	uint8_t *refp;
	boolean_t change;

	index = dmfe_hash_index(mca);
	refp = &dmfep->mcast_refs[index];
	change = (val ? (*refp)++ : --(*refp)) == 0;

	if (change)
		dmfe_update_hash(dmfep, index, val);

	return (change);
}

/*
 * "Transmit" the (possibly updated) magic setup packet
 */
static int
dmfe_send_setup(dmfe_t *dmfep)
{
	int status;

	ASSERT(mutex_owned(dmfep->oplock));

	if (dmfep->suspended)
		return (0);

	/*
	 * If the chip isn't running, we can't really send the setup frame
	 * now but it doesn't matter, 'cos it will be sent when the transmit
	 * process is restarted (see dmfe_start()).
	 */
	if ((dmfep->opmode & START_TRANSMIT) == 0)
		return (0);

	/*
	 * "Send" the setup frame.  If it fails (e.g. no resources),
	 * set a flag; then the factotum will retry the "send".  Once
	 * it works, we can clear the flag no matter how many attempts
	 * had previously failed.  We tell the caller that it worked
	 * whether it did or not; after all, it *will* work eventually.
	 */
	status = dmfe_send_msg(dmfep, NULL);
	dmfep->need_setup = status ? B_FALSE : B_TRUE;
	return (0);
}

/*
 *	dmfe_m_unicst() -- set the physical network address
 */
static int
dmfe_m_unicst(void *arg, const uint8_t *macaddr)
{
	dmfe_t *dmfep = arg;
	int status;
	int index;

	/*
	 * Update our current address and send out a new setup packet
	 *
	 * Here we accommodate the use of HASH_ONLY or HASH_AND_PERFECT
	 * filtering modes (we don't support PERFECT_ONLY or INVERSE modes).
	 *
	 * It is said that there is a bug in the 21140 where it fails to
	 * receive packes addresses to the specified perfect filter address.
	 * If the same bug is present in the DM9102A, the TX_FILTER_TYPE1
	 * bit should be set in the module variable dmfe_setup_desc1.
	 *
	 * If TX_FILTER_TYPE1 is set, we will use HASH_ONLY filtering.
	 * In this mode, *all* incoming addresses are hashed and looked
	 * up in the bitmap described by the setup packet.  Therefore,
	 * the bit representing the station address has to be added to
	 * the table before sending it out.  If the address is changed,
	 * the old entry should be removed before the new entry is made.
	 *
	 * NOTE: in this mode, unicast packets that are not intended for
	 * this station may be received; it is up to software to filter
	 * them out afterwards!
	 *
	 * If TX_FILTER_TYPE1 is *not* set, we will use HASH_AND_PERFECT
	 * filtering.  In this mode, multicast addresses are hashed and
	 * checked against the bitmap, while unicast addresses are simply
	 * matched against the one physical address specified in the setup
	 * packet.  This means that we shouldn't receive unicast packets
	 * that aren't intended for us (but software still has to filter
	 * multicast packets just the same).
	 *
	 * Whichever mode we're using, we have to enter the broadcast
	 * address into the multicast filter map too, so we do this on
	 * the first time through after attach or reset.
	 */
	mutex_enter(dmfep->oplock);

	if (dmfep->addr_set && dmfe_setup_desc1 & TX_FILTER_TYPE1)
		(void) dmfe_update_mcast(dmfep, dmfep->curr_addr, B_FALSE);
	if (dmfe_setup_desc1 & TX_FILTER_TYPE1)
		(void) dmfe_update_mcast(dmfep, macaddr, B_TRUE);
	if (!dmfep->addr_set)
		(void) dmfe_update_mcast(dmfep, dmfe_broadcast_addr, B_TRUE);

	/*
	 * Remember the new current address
	 */
	ethaddr_copy(macaddr, dmfep->curr_addr);
	dmfep->addr_set = B_TRUE;

	/*
	 * Install the new physical address into the proper position in
	 * the setup frame; this is only used if we select hash+perfect
	 * filtering, but we'll put it in anyway.  The ugliness here is
	 * down to the usual war of the egg :(
	 */
	for (index = 0; index < ETHERADDRL; index += 2)
		dmfe_setup_put32(&dmfep->tx_desc, SETUPBUF_PHYS+index/2,
		    (macaddr[index+1] << 8) | macaddr[index]);

	/*
	 * Finally, we're ready to "transmit" the setup frame
	 */
	status = dmfe_send_setup(dmfep);
	mutex_exit(dmfep->oplock);

	return (status);
}

/*
 *	dmfe_m_multicst() -- enable or disable a multicast address
 *
 *	Program the hardware to enable/disable the multicast address
 *	in "mca" (enable if add is true, otherwise disable it.)
 *	We keep a refcount for each bit in the map, so that it still
 *	works out properly if multiple addresses hash to the same bit.
 *	dmfe_update_mcast() tells us whether the map actually changed;
 *	if so, we have to re-"transmit" the magic setup packet.
 */
static int
dmfe_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
{
	dmfe_t *dmfep = arg;			/* private device info	*/
	int status = 0;

	mutex_enter(dmfep->oplock);
	if (dmfe_update_mcast(dmfep, mca, add))
		status = dmfe_send_setup(dmfep);
	mutex_exit(dmfep->oplock);

	return (status);
}


/*
 * ========== Internal state management entry points ==========
 */

/*
 * These routines provide all the functionality required by the
 * corresponding MAC layer entry points, but don't update the MAC layer state
 * so they can be called internally without disturbing our record
 * of what MAC layer thinks we should be doing ...
 */

/*
 *	dmfe_stop() -- stop processing, don't reset h/w or rings
 */
static void
dmfe_stop(dmfe_t *dmfep)
{
	ASSERT(mutex_owned(dmfep->oplock));

	dmfe_stop_chip(dmfep, CHIP_STOPPED);
}

/*
 *	dmfe_reset() -- stop processing, reset h/w & rings to initial state
 */
static void
dmfe_reset(dmfe_t *dmfep)
{
	ASSERT(mutex_owned(dmfep->oplock));
	ASSERT(mutex_owned(dmfep->rxlock));
	ASSERT(mutex_owned(dmfep->txlock));

	dmfe_stop_chip(dmfep, CHIP_RESET);
	dmfe_init_rings(dmfep);
}

/*
 *	dmfe_start() -- start transmitting/receiving
 */
static void
dmfe_start(dmfe_t *dmfep)
{
	uint32_t gpsr;

	ASSERT(mutex_owned(dmfep->oplock));

	ASSERT(dmfep->chip_state == CHIP_RESET ||
	    dmfep->chip_state == CHIP_STOPPED);

	/*
	 * Make opmode consistent with PHY duplex setting
	 */
	gpsr = dmfe_chip_get32(dmfep, PHY_STATUS_REG);
	if (gpsr & GPS_FULL_DUPLEX)
		dmfep->opmode |= FULL_DUPLEX;
	else
		dmfep->opmode &= ~FULL_DUPLEX;

	/*
	 * Start transmit processing
	 * Set up the address filters
	 * Start receive processing
	 * Enable interrupts
	 */
	dmfe_start_chip(dmfep, START_TRANSMIT);
	(void) dmfe_send_setup(dmfep);
	drv_usecwait(10);
	dmfe_start_chip(dmfep, START_RECEIVE);
	dmfe_enable_interrupts(dmfep);
}

/*
 * dmfe_restart - restart transmitting/receiving after error or suspend
 */
static void
dmfe_restart(dmfe_t *dmfep)
{
	ASSERT(mutex_owned(dmfep->oplock));

	/*
	 * You need not only <oplock>, but also <rxlock> AND <txlock>
	 * in order to reset the rings, but then <txlock> *mustn't*
	 * be held across the call to dmfe_start()
	 */
	mutex_enter(dmfep->rxlock);
	mutex_enter(dmfep->txlock);
	dmfe_reset(dmfep);
	mutex_exit(dmfep->txlock);
	mutex_exit(dmfep->rxlock);
	if (dmfep->mac_state == DMFE_MAC_STARTED) {
		dmfe_start(dmfep);
	}
}


/*
 * ========== MAC-required management entry points ==========
 */

/*
 *	dmfe_m_stop() -- stop transmitting/receiving
 */
static void
dmfe_m_stop(void *arg)
{
	dmfe_t *dmfep = arg;			/* private device info	*/

	/*
	 * Just stop processing, then record new MAC state
	 */
	mii_stop(dmfep->mii);

	mutex_enter(dmfep->oplock);
	if (!dmfep->suspended)
		dmfe_stop(dmfep);
	dmfep->mac_state = DMFE_MAC_STOPPED;
	mutex_exit(dmfep->oplock);
}

/*
 *	dmfe_m_start() -- start transmitting/receiving
 */
static int
dmfe_m_start(void *arg)
{
	dmfe_t *dmfep = arg;			/* private device info	*/

	/*
	 * Start processing and record new MAC state
	 */
	mutex_enter(dmfep->oplock);
	if (!dmfep->suspended)
		dmfe_start(dmfep);
	dmfep->mac_state = DMFE_MAC_STARTED;
	mutex_exit(dmfep->oplock);

	mii_start(dmfep->mii);

	return (0);
}

/*
 * dmfe_m_promisc() -- set or reset promiscuous mode on the board
 *
 *	Program the hardware to enable/disable promiscuous and/or
 *	receive-all-multicast modes.  Davicom don't document this
 *	clearly, but it looks like we can do this on-the-fly (i.e.
 *	without stopping & restarting the TX/RX processes).
 */
static int
dmfe_m_promisc(void *arg, boolean_t on)
{
	dmfe_t *dmfep = arg;

	mutex_enter(dmfep->oplock);
	dmfep->opmode &= ~(PROMISC_MODE | PASS_MULTICAST);
	if (on)
		dmfep->opmode |= PROMISC_MODE;
	if (!dmfep->suspended)
		dmfe_set_opmode(dmfep);
	mutex_exit(dmfep->oplock);

	return (0);
}

/*
 * ========== Factotum, implemented as a softint handler ==========
 */

/*
 * The factotum is woken up when there's something to do that we'd rather
 * not do from inside a (high-level?) hardware interrupt handler.  Its
 * two main tasks are:
 *	reset & restart the chip after an error
 *	update & restart the chip after a link status change
 */
static uint_t
dmfe_factotum(caddr_t arg)
{
	dmfe_t *dmfep;

	dmfep = (void *)arg;
	ASSERT(dmfep->dmfe_guard == DMFE_GUARD);

	mutex_enter(dmfep->oplock);
	if (dmfep->suspended) {
		mutex_exit(dmfep->oplock);
		return (DDI_INTR_CLAIMED);
	}

	dmfep->factotum_flag = 0;
	DRV_KS_INC(dmfep, KS_FACTOTUM_RUN);

	/*
	 * Check for chip error ...
	 */
	if (dmfep->chip_state == CHIP_ERROR) {
		/*
		 * Error recovery required: reset the chip and the rings,
		 * then, if it's supposed to be running, kick it off again.
		 */
		DRV_KS_INC(dmfep, KS_RECOVERY);
		dmfe_restart(dmfep);
		mutex_exit(dmfep->oplock);

		mii_reset(dmfep->mii);

	} else if (dmfep->need_setup) {
		(void) dmfe_send_setup(dmfep);
		mutex_exit(dmfep->oplock);
	}

	return (DDI_INTR_CLAIMED);
}

static void
dmfe_wake_factotum(dmfe_t *dmfep, int ks_id, const char *why)
{
	_NOTE(ARGUNUSED(why));
	ASSERT(mutex_owned(dmfep->oplock));
	DRV_KS_INC(dmfep, ks_id);

	if (dmfep->factotum_flag++ == 0)
		ddi_trigger_softintr(dmfep->factotum_id);
}


/*
 * ========== Periodic Tasks (Cyclic handler & friends) ==========
 */

/*
 * Periodic tick tasks, run from the cyclic handler
 *
 * Check for TX stall; flag an error and wake the factotum if so.
 */
static void
dmfe_tick_stall_check(dmfe_t *dmfep, uint32_t gpsr, uint32_t istat)
{
	boolean_t tx_stall;
	uint32_t tx_state;
	uint32_t limit;

	ASSERT(mutex_owned(dmfep->oplock));

	/*
	 * Check for transmit stall ...
	 *
	 * IF there's at least one packet in the ring, AND the timeout
	 * has elapsed, AND we can't reclaim any descriptors, THEN we've
	 * stalled; we return B_TRUE to trigger a reset-and-recover cycle.
	 *
	 * Note that the timeout limit is based on the transmit engine
	 * state; we allow the transmitter longer to make progress in
	 * some states than in others, based on observations of this
	 * chip's actual behaviour in the lab.
	 *
	 * By observation, we find that on about 1 in 10000 passes through
	 * here, the TX lock is already held.  In that case, we'll skip
	 * the check on this pass rather than wait.  Most likely, the send
	 * routine was holding the lock when the interrupt happened, and
	 * we'll succeed next time through.  In the event of a real stall,
	 * the TX ring will fill up, after which the send routine won't be
	 * called any more and then we're sure to get in.
	 */
	tx_stall = B_FALSE;
	if (mutex_tryenter(dmfep->txlock)) {
		if (dmfep->tx.n_free < dmfep->tx.n_desc) {
			tx_state = TX_PROCESS_STATE(istat);
			if (gpsr & GPS_LINK_100)
				limit = stall_100_tix[tx_state];
			else
				limit = stall_10_tix[tx_state];
			if (++dmfep->tx_pending_tix >= limit &&
			    dmfe_reclaim_tx_desc(dmfep) == B_FALSE) {
				dmfe_log(dmfep, "TX stall detected "
				    "after %d ticks in state %d; "
				    "automatic recovery initiated",
				    dmfep->tx_pending_tix, tx_state);
				tx_stall = B_TRUE;
			}
		}
		mutex_exit(dmfep->txlock);
	}

	if (tx_stall) {
		dmfe_stop_chip(dmfep, CHIP_ERROR);
		dmfe_wake_factotum(dmfep, KS_TX_STALL, "tick (TX stall)");
	}
}

/*
 * Cyclic callback handler
 */
static void
dmfe_cyclic(void *arg)
{
	dmfe_t *dmfep = arg;			/* private device info */
	uint32_t istat;
	uint32_t gpsr;

	/*
	 * If the chip's not RUNNING, there's nothing to do.
	 * If we can't get the mutex straight away, we'll just
	 * skip this pass; we'll back back soon enough anyway.
	 */
	if (mutex_tryenter(dmfep->oplock) == 0)
		return;
	if ((dmfep->suspended) || (dmfep->chip_state != CHIP_RUNNING)) {
		mutex_exit(dmfep->oplock);
		return;
	}

	/*
	 * Recheck chip state (it might have been stopped since we
	 * checked above).  If still running, call each of the *tick*
	 * tasks.  They will check for link change, TX stall, etc ...
	 */
	if (dmfep->chip_state == CHIP_RUNNING) {
		istat = dmfe_chip_get32(dmfep, STATUS_REG);
		gpsr = dmfe_chip_get32(dmfep, PHY_STATUS_REG);
		dmfe_tick_stall_check(dmfep, gpsr, istat);
	}

	DRV_KS_INC(dmfep, KS_CYCLIC_RUN);
	mutex_exit(dmfep->oplock);
}

/*
 * ========== Hardware interrupt handler ==========
 */

/*
 *	dmfe_interrupt() -- handle chip interrupts
 */
static uint_t
dmfe_interrupt(caddr_t arg)
{
	dmfe_t *dmfep;			/* private device info */
	uint32_t interrupts;
	uint32_t istat;
	const char *msg;
	mblk_t *mp;
	boolean_t warning_msg = B_TRUE;

	dmfep = (void *)arg;

	mutex_enter(dmfep->oplock);
	if (dmfep->suspended) {
		mutex_exit(dmfep->oplock);
		return (DDI_INTR_UNCLAIMED);
	}

	/*
	 * A quick check as to whether the interrupt was from this
	 * device, before we even finish setting up all our local
	 * variables.  Note that reading the interrupt status register
	 * doesn't have any unpleasant side effects such as clearing
	 * the bits read, so it's quite OK to re-read it once we have
	 * determined that we are going to service this interrupt and
	 * grabbed the mutexen.
	 */
	istat = dmfe_chip_get32(dmfep, STATUS_REG);
	if ((istat & (NORMAL_SUMMARY_INT | ABNORMAL_SUMMARY_INT)) == 0) {

		mutex_exit(dmfep->oplock);
		return (DDI_INTR_UNCLAIMED);
	}

	DRV_KS_INC(dmfep, KS_INTERRUPT);

	/*
	 * Identify bits that represent enabled interrupts ...
	 */
	istat |= dmfe_chip_get32(dmfep, STATUS_REG);
	interrupts = istat & dmfep->imask;
	ASSERT(interrupts != 0);

	DTRACE_PROBE1(intr, uint32_t, istat);

	/*
	 * Check for any interrupts other than TX/RX done.
	 * If there are any, they are considered Abnormal
	 * and will cause the chip to be reset.
	 */
	if (interrupts & ~(RX_PKTDONE_INT | TX_PKTDONE_INT)) {
		if (istat & ABNORMAL_SUMMARY_INT) {
			/*
			 * Any Abnormal interrupts will lead to us
			 * resetting the chip, so we don't bother
			 * to clear each interrupt individually.
			 *
			 * Our main task here is to identify the problem,
			 * by pointing out the most significant unexpected
			 * bit.  Additional bits may well be consequences
			 * of the first problem, so we consider the possible
			 * causes in order of severity.
			 */
			if (interrupts & SYSTEM_ERR_INT) {
				switch (istat & SYSTEM_ERR_BITS) {
				case SYSTEM_ERR_M_ABORT:
					msg = "Bus Master Abort";
					break;

				case SYSTEM_ERR_T_ABORT:
					msg = "Bus Target Abort";
					break;

				case SYSTEM_ERR_PARITY:
					msg = "Parity Error";
					break;

				default:
					msg = "Unknown System Bus Error";
					break;
				}
			} else if (interrupts & RX_STOPPED_INT) {
				msg = "RX process stopped";
			} else if (interrupts & RX_UNAVAIL_INT) {
				msg = "RX buffer unavailable";
				warning_msg = B_FALSE;
			} else if (interrupts & RX_WATCHDOG_INT) {
				msg = "RX watchdog timeout?";
			} else if (interrupts & RX_EARLY_INT) {
				msg = "RX early interrupt?";
			} else if (interrupts & TX_STOPPED_INT) {
				msg = "TX process stopped";
			} else if (interrupts & TX_JABBER_INT) {
				msg = "TX jabber timeout";
			} else if (interrupts & TX_UNDERFLOW_INT) {
				msg = "TX underflow?";
			} else if (interrupts & TX_EARLY_INT) {
				msg = "TX early interrupt?";

			} else if (interrupts & LINK_STATUS_INT) {
				msg = "Link status change?";
			} else if (interrupts & GP_TIMER_INT) {
				msg = "Timer expired?";
			}

			if (warning_msg)
				dmfe_warning(dmfep, "abnormal interrupt, "
				    "status 0x%x: %s", istat, msg);

			/*
			 * We don't want to run the entire reinitialisation
			 * code out of this (high-level?) interrupt, so we
			 * simply STOP the chip, and wake up the factotum
			 * to reinitalise it ...
			 */
			dmfe_stop_chip(dmfep, CHIP_ERROR);
			dmfe_wake_factotum(dmfep, KS_CHIP_ERROR,
			    "interrupt (error)");
		} else {
			/*
			 * We shouldn't really get here (it would mean
			 * there were some unprocessed enabled bits but
			 * they weren't Abnormal?), but we'll check just
			 * in case ...
			 */
			DTRACE_PROBE1(intr__unexpected, uint32_t, istat);
		}
	}

	/*
	 * Acknowledge all the original bits - except in the case of an
	 * error, when we leave them unacknowledged so that the recovery
	 * code can see what was going on when the problem occurred ...
	 */
	if (dmfep->chip_state != CHIP_ERROR) {
		(void) dmfe_chip_put32(dmfep, STATUS_REG, istat);
		/*
		 * Read-after-write forces completion on PCI bus.
		 *
		 */
		(void) dmfe_chip_get32(dmfep, STATUS_REG);
	}


	/*
	 * We've finished talking to the chip, so we can drop <oplock>
	 * before handling the normal interrupts, which only involve
	 * manipulation of descriptors ...
	 */
	mutex_exit(dmfep->oplock);

	if (interrupts & RX_PKTDONE_INT)
		if ((mp = dmfe_getp(dmfep)) != NULL)
			mac_rx(dmfep->mh, NULL, mp);

	if (interrupts & TX_PKTDONE_INT) {
		/*
		 * The only reason for taking this interrupt is to give
		 * MAC a chance to schedule queued packets after a
		 * ring-full condition.  To minimise the number of
		 * redundant TX-Done interrupts, we only mark two of the
		 * ring descriptors as 'interrupt-on-complete' - all the
		 * others are simply handed back without an interrupt.
		 */
		if (dmfe_reclaim_on_done && mutex_tryenter(dmfep->txlock)) {
			(void) dmfe_reclaim_tx_desc(dmfep);
			mutex_exit(dmfep->txlock);
		}
		mac_tx_update(dmfep->mh);
	}

	return (DDI_INTR_CLAIMED);
}

/*
 * ========== Statistics update handler ==========
 */

static int
dmfe_m_stat(void *arg, uint_t stat, uint64_t *val)
{
	dmfe_t *dmfep = arg;
	int rv = 0;

	/* Let MII handle its own stats. */
	if (mii_m_getstat(dmfep->mii, stat, val) == 0) {
		return (0);
	}

	mutex_enter(dmfep->oplock);
	mutex_enter(dmfep->rxlock);
	mutex_enter(dmfep->txlock);

	/* make sure we have all the stats collected */
	(void) dmfe_reclaim_tx_desc(dmfep);

	switch (stat) {

	case MAC_STAT_IPACKETS:
		*val = dmfep->rx_stats_ipackets;
		break;

	case MAC_STAT_MULTIRCV:
		*val = dmfep->rx_stats_multi;
		break;

	case MAC_STAT_BRDCSTRCV:
		*val = dmfep->rx_stats_bcast;
		break;

	case MAC_STAT_RBYTES:
		*val = dmfep->rx_stats_rbytes;
		break;

	case MAC_STAT_IERRORS:
		*val = dmfep->rx_stats_ierrors;
		break;

	case MAC_STAT_NORCVBUF:
		*val = dmfep->rx_stats_norcvbuf;
		break;

	case MAC_STAT_COLLISIONS:
		*val = dmfep->tx_stats_collisions;
		break;

	case MAC_STAT_OERRORS:
		*val = dmfep->tx_stats_oerrors;
		break;

	case MAC_STAT_OPACKETS:
		*val = dmfep->tx_stats_opackets;
		break;

	case MAC_STAT_MULTIXMT:
		*val = dmfep->tx_stats_multi;
		break;

	case MAC_STAT_BRDCSTXMT:
		*val = dmfep->tx_stats_bcast;
		break;

	case MAC_STAT_OBYTES:
		*val = dmfep->tx_stats_obytes;
		break;

	case MAC_STAT_OVERFLOWS:
		*val = dmfep->rx_stats_overflow;
		break;

	case MAC_STAT_UNDERFLOWS:
		*val = dmfep->tx_stats_underflow;
		break;

	case ETHER_STAT_ALIGN_ERRORS:
		*val = dmfep->rx_stats_align;
		break;

	case ETHER_STAT_FCS_ERRORS:
		*val = dmfep->rx_stats_fcs;
		break;

	case ETHER_STAT_TOOLONG_ERRORS:
		*val = dmfep->rx_stats_toolong;
		break;

	case ETHER_STAT_TOOSHORT_ERRORS:
		*val = dmfep->rx_stats_short;
		break;

	case ETHER_STAT_MACRCV_ERRORS:
		*val = dmfep->rx_stats_macrcv_errors;
		break;

	case ETHER_STAT_MACXMT_ERRORS:
		*val = dmfep->tx_stats_macxmt_errors;
		break;

	case ETHER_STAT_JABBER_ERRORS:
		*val = dmfep->tx_stats_jabber;
		break;

	case ETHER_STAT_CARRIER_ERRORS:
		*val = dmfep->tx_stats_nocarrier;
		break;

	case ETHER_STAT_TX_LATE_COLLISIONS:
		*val = dmfep->tx_stats_xmtlatecoll;
		break;

	case ETHER_STAT_EX_COLLISIONS:
		*val = dmfep->tx_stats_excoll;
		break;

	case ETHER_STAT_DEFER_XMTS:
		*val = dmfep->tx_stats_defer;
		break;

	case ETHER_STAT_FIRST_COLLISIONS:
		*val = dmfep->tx_stats_first_coll;
		break;

	case ETHER_STAT_MULTI_COLLISIONS:
		*val = dmfep->tx_stats_multi_coll;
		break;

	default:
		rv = ENOTSUP;
	}

	mutex_exit(dmfep->txlock);
	mutex_exit(dmfep->rxlock);
	mutex_exit(dmfep->oplock);

	return (rv);
}

/*
 * ========== Ioctl handler & subfunctions ==========
 */

static lb_property_t dmfe_loopmodes[] = {
	{ normal,	"normal",	0 },
	{ internal,	"Internal",	1 },
	{ external,	"External",	2 },
};

/*
 * Specific dmfe IOCTLs, the mac module handles the generic ones.
 * Unfortunately, the DM9102 doesn't seem to work well with MII based
 * loopback, so we have to do something special for it.
 */

static void
dmfe_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
{
	dmfe_t		*dmfep = arg;
	struct iocblk	*iocp;
	int		rv = 0;
	lb_info_sz_t	sz;
	int		cmd;
	uint32_t	mode;

	iocp = (void *)mp->b_rptr;
	cmd = iocp->ioc_cmd;

	if (mp->b_cont == NULL) {
		/*
		 * All of these ioctls need data!
		 */
		miocnak(wq, mp, 0, EINVAL);
		return;
	}

	switch (cmd) {
	case LB_GET_INFO_SIZE:
		if (iocp->ioc_count != sizeof (sz)) {
			rv = EINVAL;
		} else {
			sz = sizeof (dmfe_loopmodes);
			bcopy(&sz, mp->b_cont->b_rptr, sizeof (sz));
		}
		break;

	case LB_GET_INFO:
		if (iocp->ioc_count != sizeof (dmfe_loopmodes)) {
			rv = EINVAL;
		} else {
			bcopy(dmfe_loopmodes, mp->b_cont->b_rptr,
			    iocp->ioc_count);
		}
		break;

	case LB_GET_MODE:
		if (iocp->ioc_count != sizeof (mode)) {
			rv = EINVAL;
		} else {
			mutex_enter(dmfep->oplock);
			switch (dmfep->opmode & LOOPBACK_MODE_MASK) {
			case LOOPBACK_OFF:
				mode = 0;
				break;
			case LOOPBACK_INTERNAL:
				mode = 1;
				break;
			default:
				mode = 2;
				break;
			}
			mutex_exit(dmfep->oplock);
			bcopy(&mode, mp->b_cont->b_rptr, sizeof (mode));
		}
		break;

	case LB_SET_MODE:
		rv = secpolicy_net_config(iocp->ioc_cr, B_FALSE);
		if (rv != 0)
			break;
		if (iocp->ioc_count != sizeof (mode)) {
			rv = EINVAL;
			break;
		}
		bcopy(mp->b_cont->b_rptr, &mode, sizeof (mode));

		mutex_enter(dmfep->oplock);
		dmfep->opmode &= ~LOOPBACK_MODE_MASK;
		switch (mode) {
		case 2:
			dmfep->opmode |= LOOPBACK_PHY_D;
			break;
		case 1:
			dmfep->opmode |= LOOPBACK_INTERNAL;
			break;
		default:
			break;
		}
		if (!dmfep->suspended) {
			dmfe_restart(dmfep);
		}
		mutex_exit(dmfep->oplock);
		break;

	default:
		rv = EINVAL;
		break;
	}

	if (rv == 0) {
		miocack(wq, mp, iocp->ioc_count, 0);
	} else {
		miocnak(wq, mp, 0, rv);
	}
}

int
dmfe_m_getprop(void *arg, const char *name, mac_prop_id_t num, uint_t flags,
    uint_t sz, void *val, uint_t *perm)
{
	dmfe_t		*dmfep = arg;

	return (mii_m_getprop(dmfep->mii, name, num, flags, sz, val, perm));
}

int
dmfe_m_setprop(void *arg, const char *name, mac_prop_id_t num, uint_t sz,
    const void *val)
{
	dmfe_t		*dmfep = arg;

	return (mii_m_setprop(dmfep->mii, name, num, sz, val));
}


/*
 * ========== Per-instance setup/teardown code ==========
 */

/*
 * Determine local MAC address & broadcast address for this interface
 */
static void
dmfe_find_mac_address(dmfe_t *dmfep)
{
	uchar_t *prop;
	uint_t propsize;
	int err;

	/*
	 * We have to find the "vendor's factory-set address".  This is
	 * the value of the property "local-mac-address", as set by OBP
	 * (or a .conf file!)
	 *
	 * If the property is not there, then we try to find the factory
	 * mac address from the devices serial EEPROM.
	 */
	bzero(dmfep->curr_addr, sizeof (dmfep->curr_addr));
	err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, dmfep->devinfo,
	    DDI_PROP_DONTPASS, localmac_propname, &prop, &propsize);
	if (err == DDI_PROP_SUCCESS) {
		if (propsize == ETHERADDRL)
			ethaddr_copy(prop, dmfep->curr_addr);
		ddi_prop_free(prop);
	} else {
		/* no property set... check eeprom */
		dmfe_read_eeprom(dmfep, EEPROM_EN_ADDR, dmfep->curr_addr,
		    ETHERADDRL);
	}
}

static int
dmfe_alloc_dma_mem(dmfe_t *dmfep, size_t memsize,
	size_t setup, size_t slop, ddi_device_acc_attr_t *attr_p,
	uint_t dma_flags, dma_area_t *dma_p)
{
	ddi_dma_cookie_t dma_cookie;
	uint_t ncookies;
	int err;

	/*
	 * Allocate handle
	 */
	err = ddi_dma_alloc_handle(dmfep->devinfo, &dma_attr,
	    DDI_DMA_SLEEP, NULL, &dma_p->dma_hdl);
	if (err != DDI_SUCCESS) {
		dmfe_error(dmfep, "DMA handle allocation failed");
		return (DDI_FAILURE);
	}

	/*
	 * Allocate memory
	 */
	err = ddi_dma_mem_alloc(dma_p->dma_hdl, memsize + setup + slop,
	    attr_p, dma_flags & (DDI_DMA_CONSISTENT | DDI_DMA_STREAMING),
	    DDI_DMA_SLEEP, NULL,
	    &dma_p->mem_va, &dma_p->alength, &dma_p->acc_hdl);
	if (err != DDI_SUCCESS) {
		dmfe_error(dmfep, "DMA memory allocation failed: %d", err);
		return (DDI_FAILURE);
	}

	/*
	 * Bind the two together
	 */
	err = ddi_dma_addr_bind_handle(dma_p->dma_hdl, NULL,
	    dma_p->mem_va, dma_p->alength, dma_flags,
	    DDI_DMA_SLEEP, NULL, &dma_cookie, &ncookies);
	if (err != DDI_DMA_MAPPED) {
		dmfe_error(dmfep, "DMA mapping failed: %d", err);
		return (DDI_FAILURE);
	}
	if ((dma_p->ncookies = ncookies) != 1) {
		dmfe_error(dmfep, "Too many DMA cookeis: %d", ncookies);
		return (DDI_FAILURE);
	}

	dma_p->mem_dvma = dma_cookie.dmac_address;
	if (setup > 0) {
		dma_p->setup_dvma = dma_p->mem_dvma + memsize;
		dma_p->setup_va = dma_p->mem_va + memsize;
	} else {
		dma_p->setup_dvma = 0;
		dma_p->setup_va = NULL;
	}

	return (DDI_SUCCESS);
}

/*
 * This function allocates the transmit and receive buffers and descriptors.
 */
static int
dmfe_alloc_bufs(dmfe_t *dmfep)
{
	size_t memsize;
	int err;

	/*
	 * Allocate memory & handles for TX descriptor ring
	 */
	memsize = dmfep->tx.n_desc * sizeof (struct tx_desc_type);
	err = dmfe_alloc_dma_mem(dmfep, memsize, SETUPBUF_SIZE, DMFE_SLOP,
	    &dmfe_reg_accattr, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
	    &dmfep->tx_desc);
	if (err != DDI_SUCCESS) {
		dmfe_error(dmfep, "TX descriptor allocation failed");
		return (DDI_FAILURE);
	}

	/*
	 * Allocate memory & handles for TX buffers
	 */
	memsize = dmfep->tx.n_desc * DMFE_BUF_SIZE;
	err = dmfe_alloc_dma_mem(dmfep, memsize, 0, 0,
	    &dmfe_data_accattr, DDI_DMA_WRITE | DMFE_DMA_MODE,
	    &dmfep->tx_buff);
	if (err != DDI_SUCCESS) {
		dmfe_error(dmfep, "TX buffer allocation failed");
		return (DDI_FAILURE);
	}

	/*
	 * Allocate memory & handles for RX descriptor ring
	 */
	memsize = dmfep->rx.n_desc * sizeof (struct rx_desc_type);
	err = dmfe_alloc_dma_mem(dmfep, memsize, 0, DMFE_SLOP,
	    &dmfe_reg_accattr, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
	    &dmfep->rx_desc);
	if (err != DDI_SUCCESS) {
		dmfe_error(dmfep, "RX descriptor allocation failed");
		return (DDI_FAILURE);
	}

	/*
	 * Allocate memory & handles for RX buffers
	 */
	memsize = dmfep->rx.n_desc * DMFE_BUF_SIZE;
	err = dmfe_alloc_dma_mem(dmfep, memsize, 0, 0,
	    &dmfe_data_accattr, DDI_DMA_READ | DMFE_DMA_MODE, &dmfep->rx_buff);
	if (err != DDI_SUCCESS) {
		dmfe_error(dmfep, "RX buffer allocation failed");
		return (DDI_FAILURE);
	}

	/*
	 * Allocate bitmasks for tx packet type tracking
	 */
	dmfep->tx_mcast = kmem_zalloc(dmfep->tx.n_desc / NBBY, KM_SLEEP);
	dmfep->tx_bcast = kmem_zalloc(dmfep->tx.n_desc / NBBY, KM_SLEEP);

	return (DDI_SUCCESS);
}

static void
dmfe_free_dma_mem(dma_area_t *dma_p)
{
	if (dma_p->dma_hdl != NULL) {
		if (dma_p->ncookies) {
			(void) ddi_dma_unbind_handle(dma_p->dma_hdl);
			dma_p->ncookies = 0;
		}
		ddi_dma_free_handle(&dma_p->dma_hdl);
		dma_p->dma_hdl = NULL;
		dma_p->mem_dvma = 0;
		dma_p->setup_dvma = 0;
	}

	if (dma_p->acc_hdl != NULL) {
		ddi_dma_mem_free(&dma_p->acc_hdl);
		dma_p->acc_hdl = NULL;
		dma_p->mem_va = NULL;
		dma_p->setup_va = NULL;
	}
}

/*
 * This routine frees the transmit and receive buffers and descriptors.
 * Make sure the chip is stopped before calling it!
 */
static void
dmfe_free_bufs(dmfe_t *dmfep)
{
	dmfe_free_dma_mem(&dmfep->rx_buff);
	dmfe_free_dma_mem(&dmfep->rx_desc);
	dmfe_free_dma_mem(&dmfep->tx_buff);
	dmfe_free_dma_mem(&dmfep->tx_desc);
	if (dmfep->tx_mcast)
		kmem_free(dmfep->tx_mcast, dmfep->tx.n_desc / NBBY);
	if (dmfep->tx_bcast)
		kmem_free(dmfep->tx_bcast, dmfep->tx.n_desc / NBBY);
}

static void
dmfe_unattach(dmfe_t *dmfep)
{
	/*
	 * Clean up and free all DMFE data structures
	 */
	if (dmfep->cycid != NULL) {
		ddi_periodic_delete(dmfep->cycid);
		dmfep->cycid = NULL;
	}

	if (dmfep->ksp_drv != NULL)
		kstat_delete(dmfep->ksp_drv);
	if (dmfep->progress & PROGRESS_HWINT) {
		ddi_remove_intr(dmfep->devinfo, 0, dmfep->iblk);
	}
	if (dmfep->progress & PROGRESS_SOFTINT)
		ddi_remove_softintr(dmfep->factotum_id);
	if (dmfep->mii != NULL)
		mii_free(dmfep->mii);
	if (dmfep->progress & PROGRESS_MUTEX) {
		mutex_destroy(dmfep->txlock);
		mutex_destroy(dmfep->rxlock);
		mutex_destroy(dmfep->oplock);
	}
	dmfe_free_bufs(dmfep);
	if (dmfep->io_handle != NULL)
		ddi_regs_map_free(&dmfep->io_handle);

	kmem_free(dmfep, sizeof (*dmfep));
}

static int
dmfe_config_init(dmfe_t *dmfep, chip_id_t *idp)
{
	ddi_acc_handle_t handle;
	uint32_t regval;

	if (pci_config_setup(dmfep->devinfo, &handle) != DDI_SUCCESS)
		return (DDI_FAILURE);

	/*
	 * Get vendor/device/revision.  We expect (but don't check) that
	 * (vendorid == DAVICOM_VENDOR_ID) && (deviceid == DEVICE_ID_9102)
	 */
	idp->vendor = pci_config_get16(handle, PCI_CONF_VENID);
	idp->device = pci_config_get16(handle, PCI_CONF_DEVID);
	idp->revision = pci_config_get8(handle, PCI_CONF_REVID);

	/*
	 * Turn on Bus Master Enable bit and ensure the device is not asleep
	 */
	regval = pci_config_get32(handle, PCI_CONF_COMM);
	pci_config_put32(handle, PCI_CONF_COMM, (regval | PCI_COMM_ME));

	regval = pci_config_get32(handle, PCI_DMFE_CONF_CFDD);
	pci_config_put32(handle, PCI_DMFE_CONF_CFDD,
	    regval & ~(CFDD_SLEEP | CFDD_SNOOZE));

	pci_config_teardown(&handle);
	return (DDI_SUCCESS);
}

struct ks_index {
	int index;
	char *name;
};

static const struct ks_index ks_drv_names[] = {
	{	KS_INTERRUPT,			"intr"			},
	{	KS_CYCLIC_RUN,			"cyclic_run"		},

	{	KS_TX_STALL,			"tx_stall_detect"	},
	{	KS_CHIP_ERROR,			"chip_error_interrupt"	},

	{	KS_FACTOTUM_RUN,		"factotum_run"		},
	{	KS_RECOVERY,			"factotum_recover"	},

	{	-1,				NULL			}
};

static void
dmfe_init_kstats(dmfe_t *dmfep, int instance)
{
	kstat_t *ksp;
	kstat_named_t *knp;
	const struct ks_index *ksip;

	/* no need to create MII stats, the mac module already does it */

	/* Create and initialise driver-defined kstats */
	ksp = kstat_create(DRIVER_NAME, instance, "dmfe_events", "net",
	    KSTAT_TYPE_NAMED, KS_DRV_COUNT, KSTAT_FLAG_PERSISTENT);
	if (ksp != NULL) {
		for (knp = ksp->ks_data, ksip = ks_drv_names;
		    ksip->name != NULL; ++ksip) {
			kstat_named_init(&knp[ksip->index], ksip->name,
			    KSTAT_DATA_UINT64);
		}
		dmfep->ksp_drv = ksp;
		dmfep->knp_drv = knp;
		kstat_install(ksp);
	} else {
		dmfe_error(dmfep, "kstat_create() for dmfe_events failed");
	}
}

static int
dmfe_resume(dev_info_t *devinfo)
{
	dmfe_t *dmfep;				/* Our private data	*/
	chip_id_t chipid;
	boolean_t restart = B_FALSE;

	dmfep = ddi_get_driver_private(devinfo);
	if (dmfep == NULL)
		return (DDI_FAILURE);

	/*
	 * Refuse to resume if the data structures aren't consistent
	 */
	if (dmfep->devinfo != devinfo)
		return (DDI_FAILURE);

	/*
	 * Refuse to resume if the chip's changed its identity (*boggle*)
	 */
	if (dmfe_config_init(dmfep, &chipid) != DDI_SUCCESS)
		return (DDI_FAILURE);
	if (chipid.vendor != dmfep->chipid.vendor)
		return (DDI_FAILURE);
	if (chipid.device != dmfep->chipid.device)
		return (DDI_FAILURE);
	if (chipid.revision != dmfep->chipid.revision)
		return (DDI_FAILURE);

	mutex_enter(dmfep->oplock);
	mutex_enter(dmfep->txlock);
	dmfep->suspended = B_FALSE;
	mutex_exit(dmfep->txlock);

	/*
	 * All OK, reinitialise h/w & kick off MAC scheduling
	 */
	if (dmfep->mac_state == DMFE_MAC_STARTED) {
		dmfe_restart(dmfep);
		restart = B_TRUE;
	}
	mutex_exit(dmfep->oplock);

	if (restart) {
		mii_resume(dmfep->mii);
		mac_tx_update(dmfep->mh);
	}
	return (DDI_SUCCESS);
}

/*
 * attach(9E) -- Attach a device to the system
 *
 * Called once for each board successfully probed.
 */
static int
dmfe_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
{
	mac_register_t *macp;
	dmfe_t *dmfep;				/* Our private data	*/
	uint32_t csr6;
	int instance;
	int err;

	instance = ddi_get_instance(devinfo);

	switch (cmd) {
	default:
		return (DDI_FAILURE);

	case DDI_RESUME:
		return (dmfe_resume(devinfo));

	case DDI_ATTACH:
		break;
	}

	dmfep = kmem_zalloc(sizeof (*dmfep), KM_SLEEP);
	ddi_set_driver_private(devinfo, dmfep);
	dmfep->devinfo = devinfo;
	dmfep->dmfe_guard = DMFE_GUARD;

	/*
	 * Initialize more fields in DMFE private data
	 * Determine the local MAC address
	 */
#if	DMFEDEBUG
	dmfep->debug = ddi_prop_get_int(DDI_DEV_T_ANY, devinfo, 0,
	    debug_propname, dmfe_debug);
#endif	/* DMFEDEBUG */
	dmfep->cycid = NULL;
	(void) snprintf(dmfep->ifname, sizeof (dmfep->ifname), "dmfe%d",
	    instance);

	/*
	 * Check for custom "opmode-reg-value" property;
	 * if none, use the defaults below for CSR6 ...
	 */
	csr6 = TX_THRESHOLD_HI | STORE_AND_FORWARD | EXT_MII_IF | OPN_25_MB1;
	dmfep->opmode = ddi_prop_get_int(DDI_DEV_T_ANY, devinfo,
	    DDI_PROP_DONTPASS, opmode_propname, csr6);

	/*
	 * Read chip ID & set up config space command register(s)
	 */
	if (dmfe_config_init(dmfep, &dmfep->chipid) != DDI_SUCCESS) {
		dmfe_error(dmfep, "dmfe_config_init() failed");
		goto attach_fail;
	}

	/*
	 * Map operating registers
	 */
	err = ddi_regs_map_setup(devinfo, DMFE_PCI_RNUMBER,
	    &dmfep->io_reg, 0, 0, &dmfe_reg_accattr, &dmfep->io_handle);
	if (err != DDI_SUCCESS) {
		dmfe_error(dmfep, "ddi_regs_map_setup() failed");
		goto attach_fail;
	}

	/*
	 * Get our MAC address.
	 */
	dmfe_find_mac_address(dmfep);

	/*
	 * Allocate the TX and RX descriptors/buffers.
	 */
	dmfep->tx.n_desc = dmfe_tx_desc;
	dmfep->rx.n_desc = dmfe_rx_desc;
	err = dmfe_alloc_bufs(dmfep);
	if (err != DDI_SUCCESS) {
		goto attach_fail;
	}

	/*
	 * Add the softint handler
	 */
	if (ddi_add_softintr(devinfo, DDI_SOFTINT_LOW, &dmfep->factotum_id,
	    NULL, NULL, dmfe_factotum, (caddr_t)dmfep) != DDI_SUCCESS) {
		dmfe_error(dmfep, "ddi_add_softintr() failed");
		goto attach_fail;
	}
	dmfep->progress |= PROGRESS_SOFTINT;

	/*
	 * Add the h/w interrupt handler & initialise mutexen
	 */
	if (ddi_get_iblock_cookie(devinfo, 0, &dmfep->iblk) != DDI_SUCCESS) {
		dmfe_error(dmfep, "ddi_get_iblock_cookie() failed");
		goto attach_fail;
	}

	mutex_init(dmfep->milock, NULL, MUTEX_DRIVER, NULL);
	mutex_init(dmfep->oplock, NULL, MUTEX_DRIVER, dmfep->iblk);
	mutex_init(dmfep->rxlock, NULL, MUTEX_DRIVER, dmfep->iblk);
	mutex_init(dmfep->txlock, NULL, MUTEX_DRIVER, dmfep->iblk);
	dmfep->progress |= PROGRESS_MUTEX;

	if (ddi_add_intr(devinfo, 0, NULL, NULL,
	    dmfe_interrupt, (caddr_t)dmfep) != DDI_SUCCESS) {
		dmfe_error(dmfep, "ddi_add_intr() failed");
		goto attach_fail;
	}
	dmfep->progress |= PROGRESS_HWINT;

	/*
	 * Create & initialise named kstats
	 */
	dmfe_init_kstats(dmfep, instance);

	/*
	 * Reset & initialise the chip and the ring buffers
	 * Initialise the (internal) PHY
	 */
	mutex_enter(dmfep->oplock);
	mutex_enter(dmfep->rxlock);
	mutex_enter(dmfep->txlock);

	dmfe_reset(dmfep);

	/*
	 * Prepare the setup packet
	 */
	bzero(dmfep->tx_desc.setup_va, SETUPBUF_SIZE);
	bzero(dmfep->mcast_refs, MCASTBUF_SIZE);
	dmfep->addr_set = B_FALSE;
	dmfep->opmode &= ~(PROMISC_MODE | PASS_MULTICAST);
	dmfep->mac_state = DMFE_MAC_RESET;

	mutex_exit(dmfep->txlock);
	mutex_exit(dmfep->rxlock);
	mutex_exit(dmfep->oplock);

	if (dmfe_init_phy(dmfep) != B_TRUE)
		goto attach_fail;

	/*
	 * Send a reasonable setup frame.  This configures our starting
	 * address and the broadcast address.
	 */
	(void) dmfe_m_unicst(dmfep, dmfep->curr_addr);

	/*
	 * Initialize pointers to device specific functions which
	 * will be used by the generic layer.
	 */
	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
		goto attach_fail;
	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
	macp->m_driver = dmfep;
	macp->m_dip = devinfo;
	macp->m_src_addr = dmfep->curr_addr;
	macp->m_callbacks = &dmfe_m_callbacks;
	macp->m_min_sdu = 0;
	macp->m_max_sdu = ETHERMTU;
	macp->m_margin = VLAN_TAGSZ;

	/*
	 * Finally, we're ready to register ourselves with the MAC layer
	 * interface; if this succeeds, we're all ready to start()
	 */
	err = mac_register(macp, &dmfep->mh);
	mac_free(macp);
	if (err != 0)
		goto attach_fail;
	ASSERT(dmfep->dmfe_guard == DMFE_GUARD);

	/*
	 * Install the cyclic callback that we use to check for link
	 * status, transmit stall, etc. The cyclic callback (dmfe_cyclic())
	 * is invoked in kernel context then.
	 */
	ASSERT(dmfep->cycid == NULL);
	dmfep->cycid = ddi_periodic_add(dmfe_cyclic, dmfep,
	    dmfe_tick_us * 1000, DDI_IPL_0);
	return (DDI_SUCCESS);

attach_fail:
	dmfe_unattach(dmfep);
	return (DDI_FAILURE);
}

/*
 *	dmfe_suspend() -- suspend transmit/receive for powerdown
 */
static int
dmfe_suspend(dmfe_t *dmfep)
{
	/*
	 * Just stop processing ...
	 */
	mii_suspend(dmfep->mii);
	mutex_enter(dmfep->oplock);
	dmfe_stop(dmfep);

	mutex_enter(dmfep->txlock);
	dmfep->suspended = B_TRUE;
	mutex_exit(dmfep->txlock);
	mutex_exit(dmfep->oplock);

	return (DDI_SUCCESS);
}

/*
 * detach(9E) -- Detach a device from the system
 */
static int
dmfe_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
{
	dmfe_t *dmfep;

	dmfep = ddi_get_driver_private(devinfo);

	switch (cmd) {
	default:
		return (DDI_FAILURE);

	case DDI_SUSPEND:
		return (dmfe_suspend(dmfep));

	case DDI_DETACH:
		break;
	}

	/*
	 * Unregister from the MAC subsystem.  This can fail, in
	 * particular if there are DLPI style-2 streams still open -
	 * in which case we just return failure without shutting
	 * down chip operations.
	 */
	if (mac_unregister(dmfep->mh) != DDI_SUCCESS)
		return (DDI_FAILURE);

	/*
	 * All activity stopped, so we can clean up & exit
	 */
	dmfe_unattach(dmfep);
	return (DDI_SUCCESS);
}


/*
 * ========== Module Loading Data & Entry Points ==========
 */

DDI_DEFINE_STREAM_OPS(dmfe_dev_ops, nulldev, nulldev, dmfe_attach, dmfe_detach,
	nodev, NULL, D_MP, NULL, ddi_quiesce_not_supported);

static struct modldrv dmfe_modldrv = {
	&mod_driverops,		/* Type of module.  This one is a driver */
	dmfe_ident,		/* short description */
	&dmfe_dev_ops		/* driver specific ops */
};

static struct modlinkage modlinkage = {
	MODREV_1, (void *)&dmfe_modldrv, NULL
};

int
_info(struct modinfo *modinfop)
{
	return (mod_info(&modlinkage, modinfop));
}

int
_init(void)
{
	uint32_t tmp100;
	uint32_t tmp10;
	int i;
	int status;

	/* Calculate global timing parameters */
	tmp100 = (dmfe_tx100_stall_us+dmfe_tick_us-1)/dmfe_tick_us;
	tmp10 = (dmfe_tx10_stall_us+dmfe_tick_us-1)/dmfe_tick_us;

	for (i = 0; i <= TX_PROCESS_MAX_STATE; ++i) {
		switch (i) {
		case TX_PROCESS_STATE(TX_PROCESS_FETCH_DATA):
		case TX_PROCESS_STATE(TX_PROCESS_WAIT_END):
			/*
			 * The chip doesn't spontaneously recover from
			 * a stall in these states, so we reset early
			 */
			stall_100_tix[i] = tmp100;
			stall_10_tix[i] = tmp10;
			break;

		case TX_PROCESS_STATE(TX_PROCESS_SUSPEND):
		default:
			/*
			 * The chip has been seen to spontaneously recover
			 * after an apparent stall in the SUSPEND state,
			 * so we'll allow it rather longer to do so.  As
			 * stalls in other states have not been observed,
			 * we'll use long timeouts for them too ...
			 */
			stall_100_tix[i] = tmp100 * 20;
			stall_10_tix[i] = tmp10 * 20;
			break;
		}
	}

	mac_init_ops(&dmfe_dev_ops, "dmfe");
	status = mod_install(&modlinkage);
	if (status == DDI_SUCCESS)
		dmfe_log_init();

	return (status);
}

int
_fini(void)
{
	int status;

	status = mod_remove(&modlinkage);
	if (status == DDI_SUCCESS) {
		mac_fini_ops(&dmfe_dev_ops);
		dmfe_log_fini();
	}

	return (status);
}