/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include "nge.h"

#define	TXD_OWN		0x80000000
#define	TXD_ERR		0x40000000
#define	TXD_END		0x20000000
#define	TXD_BCNT_MSK	0x00003FFF


#undef	NGE_DBG
#define	NGE_DBG		NGE_DBG_SEND

#define	NGE_TXSWD_RECYCLE(sd)	{\
					(sd)->mp = NULL; \
					(sd)->frags = 0; \
					(sd)->mp_hndl.head = NULL; \
					(sd)->mp_hndl.tail = NULL; \
					(sd)->flags = HOST_OWN; \
				}


static size_t nge_tx_dmah_pop(nge_dmah_list_t *, nge_dmah_list_t *, size_t);
static void nge_tx_dmah_push(nge_dmah_list_t *, nge_dmah_list_t *);


void nge_tx_recycle_all(nge_t *ngep);
#pragma	no_inline(nge_tx_recycle_all)

void
nge_tx_recycle_all(nge_t *ngep)
{
	send_ring_t *srp;
	sw_tx_sbd_t *ssbdp;
	nge_dmah_node_t	*dmah;
	uint32_t slot;
	uint32_t nslots;

	srp = ngep->send;
	nslots = srp->desc.nslots;

	for (slot = 0; slot < nslots; ++slot) {

		ssbdp = srp->sw_sbds + slot;

		DMA_ZERO(ssbdp->desc);

		if (ssbdp->mp != NULL)	{

			for (dmah = ssbdp->mp_hndl.head; dmah != NULL;
			    dmah = dmah->next)
				(void) ddi_dma_unbind_handle(dmah->hndl);

			freemsg(ssbdp->mp);
		}

		NGE_TXSWD_RECYCLE(ssbdp);
	}
}

static size_t
nge_tx_dmah_pop(nge_dmah_list_t *src, nge_dmah_list_t *dst, size_t num)
{
	nge_dmah_node_t	*node;

	for (node = src->head; node != NULL && --num != 0; node = node->next)
		;

	if (num == 0)	{

		dst->head = src->head;
		dst->tail = node;

		if ((src->head = node->next) == NULL)
			src->tail = NULL;

		node->next = NULL;
	}

	return (num);
}

static void
nge_tx_dmah_push(nge_dmah_list_t *src, nge_dmah_list_t *dst)
{
	if (dst->tail != NULL)
		dst->tail->next = src->head;
	else
		dst->head = src->head;

	dst->tail = src->tail;
}

static void
nge_tx_desc_sync(nge_t *ngep, uint64_t start, uint64_t num, uint_t type)
{
	send_ring_t *srp = ngep->send;
	const size_t txd_size = ngep->desc_attr.txd_size;
	const uint64_t end = srp->desc.nslots * txd_size;

	start = start * txd_size;
	num = num * txd_size;

	if (start + num <= end)
		(void) ddi_dma_sync(srp->desc.dma_hdl, start, num, type);
	else	{

		(void) ddi_dma_sync(srp->desc.dma_hdl, start, 0, type);
		(void) ddi_dma_sync(srp->desc.dma_hdl, 0, start + num - end,
		    type);
	}
}

/*
 * Reclaim the resource after tx's completion
 */
void
nge_tx_recycle(nge_t *ngep, boolean_t is_intr)
{
	int resched;
	uint32_t stflg;
	size_t len;
	uint64_t free;
	uint64_t slot;
	uint64_t used;
	uint64_t next;
	uint64_t nslots;
	mblk_t *mp;
	sw_tx_sbd_t *ssbdp;
	void *hw_sbd_p;
	send_ring_t *srp;
	nge_dmah_node_t *dme;
	nge_dmah_list_t dmah;

	srp = ngep->send;

	if (is_intr) {
		if (mutex_tryenter(srp->tc_lock) == 0)
			return;
	} else
		mutex_enter(srp->tc_lock);
	mutex_enter(srp->tx_lock);

	next = srp->tx_next;
	used = srp->tx_flow;
	free = srp->tx_free;

	mutex_exit(srp->tx_lock);

	slot = srp->tc_next;
	nslots = srp->desc.nslots;

	used = nslots - free - used;

	ASSERT(slot == NEXT_INDEX(next, free, nslots));

	if (used > srp->tx_hwmark)
		used = srp->tx_hwmark;

	nge_tx_desc_sync(ngep, slot, used, DDI_DMA_SYNC_FORKERNEL);

	/*
	 * Look through the send ring by bd's status part
	 * to find all the bds which has been transmitted sucessfully
	 * then reclaim all resouces associated with these bds
	 */

	mp = NULL;
	dmah.head = NULL;
	dmah.tail = NULL;

	for (free = 0; used-- != 0; slot = NEXT(slot, nslots), ++free)	{

		ssbdp = &srp->sw_sbds[slot];
		hw_sbd_p = DMA_VPTR(ssbdp->desc);

		stflg = ngep->desc_attr.txd_check(hw_sbd_p, &len);

		if (ssbdp->flags == HOST_OWN || (TXD_OWN & stflg) != 0)
			break;

		DMA_ZERO(ssbdp->desc);

		if (ssbdp->mp != NULL)	{
			ssbdp->mp->b_next = mp;
			mp = ssbdp->mp;

			if (ssbdp->mp_hndl.head != NULL)
				nge_tx_dmah_push(&ssbdp->mp_hndl, &dmah);
		}

		NGE_TXSWD_RECYCLE(ssbdp);
	}

	/*
	 * We're about to release one or more places :-)
	 * These ASSERTions check that our invariants still hold:
	 * there must always be at least one free place
	 * at this point, there must be at least one place NOT free
	 * we're not about to free more places than were claimed!
	 */

	mutex_enter(srp->tx_lock);

	srp->tx_free += free;
	ngep->watchdog = (srp->desc.nslots - srp->tx_free != 0);

	srp->tc_next = slot;

	ASSERT(srp->tx_free <= nslots);
	ASSERT(srp->tc_next == NEXT_INDEX(srp->tx_next, srp->tx_free, nslots));

	resched = (ngep->resched_needed != 0 && srp->tx_hwmark <= srp->tx_free);

	mutex_exit(srp->tx_lock);
	mutex_exit(srp->tc_lock);

	/* unbind/free mblks */

	for (dme = dmah.head; dme != NULL; dme = dme->next)
		(void) ddi_dma_unbind_handle(dme->hndl);

	mutex_enter(&srp->dmah_lock);
	nge_tx_dmah_push(&dmah, &srp->dmah_free);
	mutex_exit(&srp->dmah_lock);

	freemsgchain(mp);

	/*
	 * up to this place, we maybe have reclaim some resouce
	 * if there is a requirement to report to gld, report this.
	 */

	if (resched)
		(void) ddi_intr_trigger_softint(ngep->resched_hdl, NULL);
}

static uint64_t
nge_tx_alloc(nge_t *ngep, uint64_t num)
{
	uint64_t start;
	send_ring_t *srp;

	start = (uint64_t)-1;
	srp = ngep->send;

	mutex_enter(srp->tx_lock);

	if (srp->tx_free < srp->tx_lwmark)	{

		mutex_exit(srp->tx_lock);
		nge_tx_recycle(ngep, B_FALSE);
		mutex_enter(srp->tx_lock);
	}

	if (srp->tx_free >= num)	{

		start = srp->tx_next;

		srp->tx_next = NEXT_INDEX(start, num, srp->desc.nslots);
		srp->tx_free -= num;
		srp->tx_flow += num;
	}

	mutex_exit(srp->tx_lock);
	return (start);
}

static void
nge_tx_start(nge_t *ngep, uint64_t slotnum)
{
	nge_mode_cntl mode_cntl;
	send_ring_t *srp;

	srp = ngep->send;

	/*
	 * Because there can be multiple concurrent threads in
	 * transit through this code, we only want to notify the
	 * hardware once the last one is departing ...
	 */

	mutex_enter(srp->tx_lock);

	srp->tx_flow -= slotnum;
	if (srp->tx_flow == 0) {

		/*
		 * Bump the watchdog counter, thus guaranteeing that it's
		 * nonzero (watchdog activated).  Note that non-synchonised
		 * access here means we may race with the reclaim() code
		 * above, but the outcome will be harmless.  At worst, the
		 * counter may not get reset on a partial reclaim; but the
		 * large trigger threshold makes false positives unlikely
		 */
		ngep->watchdog ++;

		mode_cntl.mode_val = nge_reg_get32(ngep, NGE_MODE_CNTL);
		mode_cntl.mode_bits.txdm = NGE_SET;
		mode_cntl.mode_bits.tx_rcom_en = NGE_SET;
		nge_reg_put32(ngep, NGE_MODE_CNTL, mode_cntl.mode_val);
	}
	mutex_exit(srp->tx_lock);
}

static enum send_status
nge_send_copy(nge_t *ngep, mblk_t *mp, send_ring_t *srp);
#pragma	inline(nge_send_copy)

static enum send_status
nge_send_copy(nge_t *ngep, mblk_t *mp, send_ring_t *srp)
{
	size_t totlen;
	size_t mblen;
	uint32_t flags;
	uint64_t bds;
	uint64_t start_index;
	char *txb;
	mblk_t *bp;
	void *hw_sbd_p;
	sw_tx_sbd_t *ssbdp;

	hcksum_retrieve(mp, NULL, NULL, NULL, NULL,
	    NULL, NULL, &flags);
	bds = 0x1;

	if ((uint64_t)-1 == (start_index = nge_tx_alloc(ngep, bds)))
		return (SEND_COPY_FAIL);

	ASSERT(start_index < srp->desc.nslots);

	/*
	 * up to this point, there's nothing that can fail,
	 * so we can go straight to claiming our
	 * already-reserved place son the train.
	 *
	 * This is the point of no return!
	 */

	bp = mp;
	totlen = 0;
	ssbdp = &srp->sw_sbds[start_index];
	ASSERT(ssbdp->flags == HOST_OWN);

	txb = DMA_VPTR(ssbdp->pbuf);
	totlen = 0;
	for (; bp != NULL; bp = bp->b_cont) {
		if ((mblen = MBLKL(bp)) == 0)
			continue;
		if ((totlen += mblen) <= ngep->max_sdu) {
			bcopy(bp->b_rptr, txb, mblen);
			txb += mblen;
		}
	}

	DMA_SYNC(ssbdp->pbuf, DDI_DMA_SYNC_FORDEV);

	/* Fill & sync hw desc */

	hw_sbd_p = DMA_VPTR(ssbdp->desc);

	ngep->desc_attr.txd_fill(hw_sbd_p, &ssbdp->pbuf.cookie, totlen,
	    flags, B_TRUE);
	nge_tx_desc_sync(ngep, start_index, bds, DDI_DMA_SYNC_FORDEV);

	ssbdp->flags = CONTROLER_OWN;

	nge_tx_start(ngep, bds);

	/*
	 * The return status indicates that the message can be freed
	 * right away, as we've already copied the contents ...
	 */

	freemsg(mp);
	return (SEND_COPY_SUCESS);
}

/*
 * static enum send_status
 * nge_send_mapped(nge_t *ngep, mblk_t *mp, size_t fragno);
 * #pragma	inline(nge_send_mapped)
 */

static enum send_status
nge_send_mapped(nge_t *ngep, mblk_t *mp, size_t fragno)
{
	int err;
	boolean_t end;
	uint32_t i;
	uint32_t j;
	uint32_t ncookies;
	uint32_t slot;
	uint32_t nslots;
	uint32_t mblen;
	uint32_t flags;
	uint64_t start_index;
	uint64_t end_index;
	mblk_t *bp;
	void *hw_sbd_p;
	send_ring_t *srp;
	nge_dmah_node_t *dmah;
	nge_dmah_node_t	*dmer;
	nge_dmah_list_t dmah_list;
	ddi_dma_cookie_t cookie[NGE_MAX_COOKIES * NGE_MAP_FRAGS];

	srp = ngep->send;
	nslots = srp->desc.nslots;

	mutex_enter(&srp->dmah_lock);
	err = nge_tx_dmah_pop(&srp->dmah_free, &dmah_list, fragno);
	mutex_exit(&srp->dmah_lock);

	if (err != 0)	{

		return (SEND_MAP_FAIL);
	}

	/*
	 * Pre-scan the message chain, noting the total number of bytes,
	 * the number of fragments by pre-doing dma addr bind
	 * if the fragment is larger than NGE_COPY_SIZE.
	 * This way has the following advantages:
	 * 1. Acquire the detailed information of resouce
	 *	need to send the message
	 *
	 * 2. If can not pre-apply enough resouce, fails  at once
	 *	and the driver will chose copy way to send out the
	 *	message
	 */

	slot = 0;
	dmah = dmah_list.head;

	hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &flags);

	for (bp = mp; bp != NULL; bp = bp->b_cont)	{

		mblen = MBLKL(bp);
		if (mblen == 0)
			continue;

		err = ddi_dma_addr_bind_handle(dmah->hndl,
		    NULL, (caddr_t)bp->b_rptr, mblen,
		    DDI_DMA_STREAMING | DDI_DMA_WRITE,
		    DDI_DMA_DONTWAIT, NULL, cookie + slot, &ncookies);

		/*
		 * If there can not map successfully, it is uncessary
		 * sending the message by map way. Sending the message
		 * by copy way.
		 *
		 * By referring to intel's suggestion, it is better
		 * the number of cookies should be less than 4.
		 */
		if (err != DDI_DMA_MAPPED || ncookies > NGE_MAX_COOKIES) {
			NGE_DEBUG(("err(%x) map tx bulk fails"
			    " cookie(%x), ncookies(%x)",
			    err, cookie[slot].dmac_laddress, ncookies));
			goto map_fail;
		}

		/*
		 * Check How many bds a cookie will consume
		 */
		for (end_index = slot + ncookies;
		    ++slot != end_index;
		    ddi_dma_nextcookie(dmah->hndl, cookie + slot))
			;

		dmah = dmah->next;
	}

	/*
	 * Now allocate tx descriptors and fill them
	 * IMPORTANT:
	 *	Up to the point where it claims a place, It is impossibel
	 * 	to fail.
	 *
	 * In this version, there's no setup to be done here, and there's
	 * nothing that can fail, so we can go straight to claiming our
	 * already-reserved places on the train.
	 *
	 * This is the point of no return!
	 */


	if ((uint64_t)-1 == (start_index = nge_tx_alloc(ngep, slot)))
		goto map_fail;

	ASSERT(start_index < nslots);

	/* fill&sync hw desc, going in reverse order */

	end = B_TRUE;
	end_index = NEXT_INDEX(start_index, slot - 1, nslots);

	for (i = slot - 1, j = end_index; start_index - j != 0;
	    j = PREV(j, nslots), --i)	{

		hw_sbd_p = DMA_VPTR(srp->sw_sbds[j].desc);
		ngep->desc_attr.txd_fill(hw_sbd_p, cookie + i,
		    cookie[i].dmac_size, 0, end);

		end = B_FALSE;
	}

	hw_sbd_p = DMA_VPTR(srp->sw_sbds[j].desc);
	ngep->desc_attr.txd_fill(hw_sbd_p, cookie + i, cookie[i].dmac_size,
	    flags, end);

	nge_tx_desc_sync(ngep, start_index, slot, DDI_DMA_SYNC_FORDEV);

	/* fill sw desc */

	for (j = start_index; end_index - j != 0; j = NEXT(j, nslots))	{

		srp->sw_sbds[j].flags = CONTROLER_OWN;
	}

	srp->sw_sbds[j].mp = mp;
	srp->sw_sbds[j].mp_hndl = dmah_list;
	srp->sw_sbds[j].frags = fragno;
	srp->sw_sbds[j].flags = CONTROLER_OWN;

	nge_tx_start(ngep, slot);

	/*
	 * The return status indicates that the message can not be freed
	 * right away, until we can make assure the message has been sent
	 * out sucessfully.
	 */
	return (SEND_MAP_SUCCESS);

map_fail:
	for (dmer = dmah_list.head; dmah - dmer != 0; dmer = dmer->next)
		(void) ddi_dma_unbind_handle(dmer->hndl);

	mutex_enter(&srp->dmah_lock);
	nge_tx_dmah_push(&dmah_list, &srp->dmah_free);
	mutex_exit(&srp->dmah_lock);

	return (SEND_MAP_FAIL);
}

static boolean_t
nge_send(nge_t *ngep, mblk_t *mp)
{
	mblk_t *bp;
	send_ring_t *srp;
	enum send_status status;
	uint32_t mblen = 0;
	uint32_t frags = 0;
	nge_statistics_t *nstp = &ngep->statistics;
	nge_sw_statistics_t *sw_stp = &nstp->sw_statistics;

	ASSERT(mp != NULL);
	ASSERT(ngep->nge_mac_state == NGE_MAC_STARTED);

	srp = ngep->send;
	/*
	 * 1.Check the number of the fragments of the messages
	 * If the total number is larger than 3,
	 * Chose copy way
	 *
	 * 2. Check the length of the message whether is larger than
	 * NGE_TX_COPY_SIZE, if so, choose the map way.
	 */
	for (frags = 0, bp = mp; bp != NULL; bp = bp->b_cont) {
		if (MBLKL(bp) == 0)
			continue;
		frags++;
		mblen += MBLKL(bp);
	}
	if (mblen > (ngep->max_sdu) || mblen == 0) {
		freemsg(mp);
		return (B_TRUE);
	}

	if ((mblen > ngep->param_txbcopy_threshold) &&
	    (srp->tx_free > frags * NGE_MAX_COOKIES)) {
		status = nge_send_mapped(ngep, mp, frags);
		if (status == SEND_MAP_FAIL)
			status = nge_send_copy(ngep, mp, srp);
	} else {
		status = nge_send_copy(ngep, mp, srp);
	}
	if (status == SEND_COPY_FAIL) {
		nge_tx_recycle(ngep, B_FALSE);
		status = nge_send_copy(ngep, mp, srp);
		if (status == SEND_COPY_FAIL) {
			ngep->resched_needed = 1;
			NGE_DEBUG(("nge_send: send fail!"));
			return (B_FALSE);
		}
	}
	/* Update the software statistics */
	sw_stp->obytes += mblen + ETHERFCSL;
	sw_stp->xmit_count ++;

	return (B_TRUE);
}

/*
 * nge_m_tx : Send a chain of packets.
 */
mblk_t *
nge_m_tx(void *arg, mblk_t *mp)
{
	nge_t *ngep = arg;
	mblk_t *next;

	rw_enter(ngep->rwlock, RW_READER);
	ASSERT(mp != NULL);
	if (ngep->nge_chip_state != NGE_CHIP_RUNNING) {
		freemsgchain(mp);
		mp = NULL;
	}
	while (mp != NULL) {
		next = mp->b_next;
		mp->b_next = NULL;

		if (!nge_send(ngep, mp)) {
			mp->b_next = next;
			break;
		}

		mp = next;
	}
	rw_exit(ngep->rwlock);

	return (mp);
}

/* ARGSUSED */
uint_t
nge_reschedule(caddr_t args1, caddr_t args2)
{
	nge_t *ngep;
	uint_t rslt;

	ngep = (nge_t *)args1;
	rslt = DDI_INTR_UNCLAIMED;

	/*
	 * when softintr is trigged, checking whether this
	 * is caused by our expected interrupt
	 */
	if (ngep->nge_mac_state == NGE_MAC_STARTED &&
	    ngep->resched_needed == 1) {
		ngep->resched_needed = 0;
		++ngep->statistics.sw_statistics.tx_resched;
		mac_tx_update(ngep->mh);
		rslt = DDI_INTR_CLAIMED;
	}
	return (rslt);
}

uint32_t
nge_hot_txd_check(const void *hwd, size_t *len)
{
	uint32_t err_flag;
	const hot_tx_bd * htbdp;

	htbdp = hwd;
	err_flag = htbdp->control_status.cntl_val & ~TXD_BCNT_MSK;

	*len = htbdp->control_status.status_bits.bcnt;
	return (err_flag);
}

uint32_t
nge_sum_txd_check(const void *hwd, size_t *len)
{
	uint32_t err_flag;
	const sum_tx_bd * htbdp;

	htbdp = hwd;
	err_flag = htbdp->control_status.cntl_val & ~TXD_BCNT_MSK;

	*len = htbdp->control_status.status_bits.bcnt;
	return (err_flag);
}


/*
 * Filling the contents of Tx's data descriptor
 * before transmitting.
 */

void
nge_hot_txd_fill(void *hwdesc, const ddi_dma_cookie_t *cookie,
	size_t length, uint32_t sum_flag, boolean_t end)
{
	hot_tx_bd * hw_sbd_p = hwdesc;

	hw_sbd_p->host_buf_addr_hi = cookie->dmac_laddress >> 32;
	hw_sbd_p->host_buf_addr_lo = cookie->dmac_laddress;

	/*
	 * Setting the length of the packet
	 * Note: the length filled in the part should be
	 * the original length subtract 1;
	 */

	hw_sbd_p->control_status.control_sum_bits.bcnt = length - 1;

	/* setting ip checksum */
	if (sum_flag & HCK_IPV4_HDRCKSUM)
		hw_sbd_p->control_status.control_sum_bits.ip_hsum
		    = NGE_SET;
	/* setting tcp checksum */
	if (sum_flag & HCK_FULLCKSUM)
		hw_sbd_p->control_status.control_sum_bits.tcp_hsum
		    = NGE_SET;
	/*
	 * indicating the end of BDs
	 */
	if (end)
		hw_sbd_p->control_status.control_sum_bits.end = NGE_SET;

	membar_producer();

	/* pass desc to HW */
	hw_sbd_p->control_status.control_sum_bits.own = NGE_SET;
}

void
nge_sum_txd_fill(void *hwdesc, const ddi_dma_cookie_t *cookie,
	size_t length, uint32_t sum_flag, boolean_t end)
{
	sum_tx_bd * hw_sbd_p = hwdesc;

	hw_sbd_p->host_buf_addr = cookie->dmac_address;

	/*
	 * Setting the length of the packet
	 * Note: the length filled in the part should be
	 * the original length subtract 1;
	 */

	hw_sbd_p->control_status.control_sum_bits.bcnt = length - 1;

	/* setting ip checksum */
	if (sum_flag & HCK_IPV4_HDRCKSUM)
		hw_sbd_p->control_status.control_sum_bits.ip_hsum
		    = NGE_SET;
	/* setting tcp checksum */
	if (sum_flag & HCK_FULLCKSUM)
		hw_sbd_p->control_status.control_sum_bits.tcp_hsum
		    = NGE_SET;
	/*
	 * indicating the end of BDs
	 */
	if (end)
		hw_sbd_p->control_status.control_sum_bits.end = NGE_SET;

	membar_producer();

	/* pass desc to HW */
	hw_sbd_p->control_status.control_sum_bits.own = NGE_SET;
}