/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * Softmac data-path switching:
 *
 * - Fast-path model
 *
 * When the softmac fast-path is used, a dedicated lower-stream
 * will be opened over the legacy device for each IP/ARP (upper-)stream
 * over the softMAC, and all DLPI messages (including control messages
 * and data messages) will be exchanged between the upper-stream and
 * the corresponding lower-stream directly. Therefore, the data
 * demultiplexing, filtering and classification processing will be done
 * by the lower-stream, and the GLDv3 DLS/MAC layer processing will be
 * no longer needed.
 *
 * - Slow-path model
 *
 * Some GLDv3 features requires the GLDv3 DLS/MAC layer processing to
 * not be bypassed to assure its function correctness. For example,
 * softmac fast-path must be disabled to support GLDv3 VNIC functionality.
 * In this case, a shared lower-stream will be opened over the legacy
 * device, which is responsible for implementing the GLDv3 callbacks
 * and passing RAW data messages between the legacy devices and the GLDv3
 * framework.
 *
 * By default, the softmac fast-path mode will be used to assure the
 * performance; MAC clients will be able to request to disable the softmac
 * fast-path mode to support certain features, and if that succeeds,
 * the system will fallback to the slow-path softmac data-path model.
 *
 *
 * The details of the softmac data fast-path model is stated as below
 *
 * 1. When a stream is opened on a softMAC, the softmac module will takes
 *    over the DLPI processing on this stream;
 *
 * 2. For IP/ARP streams over a softMAC, softmac data fast-path will be
 *    used by default, unless fast-path is disabled by any MAC client
 *    explicitly. The softmac module first identifies an IP/ARP stream
 *    by seeing whether there is a SIOCSLIFNAME ioctl sent from upstream,
 *    if there is one, this stream is either an IP or an ARP stream
 *    and will use fast-path potentially;
 *
 * 3. When the softmac fast-path is used, an dedicated lower-stream will
 *    be setup for each IP/ARP stream (1-1 mapping). From that point on,
 *    all control and data messages will be exchanged between the IP/ARP
 *    upper-stream and the legacy device through this dedicated
 *    lower-stream. As a result, the DLS/MAC layer processing in GLDv3
 *    will be skipped, and this greatly improves the performance;
 *
 * 4. When the softmac data fast-path is disabled by a MAC client (e.g.,
 *    by a VNIC), all the IP/ARP upper streams will try to switch from
 *    the fast-path to the slow-path. The dedicated lower-stream will be
 *    destroyed, and all the control and data-messages will go through the
 *    existing GLDv3 code path and (in the end) the shared lower-stream;
 *
 * 5. On the other hand, when the last MAC client cancels its fast-path
 *    disable request, all the IP/ARP streams will try to switch back to
 *    the fast-path mode;
 *
 * Step 5 and 6 both rely on the data-path mode switching process
 * described below:
 *
 * 1) To switch the softmac data-path mode (between fast-path and slow-path),
 *    softmac will first send a DL_NOTE_REPLUMB DL_NOTIFY_IND message
 *    upstream over each IP/ARP streams that needs data-path mode switching;
 *
 * 2) When IP receives this DL_NOTE_REPLUMB message, it will bring down
 *    all the IP interfaces on the corresponding ill (IP Lower level
 *    structure), and bring up those interfaces over again; this will in
 *    turn cause the ARP to "replumb" the interface.
 *
 *    During the replumb process, both IP and ARP will send downstream the
 *    necessary DL_DISABMULTI_REQ and DL_UNBIND_REQ messages and cleanup
 *    the old state of the underlying softMAC, following with the necessary
 *    DL_BIND_REQ and DL_ENABMULTI_REQ messages to setup the new state.
 *    Between the cleanup and re-setup process, IP/ARP will also send down
 *    a DL_NOTE_REPLUMB_DONE DL_NOTIFY_CONF messages to the softMAC to
 *    indicate the *switching point*;
 *
 * 3) When softmac receives the DL_NOTE_REPLUMB_DONE message, it either
 *    creates or destroys the dedicated lower-stream (depending on which
 *    data-path mode the softMAC switches to), and change the softmac
 *    data-path mode. From then on, softmac will process all the succeeding
 *    control messages (including the DL_BIND_REQ and DL_ENABMULTI_REQ
 *    messages) and data messages based on new data-path mode.
 */

#include <sys/types.h>
#include <sys/disp.h>
#include <sys/callb.h>
#include <sys/sysmacros.h>
#include <sys/file.h>
#include <sys/vlan.h>
#include <sys/dld.h>
#include <sys/sockio.h>
#include <sys/softmac_impl.h>
#include <net/if.h>

static kmutex_t		softmac_taskq_lock;
static kcondvar_t	softmac_taskq_cv;
static list_t		softmac_taskq_list;	/* List of softmac_upper_t */
boolean_t		softmac_taskq_quit;
boolean_t		softmac_taskq_done;

static void		softmac_taskq_dispatch();
static int		softmac_fastpath_setup(softmac_upper_t *);
static mac_tx_cookie_t	softmac_fastpath_wput_data(softmac_upper_t *, mblk_t *,
			    uintptr_t, uint16_t);
static void		softmac_datapath_switch_done(softmac_upper_t *);

void
softmac_fp_init()
{
	mutex_init(&softmac_taskq_lock, NULL, MUTEX_DRIVER, NULL);
	cv_init(&softmac_taskq_cv, NULL, CV_DRIVER, NULL);

	softmac_taskq_quit = B_FALSE;
	softmac_taskq_done = B_FALSE;
	list_create(&softmac_taskq_list, sizeof (softmac_upper_t),
	    offsetof(softmac_upper_t, su_taskq_list_node));
	(void) thread_create(NULL, 0, softmac_taskq_dispatch, NULL, 0,
	    &p0, TS_RUN, minclsyspri);
}

void
softmac_fp_fini()
{
	/*
	 * Request the softmac_taskq thread to quit and wait for it to be done.
	 */
	mutex_enter(&softmac_taskq_lock);
	softmac_taskq_quit = B_TRUE;
	cv_signal(&softmac_taskq_cv);
	while (!softmac_taskq_done)
		cv_wait(&softmac_taskq_cv, &softmac_taskq_lock);
	mutex_exit(&softmac_taskq_lock);
	list_destroy(&softmac_taskq_list);

	mutex_destroy(&softmac_taskq_lock);
	cv_destroy(&softmac_taskq_cv);
}

static boolean_t
check_ip_above(queue_t *q)
{
	queue_t		*next_q;
	boolean_t	ret = B_TRUE;

	claimstr(q);
	next_q = q->q_next;
	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
		ret = B_FALSE;
	releasestr(q);
	return (ret);
}

/* ARGSUSED */
static int
softmac_capab_perim(softmac_upper_t *sup, void *data, uint_t flags)
{
	switch (flags) {
	case DLD_ENABLE:
		mutex_enter(&sup->su_mutex);
		break;
	case DLD_DISABLE:
		mutex_exit(&sup->su_mutex);
		break;
	case DLD_QUERY:
		return (MUTEX_HELD(&sup->su_mutex));
	}
	return (0);
}

static mac_tx_notify_handle_t
softmac_client_tx_notify(softmac_upper_t *sup, mac_tx_notify_t func, void *arg)
{
	ASSERT(MUTEX_HELD(&sup->su_mutex));

	if (func != NULL) {
		sup->su_tx_notify_func = func;
		sup->su_tx_notify_arg = arg;
	} else {
		/*
		 * Wait for all tx_notify_func call to be done.
		 */
		while (sup->su_tx_inprocess != 0)
			cv_wait(&sup->su_cv, &sup->su_mutex);

		sup->su_tx_notify_func = NULL;
		sup->su_tx_notify_arg = NULL;
	}
	return ((mac_tx_notify_handle_t)sup);
}

static boolean_t
softmac_tx_is_flow_blocked(softmac_upper_t *sup, mac_tx_cookie_t cookie)
{
	ASSERT(cookie == (mac_tx_cookie_t)sup);
	return (sup->su_tx_busy);
}

static int
softmac_capab_direct(softmac_upper_t *sup, void *data, uint_t flags)
{
	dld_capab_direct_t	*direct = data;
	softmac_lower_t		*slp = sup->su_slp;

	ASSERT(MUTEX_HELD(&sup->su_mutex));

	ASSERT(sup->su_mode == SOFTMAC_FASTPATH);

	switch (flags) {
	case DLD_ENABLE:
		if (sup->su_direct)
			return (0);

		sup->su_direct_rxinfo.slr_rx = (softmac_rx_t)direct->di_rx_cf;
		sup->su_direct_rxinfo.slr_arg = direct->di_rx_ch;
		slp->sl_rxinfo = &sup->su_direct_rxinfo;
		direct->di_tx_df = (uintptr_t)softmac_fastpath_wput_data;
		direct->di_tx_dh = sup;
		direct->di_tx_fctl_df = (uintptr_t)softmac_tx_is_flow_blocked;
		direct->di_tx_fctl_dh = sup;
		direct->di_tx_cb_df = (uintptr_t)softmac_client_tx_notify;
		direct->di_tx_cb_dh = sup;
		sup->su_direct = B_TRUE;
		return (0);

	case DLD_DISABLE:
		if (!sup->su_direct)
			return (0);

		slp->sl_rxinfo = &sup->su_rxinfo;
		sup->su_direct = B_FALSE;
		return (0);
	}
	return (ENOTSUP);
}

static int
softmac_dld_capab(softmac_upper_t *sup, uint_t type, void *data, uint_t flags)
{
	int	err;

	/*
	 * Don't enable direct callback capabilities unless the caller is
	 * the IP client. When a module is inserted in a stream (_I_INSERT)
	 * the stack initiates capability disable, but due to races, the
	 * module insertion may complete before the capability disable
	 * completes. So we limit the check to DLD_ENABLE case.
	 */
	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
	    !check_ip_above(sup->su_rq)) {
		return (ENOTSUP);
	}

	switch (type) {
	case DLD_CAPAB_DIRECT:
		err = softmac_capab_direct(sup, data, flags);
		break;

	case DLD_CAPAB_PERIM:
		err = softmac_capab_perim(sup, data, flags);
		break;

	default:
		err = ENOTSUP;
		break;
	}
	return (err);
}

static void
softmac_capability_advertise(softmac_upper_t *sup, mblk_t *mp)
{
	dl_capability_ack_t	*dlap;
	dl_capability_sub_t	*dlsp;
	t_uscalar_t		subsize;
	uint8_t			*ptr;
	queue_t			*q = sup->su_wq;
	mblk_t			*mp1;
	softmac_t		*softmac = sup->su_softmac;
	boolean_t		dld_capable = B_FALSE;
	boolean_t		hcksum_capable = B_FALSE;
	boolean_t		zcopy_capable = B_FALSE;
	boolean_t		mdt_capable = B_FALSE;

	ASSERT(sup->su_mode == SOFTMAC_FASTPATH);

	/*
	 * Initially assume no capabilities.
	 */
	subsize = 0;

	/*
	 * Direct capability negotiation interface between IP and softmac
	 */
	if (check_ip_above(sup->su_rq)) {
		dld_capable = B_TRUE;
		subsize += sizeof (dl_capability_sub_t) +
		    sizeof (dl_capab_dld_t);
	}

	/*
	 * Check if checksum offload is supported on this MAC.
	 */
	if (softmac->smac_capab_flags & MAC_CAPAB_HCKSUM) {
		hcksum_capable = B_TRUE;
		subsize += sizeof (dl_capability_sub_t) +
		    sizeof (dl_capab_hcksum_t);
	}

	/*
	 * Check if zerocopy is supported on this interface.
	 */
	if (!(softmac->smac_capab_flags & MAC_CAPAB_NO_ZCOPY)) {
		zcopy_capable = B_TRUE;
		subsize += sizeof (dl_capability_sub_t) +
		    sizeof (dl_capab_zerocopy_t);
	}

	if (softmac->smac_mdt) {
		mdt_capable = B_TRUE;
		subsize += sizeof (dl_capability_sub_t) +
		    sizeof (dl_capab_mdt_t);
	}

	/*
	 * If there are no capabilities to advertise or if we
	 * can't allocate a response, send a DL_ERROR_ACK.
	 */
	if ((subsize == 0) || (mp1 = reallocb(mp,
	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
		return;
	}

	mp = mp1;
	DB_TYPE(mp) = M_PROTO;
	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
	bzero(mp->b_rptr, MBLKL(mp));
	dlap = (dl_capability_ack_t *)mp->b_rptr;
	dlap->dl_primitive = DL_CAPABILITY_ACK;
	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
	dlap->dl_sub_length = subsize;
	ptr = (uint8_t *)&dlap[1];

	/*
	 * IP polling interface.
	 */
	if (dld_capable) {
		dl_capab_dld_t		dld;

		dlsp = (dl_capability_sub_t *)ptr;
		dlsp->dl_cap = DL_CAPAB_DLD;
		dlsp->dl_length = sizeof (dl_capab_dld_t);
		ptr += sizeof (dl_capability_sub_t);

		bzero(&dld, sizeof (dl_capab_dld_t));
		dld.dld_version = DLD_CURRENT_VERSION;
		dld.dld_capab = (uintptr_t)softmac_dld_capab;
		dld.dld_capab_handle = (uintptr_t)sup;

		dlcapabsetqid(&(dld.dld_mid), sup->su_rq);
		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
		ptr += sizeof (dl_capab_dld_t);
	}

	/*
	 * TCP/IP checksum offload.
	 */
	if (hcksum_capable) {
		dl_capab_hcksum_t	hcksum;

		dlsp = (dl_capability_sub_t *)ptr;

		dlsp->dl_cap = DL_CAPAB_HCKSUM;
		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
		ptr += sizeof (dl_capability_sub_t);

		bzero(&hcksum, sizeof (dl_capab_hcksum_t));
		hcksum.hcksum_version = HCKSUM_VERSION_1;
		hcksum.hcksum_txflags = softmac->smac_hcksum_txflags;
		dlcapabsetqid(&(hcksum.hcksum_mid), sup->su_rq);
		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
		ptr += sizeof (dl_capab_hcksum_t);
	}

	/*
	 * Zero copy
	 */
	if (zcopy_capable) {
		dl_capab_zerocopy_t	zcopy;

		dlsp = (dl_capability_sub_t *)ptr;

		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
		ptr += sizeof (dl_capability_sub_t);

		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
		dlcapabsetqid(&(zcopy.zerocopy_mid), sup->su_rq);
		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
		ptr += sizeof (dl_capab_zerocopy_t);
	}

	/*
	 * MDT
	 */
	if (mdt_capable) {
		dl_capab_mdt_t mdt;

		dlsp = (dl_capability_sub_t *)ptr;

		dlsp->dl_cap = DL_CAPAB_MDT;
		dlsp->dl_length = sizeof (dl_capab_mdt_t);
		ptr += sizeof (dl_capability_sub_t);

		bzero(&mdt, sizeof (dl_capab_mdt_t));
		mdt.mdt_version = MDT_VERSION_2;
		mdt.mdt_flags = DL_CAPAB_MDT_ENABLE;
		mdt.mdt_hdr_head = softmac->smac_mdt_capab.mdt_hdr_head;
		mdt.mdt_hdr_tail = softmac->smac_mdt_capab.mdt_hdr_tail;
		mdt.mdt_max_pld = softmac->smac_mdt_capab.mdt_max_pld;
		mdt.mdt_span_limit = softmac->smac_mdt_capab.mdt_span_limit;
		dlcapabsetqid(&(mdt.mdt_mid), sup->su_rq);
		bcopy(&mdt, ptr, sizeof (dl_capab_mdt_t));
		ptr += sizeof (dl_capab_mdt_t);
	}

	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
	qreply(q, mp);
}

static void
softmac_capability_req(softmac_upper_t *sup, mblk_t *mp)
{
	dl_capability_req_t	*dlp = (dl_capability_req_t *)mp->b_rptr;
	dl_capability_sub_t	*sp;
	size_t			size, len;
	offset_t		off, end;
	t_uscalar_t		dl_err;
	queue_t			*q = sup->su_wq;

	ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
		dl_err = DL_BADPRIM;
		goto failed;
	}

	if (!sup->su_bound) {
		dl_err = DL_OUTSTATE;
		goto failed;
	}

	/*
	 * This request is overloaded. If there are no requested capabilities
	 * then we just want to acknowledge with all the capabilities we
	 * support. Otherwise we enable the set of capabilities requested.
	 */
	if (dlp->dl_sub_length == 0) {
		softmac_capability_advertise(sup, mp);
		return;
	}

	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
		dl_err = DL_BADPRIM;
		goto failed;
	}

	dlp->dl_primitive = DL_CAPABILITY_ACK;

	off = dlp->dl_sub_offset;
	len = dlp->dl_sub_length;

	/*
	 * Walk the list of capabilities to be enabled.
	 */
	for (end = off + len; off < end; ) {
		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
		size = sizeof (dl_capability_sub_t) + sp->dl_length;

		if (off + size > end ||
		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
			dl_err = DL_BADPRIM;
			goto failed;
		}

		switch (sp->dl_cap) {
		/*
		 * TCP/IP checksum offload to hardware.
		 */
		case DL_CAPAB_HCKSUM: {
			dl_capab_hcksum_t *hcksump;
			dl_capab_hcksum_t hcksum;

			hcksump = (dl_capab_hcksum_t *)&sp[1];
			/*
			 * Copy for alignment.
			 */
			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
			dlcapabsetqid(&(hcksum.hcksum_mid), sup->su_rq);
			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
			break;
		}

		default:
			break;
		}

		off += size;
	}
	qreply(q, mp);
	return;
failed:
	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
}

static void
softmac_bind_req(softmac_upper_t *sup, mblk_t *mp)
{
	softmac_lower_t	*slp = sup->su_slp;
	softmac_t	*softmac = sup->su_softmac;
	mblk_t		*ackmp, *mp1;
	int		err;

	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
		freemsg(mp);
		return;
	}

	/*
	 * Allocate ackmp incase the underlying driver does not ack timely.
	 */
	if ((mp1 = allocb(sizeof (dl_error_ack_t), BPRI_HI)) == NULL) {
		dlerrorack(sup->su_wq, mp, DL_BIND_REQ, DL_SYSERR, ENOMEM);
		return;
	}

	err = softmac_output(slp, mp, DL_BIND_REQ, DL_BIND_ACK, &ackmp);
	if (ackmp != NULL) {
		freemsg(mp1);
	} else {
		/*
		 * The driver does not ack timely.
		 */
		ASSERT(err == ENOMSG);
		ackmp = mp1;
	}
	if (err != 0)
		goto failed;

	/*
	 * Enable capabilities the underlying driver claims to support.
	 */
	if ((err = softmac_capab_enable(slp)) != 0)
		goto failed;

	/*
	 * Check whether this softmac is already marked as exclusively used,
	 * e.g., an aggregation is created over it. Fail the BIND_REQ if so.
	 */
	mutex_enter(&softmac->smac_active_mutex);
	if (softmac->smac_active) {
		mutex_exit(&softmac->smac_active_mutex);
		err = EBUSY;
		goto failed;
	}
	softmac->smac_nactive++;
	sup->su_active = B_TRUE;
	mutex_exit(&softmac->smac_active_mutex);
	sup->su_bound = B_TRUE;

	qreply(sup->su_wq, ackmp);
	return;
failed:
	if (err != 0) {
		dlerrorack(sup->su_wq, ackmp, DL_BIND_REQ, DL_SYSERR, err);
		return;
	}
}

static void
softmac_unbind_req(softmac_upper_t *sup, mblk_t *mp)
{
	softmac_lower_t	*slp = sup->su_slp;
	softmac_t	*softmac = sup->su_softmac;
	mblk_t		*ackmp, *mp1;
	int		err;

	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
		freemsg(mp);
		return;
	}

	if (!sup->su_bound) {
		dlerrorack(sup->su_wq, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
		return;
	}

	/*
	 * Allocate ackmp incase the underlying driver does not ack timely.
	 */
	if ((mp1 = allocb(sizeof (dl_error_ack_t), BPRI_HI)) == NULL) {
		dlerrorack(sup->su_wq, mp, DL_UNBIND_REQ, DL_SYSERR, ENOMEM);
		return;
	}

	err = softmac_output(slp, mp, DL_UNBIND_REQ, DL_OK_ACK, &ackmp);
	if (ackmp != NULL) {
		freemsg(mp1);
	} else {
		/*
		 * The driver does not ack timely.
		 */
		ASSERT(err == ENOMSG);
		ackmp = mp1;
	}
	if (err != 0) {
		dlerrorack(sup->su_wq, ackmp, DL_UNBIND_REQ, DL_SYSERR, err);
		return;
	}

	sup->su_bound = B_FALSE;

	mutex_enter(&softmac->smac_active_mutex);
	if (sup->su_active) {
		ASSERT(!softmac->smac_active);
		softmac->smac_nactive--;
		sup->su_active = B_FALSE;
	}
	mutex_exit(&softmac->smac_active_mutex);

done:
	qreply(sup->su_wq, ackmp);
}

/*
 * Process the non-data mblk.
 */
static void
softmac_wput_single_nondata(softmac_upper_t *sup, mblk_t *mp)
{
	softmac_t *softmac = sup->su_softmac;
	softmac_lower_t	*slp = sup->su_slp;
	unsigned char	dbtype;
	t_uscalar_t	prim;

	dbtype = DB_TYPE(mp);
	sup->su_is_arp = 0;
	switch (dbtype) {
	case M_CTL:
		sup->su_is_arp = 1;
		/* FALLTHROUGH */
	case M_IOCTL: {
		uint32_t	expected_mode;

		if (((struct iocblk *)(mp->b_rptr))->ioc_cmd != SIOCSLIFNAME)
			break;

		/*
		 * Nak the M_IOCTL based on the STREAMS specification.
		 */
		if (dbtype == M_IOCTL)
			miocnak(sup->su_wq, mp, 0, EINVAL);
		else
			freemsg(mp);

		/*
		 * This stream is either IP or ARP. See whether
		 * we need to setup a dedicated-lower-stream for it.
		 */
		mutex_enter(&softmac->smac_fp_mutex);

		expected_mode = DATAPATH_MODE(softmac);
		if (expected_mode == SOFTMAC_SLOWPATH)
			sup->su_mode = SOFTMAC_SLOWPATH;
		list_insert_head(&softmac->smac_sup_list, sup);
		mutex_exit(&softmac->smac_fp_mutex);

		/*
		 * Setup the fast-path dedicated lower stream if fast-path
		 * is expected. Note that no lock is held here, and if
		 * smac_expected_mode is changed from SOFTMAC_FASTPATH to
		 * SOFTMAC_SLOWPATH, the DL_NOTE_REPLUMB message used for
		 * data-path switching would already be queued and will
		 * be processed by softmac_wput_single_nondata() later.
		 */
		if (expected_mode == SOFTMAC_FASTPATH)
			(void) softmac_fastpath_setup(sup);
		return;
	}
	case M_PROTO:
	case M_PCPROTO:
		if (MBLKL(mp) < sizeof (t_uscalar_t)) {
			freemsg(mp);
			return;
		}
		prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
		switch (prim) {
		case DL_NOTIFY_IND:
			if (MBLKL(mp) < sizeof (dl_notify_ind_t) ||
			    ((dl_notify_ind_t *)mp->b_rptr)->dl_notification !=
			    DL_NOTE_REPLUMB) {
				freemsg(mp);
				return;
			}
			/*
			 * This DL_NOTE_REPLUMB message is initiated
			 * and queued by the softmac itself, when the
			 * sup is trying to switching its datapath mode
			 * between SOFTMAC_SLOWPATH and SOFTMAC_FASTPATH.
			 * Send this message upstream.
			 */
			qreply(sup->su_wq, mp);
			return;
		case DL_NOTIFY_CONF:
			if (MBLKL(mp) < sizeof (dl_notify_conf_t) ||
			    ((dl_notify_conf_t *)mp->b_rptr)->dl_notification !=
			    DL_NOTE_REPLUMB_DONE) {
				freemsg(mp);
				return;
			}
			/*
			 * This is an indication from IP/ARP that the
			 * fastpath->slowpath switch is done.
			 */
			freemsg(mp);
			softmac_datapath_switch_done(sup);
			return;
		}
		break;
	}

	/*
	 * No need to hold lock to check su_mode, since su_mode updating only
	 * operation is is serialized by softmac_wput_nondata_task().
	 */
	if (sup->su_mode != SOFTMAC_FASTPATH) {
		dld_wput(sup->su_wq, mp);
		return;
	}

	/*
	 * Fastpath non-data message processing. Most of non-data messages
	 * can be directly passed down to the dedicated-lower-stream, aside
	 * from the following M_PROTO/M_PCPROTO messages.
	 */
	switch (dbtype) {
	case M_PROTO:
	case M_PCPROTO:
		switch (prim) {
		case DL_BIND_REQ:
			softmac_bind_req(sup, mp);
			break;
		case DL_UNBIND_REQ:
			softmac_unbind_req(sup, mp);
			break;
		case DL_CAPABILITY_REQ:
			softmac_capability_req(sup, mp);
			break;
		default:
			putnext(slp->sl_wq, mp);
			break;
		}
		break;
	default:
		putnext(slp->sl_wq, mp);
		break;
	}
}

/*
 * The worker thread which processes non-data messages. Note we only process
 * one message at one time in order to be able to "flush" the queued message
 * and serialize the processing.
 */
static void
softmac_wput_nondata_task(void *arg)
{
	softmac_upper_t	*sup = arg;
	mblk_t		*mp;

	mutex_enter(&sup->su_disp_mutex);

	while (sup->su_pending_head != NULL) {
		if (sup->su_closing)
			break;

		SOFTMAC_DQ_PENDING(sup, &mp);
		mutex_exit(&sup->su_disp_mutex);
		softmac_wput_single_nondata(sup, mp);
		mutex_enter(&sup->su_disp_mutex);
	}

	/*
	 * If the stream is closing, flush all queued messages and inform
	 * the stream to be closed.
	 */
	freemsgchain(sup->su_pending_head);
	sup->su_pending_head = sup->su_pending_tail = NULL;
	sup->su_dlpi_pending = B_FALSE;
	cv_signal(&sup->su_disp_cv);
	mutex_exit(&sup->su_disp_mutex);
}

/*
 * Kernel thread to handle taskq dispatch failures in softmac_wput_nondata().
 * This thread is started when the softmac module is first loaded.
 */
static void
softmac_taskq_dispatch(void)
{
	callb_cpr_t	cprinfo;
	softmac_upper_t	*sup;

	CALLB_CPR_INIT(&cprinfo, &softmac_taskq_lock, callb_generic_cpr,
	    "softmac_taskq_dispatch");
	mutex_enter(&softmac_taskq_lock);

	while (!softmac_taskq_quit) {
		sup = list_head(&softmac_taskq_list);
		while (sup != NULL) {
			list_remove(&softmac_taskq_list, sup);
			sup->su_taskq_scheduled = B_FALSE;
			mutex_exit(&softmac_taskq_lock);
			VERIFY(taskq_dispatch(system_taskq,
			    softmac_wput_nondata_task, sup, TQ_SLEEP) != NULL);
			mutex_enter(&softmac_taskq_lock);
			sup = list_head(&softmac_taskq_list);
		}

		CALLB_CPR_SAFE_BEGIN(&cprinfo);
		cv_wait(&softmac_taskq_cv, &softmac_taskq_lock);
		CALLB_CPR_SAFE_END(&cprinfo, &softmac_taskq_lock);
	}

	softmac_taskq_done = B_TRUE;
	cv_signal(&softmac_taskq_cv);
	CALLB_CPR_EXIT(&cprinfo);
	thread_exit();
}

void
softmac_wput_nondata(softmac_upper_t *sup, mblk_t *mp)
{
	/*
	 * The processing of the message might block. Enqueue the
	 * message for later processing.
	 */
	mutex_enter(&sup->su_disp_mutex);

	if (sup->su_closing) {
		mutex_exit(&sup->su_disp_mutex);
		freemsg(mp);
		return;
	}

	SOFTMAC_EQ_PENDING(sup, mp);

	if (sup->su_dlpi_pending) {
		mutex_exit(&sup->su_disp_mutex);
		return;
	}
	sup->su_dlpi_pending = B_TRUE;
	mutex_exit(&sup->su_disp_mutex);

	if (taskq_dispatch(system_taskq, softmac_wput_nondata_task,
	    sup, TQ_NOSLEEP) != NULL) {
		return;
	}

	mutex_enter(&softmac_taskq_lock);
	if (!sup->su_taskq_scheduled) {
		list_insert_tail(&softmac_taskq_list, sup);
		cv_signal(&softmac_taskq_cv);
	}
	sup->su_taskq_scheduled = B_TRUE;
	mutex_exit(&softmac_taskq_lock);
}

/*
 * Setup the dedicated-lower-stream (fast-path) for the IP/ARP upperstream.
 */
static int
softmac_fastpath_setup(softmac_upper_t *sup)
{
	softmac_t	*softmac = sup->su_softmac;
	softmac_lower_t	*slp;
	int		err;

	err = softmac_lower_setup(softmac, sup, &slp);

	mutex_enter(&sup->su_mutex);
	/*
	 * Wait for all data messages to be processed so that we can change
	 * the su_mode.
	 */
	while (sup->su_tx_inprocess != 0)
		cv_wait(&sup->su_cv, &sup->su_mutex);

	ASSERT(sup->su_mode != SOFTMAC_FASTPATH);
	ASSERT(sup->su_slp == NULL);
	if (err != 0) {
		sup->su_mode = SOFTMAC_SLOWPATH;
	} else {
		sup->su_slp = slp;
		sup->su_mode = SOFTMAC_FASTPATH;
	}
	mutex_exit(&sup->su_mutex);
	return (err);
}

/*
 * Tear down the dedicated-lower-stream (fast-path) for the IP/ARP upperstream.
 */
static void
softmac_fastpath_tear(softmac_upper_t *sup)
{
	mutex_enter(&sup->su_mutex);
	/*
	 * Wait for all data messages in the dedicated-lower-stream
	 * to be processed.
	 */
	while (sup->su_tx_inprocess != 0)
		cv_wait(&sup->su_cv, &sup->su_mutex);

	/*
	 * Note that this function is called either when the stream is closed,
	 * or the stream is unbound (fastpath-slowpath-switch). Therefore,
	 * No need to call the tx_notify callback.
	 */
	sup->su_tx_notify_func = NULL;
	sup->su_tx_notify_arg = NULL;
	if (sup->su_tx_busy) {
		ASSERT(sup->su_tx_flow_mp == NULL);
		VERIFY((sup->su_tx_flow_mp = getq(sup->su_wq)) != NULL);
		sup->su_tx_busy = B_FALSE;
	}

	sup->su_mode = SOFTMAC_SLOWPATH;

	/*
	 * Destroy the dedicated-lower-stream. Note that slp is destroyed
	 * when lh is closed.
	 */
	(void) ldi_close(sup->su_slp->sl_lh, FREAD|FWRITE, kcred);
	sup->su_slp = NULL;
	mutex_exit(&sup->su_mutex);
}

void
softmac_wput_data(softmac_upper_t *sup, mblk_t *mp)
{
	/*
	 * No lock is required to access the su_mode field since the data
	 * traffic is quiesce by IP when the data-path mode is in the
	 * process of switching.
	 */
	if (sup->su_mode != SOFTMAC_FASTPATH)
		dld_wput(sup->su_wq, mp);
	else
		(void) softmac_fastpath_wput_data(sup, mp, NULL, 0);
}

/*ARGSUSED*/
static mac_tx_cookie_t
softmac_fastpath_wput_data(softmac_upper_t *sup, mblk_t *mp, uintptr_t f_hint,
    uint16_t flag)
{
	queue_t		*wq = sup->su_slp->sl_wq;

	/*
	 * This function is called from IP, only the MAC_DROP_ON_NO_DESC
	 * flag can be specified.
	 */
	ASSERT((flag & ~MAC_DROP_ON_NO_DESC) == 0);
	ASSERT(mp->b_next == NULL);

	/*
	 * Check wether the dedicated-lower-stream is able to handle more
	 * messages, and enable the flow-control if it is not.
	 *
	 * Note that in order not to introduce any packet reordering, we
	 * always send the message down to the dedicated-lower-stream:
	 *
	 * If the flow-control is already enabled, but we still get
	 * the messages from the upper-stream, it means that the upper
	 * stream does not respect STREAMS flow-control (e.g., TCP). Simply
	 * pass the message down to the lower-stream in that case.
	 */
	if (SOFTMAC_CANPUTNEXT(wq)) {
		putnext(wq, mp);
		return (NULL);
	}

	if (sup->su_tx_busy) {
		if ((flag & MAC_DROP_ON_NO_DESC) != 0)
			freemsg(mp);
		else
			putnext(wq, mp);
		return ((mac_tx_cookie_t)sup);
	}

	mutex_enter(&sup->su_mutex);
	if (!sup->su_tx_busy) {
		/*
		 * If DLD_CAPAB_DIRECT is enabled, the notify callback will be
		 * called when the flow control can be disabled. Otherwise,
		 * put the tx_flow_mp into the wq to make use of the old
		 * streams flow control.
		 */
		ASSERT(sup->su_tx_flow_mp != NULL);
		(void) putq(sup->su_wq, sup->su_tx_flow_mp);
		sup->su_tx_flow_mp = NULL;
		sup->su_tx_busy = B_TRUE;
		qenable(wq);
	}
	mutex_exit(&sup->su_mutex);

	if ((flag & MAC_DROP_ON_NO_DESC) != 0)
		freemsg(mp);
	else
		putnext(wq, mp);
	return ((mac_tx_cookie_t)sup);
}

boolean_t
softmac_active_set(void *arg)
{
	softmac_t	*softmac = arg;

	mutex_enter(&softmac->smac_active_mutex);
	if (softmac->smac_nactive != 0) {
		mutex_exit(&softmac->smac_active_mutex);
		return (B_FALSE);
	}
	softmac->smac_active = B_TRUE;
	mutex_exit(&softmac->smac_active_mutex);
	return (B_TRUE);
}

void
softmac_active_clear(void *arg)
{
	softmac_t	*softmac = arg;

	mutex_enter(&softmac->smac_active_mutex);
	ASSERT(softmac->smac_active && (softmac->smac_nactive == 0));
	softmac->smac_active = B_FALSE;
	mutex_exit(&softmac->smac_active_mutex);
}

/*
 * Disable/reenable fastpath on given softmac. This request could come from a
 * MAC client or directly from administrators.
 */
int
softmac_datapath_switch(softmac_t *softmac, boolean_t disable, boolean_t admin)
{
	softmac_upper_t		*sup;
	mblk_t			*head = NULL, *tail = NULL, *mp;
	list_t			reqlist;
	softmac_switch_req_t	*req;
	uint32_t		current_mode, expected_mode;
	int			err = 0;

	mutex_enter(&softmac->smac_fp_mutex);

	current_mode = DATAPATH_MODE(softmac);
	if (admin) {
		if (softmac->smac_fastpath_admin_disabled == disable) {
			mutex_exit(&softmac->smac_fp_mutex);
			return (0);
		}
		softmac->smac_fastpath_admin_disabled = disable;
	} else if (disable) {
		softmac->smac_fp_disable_clients++;
	} else {
		ASSERT(softmac->smac_fp_disable_clients != 0);
		softmac->smac_fp_disable_clients--;
	}

	expected_mode = DATAPATH_MODE(softmac);
	if (current_mode == expected_mode) {
		mutex_exit(&softmac->smac_fp_mutex);
		return (0);
	}

	/*
	 * The expected mode is different from whatever datapath mode
	 * this softmac is expected from last request, enqueue the data-path
	 * switch request.
	 */
	list_create(&reqlist, sizeof (softmac_switch_req_t),
	    offsetof(softmac_switch_req_t, ssq_req_list_node));

	/*
	 * Allocate all DL_NOTIFY_IND messages and request structures that
	 * are required to switch each IP/ARP stream to the expected mode.
	 */
	for (sup = list_head(&softmac->smac_sup_list); sup != NULL;
	    sup = list_next(&softmac->smac_sup_list, sup)) {
		dl_notify_ind_t	*dlip;

		req = kmem_alloc(sizeof (softmac_switch_req_t), KM_NOSLEEP);
		if (req == NULL)
			break;

		req->ssq_expected_mode = expected_mode;
		if (sup->su_is_arp) {
			list_insert_tail(&reqlist, req);
			continue;
		}
		/*
		 * Allocate the DL_NOTE_REPLUMB message.
		 */
		if ((mp = allocb(sizeof (dl_notify_ind_t), BPRI_LO)) == NULL) {
			kmem_free(req, sizeof (softmac_switch_req_t));
			break;
		}

		list_insert_tail(&reqlist, req);

		mp->b_wptr = mp->b_rptr + sizeof (dl_notify_ind_t);
		mp->b_datap->db_type = M_PROTO;
		bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
		dlip = (dl_notify_ind_t *)mp->b_rptr;
		dlip->dl_primitive = DL_NOTIFY_IND;
		dlip->dl_notification = DL_NOTE_REPLUMB;
		if (head == NULL) {
			head = tail = mp;
		} else {
			tail->b_next = mp;
			tail = mp;
		}
	}

	/*
	 * Note that it is fine if the expected data-path mode is fast-path
	 * and some of streams fails to switch. Only return failure if we
	 * are expected to switch to the slow-path.
	 */
	if (sup != NULL && expected_mode == SOFTMAC_SLOWPATH) {
		err = ENOMEM;
		goto fail;
	}

	/*
	 * Start switching for each IP/ARP stream. The switching operation
	 * will eventually succeed and there is no need to wait for it
	 * to finish.
	 */
	for (sup = list_head(&softmac->smac_sup_list); sup != NULL;
	    sup = list_next(&softmac->smac_sup_list, sup)) {
		if (!sup->su_is_arp) {
			mp = head->b_next;
			head->b_next = NULL;
			softmac_wput_nondata(sup, head);
			head = mp;
		}
		/*
		 * Add the switch request to the requests list of the stream.
		 */
		req = list_head(&reqlist);
		ASSERT(req != NULL);
		list_remove(&reqlist, req);
		list_insert_tail(&sup->su_req_list, req);
	}

	mutex_exit(&softmac->smac_fp_mutex);
	ASSERT(list_is_empty(&reqlist));
	list_destroy(&reqlist);
	return (0);
fail:
	if (admin) {
		softmac->smac_fastpath_admin_disabled = !disable;
	} else if (disable) {
		softmac->smac_fp_disable_clients--;
	} else {
		softmac->smac_fp_disable_clients++;
	}

	mutex_exit(&softmac->smac_fp_mutex);
	while ((req = list_head(&reqlist)) != NULL) {
		list_remove(&reqlist, req);
		kmem_free(req, sizeof (softmac_switch_req_t));
	}
	freemsgchain(head);
	list_destroy(&reqlist);
	return (err);
}

int
softmac_fastpath_disable(void *arg)
{
	return (softmac_datapath_switch((softmac_t *)arg, B_TRUE, B_FALSE));
}

void
softmac_fastpath_enable(void *arg)
{
	VERIFY(softmac_datapath_switch((softmac_t *)arg, B_FALSE,
	    B_FALSE) == 0);
}

void
softmac_upperstream_close(softmac_upper_t *sup)
{
	softmac_t		*softmac = sup->su_softmac;
	softmac_switch_req_t	*req;

	mutex_enter(&softmac->smac_fp_mutex);

	if (sup->su_mode == SOFTMAC_FASTPATH)
		softmac_fastpath_tear(sup);

	if (sup->su_mode != SOFTMAC_UNKNOWN) {
		list_remove(&softmac->smac_sup_list, sup);
		sup->su_mode = SOFTMAC_UNKNOWN;
	}

	/*
	 * Cleanup all the switch requests queueed on this stream.
	 */
	while ((req = list_head(&sup->su_req_list)) != NULL) {
		list_remove(&sup->su_req_list, req);
		kmem_free(req, sizeof (softmac_switch_req_t));
	}
	mutex_exit(&softmac->smac_fp_mutex);
}

/*
 * Handle the DL_NOTE_REPLUMB_DONE indication from IP/ARP. Change the upper
 * stream from the fastpath mode to the slowpath mode.
 */
static void
softmac_datapath_switch_done(softmac_upper_t *sup)
{
	softmac_t		*softmac = sup->su_softmac;
	softmac_switch_req_t	*req;
	uint32_t		expected_mode;

	mutex_enter(&softmac->smac_fp_mutex);
	req = list_head(&sup->su_req_list);
	list_remove(&sup->su_req_list, req);
	expected_mode = req->ssq_expected_mode;
	kmem_free(req, sizeof (softmac_switch_req_t));

	if (expected_mode == sup->su_mode) {
		mutex_exit(&softmac->smac_fp_mutex);
		return;
	}

	ASSERT(!sup->su_bound);
	mutex_exit(&softmac->smac_fp_mutex);

	/*
	 * It is fine if the expected mode is fast-path and we fail
	 * to enable fastpath on this stream.
	 */
	if (expected_mode == SOFTMAC_SLOWPATH)
		softmac_fastpath_tear(sup);
	else
		(void) softmac_fastpath_setup(sup);
}