/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */


/*
 * Starcat Management Network Driver
 *
 * ****** NOTICE **** This file also resides in the SSC gate as
 * ****** NOTICE **** usr/src/uts/sun4u/scman/scman.c. Any changes
 * ****** NOTICE **** made here must be propogated there as well.
 *
 */

#include <sys/types.h>
#include <sys/proc.h>
#include <sys/disp.h>
#include <sys/kmem.h>
#include <sys/stat.h>
#include <sys/kstat.h>
#include <sys/ksynch.h>
#include <sys/stream.h>
#include <sys/dlpi.h>
#include <sys/stropts.h>
#include <sys/strsubr.h>
#include <sys/debug.h>
#include <sys/conf.h>
#include <sys/kstr.h>
#include <sys/errno.h>
#include <sys/ethernet.h>
#include <sys/byteorder.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/sunldi.h>
#include <sys/modctl.h>
#include <sys/strsun.h>
#include <sys/callb.h>
#include <sys/pci.h>
#include <netinet/in.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/nd.h>
#include <sys/socket.h>
#include <netinet/igmp_var.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <sys/file.h>
#include <sys/dman.h>
#include <sys/autoconf.h>
#include <sys/zone.h>

extern int ddi_create_internal_pathname(dev_info_t *, char *, int, minor_t);

#define	MAN_IDNAME	"dman"
#define	DMAN_INT_PATH	"/devices/pseudo/dman@0:dman"
#define	DMAN_PATH	"/devices/pseudo/clone@0:dman"
#define	ERI_IDNAME	"eri"
#define	ERI_PATH	"/devices/pseudo/clone@0:eri"

#if defined(DEBUG)

static void man_print_msp(manstr_t *);
static void man_print_man(man_t *);
static void man_print_mdp(man_dest_t *);
static void man_print_dev(man_dev_t *);
static void man_print_mip(mi_path_t *);
static void man_print_mtp(mi_time_t *);
static void man_print_mpg(man_pg_t *);
static void man_print_path(man_path_t *);
static void man_print_work(man_work_t *);

/*
 * Set manstr_t dlpistate (upper half of multiplexor)
 */
#define	SETSTATE(msp, state) \
	MAN_DBG(MAN_DLPI, ("msp=0x%p @ %d state %s=>%s\n",		\
		    (void *)msp, __LINE__, dss[msp->ms_dlpistate],	\
		    dss[(state)]));					\
		    msp->ms_dlpistate = (state);
/*
 * Set man_dest_t dlpistate (lower half of multiplexor)
 */
#define	D_SETSTATE(mdp, state) \
	MAN_DBG(MAN_DLPI, ("dst=0x%p @ %d state %s=>%s\n",	   \
		    (void *)mdp, __LINE__, dss[mdp->md_dlpistate], \
		    dss[(state)]));				   \
		    mdp->md_dlpistate = (state);

static char *promisc[] = {	/* DLPI promisc Strings */
	"not used",		/* 0x00 */
	"DL_PROMISC_PHYS",	/* 0x01 */
	"DL_PROMISC_SAP",	/* 0x02 */
	"DL_PROMISC_MULTI"	/* 0x03 */
};

static char *dps[] = {			/* DLPI Primitive Strings */
	"DL_INFO_REQ",			/* 0x00 */
	"DL_BIND_REQ",			/* 0x01 */
	"DL_UNBIND_REQ",		/* 0x02 */
	"DL_INFO_ACK",			/* 0x03 */
	"DL_BIND_ACK",			/* 0x04 */
	"DL_ERROR_ACK",			/* 0x05 */
	"DL_OK_ACK",			/* 0x06 */
	"DL_UNITDATA_REQ",		/* 0x07 */
	"DL_UNITDATA_IND",		/* 0x08 */
	"DL_UDERROR_IND",		/* 0x09 */
	"DL_UDQOS_REQ",			/* 0x0a */
	"DL_ATTACH_REQ",		/* 0x0b */
	"DL_DETACH_REQ",		/* 0x0c */
	"DL_CONNECT_REQ",		/* 0x0d */
	"DL_CONNECT_IND",		/* 0x0e */
	"DL_CONNECT_RES",		/* 0x0f */
	"DL_CONNECT_CON",		/* 0x10 */
	"DL_TOKEN_REQ",			/* 0x11 */
	"DL_TOKEN_ACK",			/* 0x12 */
	"DL_DISCONNECT_REQ",		/* 0x13 */
	"DL_DISCONNECT_IND",		/* 0x14 */
	"DL_SUBS_UNBIND_REQ",		/* 0x15 */
	"DL_LIARLIARPANTSONFIRE",	/* 0x16 */
	"DL_RESET_REQ",			/* 0x17 */
	"DL_RESET_IND",			/* 0x18 */
	"DL_RESET_RES",			/* 0x19 */
	"DL_RESET_CON",			/* 0x1a */
	"DL_SUBS_BIND_REQ",		/* 0x1b */
	"DL_SUBS_BIND_ACK",		/* 0x1c */
	"DL_ENABMULTI_REQ",		/* 0x1d */
	"DL_DISABMULTI_REQ",		/* 0x1e */
	"DL_PROMISCON_REQ",		/* 0x1f */
	"DL_PROMISCOFF_REQ",		/* 0x20 */
	"DL_DATA_ACK_REQ",		/* 0x21 */
	"DL_DATA_ACK_IND",		/* 0x22 */
	"DL_DATA_ACK_STATUS_IND",	/* 0x23 */
	"DL_REPLY_REQ",			/* 0x24 */
	"DL_REPLY_IND",			/* 0x25 */
	"DL_REPLY_STATUS_IND",		/* 0x26 */
	"DL_REPLY_UPDATE_REQ",		/* 0x27 */
	"DL_REPLY_UPDATE_STATUS_IND",	/* 0x28 */
	"DL_XID_REQ",			/* 0x29 */
	"DL_XID_IND",			/* 0x2a */
	"DL_XID_RES",			/* 0x2b */
	"DL_XID_CON",			/* 0x2c */
	"DL_TEST_REQ",			/* 0x2d */
	"DL_TEST_IND",			/* 0x2e */
	"DL_TEST_RES",			/* 0x2f */
	"DL_TEST_CON",			/* 0x30 */
	"DL_PHYS_ADDR_REQ",		/* 0x31 */
	"DL_PHYS_ADDR_ACK",		/* 0x32 */
	"DL_SET_PHYS_ADDR_REQ",		/* 0x33 */
	"DL_GET_STATISTICS_REQ",	/* 0x34 */
	"DL_GET_STATISTICS_ACK",	/* 0x35 */
};

#define	MAN_DLPI_MAX_PRIM	0x35

static char *dss[] = {			/* DLPI State Strings */
	"DL_UNBOUND",			/* 0x00	*/
	"DL_BIND_PENDING",		/* 0x01	*/
	"DL_UNBIND_PENDING",		/* 0x02	*/
	"DL_IDLE",			/* 0x03	*/
	"DL_UNATTACHED",		/* 0x04	*/
	"DL_ATTACH_PENDING",		/* 0x05	*/
	"DL_DETACH_PENDING",		/* 0x06	*/
	"DL_UDQOS_PENDING",		/* 0x07	*/
	"DL_OUTCON_PENDING",		/* 0x08	*/
	"DL_INCON_PENDING",		/* 0x09	*/
	"DL_CONN_RES_PENDING",		/* 0x0a	*/
	"DL_DATAXFER",			/* 0x0b	*/
	"DL_USER_RESET_PENDING",	/* 0x0c	*/
	"DL_PROV_RESET_PENDING",	/* 0x0d	*/
	"DL_RESET_RES_PENDING",		/* 0x0e	*/
	"DL_DISCON8_PENDING",		/* 0x0f	*/
	"DL_DISCON9_PENDING",		/* 0x10	*/
	"DL_DISCON11_PENDING",		/* 0x11	*/
	"DL_DISCON12_PENDING",		/* 0x12	*/
	"DL_DISCON13_PENDING",		/* 0x13	*/
	"DL_SUBS_BIND_PND",		/* 0x14	*/
	"DL_SUBS_UNBIND_PND",		/* 0x15	*/
};

static const char *lss[] = {
	"UNKNOWN",	/* 0x0 */
	"INIT",		/* 0x1 */
	"GOOD",		/* 0x2 */
	"STALE",	/* 0x3 */
	"FAIL",		/* 0x4 */
};

static char *_mw_type[] = {
	"OPEN_CTL",		/* 0x0 */
	"CLOSE_CTL",		/* 0x1 */
	"SWITCH",		/* 0x2 */
	"PATH_UPDATE",		/* 0x3 */
	"CLOSE",		/* 0x4 */
	"CLOSE_STREAM",	/* 0x5 */
	"DRATTACH",		/* 0x6 */
	"DRDETACH",		/* 0x7 */
	"STOP",			/* 0x8 */
	"DRSWITCH",		/* 0x9 */
	"KSTAT_UPDATE"		/* 0xA */
};

uint32_t		man_debug = MAN_WARN;

#define	man_kzalloc(a, b)	man_dbg_kzalloc(__LINE__, a, b)
#define	man_kfree(a, b)		man_dbg_kfree(__LINE__, a, b)
void	*man_dbg_kzalloc(int line, size_t size, int kmflags);
void	man_dbg_kfree(int line, void *buf, size_t size);

#else	/* DEBUG */

uint32_t		man_debug = 0;
/*
 * Set manstr_t dlpistate (upper half of multiplexor)
 */
#define	SETSTATE(msp, state) msp->ms_dlpistate = (state);
/*
 * Set man_dest_t dlpistate (lower half of multiplexor)
 */
#define	D_SETSTATE(mdp, state) mdp->md_dlpistate = (state);

#define	man_kzalloc(a, b)	kmem_zalloc(a, b)
#define	man_kfree(a, b)		kmem_free(a, b)

#endif	/* DEBUG */

#define	DL_PRIM(mp)	(((union DL_primitives *)(mp)->b_rptr)->dl_primitive)
#define	DL_PROMISCON_TYPE(mp)	\
		(((union DL_primitives *)(mp)->b_rptr)->promiscon_req.dl_level)
#define	IOC_CMD(mp)	(((struct iocblk *)(mp)->b_rptr)->ioc_cmd)

/*
 * Start of kstat-related declarations
 */
#define	MK_NOT_COUNTER		(1<<0)	/* is it a counter? */
#define	MK_ERROR		(1<<2)	/* for error statistics */
#define	MK_NOT_PHYSICAL		(1<<3)	/* no matching physical stat */

typedef struct man_kstat_info_s {
	char		*mk_name;	/* e.g. align_errors */
	char		*mk_physname;	/* e.g. framing (NULL for same) */
	char		*mk_physalias;	/* e.g. framing (NULL for same) */
	uchar_t		mk_type;	/* e.g. KSTAT_DATA_UINT32 */
	int		mk_flags;
} man_kstat_info_t;

/*
 * Master declaration macro, note that it uses token pasting
 */
#define	MK_DECLARE(name, pname, palias, bits, flags) \
	{ name,		pname,	palias,	KSTAT_DATA_UINT ## bits, flags }

/*
 * Obsolete forms don't have the _sinceswitch forms, they are all errors
 */
#define	MK_OBSOLETE32(name, alias) MK_DECLARE(alias, name, alias, 32, MK_ERROR)
#define	MK_OBSOLETE64(name, alias) MK_DECLARE(alias, name, alias, 64, MK_ERROR)

/*
 * The only non-counters don't have any other aliases
 */
#define	MK_NOTCOUNTER32(name) MK_DECLARE(name, name, NULL, 32, MK_NOT_COUNTER)
#define	MK_NOTCOUNTER64(name) MK_DECLARE(name, name, NULL, 64, MK_NOT_COUNTER)

/*
 * Normal counter forms
 */
#define	MK_DECLARE32(name, alias) \
	MK_DECLARE(name, name, alias, 32, 0)
#define	MK_DECLARE64(name, alias) \
	MK_DECLARE(name, name, alias, 64, 0)

/*
 * Error counters need special MK_ERROR flag only for the non-AP form
 */
#define	MK_ERROR32(name, alias) \
	MK_DECLARE(name, name, alias, 32, MK_ERROR)
#define	MK_ERROR64(name, alias) \
	MK_DECLARE(name, name, alias, 64, MK_ERROR)

/*
 * These AP-specific stats are not backed by physical statistics
 */
#define	MK_NOTPHYS32(name) MK_DECLARE(name, NULL, NULL, 32, MK_NOT_PHYSICAL)
#define	MK_NOTPHYS64(name) MK_DECLARE(name, NULL, NULL, 64, MK_NOT_PHYSICAL)

/*
 * START of the actual man_kstat_info declaration using above macros
 */
static man_kstat_info_t man_kstat_info[] = {
	/*
	 * Link Input/Output stats
	 */
	MK_DECLARE32("ipackets", NULL),
	MK_ERROR32("ierrors", NULL),
	MK_DECLARE32("opackets", NULL),
	MK_ERROR32("oerrors", NULL),
	MK_ERROR32("collisions", NULL),
	MK_NOTCOUNTER64("ifspeed"),
	/*
	 * These are new MIB-II stats, per PSARC 1997/198
	 */
	MK_DECLARE32("rbytes", NULL),
	MK_DECLARE32("obytes", NULL),
	MK_DECLARE32("multircv", NULL),
	MK_DECLARE32("multixmt", NULL),
	MK_DECLARE32("brdcstrcv", NULL),
	MK_DECLARE32("brdcstxmt", NULL),
	/*
	 * Error values
	 */
	MK_ERROR32("norcvbuf", NULL),
	MK_ERROR32("noxmtbuf", NULL),
	MK_ERROR32("unknowns", NULL),
	/*
	 * These are the 64-bit values, they fallback to 32-bit values
	 */
	MK_DECLARE64("ipackets64", "ipackets"),
	MK_DECLARE64("opackets64", "opackets"),
	MK_DECLARE64("rbytes64", "rbytes"),
	MK_DECLARE64("obytes64", "obytes"),

	/* New AP switching statistics */
	MK_NOTPHYS64("man_switches"),
	MK_NOTPHYS64("man_link_fails"),
	MK_NOTPHYS64("man_link_stales"),
	MK_NOTPHYS64("man_icmpv4_probes"),
	MK_NOTPHYS64("man_icmpv6_probes"),

	MK_ERROR32("align_errors", "framing"),
	MK_ERROR32("fcs_errors", "crc"),
	MK_ERROR32("first_collisions", NULL),
	MK_ERROR32("multi_collisions", NULL),
	MK_ERROR32("sqe_errors", "sqe"),

	MK_ERROR32("tx_late_collisions", NULL),
	MK_ERROR32("ex_collisions", "excollisions"),
	MK_ERROR32("macxmt_errors", NULL),
	MK_ERROR32("carrier_errors", "nocarrier"),
	MK_ERROR32("toolong_errors", "buff"),
	MK_ERROR32("macrcv_errors", NULL),

	MK_OBSOLETE32("framing", "align_errors"),
	MK_OBSOLETE32("crc", "fcs_errors"),
	MK_OBSOLETE32("sqe", "sqe_errors"),
	MK_OBSOLETE32("excollisions", "ex_collisions"),
	MK_OBSOLETE32("nocarrier", "carrier_errors"),
	MK_OBSOLETE32("buff", "toolong_errors"),
};

#define	MAN_NUMSTATS (sizeof (man_kstat_info) / sizeof (man_kstat_info_t))

/*
 * Miscellaneous ethernet stuff.
 *
 * MANs DL_INFO_ACK template.
 */
static	dl_info_ack_t man_infoack = {
	DL_INFO_ACK,				/* dl_primitive */
	ETHERMTU,				/* dl_max_sdu */
	0,					/* dl_min_sdu */
	MAN_ADDRL,				/* dl_addr_length */
	DL_ETHER,				/* dl_mac_type */
	0,					/* dl_reserved */
	0,					/* dl_current_state */
	-2,					/* dl_sap_length */
	DL_CLDLS,				/* dl_service_mode */
	0,					/* dl_qos_length */
	0,					/* dl_qos_offset */
	0,					/* dl_range_length */
	0,					/* dl_range_offset */
	DL_STYLE2,				/* dl_provider_style */
	sizeof (dl_info_ack_t),			/* dl_addr_offset */
	DL_VERSION_2,				/* dl_version */
	ETHERADDRL,				/* dl_brdcst_addr_length */
	sizeof (dl_info_ack_t) + MAN_ADDRL,	/* dl_brdcst_addr_offset */
	0					/* dl_growth */
};

/*
 * Ethernet broadcast address definition.
 */
static	struct ether_addr	etherbroadcast = {
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
};

static struct ether_addr zero_ether_addr = {
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};

/*
 * Set via MAN_SET_SC_IPADDRS ioctl.
 */
man_sc_ipaddrs_t	man_sc_ipaddrs = { 0xffffffffU, 0xffffffffU };

/*
 * Set via MAN_SET_SC_IP6ADDRS ioctl.
 */
man_sc_ip6addrs_t	man_sc_ip6addrs = { 0, 0, 0, 0, 0, 0, 0, 0 };

/*
 * IP & ICMP constants
 */
#ifndef	ETHERTYPE_IPV6
#define	ETHERTYPE_IPV6 0x86DD
#endif

/*
 * Function prototypes.
 *
 * Upper multiplexor functions.
 */
static int	man_attach(dev_info_t *, ddi_attach_cmd_t);
static int	man_detach(dev_info_t *, ddi_detach_cmd_t);
static int	man_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
static int	man_open(register queue_t *, dev_t *, int, int, cred_t *);
static int	man_configure(queue_t *);
static int	man_deconfigure(void);
static int	man_init_dests(man_t *, manstr_t *);
static void	man_start_dest(man_dest_t *, manstr_t *, man_pg_t *);
static void	man_set_optimized_dest(manstr_t *);
static int	man_close(queue_t *);
static void	man_cancel_timers(man_adest_t *);
static int	man_uwput(queue_t *, mblk_t *);
static int	man_start(queue_t *, mblk_t *, eaddr_t *);
static void	man_ioctl(queue_t *, mblk_t *);
static void	man_set_linkcheck_time(queue_t *, mblk_t *);
static void	man_setpath(queue_t *, mblk_t *);
static void	man_geteaddr(queue_t *, mblk_t *);
static void	man_set_sc_ipaddrs(queue_t *, mblk_t *);
static void	man_set_sc_ip6addrs(queue_t *, mblk_t *);
static int	man_get_our_etheraddr(eaddr_t *eap);
static void	man_nd_getset(queue_t *, mblk_t *);
static void	man_dl_ioc_hdr_info(queue_t *, mblk_t *);
static int	man_uwsrv(queue_t *);
static int	man_proto(queue_t *, mblk_t *);
static int	man_udreq(queue_t *, mblk_t *);
static void	man_areq(queue_t *, mblk_t *);
static mblk_t	*man_alloc_physreq_mp(eaddr_t *);
static void	man_dreq(queue_t *, mblk_t *);
static void	man_dodetach(manstr_t *, man_work_t *);
static void	man_dl_clean(mblk_t **);
static void	man_breq(queue_t *, mblk_t *);
static void	man_ubreq(queue_t *, mblk_t *);
static void	man_ireq(queue_t *, mblk_t *);
static void	man_ponreq(queue_t *, mblk_t *);
static void	man_poffreq(queue_t *, mblk_t *);
static void	man_emreq(queue_t *, mblk_t *);
static void	man_dmreq(queue_t *, mblk_t *);
static void	man_pareq(queue_t *, mblk_t *);
static void	man_spareq(queue_t *, mblk_t *);
static int	man_dlpi(manstr_t *, mblk_t *);
static int	man_dlioc(manstr_t *, mblk_t *);
static int	man_dl_catch(mblk_t **, mblk_t *);
static void	man_dl_release(mblk_t **, mblk_t *);
static int	man_match_proto(mblk_t *, mblk_t *);
static int	man_open_ctl();
static void	man_close_ctl();
/*
 * upper/lower multiplexor functions.
 */
static int	man_dlpi_senddown(manstr_t *, mblk_t *);
static int	man_start_lower(man_dest_t *, mblk_t *, queue_t *, int caller);
static int	man_lrput(queue_t *, mblk_t *);
/*
 * Lower multiplexor functions.
 */
static int	man_lwsrv(queue_t *);
static int	man_lrsrv(queue_t *);
static void	man_dlpi_replay(man_dest_t *, mblk_t *);
static int	man_dlioc_replay(man_dest_t *);
/*
 * Link failover routines.
 */
static int	man_gettimer(int, man_dest_t *);
static void	man_linkcheck_timer(void *);
static int	man_needs_linkcheck(man_dest_t *);
static int	man_do_autoswitch(man_dest_t *);
static int	man_autoswitch(man_pg_t *, man_dev_t *, man_work_t *);
static int	man_prep_dests_for_switch(man_pg_t *, man_dest_t **, int *);
static int	man_str_uses_pg(manstr_t *, man_pg_t *);
static void	man_do_icmp_bcast(man_dest_t *, t_uscalar_t);
static mblk_t	*man_alloc_udreq(int, man_dladdr_t *);
static mblk_t	*man_pinger(t_uscalar_t);
/*
 * Functions normally executing outside of the STREAMs perimeter.
 */
/*
 * Functions supporting/processing work requests.
 */
static void	man_bwork(void);
static void	man_iwork(void);		/* inside perimeter */
void		man_work_add(man_workq_t *, man_work_t *);
man_work_t	*man_work_alloc(int, int);
void		man_work_free(man_work_t *);
/*
 * Functions implementing/supporting failover.
 *
 * Executed inside perimeter.
 */
static int	man_do_dr_attach(man_work_t *);
static int	man_do_dr_switch(man_work_t *);
static void	man_do_dr_detach(man_work_t *);
static int	man_iswitch(man_work_t *);
static void	man_ifail_dest(man_dest_t *);
static man_dest_t *man_switch_match(man_dest_t *, int, void *);
static void	man_add_dests(man_pg_t *);
static void	man_reset_dlpi(void *);
static mblk_t	*man_dup_mplist(mblk_t *);
static mblk_t	*man_alloc_ubreq_dreq();
/*
 * Executed outside perimeter (us man_lock for synchronization).
 */
static void	man_bclose(man_adest_t *);
static void	man_bswitch(man_adest_t *, man_work_t *);
static int	man_plumb(man_dest_t *);
static void	man_unplumb(man_dest_t *);
static void	man_plink(queue_t *, mblk_t *);
static void	man_unplink(queue_t *, mblk_t *);
static void	man_linkrec_insert(man_linkrec_t *);
static queue_t	*man_linkrec_find(int);
/*
 * Functions supporting pathgroups
 */
int	man_pg_cmd(mi_path_t *, man_work_t *);
static int	man_pg_assign(man_pg_t **, mi_path_t *, int);
static int	man_pg_create(man_pg_t **, man_pg_t **, mi_path_t *);
static int	man_pg_unassign(man_pg_t **, mi_path_t *);
static int	man_pg_activate(man_t *, mi_path_t *, man_work_t *);
static int	man_pg_read(man_pg_t *, mi_path_t *);
static man_pg_t	*man_find_path_by_dev(man_pg_t *, man_dev_t *, man_path_t **);
static man_pg_t	*man_find_pg_by_id(man_pg_t *, int);
static man_path_t	*man_find_path_by_ppa(man_path_t *, int);
static man_path_t	*man_find_active_path(man_path_t *);
static man_path_t	*man_find_alternate_path(man_path_t *);
static void	man_path_remove(man_path_t **, man_path_t *);
static void	man_path_insert(man_path_t **, man_path_t *);
static void	man_path_merge(man_path_t **, man_path_t *);
static int	man_path_kstat_init(man_path_t *);
static void	man_path_kstat_uninit(man_path_t *);
/*
 * Functions supporting kstat reporting.
 */
static int	man_kstat_update(kstat_t *, int);
static void	man_do_kstats(man_work_t *);
static void	man_update_path_kstats(man_t *);
static void 	man_update_dev_kstats(kstat_named_t *, man_path_t *);
static void	man_sum_dests_kstats(kstat_named_t *, man_pg_t *);
static void	man_kstat_named_init(kstat_named_t *, int);
static int	man_kstat_byname(kstat_t *, char *, kstat_named_t *);
static void	man_sum_kstats(kstat_named_t *, kstat_t *, kstat_named_t *);
/*
 * Functions supporting ndd.
 */
static int	man_param_register(param_t *, int);
static int	man_pathgroups_report(queue_t *, mblk_t *, caddr_t, cred_t *);
static void	man_preport(man_path_t *, mblk_t *);
static int	man_set_active_path(queue_t *, mblk_t *, char *, caddr_t,
			cred_t *);
static int	man_get_hostinfo(queue_t *, mblk_t *, caddr_t, cred_t *);
static char	*man_inet_ntoa(in_addr_t);
static int	man_param_get(queue_t *, mblk_t *, caddr_t, cred_t *);
static int	man_param_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *);
static  void    man_param_cleanup(void);
static  void    man_nd_free(caddr_t *nd_pparam);
/*
 * MAN SSC/Domain specific externs.
 */
extern int	man_get_iosram(manc_t *);
extern int	man_domain_configure(void);
extern int	man_domain_deconfigure(void);
extern int	man_dossc_switch(uint32_t);
extern int	man_is_on_domain;

/*
 * Driver Globals protected by inner perimeter.
 */
static manstr_t	*man_strup = NULL;	/* list of MAN STREAMS */
static caddr_t	man_ndlist = NULL;	/* head of ndd var list */
void		*man_softstate = NULL;

/*
 * Driver globals protected by man_lock.
 */
kmutex_t		man_lock;		/* lock protecting vars below */
static kthread_id_t	man_bwork_id = NULL;	/* background thread ID */
man_workq_t		*man_bwork_q;		/* bgthread work q */
man_workq_t		*man_iwork_q;		/* inner perim (uwsrv) work q */
static man_linkrec_t	*man_linkrec_head = NULL;	/* list of linkblks */
ldi_handle_t		man_ctl_lh = NULL;	/* MAN control handle */
queue_t			*man_ctl_wq = NULL;	/* MAN control rq */
static int		man_config_state = MAN_UNCONFIGURED;
static int		man_config_error = ENODEV;

/*
 * These parameters are accessed via ndd to report the link configuration
 * for the MAN driver. They can also be used to force configuration changes.
 */
#define	MAN_NOTUSR	0x0f000000

/* ------------------------------------------------------------------------- */

static  param_t	man_param_arr[] = {
	/* min		max		value		name */
	{  0,		0xFFFF,		0,		"man_debug_level"},
};

#define	MAN_NDD_GETABLE	1
#define	MAN_NDD_SETABLE	2

static  uint32_t	man_param_display[] = {
/* DISPLAY */
MAN_NDD_SETABLE,	/* man_debug_level */
};

/*
 * STREAMs information.
 */
static struct module_info man_m_info = {
	MAN_IDNUM,			/* mi_idnum */
	MAN_IDNAME,			/* mi_idname */
	MAN_MINPSZ,			/* mi_minpsz */
	MAN_MAXPSZ,			/* mi_maxpsz */
	MAN_HIWAT,			/* mi_hiwat */
	MAN_LOWAT			/* mi_lowat */
};

/*
 * Upper read queue does not do anything.
 */
static struct qinit man_urinit = {
	NULL,				/* qi_putp */
	NULL,				/* qi_srvp */
	man_open,			/* qi_qopen */
	man_close,			/* qi_qclose */
	NULL,				/* qi_qadmin */
	&man_m_info,			/* qi_minfo */
	NULL				/* qi_mstat */
};

static struct qinit man_lrinit = {
	man_lrput,			/* qi_putp */
	man_lrsrv,			/* qi_srvp */
	man_open,			/* qi_qopen */
	man_close,			/* qi_qclose */
	NULL,				/* qi_qadmin */
	&man_m_info,			/* qi_minfo */
	NULL				/* qi_mstat */
};

static struct qinit man_uwinit = {
	man_uwput,			/* qi_putp */
	man_uwsrv,			/* qi_srvp */
	man_open,			/* qi_qopen */
	man_close,			/* qi_qclose */
	NULL,				/* qi_qadmin */
	&man_m_info,			/* qi_minfo */
	NULL				/* qi_mstat */
};

static struct qinit man_lwinit = {
	NULL,				/* qi_putp */
	man_lwsrv,			/* qi_srvp */
	man_open,			/* qi_qopen */
	man_close,			/* qi_qclose */
	NULL,				/* qi_qadmin */
	&man_m_info,			/* qi_minfo */
	NULL				/* qi_mstat */
};

static struct streamtab man_maninfo = {
	&man_urinit,			/* st_rdinit */
	&man_uwinit,			/* st_wrinit */
	&man_lrinit,			/* st_muxrinit */
	&man_lwinit			/* st_muxwrinit */
};


/*
 * Module linkage information for the kernel.
 *
 * Locking Theory:
 * 	D_MTPERMOD -	Only an inner perimeter: All routines single
 * 			threaded (except put, see below).
 *	D_MTPUTSHARED -	Put routines enter inner perimeter shared (not
 *			exclusive) for concurrency/performance reasons.
 *
 *	Anyone who needs exclusive outer perimeter permission (changing
 *	global data structures) does so via qwriter() calls. The
 *	background thread does all his work outside of perimeter and
 *	submits work via qtimeout() when data structures need to be
 *	modified.
 */

#define	MAN_MDEV_FLAGS	(D_MP|D_MTPERMOD|D_MTPUTSHARED)

DDI_DEFINE_STREAM_OPS(man_ops, nulldev, nulldev, man_attach,
    man_detach, nodev, man_info, MAN_MDEV_FLAGS, &man_maninfo,
    ddi_quiesce_not_supported);

extern int nodev(), nulldev();

static struct modldrv modldrv = {
	&mod_driverops, 	/* Module type.  This one is a pseudo driver */
	"MAN MetaDriver",
	&man_ops,		/* driver ops */
};

static struct modlinkage modlinkage = {
	MODREV_1,
	(void *) &modldrv,
	NULL
};


/* Virtual Driver loader entry points */

int
_init(void)
{
	int		status = DDI_FAILURE;

	MAN_DBG(MAN_INIT, ("_init:"));

	status = mod_install(&modlinkage);
	if (status != 0) {
		cmn_err(CE_WARN, "man_init: mod_install failed"
		    " error = %d", status);
		return (status);
	}

	status = ddi_soft_state_init(&man_softstate, sizeof (man_t), 4);
	if (status != 0) {
		cmn_err(CE_WARN, "man_init: ddi_soft_state_init failed"
		    " error = %d", status);
		mod_remove(&modlinkage);
		return (status);
	}

	man_bwork_q = man_kzalloc(sizeof (man_workq_t), KM_SLEEP);
	man_iwork_q = man_kzalloc(sizeof (man_workq_t), KM_SLEEP);

	mutex_init(&man_lock, NULL, MUTEX_DRIVER, NULL);
	cv_init(&man_bwork_q->q_cv, NULL, CV_DRIVER, NULL);
	cv_init(&man_iwork_q->q_cv, NULL, CV_DRIVER, NULL);

	return (0);
}

/*
 * _info is called by modinfo().
 */
int
_info(struct modinfo *modinfop)
{
	int	status;

	MAN_DBG(MAN_INIT, ("_info:"));

	status = mod_info(&modlinkage, modinfop);

	MAN_DBG(MAN_INIT, ("_info: returns %d", status));

	return (status);
}

/*
 * _fini called by modunload() just before driver is unloaded from memory.
 */
int
_fini(void)
{
	int status = 0;

	MAN_DBG(MAN_INIT, ("_fini:"));


	/*
	 * The only upper stream left should be man_ctl_lh. Note that
	 * man_close (upper stream) is synchronous (i.e. it waits for
	 * all STREAMS framework associated with the upper stream to be
	 * torn down). This guarantees that man_ctl_lh will never become
	 * NULL until noone is around to notice. This assumption is made
	 * in a few places like man_plumb, man_unplumb, etc.
	 */
	if (man_strup && (man_strup->ms_next != NULL))
		return (EBUSY);

	/*
	 * Deconfigure the driver.
	 */
	status = man_deconfigure();
	if (status)
		goto exit;

	/*
	 * need to detach every instance of the driver
	 */
	status = mod_remove(&modlinkage);
	if (status != 0)
		goto exit;

	ddi_soft_state_fini(&man_softstate);

	/*
	 * Free up locks.
	 */
	mutex_destroy(&man_lock);
	cv_destroy(&man_bwork_q->q_cv);
	cv_destroy(&man_iwork_q->q_cv);

	man_kfree(man_bwork_q, sizeof (man_workq_t));
	man_kfree(man_iwork_q, sizeof (man_workq_t));

exit:

	MAN_DBG(MAN_INIT, ("_fini: returns %d", status));

	return (status);
}

/*
 * Deconfigure the MAN driver.
 */
static int
man_deconfigure()
{
	man_work_t	*wp;
	int		status = 0;

	MAN_DBG(MAN_CONFIG, ("man_deconfigure:\n"));

	mutex_enter(&man_lock);

	if (man_is_on_domain) {
		status = man_domain_deconfigure();
		if (status != 0)
			goto exit;
	}

	man_param_cleanup();	/* Free up NDD resources */

	/*
	 * I may have to handle straggling work requests. Just qwait?
	 * or cvwait? Called from _fini - TBD
	 */
	ASSERT(man_bwork_q->q_work == NULL);
	ASSERT(man_iwork_q->q_work == NULL);

	MAN_DBG(MAN_CONFIG, ("man_deconfigure: submitting CLOSE_CTL\n"));

	if (man_ctl_lh != NULL) {
		wp = man_work_alloc(MAN_WORK_CLOSE_CTL, KM_SLEEP);
		wp->mw_flags = MAN_WFLAGS_CVWAITER;
		man_work_add(man_bwork_q, wp);

		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
			cv_wait(&wp->mw_cv, &man_lock);
		}
		man_work_free(wp);
	}

	MAN_DBG(MAN_CONFIG, ("man_deconfigure: submitting STOP\n"));
	if (man_bwork_id != NULL) {

		wp = man_work_alloc(MAN_WORK_STOP, KM_SLEEP);
		wp->mw_flags = MAN_WFLAGS_CVWAITER;
		man_work_add(man_bwork_q, wp);

		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
			cv_wait(&wp->mw_cv, &man_lock);
		}
		man_work_free(wp);
	}
	man_config_state = MAN_UNCONFIGURED;

exit:
	mutex_exit(&man_lock);

	MAN_DBG(MAN_CONFIG, ("man_deconfigure: returns %d\n", status));

	return (status);
}

/*
 * man_attach - allocate resources and attach an instance of the MAN driver
 * The <man>.conf file controls how many instances of the MAN driver are
 * available.
 *
 *	dip - devinfo of node
 * 	cmd - one of DDI_ATTACH | DDI_RESUME
 *
 *	returns	- success - DDI_SUCCESS
 *		- failure - DDI_FAILURE
 */
static int
man_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
	man_t		*manp;		/* per instance data */
	uchar_t		flag = KSTAT_FLAG_WRITABLE; /* support netstat -kc */
	kstat_t		*ksp;
	int		minor_node_created = 0;
	int		instance;
	eaddr_t		man_eaddr;

	MAN_DBG(MAN_INIT, ("man_attach: \n"));

	if (cmd != DDI_ATTACH) {
		MAN_DBG(MAN_INIT, ("man_attach: bad command %d\n", cmd));
		return (DDI_FAILURE);
	}

	if (man_get_our_etheraddr(&man_eaddr))
		return (DDI_FAILURE);

	instance = ddi_get_instance(dip);

	/*
	 * we assume that instance is always equal to zero.
	 * and there will always only be one instance.
	 * this is done because when dman opens itself via DMAN_INT_PATH,
	 * the path assumes that the instance number is zero.
	 * if we ever need to support multiple instances of the dman
	 * driver or non-zero instances, this will have to change.
	 */
	ASSERT(instance == 0);

	/*
	 * Allocate per device info pointer and link in to global list of
	 * MAN devices.
	 */
	if ((ddi_soft_state_zalloc(man_softstate, instance) != DDI_SUCCESS) ||
	    ((manp = ddi_get_soft_state(man_softstate, instance)) == NULL)) {
		cmn_err(CE_WARN, "man_attach: cannot zalloc soft state!");
		return (DDI_FAILURE);
	}

	ddi_set_driver_private(dip, manp);
	manp->man_dip = dip;
	manp->man_meta_major = ddi_driver_major(dip);
	manp->man_meta_ppa = instance;

	/*
	 * Set ethernet address. Note that this address is duplicated
	 * at md_src_eaddr.
	 */
	ether_copy(&man_eaddr, &manp->man_eaddr);
	manp->man_eaddr_v = 1;

	MAN_DBG(MAN_INIT, ("man_attach: set ether to %s",
	    ether_sprintf(&manp->man_eaddr)));

	/*
	 * Initialize failover-related fields (timers and such),
	 * taking values from properties if present.
	 */
	manp->man_init_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
	    "init_time", MAN_INIT_TIME);

	manp->man_linkcheck_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
	    "linkcheck_time", MAN_LINKCHECK_TIME);

	manp->man_linkstale_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
	    "man_linkstale_time", MAN_LINKSTALE_TIME);

	manp->man_linkstale_retries = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
	    "man_linkstale_retries", MAN_LINKSTALE_RETRIES);

	manp->man_dr_delay = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
	    "man_dr_delay", MAN_DR_DELAY);

	manp->man_dr_retries = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
	    "man_dr_retries", MAN_DR_RETRIES);

	manp->man_kstat_waittime = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
	    "man_kstat_waittime", MAN_KSTAT_WAITTIME);

	manp->man_dlpireset_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
	    "man_dlpireset_time", MAN_DLPIRESET_TIME);

	if (ddi_create_internal_pathname(dip, MAN_IDNAME, S_IFCHR,
	    ddi_get_instance(dip)) == DDI_SUCCESS) {
		minor_node_created = 1;
	} else {
		cmn_err(CE_WARN, "man_attach: failed for instance %d",
		    ddi_get_instance(dip));
		goto exit;
	}

	if (ddi_create_minor_node(dip, MAN_IDNAME, S_IFCHR,
	    ddi_get_instance(dip), DDI_NT_NET, CLONE_DEV) == DDI_SUCCESS) {
		minor_node_created = 1;
	} else {
		cmn_err(CE_WARN, "man_attach: failed for instance %d",
		    ddi_get_instance(dip));
		goto exit;
	}

	/*
	 * Allocate meta kstat_t for this instance of the driver.
	 * Note that each of man_path_t keeps track of the kstats
	 * for the real devices via mp_last_knp.
	 */
#ifdef	kstat
	flag |= KSTAT_FLAG_PERSISTENT;
#endif
	ksp = kstat_create(MAN_IDNAME, ddi_get_instance(dip), NULL, "net",
	    KSTAT_TYPE_NAMED, MAN_NUMSTATS, flag);

	if (ksp == NULL) {
		cmn_err(CE_WARN, "man_attach(%d): kstat_create failed"
		    " - manp(0x%p)", manp->man_meta_ppa,
		    (void *)manp);
		goto exit;
	}

	man_kstat_named_init(ksp->ks_data, MAN_NUMSTATS);
	ksp->ks_update = man_kstat_update;
	ksp->ks_private = (void *) manp;
	manp->man_ksp = ksp;
	kstat_install(manp->man_ksp);

	ddi_report_dev(dip);

	MAN_DBG(MAN_INIT, ("man_attach(%d) returns DDI_SUCCESS",
	    ddi_get_instance(dip)));

	return (DDI_SUCCESS);

exit:
	if (minor_node_created)
		ddi_remove_minor_node(dip, NULL);
	ddi_set_driver_private(dip, NULL);
	ddi_soft_state_free(man_softstate, instance);

	MAN_DBG(MAN_INIT, ("man_attach(%d) eaddr returns DDI_FAILIRE",
	    ddi_get_instance(dip)));

	return (DDI_FAILURE);

}

static int
man_get_our_etheraddr(eaddr_t *eap)
{
	manc_t	manc;
	int	status = 0;

	if (man_is_on_domain) {
		if (status = man_get_iosram(&manc))
			return (status);
		ether_copy(&manc.manc_dom_eaddr, eap);
	} else {
		(void) localetheraddr((struct ether_addr *)NULL, eap);
	}

	return (status);
}

/*
 * man_detach - detach an instance of a driver
 *
 *	dip - devinfo of node
 * 	cmd - one of DDI_DETACH | DDI_SUSPEND
 *
 *	returns	- success - DDI_SUCCESS
 *		- failure - DDI_FAILURE
 */
static int
man_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
	register man_t	*manp;		/* per instance data */
	int		instance;

	MAN_DBG(MAN_INIT, ("man_detach(%d):\n", ddi_get_instance(dip)));

	if (cmd != DDI_DETACH) {
		MAN_DBG(MAN_INIT, ("man_detach: bad command %d\n", cmd));
		return (DDI_FAILURE);
	}

	if (dip == NULL) {
		MAN_DBG(MAN_INIT, ("man_detach: dip == NULL\n"));
		return (DDI_FAILURE);
	}

	instance = ddi_get_instance(dip);

	mutex_enter(&man_lock);

	manp = (man_t *)ddi_get_soft_state(man_softstate, instance);
	if (manp == NULL) {
		mutex_exit(&man_lock);

		cmn_err(CE_WARN, "man_detach: unable to get softstate"
		    " for instance = %d, dip = 0x%p!\n", instance,
		    (void *)dip);
		return (DDI_FAILURE);
	}

	if (manp->man_refcnt != 0) {
		mutex_exit(&man_lock);

		cmn_err(CE_WARN, "man_detach: %s%d refcnt %d", MAN_IDNAME,
		    instance, manp->man_refcnt);
		MAN_DBGCALL(MAN_INIT, man_print_man(manp));

		return (DDI_FAILURE);
	}

	ddi_remove_minor_node(dip, NULL);

	mutex_exit(&man_lock);

	kstat_delete(manp->man_ksp);
	ddi_soft_state_free(man_softstate, instance);
	ddi_set_driver_private(dip, NULL);

	MAN_DBG(MAN_INIT, ("man_detach returns DDI_SUCCESS"));

	return (DDI_SUCCESS);
}

/*
 * man_info:
 *	As a standard DLPI style-2, man_info() should always return
 *	DDI_FAILURE.
 *
 *	However, man_open() has special treatment for a direct open
 *	via kstr_open() without going through the CLONE driver.
 *	To make this special kstr_open() work, we need to map
 *	minor of 0 to instance 0.
 */
/*ARGSUSED*/
static int
man_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
	minor_t minor;

	switch (infocmd) {
	case DDI_INFO_DEVT2DEVINFO:
		break;

	case DDI_INFO_DEVT2INSTANCE:
		minor = getminor((dev_t)arg);
		if (minor == 0) {
			*result = (void *)(uintptr_t)minor;
			return (DDI_SUCCESS);
		}
		break;
	default:
		break;
	}
	return (DDI_FAILURE);
}

/* Standard Device Driver entry points */

/*
 * man_open - open the device
 *
 *	rq - upper read queue of the stream
 *	devp - pointer to a device number
 *	flag - information passed from the user program open(2) system call
 *	sflag - stream flags
 *	credp - pointer to the cred(9S) user credential structure
 *
 *	returns	- success - 0
 *		- failure - errno value for failure
 */
/*ARGSUSED*/
static int
man_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
{
	int			minordev = -1;
	manstr_t		*msp;
	manstr_t		*tsp;
	manstr_t		**prevmsp;
	int			status = 0;

	MAN_DBG(MAN_OCLOSE, ("man_open: rq(0x%p) sflag(0x%x)\n",
	    (void *)rq, sflag));

	ASSERT(rq);
	ASSERT(sflag != MODOPEN);

	/*
	 * reopen; q_ptr set to msp at open completion.
	 */
	if (rq->q_ptr) {
		return (0);
	}

	/*
	 * Allocate and initialize manstr_t for this device.
	 */
	msp = man_kzalloc(sizeof (manstr_t), KM_SLEEP);
	SETSTATE(msp, DL_UNATTACHED);
	msp->ms_meta_ppa = -1;
	msp->ms_rq = rq;
	rq->q_ptr = WR(rq)->q_ptr = msp;

	/*
	 * Get the MAN driver configured on 1st open.  Note that the only way
	 * we get sflag != CLONEOPEN is via the call in man_plumbctl().  All
	 * CLONEOPEN calls to man_open will be via the file system
	 * device node /dev/man, a pseudo clone device.
	 */

	qprocson(rq);

	if (sflag == CLONEOPEN && man_config_state != MAN_CONFIGURED) {
		/*
		 * First open calls man_configure. Everyone qwaits until
		 * we get it open. See man_open_ctl() comments for mutex
		 * lock/synchronization info.
		 */

		mutex_enter(&man_lock);

		if (man_config_state == MAN_UNCONFIGURED) {
			man_config_state = MAN_CONFIGURING;
			mutex_exit(&man_lock);
			status = man_configure(rq);
			if (status != 0)
				goto exit;
		} else {
			while (man_config_state == MAN_CONFIGURING) {

				mutex_exit(&man_lock);
				status = qwait_sig(rq);

				if (status == 0) {
					status = EINTR;
					goto exit;
				}

				mutex_enter(&man_lock);
			}
			mutex_exit(&man_lock);

			if (man_config_error) {
				status = man_config_error;
				goto exit;
			}
		}
	}

	/*
	 * Determine minor device number. man_open serialized by
	 * D_MTPERMOD.
	 */
	prevmsp = &man_strup;
	if (sflag == CLONEOPEN) {

		minordev = 0;
		for (; (tsp = *prevmsp) != NULL; prevmsp = &tsp->ms_next) {
			if (minordev < tsp->ms_minor)
				break;
			minordev++;
		}
		*devp = makedevice(getmajor(*devp), minordev);

	} else {
		/*
		 * Should only get here from man_plumbctl().
		 */
		/*LINTED E_ASSIGN_UINT_TO_SIGNED_INT*/
		minordev = getminor(*devp);

		/*
		 * No need to protect this here as all opens are
		 * qwaiting, and the bgthread (who is doing this open)
		 * is the only one who mucks with this variable.
		 */
		man_ctl_wq = WR(rq);

		ASSERT(minordev == 0);	/* TBD delete this */
	}

	msp->ms_meta_maj = getmajor(*devp);
	msp->ms_minor = minordev;
	if (minordev == 0)
		msp->ms_flags = MAN_SFLAG_CONTROL;

	/*
	 * Link new entry into global list of active entries.
	 */
	msp->ms_next = *prevmsp;
	*prevmsp = msp;


	/*
	 * Disable automatic enabling of our write service procedure.
	 * We control this explicitly.
	 */
	noenable(WR(rq));

exit:
	MAN_DBG(MAN_OCLOSE, ("man_open: exit rq(0x%p) minor %d errno %d\n",
	    (void *)rq, minordev, status));

	/*
	 * Clean up on error.
	 */
	if (status) {
		qprocsoff(rq);
		rq->q_ptr = WR(rq)->q_ptr = NULL;
		man_kfree((char *)msp, sizeof (manstr_t));
	} else
		(void) qassociate(rq, -1);

	return (status);
}

/*
 * Get the driver configured.  Called from first man_open with exclusive
 * inner perimeter.
 */
static int
man_configure(queue_t *rq)
{
	man_work_t	*wp;
	int		status = 0;

	MAN_DBG(MAN_CONFIG, ("man_configure:"));

	/*
	 * Initialize NDD parameters.
	 */
	if (!man_ndlist &&
	    !man_param_register(man_param_arr, A_CNT(man_param_arr))) {
		cmn_err(CE_WARN, "man_configure: man_param_register failed!");
		man_config_error = ENOMEM;
		goto exit;
	}

	mutex_enter(&man_lock);

	/*
	 * Start up background thread.
	 */
	if (man_bwork_id == NULL)
		man_bwork_id = thread_create(NULL, 2 * DEFAULTSTKSZ,
		    man_bwork, NULL, 0, &p0, TS_RUN, minclsyspri);

	/*
	 * Submit work to get control stream opened. Qwait until its
	 * done. See man_open_ctl for mutex lock/synchronization info.
	 */

	if (man_ctl_lh == NULL) {
		wp = man_work_alloc(MAN_WORK_OPEN_CTL, KM_SLEEP);
		wp->mw_flags |= MAN_WFLAGS_QWAITER;
		wp->mw_q = WR(rq);

		/*
		 * Submit work and wait. When man_open_ctl exits
		 * man_open, it will cause qwait below to return.
		 */
		man_work_add(man_bwork_q, wp);
		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
			mutex_exit(&man_lock);
			qwait(rq);
			mutex_enter(&man_lock);
		}
		status = wp->mw_status;
		man_work_free(wp);

	}
	mutex_exit(&man_lock);

	/*
	 * If on domain, setup IOSRAM and build the pathgroups
	 * automatically.
	 */
	if ((status == 0) && man_is_on_domain)
		status = man_domain_configure();

exit:
	mutex_enter(&man_lock);

	man_config_error = status;
	if (status != 0)
		man_config_state = MAN_UNCONFIGURED;
	else
		man_config_state = MAN_CONFIGURED;

	mutex_exit(&man_lock);

	MAN_DBG(MAN_CONFIG, ("man_configure: returns %d\n", status));

	return (status);
}

/*
 * man_close - close the device
 *
 *	rq - upper read queue of the stream
 *
 *	returns	- success - 0
 *		- failure - errno value for failure
 */
static int
man_close(queue_t *rq)
{
	manstr_t		*close_msp;
	manstr_t		*msp;

	MAN_DBG(MAN_OCLOSE, ("man_close: rq(0x%p)\n", (void *)rq));

	qprocsoff(rq);
	close_msp = (manstr_t *)rq->q_ptr;

	/*
	 * Unlink the per-Stream entry from the active list and free it.
	 */
	if (close_msp == man_strup)
		man_strup = close_msp->ms_next;
	else {
		for (msp = man_strup; msp && msp->ms_next != close_msp; )
			msp = msp->ms_next;

		if (msp == NULL) {
			cmn_err(CE_WARN, "man_close: no stream!");
			return (ENODEV);
		}

		msp->ms_next = close_msp->ms_next;
	}

	if (close_msp->ms_dests != NULL) {
		/*
		 * Still DL_ATTACHED
		 */
		man_work_t *wp;

		wp = man_work_alloc(MAN_WORK_CLOSE_STREAM, KM_SLEEP);
		man_dodetach(close_msp, wp);
	}

	if (close_msp->ms_flags & MAN_SFLAG_CONTROL) {
		/*
		 * Driver about to unload.
		 */
		man_ctl_wq = NULL;
	}

	rq->q_ptr = WR(rq)->q_ptr = NULL;
	man_kfree((char *)close_msp, sizeof (manstr_t));
	(void) qassociate(rq, -1);

	MAN_DBG(MAN_OCLOSE, ("man_close: exit\n"));

	return (0);
}

/*
 * Ask bgthread to tear down lower stream and qwait
 * until its done.
 */
static void
man_dodetach(manstr_t *msp, man_work_t *wp)
{
	man_dest_t	*mdp;
	int		i;
	mblk_t		*mp;

	mdp = msp->ms_dests;
	msp->ms_dests = NULL;
	msp->ms_destp = NULL;

	/*
	 * Excise lower dests array, set it closing and hand it to
	 * background thread to dispose of.
	 */
	for (i = 0; i < MAN_MAX_DESTS; i++) {

		mdp[i].md_state |= MAN_DSTATE_CLOSING;
		mdp[i].md_msp = NULL;
		mdp[i].md_rq = NULL;

		if (mdp[i].md_lc_timer_id != 0) {
			(void) quntimeout(man_ctl_wq, mdp[i].md_lc_timer_id);
			mdp[i].md_lc_timer_id = 0;
		}
		if (mdp[i].md_bc_id != 0) {
			qunbufcall(man_ctl_wq, mdp[i].md_bc_id);
			mdp[i].md_bc_id = 0;
		}

		mutex_enter(&mdp[i].md_lock);
		while ((mp = mdp[i].md_dmp_head) != NULL) {
			mdp[i].md_dmp_head = mp->b_next;
			mp->b_next = NULL;
			freemsg(mp);
		}
		mdp[i].md_dmp_count = 0;
		mdp[i].md_dmp_tail = NULL;
		mutex_exit(&mdp[i].md_lock);
	}

	/*
	 * Dump any DL type messages previously caught.
	 */
	man_dl_clean(&msp->ms_dl_mp);
	man_dl_clean(&msp->ms_dlioc_mp);

	/*
	 * We need to clear fast path flag when dlioc messages are cleaned.
	 */
	msp->ms_flags &= ~MAN_SFLAG_FAST;

	/*
	 * MAN_WORK_CLOSE_STREAM work request preallocated by caller.
	 */
	ASSERT(wp->mw_type == MAN_WORK_CLOSE_STREAM);
	ASSERT(mdp != NULL);
	wp->mw_arg.a_mdp = mdp;
	wp->mw_arg.a_ndests = MAN_MAX_DESTS;
	wp->mw_arg.a_pg_id = -1;	/* Don't care */

	mutex_enter(&man_lock);
	man_work_add(man_bwork_q, wp);
	msp->ms_manp->man_refcnt--;
	mutex_exit(&man_lock);

	msp->ms_manp = NULL;

}


/*
 * man_uwput - handle DLPI messages issued from upstream, the write
 * side of the upper half of multiplexor. Called with shared access to
 * the inner perimeter.
 *
 *	wq - upper write queue of mxx
 *	mp - mblk ptr to DLPI request
 */
static int
man_uwput(register queue_t *wq, register mblk_t *mp)
{
	register manstr_t	*msp;		/* per stream data */
	register man_t		*manp;		/* per instance data */

	msp = (manstr_t *)wq->q_ptr;

	MAN_DBG(MAN_UWPUT, ("man_uwput: wq(0x%p) mp(0x%p) db_type(0x%x)"
	    " msp(0x%p)\n",
	    (void *)wq, (void *)mp, DB_TYPE(mp), (void *)msp));
#if DEBUG
	if (man_debug & MAN_UWPUT) {
		if (DB_TYPE(mp) == M_IOCTL) {
			struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
			MAN_DBG(MAN_UWPUT,
			    ("man_uwput: M_IOCTL ioc_cmd(0x%x)\n",
			    iocp->ioc_cmd));
		} else if (DB_TYPE(mp) == M_CTL) {
			struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
			MAN_DBG(MAN_UWPUT,
			    ("man_uwput: M_CTL ioc_cmd(0x%x)\n",
			    iocp->ioc_cmd));
		}
	}
#endif	/* DEBUG */


	switch (DB_TYPE(mp)) {
	case M_DATA:
		manp = msp->ms_manp;

		if (((msp->ms_flags & (MAN_SFLAG_FAST | MAN_SFLAG_RAW)) == 0) ||
		    (msp->ms_dlpistate != DL_IDLE) ||
		    (manp == NULL)) {

			merror(wq, mp, EPROTO);
			break;
		}

		if (wq->q_first) {
			(void) putq(wq, mp);
			qenable(wq);
		} else {
			ehdr_t	*ep = (ehdr_t *)mp->b_rptr;

			(void) man_start(wq, mp, &ep->ether_dhost);
		}
		break;

	case M_PROTO:
	case M_PCPROTO:
		if ((DL_PRIM(mp) == DL_UNITDATA_IND) && !wq->q_first) {
			(void) man_udreq(wq, mp);
		} else {
			(void) putq(wq, mp);
			qenable(wq);
		}
		break;

	case M_IOCTL:
	case M_IOCDATA:
		qwriter(wq, mp, man_ioctl, PERIM_INNER);
		break;

	case M_CTL:
		freemsg(mp);
		break;

	case M_FLUSH:
		MAN_DBG(MAN_UWPUT, ("man_wput: M_FLUSH\n"));
		if (*mp->b_rptr & FLUSHW)
			flushq(wq, FLUSHDATA);
		if (*mp->b_rptr & FLUSHR) {
			flushq(RD(wq), FLUSHDATA);
			*mp->b_rptr &= ~FLUSHW;
			qreply(wq, mp);
		} else {
			freemsg(mp);
		}
		break;

	default:
		MAN_DBG(MAN_WARN,
		    ("man_uwput: illegal mblk(0x%p) type(0x%x)\n",
		    (void *)mp, DB_TYPE(mp)));
		freemsg(mp);
		break;
	} /* End switch */

	MAN_DBG(MAN_UWPUT, ("man_uwput: exit wq(0x%p) mp(0x%p)\n",
	    (void *)wq, (void *)mp));

	return (0);
}

/*
 * man_start - handle data messages issued from upstream.  Send down
 * to particular man_dest based on ether_addr, otherwise send out to all
 * valid man_dests.
 *
 *	wq - upper write queue of mxx
 *	mp - mblk ptr to DLPI request
 * 	caller - Caller ID for decision making on canput failure
 *
 * Returns:
 *	0	- Data xmitted or No flow control situation detected.
 *	1	- Flow control situation detected.
 *
 * STREAMS Flow Control: can be used if there is only one destination
 * for a stream (1 to 1 multiplexor). In this case, we will use the upper
 * write queue to store mblks when in flow control. If there are multiple
 * destinations, we cannot use the STREAMs based flow control (1 to many
 * multiplexor). In this case, we will use the lower write queue to store
 * mblks when in flow control. Since destinations come and go, we may
 * transition between 1-to-1 and 1-to-m. So it may be the case that we have
 * some mblks stored on the upper queue, and some on the lower queue. However,
 * we will never send mblks out of order. See man_uwput and man_start_lower().
 *
 * A simple flow control mechanism is implemented for the deferred mblk list,
 * as this list is expected to be used temporarily for a very short
 * period required for switching paths. This flow control mechanism is
 * used only as a defensive approach to avoid infinite growth of this list.
 */
static int
man_start(register queue_t *wq, register mblk_t *mp, eaddr_t *eap)
{
	register manstr_t	*msp;		/* per stream data */
	register man_dest_t	*mdp = NULL;	/* destination */
	mblk_t			*tmp;
	int			i;
	int			status = 0;

	msp = (manstr_t *)wq->q_ptr;

	MAN_DBG(MAN_DATA, ("man_start: msp(0x%p) ether_addr(%s)\n",
	    (void *)msp, ether_sprintf(eap)));

	if (msp->ms_dests == NULL) {
		cmn_err(CE_WARN, "man_start: no destinations");
		freemsg(mp);
		return (0);
	}

	/*
	 * Optimization if only one valid destination.
	 */
	mdp = msp->ms_destp;

	if (IS_UNICAST(eap)) {
		queue_t			*flow_wq = NULL;

		if (mdp == NULL) {
			/*
			 * TDB - This needs to be optimized (some bits in
			 * ehp->dhost will act as an index.
			 */
			for (i = 0; i < MAN_MAX_DESTS; i++) {

				mdp = &msp->ms_dests[i];

				if ((mdp->md_state == MAN_DSTATE_READY) &&
				    (ether_cmp(eap, &mdp->md_dst_eaddr) == 0))
					break;
				mdp = NULL;
			}
		} else {
			/*
			 * 1 to 1 multiplexing, use upper wq for flow control.
			 */
			flow_wq = wq;
		}

		if (mdp != NULL) {
			/*
			 * Its going somewhere specific
			 */
			status =  man_start_lower(mdp, mp, flow_wq, MAN_UPPER);

		} else {
			MAN_DBG(MAN_DATA, ("man_start: no destination"
			    " for eaddr %s\n", ether_sprintf(eap)));
			freemsg(mp);
		}
	} else {
		/*
		 * Broadcast or multicast - send everone a copy.
		 */
		if (mdp == NULL) {
			for (i = 0; i < MAN_MAX_DESTS; i++) {
				mdp = &msp->ms_dests[i];

				if (mdp->md_state != MAN_DSTATE_READY)
					continue;

				if ((tmp = copymsg(mp)) != NULL) {
					(void) man_start_lower(mdp, tmp,
					    NULL, MAN_UPPER);
				} else {
					MAN_DBG(MAN_DATA, ("man_start: copymsg"
					    " failed!"));
				}
			}
			freemsg(mp);
		} else {
			if (mdp->md_state == MAN_DSTATE_READY)
				status =  man_start_lower(mdp, mp, wq,
				    MAN_UPPER);
			else
				freemsg(mp);
		}
	}
	return (status);
}

/*
 * Send a DL_UNITDATA or M_DATA fastpath data mblk to a particular
 * destination. Others mblk types sent down via * man_dlpi_senddown().
 *
 * Returns:
 *	0	- Data xmitted
 *	1	- Data not xmitted due to flow control.
 */
static int
man_start_lower(man_dest_t *mdp, mblk_t *mp, queue_t *flow_wq, int caller)
{
	queue_t		*wq = mdp->md_wq;
	int		status = 0;

	/*
	 * Lower stream ready for data transmit.
	 */
	if (mdp->md_state == MAN_DSTATE_READY &&
	    mdp->md_dlpistate == DL_IDLE) {

		ASSERT(mdp->md_wq != NULL);

		if (caller == MAN_UPPER) {
			/*
			 * Check for flow control conditions for lower
			 * stream.
			 */
			if (mdp->md_dmp_head == NULL &&
			    wq->q_first == NULL && canputnext(wq)) {

				(void) putnext(wq, mp);

			} else {
				mutex_enter(&mdp->md_lock);
				if (mdp->md_dmp_head != NULL) {
					/*
					 * A simple flow control mechanism.
					 */
					if (mdp->md_dmp_count >= MAN_HIWAT) {
						freemsg(mp);
					} else {
						/*
						 * Add 'mp' to the deferred
						 * msg list.
						 */
						mdp->md_dmp_tail->b_next = mp;
						mdp->md_dmp_tail = mp;
						mdp->md_dmp_count +=
						    msgsize(mp);
					}
					mutex_exit(&mdp->md_lock);
					/*
					 * Inform flow control situation
					 * to the caller.
					 */
					status = 1;
					qenable(wq);
					goto exit;
				}
				mutex_exit(&mdp->md_lock);
				/*
				 * If 1 to 1 mux, use upper write queue for
				 * flow control.
				 */
				if (flow_wq != NULL) {
					/*
					 * putbq() message and indicate
					 * flow control situation to the
					 * caller.
					 */
					putbq(flow_wq, mp);
					qenable(flow_wq);
					status = 1;
					goto exit;
				}
				/*
				 * 1 to many mux, use lower write queue for
				 * flow control. Be mindful not to overflow
				 * the lower MAN STREAM q.
				 */
				if (canput(wq)) {
					(void) putq(wq, mp);
					qenable(wq);
				} else {
					MAN_DBG(MAN_DATA, ("man_start_lower:"
					    " lower q flow controlled -"
					    " discarding packet"));
					freemsg(mp);
					goto exit;
				}
			}

		} else {
			/*
			 * man_lwsrv  is draining flow controlled mblks.
			 */
			if (canputnext(wq))
				(void) putnext(wq, mp);
			else
				status = 1;
		}
		goto exit;
	}

	/*
	 * Lower stream in transition, do flow control.
	 */
	status = 1;

	if (mdp->md_state == MAN_DSTATE_NOTPRESENT) {
nodest:
		cmn_err(CE_WARN,
		    "man_start_lower: no dest for mdp(0x%p), caller(%d)!",
		    (void *)mdp, caller);
		if (caller == MAN_UPPER)
			freemsg(mp);
		goto exit;
	}

	if (mdp->md_state & MAN_DSTATE_CLOSING) {
		MAN_DBG(MAN_DATA, ("man_start_lower: mdp(0x%p) closing",
		    (void *)mdp));
		if (caller == MAN_UPPER)
			freemsg(mp);
		goto exit;
	}

	if ((mdp->md_state & MAN_DSTATE_PLUMBING) ||
	    (mdp->md_state == MAN_DSTATE_INITIALIZING) ||
	    (mdp->md_dlpistate != DL_IDLE)) {
		/*
		 * Defer until PLUMBED and DL_IDLE. See man_lwsrv().
		 */
		if (caller == MAN_UPPER) {
			/*
			 * Upper stream sending data down, add to defered mblk
			 * list for stream.
			 */
			mutex_enter(&mdp->md_lock);
			if (mdp->md_dmp_count >= MAN_HIWAT) {
				freemsg(mp);
			} else {
				if (mdp->md_dmp_head == NULL) {
					ASSERT(mdp->md_dmp_tail == NULL);
					mdp->md_dmp_head = mp;
					mdp->md_dmp_tail = mp;
				} else {
					mdp->md_dmp_tail->b_next = mp;
					mdp->md_dmp_tail = mp;
				}
				mdp->md_dmp_count += msgsize(mp);
			}
			mutex_exit(&mdp->md_lock);
		}

		goto exit;
	}

exit:
	return (status);
}

/*
 * man_ioctl - handle ioctl requests for this driver (I_PLINK/I_PUNLINK)
 * or pass thru to the physical driver below.  Note that most M_IOCTLs we
 * care about come down the control msp, but the IOC ones come down the IP.
 * Called with exclusive inner perimeter.
 *
 *	wq - upper write queue of mxx
 *	mp - mblk ptr to DLPI ioctl request
 */
static void
man_ioctl(register queue_t *wq, register mblk_t *mp)
{
	manstr_t		*msp;
	struct iocblk		*iocp;

	iocp = (struct iocblk *)mp->b_rptr;
	msp = (manstr_t *)wq->q_ptr;

#ifdef DEBUG
	{
		char			ioc_cmd[30];

		sprintf(ioc_cmd, "not handled IOCTL 0x%x", iocp->ioc_cmd);
		MAN_DBG((MAN_SWITCH | MAN_PATH | MAN_DLPI),
		    ("man_ioctl: wq(0x%p) mp(0x%p) cmd(%s)\n",
		    (void *)wq, (void *)mp,
		    (iocp->ioc_cmd == I_PLINK) ? "I_PLINK" :
		    (iocp->ioc_cmd == I_PUNLINK) ? "I_PUNLINK" :
		    (iocp->ioc_cmd == MAN_SETPATH) ? "MAN_SETPATH" :
		    (iocp->ioc_cmd == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
		    (iocp->ioc_cmd == DLIOCRAW) ? "DLIOCRAW" : ioc_cmd));
	}
#endif /* DEBUG */


	/*
	 *  Handle the requests...
	 */
	switch ((unsigned int)iocp->ioc_cmd) {

	case I_PLINK:
		man_plink(wq, mp);
		break;

	case I_PUNLINK:
		man_unplink(wq, mp);
		break;

	case MAN_SETPATH:
		man_setpath(wq, mp);
		break;

	case MAN_GETEADDR:
		man_geteaddr(wq, mp);
		break;

	case MAN_SET_LINKCHECK_TIME:
		man_set_linkcheck_time(wq, mp);
		break;

	case MAN_SET_SC_IPADDRS:
		man_set_sc_ipaddrs(wq, mp);
		break;

	case MAN_SET_SC_IP6ADDRS:
		man_set_sc_ip6addrs(wq, mp);
		break;

	case DLIOCRAW:
		if (man_dlioc(msp, mp))
			miocnak(wq, mp, 0, ENOMEM);
		else {
			msp->ms_flags |= MAN_SFLAG_RAW;
			miocack(wq, mp, 0, 0);
		}
		break;

	case DL_IOC_HDR_INFO:
		man_dl_ioc_hdr_info(wq, mp);
		break;

	case MAN_ND_GET:
	case MAN_ND_SET:
		man_nd_getset(wq, mp);
		break;

	default:
		MAN_DBG(MAN_DDI, ("man_ioctl: unknown ioc_cmd %d\n",
		    (unsigned int)iocp->ioc_cmd));
		miocnak(wq, mp, 0, EINVAL);
		break;
	}
exit:
	MAN_DBG((MAN_SWITCH | MAN_PATH | MAN_DLPI), ("man_ioctl: exit\n"));

}

/*
 * man_plink: handle I_PLINK requests on the control stream
 */
void
man_plink(queue_t *wq, mblk_t *mp)
{
	struct linkblk	*linkp;
	man_linkrec_t	*lrp;
	int		status = 0;

	linkp = (struct linkblk *)mp->b_cont->b_rptr;

	/*
	 * Create a record to hold lower stream info. man_plumb will
	 * retrieve it after calling ldi_ioctl(I_PLINK)
	 */
	lrp = man_kzalloc(sizeof (man_linkrec_t), KM_NOSLEEP);
	if (lrp == NULL) {
		status = ENOMEM;
		goto exit;
	}

	lrp->l_muxid = linkp->l_index;
	lrp->l_wq = linkp->l_qbot;
	lrp->l_rq = RD(linkp->l_qbot);

	man_linkrec_insert(lrp);

exit:
	if (status)
		miocnak(wq, mp, 0, status);
	else
		miocack(wq, mp, 0, 0);

}

/*
 * man_unplink - handle I_PUNLINK requests on the control stream
 */
void
man_unplink(queue_t *wq, mblk_t *mp)
{
	struct linkblk	*linkp;

	linkp = (struct linkblk *)mp->b_cont->b_rptr;
	RD(linkp->l_qbot)->q_ptr = NULL;
	WR(linkp->l_qbot)->q_ptr = NULL;
	miocack(wq, mp, 0, 0);
}

void
man_linkrec_insert(man_linkrec_t *lrp)
{
	mutex_enter(&man_lock);

	lrp->l_next = man_linkrec_head;
	man_linkrec_head = lrp;

	mutex_exit(&man_lock);

}

static queue_t *
man_linkrec_find(int muxid)
{
	man_linkrec_t	*lpp;
	man_linkrec_t	*lp;
	queue_t		*wq = NULL;

	mutex_enter(&man_lock);

	if (man_linkrec_head == NULL)
		goto exit;

	lp = lpp = man_linkrec_head;
	if (lpp->l_muxid == muxid) {
		man_linkrec_head = lpp->l_next;
	} else {
		for (lp = lpp->l_next; lp; lp = lp->l_next) {
			if (lp->l_muxid == muxid)
				break;
			lpp = lp;
		}
	}

	if (lp == NULL)
		goto exit;

	wq = lp->l_wq;
	ASSERT(wq != NULL);

	lpp->l_next = lp->l_next;
	man_kfree(lp, sizeof (man_linkrec_t));

exit:
	mutex_exit(&man_lock);

	return (wq);
}

/*
 * Set instance linkcheck timer value.
 */
static void
man_set_linkcheck_time(queue_t *wq, mblk_t *mp)
{
	mi_time_t	*mtp;
	int		error;
	man_t		*manp;

	MAN_DBG(MAN_LINK, ("man_set_linkcheck_time: enter"));

	error = miocpullup(mp, sizeof (mi_time_t));
	if (error != 0)
		goto exit;

	mtp = (mi_time_t *)mp->b_cont->b_rptr;

	MAN_DBG(MAN_LINK, ("man_set_linkcheck_time: mtp"));
	MAN_DBGCALL(MAN_LINK, man_print_mtp(mtp));

	manp = ddi_get_soft_state(man_softstate, mtp->mtp_man_ppa);
	if (manp == NULL) {
		error = ENODEV;
		goto exit;
	}

	manp->man_linkcheck_time = mtp->mtp_time;
exit:
	if (error)
		miocnak(wq, mp, 0, error);
	else
		miocack(wq, mp, sizeof (mi_time_t), 0);
}

/*
 * Man path ioctl processing. Should only happen on the SSC. Called
 * with exclusive inner perimeter.
 */
static void
man_setpath(queue_t *wq, mblk_t *mp)
{
	mi_path_t		*mip;
	int			error;

	error = miocpullup(mp, sizeof (mi_path_t));
	if (error != 0)
		goto exit;

	mip = (mi_path_t *)mp->b_cont->b_rptr;
	mutex_enter(&man_lock);
	error = man_pg_cmd(mip, NULL);
	mutex_exit(&man_lock);

exit:
	if (error)
		miocnak(wq, mp, 0, error);
	else
		miocack(wq, mp, sizeof (mi_path_t), 0);
}

/*
 * Get the local ethernet address of this machine.
 */
static void
man_geteaddr(queue_t *wq, mblk_t *mp)
{
	eaddr_t			*eap;
	int			error;

	error = miocpullup(mp, sizeof (eaddr_t));
	if (error != 0) {
		miocnak(wq, mp, 0, error);
		return;
	}

	eap = (eaddr_t *)mp->b_cont->b_rptr;
	(void) localetheraddr(NULL, eap);
	miocack(wq, mp, sizeof (eaddr_t), 0);
}

/*
 * Set my SC and other SC IPv4 addresses for use in man_pinger routine.
 */
static void
man_set_sc_ipaddrs(queue_t *wq, mblk_t *mp)
{
	int			error;

	error = miocpullup(mp, sizeof (man_sc_ipaddrs_t));
	if (error != 0)
		goto exit;

	man_sc_ipaddrs = *(man_sc_ipaddrs_t *)mp->b_cont->b_rptr;

#ifdef DEBUG
	{
		char	buf[INET_ADDRSTRLEN];

		(void) inet_ntop(AF_INET,
		    (void *) &man_sc_ipaddrs.ip_other_sc_ipaddr,
		    buf, INET_ADDRSTRLEN);
		MAN_DBG(MAN_CONFIG, ("ip_other_sc_ipaddr = %s", buf));
		(void) inet_ntop(AF_INET,
		    (void *) &man_sc_ipaddrs.ip_my_sc_ipaddr,
		    buf, INET_ADDRSTRLEN);
		MAN_DBG(MAN_CONFIG, ("ip_my_sc_ipaddr = %s", buf));
	}
#endif /* DEBUG */
exit:
	if (error)
		miocnak(wq, mp, 0, error);
	else
		miocack(wq, mp, sizeof (man_sc_ipaddrs_t), 0);
}

/*
 * Set my SC and other SC IPv6 addresses for use in man_pinger routine.
 */
static void
man_set_sc_ip6addrs(queue_t *wq, mblk_t *mp)
{
	int			error;

	error = miocpullup(mp, sizeof (man_sc_ip6addrs_t));
	if (error != 0)
		goto exit;

	man_sc_ip6addrs = *(man_sc_ip6addrs_t *)mp->b_cont->b_rptr;

#ifdef DEBUG
	{
		char	buf[INET6_ADDRSTRLEN];

		(void) inet_ntop(AF_INET6,
		    (void *) &man_sc_ip6addrs.ip6_other_sc_ipaddr,
		    buf, INET6_ADDRSTRLEN);
		MAN_DBG(MAN_CONFIG, ("ip6_other_sc_ipaddr = %s", buf));
		(void) inet_ntop(AF_INET6,
		    (void *) &man_sc_ip6addrs.ip6_my_sc_ipaddr,
		    buf, INET6_ADDRSTRLEN);
		MAN_DBG(MAN_CONFIG, ("ip6_my_sc_ipaddr = %s", buf));
	}
#endif /* DEBUG */
exit:
	if (error)
		miocnak(wq, mp, 0, error);
	else
		miocack(wq, mp, sizeof (man_sc_ip6addrs_t), 0);
}

/*
 * M_DATA fastpath info request.
 */
static void
man_dl_ioc_hdr_info(queue_t *wq, mblk_t *mp)
{
	manstr_t		*msp;
	man_t			*manp;
	mblk_t			*nmp;
	man_dladdr_t		*dlap;
	dl_unitdata_req_t	*dludp;
	struct	ether_header	*headerp;
	t_uscalar_t		off, len;
	int			status = 0;

	MAN_DBG(MAN_DLPI, ("man_dl_ioc_hdr_info: enter"));

	msp = (manstr_t *)wq->q_ptr;
	manp = msp->ms_manp;
	if (manp == NULL) {
		status = EINVAL;
		goto exit;
	}

	status = miocpullup(mp, sizeof (dl_unitdata_req_t) + MAN_ADDRL);
	if (status != 0)
		goto exit;

	/*
	 * Sanity check the DL_UNITDATA_REQ destination address
	 * offset and length values.
	 */
	dludp = (dl_unitdata_req_t *)mp->b_cont->b_rptr;
	off = dludp->dl_dest_addr_offset;
	len = dludp->dl_dest_addr_length;
	if (dludp->dl_primitive != DL_UNITDATA_REQ ||
	    !MBLKIN(mp->b_cont, off, len) || len != MAN_ADDRL) {
		status = EINVAL;
		goto exit;
	}

	dlap = (man_dladdr_t  *)(mp->b_cont->b_rptr + off);

	/*
	 * Allocate a new mblk to hold the ether header.
	 */
	if ((nmp = allocb(ETHERHEADER_SIZE, BPRI_MED)) == NULL) {
		status = ENOMEM;
		goto exit;
	}

	/* We only need one dl_ioc_hdr mblk for replay */
	if (!(msp->ms_flags & MAN_SFLAG_FAST))
		status = man_dl_catch(&msp->ms_dlioc_mp, mp);

	/* Forward the packet to all lower destinations. */
	if ((status != 0) || ((status = man_dlpi_senddown(msp, mp)) != 0)) {
		freemsg(nmp);
		goto exit;
	}

	nmp->b_wptr += ETHERHEADER_SIZE;

	/*
	 * Fill in the ether header.
	 */
	headerp = (struct ether_header *)nmp->b_rptr;
	ether_copy(&dlap->dl_phys, &headerp->ether_dhost);
	ether_copy(&manp->man_eaddr, &headerp->ether_shost);
	put_ether_type(headerp, dlap->dl_sap);

	/*
	 * Link new mblk in after the "request" mblks.
	 */
	linkb(mp, nmp);

exit:
	MAN_DBG(MAN_DLPI, ("man_dl_ioc_hdr_info: returns, status = %d",
	    status));

	if (status) {
		miocnak(wq, mp, 0, status);
	} else {
		msp = (manstr_t *)wq->q_ptr;
		msp->ms_flags |= MAN_SFLAG_FAST;
		miocack(wq, mp, msgsize(mp->b_cont), 0);
	}

}

/*
 * man_uwsrv - Upper write queue service routine to handle deferred
 * DLPI messages issued from upstream, the write side of the upper half
 * of multiplexor. It is also used by man_bwork to switch the lower
 * multiplexor.
 *
 *	wq - upper write queue of mxx
 */
static int
man_uwsrv(queue_t *wq)
{
	register mblk_t		*mp;
	manstr_t		*msp;		/* per stream data */
	man_t			*manp;		/* per instance data */
	ehdr_t			*ep;
	int			status;

	msp = (manstr_t *)wq->q_ptr;

	MAN_DBG(MAN_UWSRV, ("man_uwsrv: wq(0x%p) msp", (void *)wq));
	MAN_DBGCALL(MAN_UWSRV, man_print_msp(msp));

	if (msp == NULL)
		goto done;

	manp = msp->ms_manp;

	while (mp = getq(wq)) {

		switch (DB_TYPE(mp)) {
		/*
		 * Can probably remove this as I never put data messages
		 * here.
		 */
		case M_DATA:
			if (manp) {
				ep = (ehdr_t *)mp->b_rptr;
				status = man_start(wq, mp, &ep->ether_dhost);
				if (status) {
					/*
					 * man_start() indicated flow control
					 * situation, stop processing now.
					 */
					goto break_loop;
				}
			} else
				freemsg(mp);
			break;

		case M_PROTO:
		case M_PCPROTO:
			status = man_proto(wq, mp);
			if (status) {
				/*
				 * man_proto() indicated flow control
				 * situation detected by man_start(),
				 * stop processing now.
				 */
				goto break_loop;
			}
			break;

		default:
			MAN_DBG(MAN_UWSRV, ("man_uwsrv: discarding mp(0x%p)",
			    (void *)mp));
			freemsg(mp);
			break;
		}
	}

break_loop:
	/*
	 * Check to see if bgthread wants us to do something inside the
	 * perimeter.
	 */
	if ((msp->ms_flags & MAN_SFLAG_CONTROL) &&
	    man_iwork_q->q_work != NULL) {

		man_iwork();
	}

done:

	MAN_DBG(MAN_UWSRV, ("man_uwsrv: returns"));

	return (0);
}


/*
 * man_proto - handle DLPI protocol requests issued from upstream.
 * Called by man_uwsrv().  We disassociate upper and lower multiplexor
 * DLPI state transitions. The upper stream here (manstr_t) transitions
 * appropriately, saves the DLPI requests via man_dlpi(), and then
 * arranges for the DLPI request to be sent down via man_dlpi_senddown() if
 * appropriate.
 *
 *	wq - upper write queue of mxx
 *	mp - mbl ptr to protocol request
 */
static int
man_proto(queue_t *wq, mblk_t *mp)
{
	union DL_primitives	*dlp;
	int			flow_status = 0;

	dlp = (union DL_primitives *)mp->b_rptr;

	MAN_DBG((MAN_UWSRV | MAN_DLPI),
	    ("man_proto: mp(0x%p) prim(%s)\n", (void *)mp,
	    dps[dlp->dl_primitive]));

	switch (dlp->dl_primitive) {
	case DL_UNITDATA_REQ:
		flow_status = man_udreq(wq, mp);
		break;

	case DL_ATTACH_REQ:
		man_areq(wq, mp);
		break;

	case DL_DETACH_REQ:
		man_dreq(wq, mp);
		break;

	case DL_BIND_REQ:
		man_breq(wq, mp);
		break;

	case DL_UNBIND_REQ:
		man_ubreq(wq, mp);
		break;

	case DL_INFO_REQ:
		man_ireq(wq, mp);
		break;

	case DL_PROMISCON_REQ:
		man_ponreq(wq, mp);
		break;

	case DL_PROMISCOFF_REQ:
		man_poffreq(wq, mp);
		break;

	case DL_ENABMULTI_REQ:
		man_emreq(wq, mp);
		break;

	case DL_DISABMULTI_REQ:
		man_dmreq(wq, mp);
		break;

	case DL_PHYS_ADDR_REQ:
		man_pareq(wq, mp);
		break;

	case DL_SET_PHYS_ADDR_REQ:
		man_spareq(wq, mp);
		break;

	default:
		MAN_DBG((MAN_UWSRV | MAN_DLPI), ("man_proto: prim(%d)\n",
		    dlp->dl_primitive));
		dlerrorack(wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
		break;

	} /* End switch */

	MAN_DBG((MAN_UWSRV | MAN_DLPI), ("man_proto: exit\n"));
	return (flow_status);

}

static int
man_udreq(queue_t *wq, mblk_t *mp)
{
	manstr_t		*msp;
	dl_unitdata_req_t	*dludp;
	mblk_t	*nmp;
	man_dladdr_t		*dlap;
	t_uscalar_t 		off, len;
	int 			flow_status = 0;

	msp = (manstr_t *)wq->q_ptr;


	if (msp->ms_dlpistate != DL_IDLE) {
		dlerrorack(wq, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
		return (flow_status);
	}
	dludp = (dl_unitdata_req_t *)mp->b_rptr;
	off = dludp->dl_dest_addr_offset;
	len = dludp->dl_dest_addr_length;

	/*
	 * Validate destination address format.
	 */
	if (!MBLKIN(mp, off, len) || (len != MAN_ADDRL)) {
		dluderrorind(wq, mp, mp->b_rptr + off, len, DL_BADADDR, 0);
		return (flow_status);
	}

	/*
	 * Error if no M_DATA follows.
	 */
	nmp = mp->b_cont;
	if (nmp == NULL) {
		dluderrorind(wq, mp, mp->b_rptr + off, len, DL_BADDATA, 0);
		return (flow_status);
	}

	dlap = (man_dladdr_t *)(mp->b_rptr + off);

	flow_status = man_start(wq, mp, &dlap->dl_phys);
	return (flow_status);
}

/*
 * Handle DL_ATTACH_REQ.
 */
static void
man_areq(queue_t *wq, mblk_t *mp)
{
	man_t			*manp;	/* per instance data */
	manstr_t		*msp;	/* per stream data */
	short			ppa;
	union DL_primitives	*dlp;
	mblk_t			*preq = NULL;
	int			did_refcnt = FALSE;
	int			dlerror = 0;
	int			status = 0;

	msp = (manstr_t *)wq->q_ptr;
	dlp = (union DL_primitives *)mp->b_rptr;

	/*
	 * Attach us to MAN PPA (device instance).
	 */
	if (MBLKL(mp) < DL_ATTACH_REQ_SIZE) {
		dlerror = DL_BADPRIM;
		goto exit;
	}

	if (msp->ms_dlpistate != DL_UNATTACHED) {
		dlerror = DL_OUTSTATE;
		goto exit;
	}

	ppa = dlp->attach_req.dl_ppa;
	if (ppa == -1 || qassociate(wq, ppa) != 0) {
		dlerror = DL_BADPPA;
		MAN_DBG(MAN_WARN, ("man_areq: bad PPA %d", ppa));
		goto exit;
	}

	mutex_enter(&man_lock);
	manp = ddi_get_soft_state(man_softstate, ppa);
	ASSERT(manp != NULL);	/* qassociate() succeeded */

	manp->man_refcnt++;
	did_refcnt = TRUE;
	mutex_exit(&man_lock);

	/*
	 * Create a DL replay list for the lower stream. These wont
	 * actually be sent down until the lower streams are made active
	 * (sometime after the call to man_init_dests below).
	 */
	preq = man_alloc_physreq_mp(&manp->man_eaddr);
	if (preq == NULL) {
		dlerror = DL_SYSERR;
		status = ENOMEM;
		goto exit;
	}

	/*
	 * Make copy for dlpi resync of upper and lower streams.
	 */
	if (man_dlpi(msp, mp)) {
		dlerror = DL_SYSERR;
		status = ENOMEM;
		goto exit;
	}

	/* TBD - need to clean off ATTACH req on failure here. */
	if (man_dlpi(msp, preq)) {
		dlerror = DL_SYSERR;
		status = ENOMEM;
		goto exit;
	}

	/*
	 * man_init_dests/man_start_dest needs these set before call.
	 */
	msp->ms_manp = manp;
	msp->ms_meta_ppa = ppa;

	/*
	 *  Allocate and init lower destination structures.
	 */
	ASSERT(msp->ms_dests == NULL);
	if (man_init_dests(manp, msp)) {
		mblk_t	 *tmp;

		/*
		 * If we cant get the lower streams ready, then
		 * remove the messages from the DL replay list and
		 * fail attach.
		 */
		while ((tmp = msp->ms_dl_mp) != NULL) {
			msp->ms_dl_mp = msp->ms_dl_mp->b_next;
			tmp->b_next = tmp->b_prev = NULL;
			freemsg(tmp);
		}

		msp->ms_manp = NULL;
		msp->ms_meta_ppa = -1;

		dlerror = DL_SYSERR;
		status = ENOMEM;
		goto exit;
	}

	MAN_DBG(MAN_DLPI, ("man_areq: ppa 0x%x man_refcnt: %d\n",
	    ppa, manp->man_refcnt));

	SETSTATE(msp, DL_UNBOUND);

exit:
	if (dlerror == 0) {
		dlokack(wq, mp, DL_ATTACH_REQ);
	} else {
		if (did_refcnt) {
			mutex_enter(&man_lock);
			manp->man_refcnt--;
			mutex_exit(&man_lock);
		}
		dlerrorack(wq, mp, DL_ATTACH_REQ, dlerror, status);
		(void) qassociate(wq, -1);
	}
	if (preq != NULL)
		freemsg(preq);

}

/*
 * Called at DL_ATTACH time.
 * Man_lock is held to protect pathgroup list(man_pg).
 */
static int
man_init_dests(man_t *manp, manstr_t *msp)
{
	man_dest_t	*mdp;
	man_pg_t	*mpg;
	int		i;

	mdp = man_kzalloc(MAN_DEST_ARRAY_SIZE, KM_NOSLEEP);
	if (mdp == NULL)
		return (ENOMEM);

	msp->ms_dests = mdp;

	mutex_enter(&man_lock);
	for (i = 0; i < MAN_MAX_DESTS; i++) {

		mdp[i].md_muxid = -1;	/* muxid 0 is valid */
		mutex_init(&mdp->md_lock, NULL, MUTEX_DRIVER, NULL);

		mpg = man_find_pg_by_id(manp->man_pg, i);

		if (mpg && man_find_active_path(mpg->mpg_pathp))
			man_start_dest(&mdp[i], msp, mpg);
	}
	mutex_exit(&man_lock);

	return (0);
}

/*
 * Get a destination ready for use.
 */
static void
man_start_dest(man_dest_t *mdp, manstr_t *msp, man_pg_t *mpg)
{
	man_path_t	*ap;

	mdp->md_muxid = -1;
	mdp->md_dlpistate = DL_UNATTACHED;
	mdp->md_msp = msp;
	mdp->md_rq = msp->ms_rq;
	mdp->md_pg_id = mpg->mpg_pg_id;

	ASSERT(msp->ms_manp);

	ether_copy(&msp->ms_manp->man_eaddr, &mdp->md_src_eaddr);
	ether_copy(&mpg->mpg_dst_eaddr, &mdp->md_dst_eaddr);

	ap = man_find_active_path(mpg->mpg_pathp);
	ASSERT(ap);
	mdp->md_device = ap->mp_device;

	/*
	 * Set up linktimers so that first time through, we will do
	 * a failover.
	 */
	mdp->md_linkstate = MAN_LINKFAIL;
	mdp->md_state = MAN_DSTATE_INITIALIZING;
	mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
	    (void *)mdp, man_gettimer(MAN_TIMER_INIT, mdp));

	/*
	 * As an optimization, if there is only one destination,
	 * remember the destination pointer. Used by man_start().
	 */
	man_set_optimized_dest(msp);

	MAN_DBG(MAN_DEST, ("man_start_dest: mdp"));
	MAN_DBGCALL(MAN_DEST, man_print_mdp(mdp));
}

static void
man_set_optimized_dest(manstr_t *msp)
{
	int		count = 0;
	int		i;
	man_dest_t	*mdp = NULL;

	for (i = 0; i < MAN_MAX_DESTS; i++) {
		if (msp->ms_dests[i].md_msp != NULL) {
			count++;
			mdp = &msp->ms_dests[i];
		}
	}

	if (count == 1)
		msp->ms_destp = mdp;
	else
		msp->ms_destp = NULL;

}

/*
 * Catch dlpi message for replaying, and arrange to send it down
 * to any destinations not PLUMBING. See man_dlpi_replay().
 */
static int
man_dlpi(manstr_t *msp, mblk_t *mp)
{
	int	status;

	status = man_dl_catch(&msp->ms_dl_mp, mp);
	if (status == 0)
		status = man_dlpi_senddown(msp, mp);

	return (status);
}

/*
 * Catch IOCTL type DL_ messages.
 */
static int
man_dlioc(manstr_t *msp, mblk_t *mp)
{
	int status;

	status = man_dl_catch(&msp->ms_dlioc_mp, mp);
	if (status == 0)
		status = man_dlpi_senddown(msp, mp);

	return (status);
}

/*
 * We catch all DLPI messages that we have to resend to a new AP'ed
 * device to put him in the right state.  We link these messages together
 * w/ their b_next fields and hang it off of msp->ms_dl_mp.  We
 * must be careful to restore b_next fields before doing dupmsg/freemsg!
 *
 *	msp - pointer of stream struct to process
 *	mblk - pointer to DLPI request to catch
 */
static int
man_dl_catch(mblk_t **mplist, mblk_t *mp)
{
	mblk_t			*dupmp;
	mblk_t			*tmp;
	unsigned		prim;
	int			status = 0;

	dupmp = copymsg(mp);
	if (dupmp == NULL) {
		status = ENOMEM;
		goto exit;
	}


	if (*mplist == NULL)
		*mplist = dupmp;
	else {
		for (tmp = *mplist; tmp->b_next; )
			tmp = tmp->b_next;

		tmp->b_next = dupmp;
	}

	prim = DL_PRIM(mp);
	MAN_DBG(MAN_DLPI,
	    ("man_dl_catch: adding %s\n",
	    (prim == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
	    (prim == DLIOCRAW) ? "DLIOCRAW" :
	    (prim == DL_PROMISCON_REQ) ? promisc[DL_PROMISCON_TYPE(mp)] :
	    dps[prim]));

exit:

	return (status);
}

/*
 * Send down a single DLPI M_[PC]PROTO to all currently valid dests.
 *
 *	msp - ptr to NDM stream structure DL_ messages was received on.
 *	mp - ptr to mblk containing DL_ request.
 */
static int
man_dlpi_senddown(manstr_t *msp, mblk_t *mp)
{
	man_dest_t	*mdp;
	int		i;
	mblk_t		*rmp[MAN_MAX_DESTS];	/* Copy to replay */
	int		dstate[MAN_MAX_DESTS];
	int		no_dests = TRUE;
	int		status = 0;

	if (msp->ms_dests == NULL)
		goto exit;

	for (i = 0; i < MAN_MAX_DESTS; i++) {
		mdp = &msp->ms_dests[i];
		if (mdp->md_state == MAN_DSTATE_READY) {
			dstate[i] = TRUE;
			no_dests = FALSE;
		} else {
			dstate[i] = FALSE;
		}
		rmp[i] = NULL;
	}

	if (no_dests)
		goto exit;

	/*
	 * Build replay and duplicate list for all possible destinations.
	 */
	for (i = 0; i < MAN_MAX_DESTS; i++) {
		if (dstate[i]) {
			rmp[i] = copymsg(mp);
			if (rmp[i] == NULL) {
				status = ENOMEM;
				break;
			}
		}
	}

	if (status == 0) {
		for (i = 0; i < MAN_MAX_DESTS; i++)
			if (dstate[i]) {
				mdp = &msp->ms_dests[i];

				ASSERT(mdp->md_wq != NULL);
				ASSERT(mp->b_next == NULL);
				ASSERT(mp->b_prev == NULL);

				man_dlpi_replay(mdp, rmp[i]);
			}
	} else {
		for (; i >= 0; i--)
			if (dstate[i] && rmp[i])
				freemsg(rmp[i]);
	}

exit:
	return (status);
}

/*
 * man_dlpi_replay - traverse the list of DLPI requests and reapply them to
 * get the upper and lower streams into the same state. Called holding inner
 * perimeter lock exclusive. Note thet we defer M_IOCTL type dlpi messages
 * until we get an OK_ACK to our ATTACH (see man_lrsrv and
 * man_dlioc_replay).
 *
 * 	mdp - pointer to lower queue (destination)
 *	rmp - list of mblks to send down stream.
 */
static void
man_dlpi_replay(man_dest_t *mdp, mblk_t *rmp)
{
	mblk_t			*mp;
	union DL_primitives	*dlp = NULL;

	MAN_DBG(MAN_DLPI, ("man_dlpi_replay: mdp(0x%p)", (void *)mdp));

	while (rmp) {
		mp = rmp;
		rmp = rmp->b_next;
		mp->b_prev = mp->b_next = NULL;

		dlp = (union DL_primitives *)mp->b_rptr;
		MAN_DBG(MAN_DLPI,
		    ("man_dlpi_replay: mdp(0x%p) sending %s\n",
		    (void *)mdp,
		    (dlp->dl_primitive == DL_IOC_HDR_INFO) ?
		    "DL_IOC_HDR_INFO" : (dlp->dl_primitive == DLIOCRAW) ?
		    "DLIOCRAW" : dps[(unsigned)(dlp->dl_primitive)]));

		if (dlp->dl_primitive == DL_ATTACH_REQ) {
			/*
			 * insert the lower devices ppa.
			 */
			dlp->attach_req.dl_ppa = mdp->md_device.mdev_ppa;
		}

		(void) putnext(mdp->md_wq, mp);
	}

}

static void
man_dreq(queue_t *wq, mblk_t *mp)
{
	manstr_t	*msp;	/* per stream data */
	man_work_t	*wp;

	msp = (manstr_t *)wq->q_ptr;

	if (MBLKL(mp) < DL_DETACH_REQ_SIZE) {
		dlerrorack(wq, mp, DL_DETACH_REQ, DL_BADPRIM, 0);
		return;
	}

	if (msp->ms_dlpistate != DL_UNBOUND) {
		dlerrorack(wq, mp, DL_DETACH_REQ, DL_OUTSTATE, 0);
		return;
	}

	ASSERT(msp->ms_dests != NULL);

	wp = man_work_alloc(MAN_WORK_CLOSE_STREAM, KM_NOSLEEP);
	if (wp == NULL) {
		dlerrorack(wq, mp, DL_DETACH_REQ, DL_SYSERR, ENOMEM);
		return;
	}
	man_dodetach(msp, wp);
	(void) qassociate(wq, -1);

	SETSTATE(msp, DL_UNATTACHED);

	dlokack(wq, mp, DL_DETACH_REQ);
}

static void
man_dl_clean(mblk_t **mplist)
{
	mblk_t	*tmp;

	/*
	 * Toss everything.
	 */
	while ((tmp = *mplist) != NULL) {
		*mplist = (*mplist)->b_next;
		tmp->b_next = tmp->b_prev = NULL;
		freemsg(tmp);
	}

}

/*
 * man_dl_release - Remove the corresponding DLPI request from the
 * catch list. Walk thru the catch list looking for the other half of
 * the pair and delete it.  If we are detaching, delete the entire list.
 *
 *	msp - pointer of stream struct to process
 *	mp  - pointer to mblk to first half of pair.  We will delete other
 * 		half of pair based on this.
 */
static void
man_dl_release(mblk_t **mplist, mblk_t *mp)
{
	uchar_t			match_dbtype;
	mblk_t			*tmp;
	mblk_t			*tmpp;
	int			matched = FALSE;

	if (*mplist == NULL)
		goto exit;

	match_dbtype = DB_TYPE(mp);

	/*
	 * Currently we only clean DL_ PROTO type messages. There is
	 * no way to turn off M_CTL or DL_IOC stuff other than sending
	 * down a DL_DETACH, which resets everything.
	 */
	if (match_dbtype != M_PROTO && match_dbtype != M_PCPROTO) {
		goto exit;
	}

	/*
	 * Selectively find a caught mblk that matches this one and
	 * remove it from the list
	 */
	tmp = tmpp = *mplist;
	matched = man_match_proto(mp, tmp);
	if (matched) {
		*mplist = tmp->b_next;
		tmp->b_next = tmp->b_prev = NULL;
	} else {
		for (tmp = tmp->b_next; tmp != NULL; tmp = tmp->b_next) {
			if (matched = man_match_proto(mp, tmp))
				break;
			tmpp = tmp;
		}

		if (matched) {
			tmpp->b_next = tmp->b_next;
			tmp->b_next = tmp->b_prev = NULL;
		}
	}

exit:
	if (matched) {

		MAN_DBG(MAN_DLPI, ("man_dl_release: release %s",
		    (DL_PRIM(mp) == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
		    (DL_PRIM(mp) == DLIOCRAW) ? "DLIOCRAW" :
		    dps[(int)DL_PRIM(mp)]));

		freemsg(tmp);
	}
	MAN_DBG(MAN_DLPI, ("man_dl_release: returns"));

}

/*
 * Compare two DL_ messages. If they are complimentary (e.g. DL_UNBIND
 * compliments DL_BIND), return true.
 */
static int
man_match_proto(mblk_t *mp1, mblk_t *mp2)
{
	t_uscalar_t	prim1;
	t_uscalar_t	prim2;
	int		matched = FALSE;

	/*
	 * Primitive to clean off list.
	 */
	prim1 = DL_PRIM(mp1);
	prim2 = DL_PRIM(mp2);

	switch (prim1) {
	case DL_UNBIND_REQ:
		if (prim2 == DL_BIND_REQ)
			matched = TRUE;
		break;

	case DL_PROMISCOFF_REQ:
		if (prim2 == DL_PROMISCON_REQ) {
			dl_promiscoff_req_t	*poff1;
			dl_promiscoff_req_t	*poff2;

			poff1 = (dl_promiscoff_req_t *)mp1->b_rptr;
			poff2 = (dl_promiscoff_req_t *)mp2->b_rptr;

			if (poff1->dl_level == poff2->dl_level)
				matched = TRUE;
		}
		break;

	case DL_DISABMULTI_REQ:
		if (prim2 == DL_ENABMULTI_REQ) {
			union DL_primitives	*dlp;
			t_uscalar_t		off;
			eaddr_t			*addrp1;
			eaddr_t			*addrp2;

			dlp = (union DL_primitives *)mp1->b_rptr;
			off = dlp->disabmulti_req.dl_addr_offset;
			addrp1 = (eaddr_t *)(mp1->b_rptr + off);

			dlp = (union DL_primitives *)mp2->b_rptr;
			off = dlp->disabmulti_req.dl_addr_offset;
			addrp2 = (eaddr_t *)(mp2->b_rptr + off);

			if (ether_cmp(addrp1, addrp2) == 0)
				matched = 1;
		}
		break;

	default:
		break;
	}

	MAN_DBG(MAN_DLPI, ("man_match_proto returns %d", matched));

	return (matched);
}

/*
 * Bind upper stream to a particular SAP. Called with exclusive innerperim
 * QPAIR, shared outerperim.
 */
static void
man_breq(queue_t *wq, mblk_t *mp)
{
	man_t			*manp;	/* per instance data */
	manstr_t		*msp;	/* per stream data */
	union DL_primitives	*dlp;
	man_dladdr_t		man_addr;
	t_uscalar_t		sap;
	t_uscalar_t		xidtest;

	msp = (manstr_t *)wq->q_ptr;

	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
		dlerrorack(wq, mp, DL_BIND_REQ, DL_BADPRIM, 0);
		return;
	}

	if (msp->ms_dlpistate != DL_UNBOUND) {
		dlerrorack(wq, mp, DL_BIND_REQ, DL_OUTSTATE, 0);
		return;
	}

	dlp = (union DL_primitives *)mp->b_rptr;
	manp = msp->ms_manp;			/* valid after attach */
	sap = dlp->bind_req.dl_sap;
	xidtest = dlp->bind_req.dl_xidtest_flg;

	ASSERT(manp);

	if (xidtest) {
		dlerrorack(wq, mp, DL_BIND_REQ, DL_NOAUTO, 0);
		return;
	}

	if (sap > ETHERTYPE_MAX) {
		dlerrorack(wq, mp, DL_BIND_REQ, DL_BADSAP, 0);
		return;
	}

	if (man_dlpi(msp, mp)) {
		dlerrorack(wq, mp, DL_BIND_REQ, DL_SYSERR, ENOMEM);
		return;
	}

	msp->ms_sap = sap;

	SETSTATE(msp, DL_IDLE);

	man_addr.dl_sap = msp->ms_sap;
	ether_copy(&msp->ms_manp->man_eaddr, &man_addr.dl_phys);

	dlbindack(wq, mp, msp->ms_sap, &man_addr, MAN_ADDRL, 0, 0);

}

static void
man_ubreq(queue_t *wq, mblk_t *mp)
{
	manstr_t		*msp;	/* per stream data */

	msp = (manstr_t *)wq->q_ptr;

	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
		return;
	}

	if (msp->ms_dlpistate != DL_IDLE) {
		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
		return;
	}

	if (man_dlpi_senddown(msp, mp)) {
		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_SYSERR, ENOMEM);
		return;
	}

	man_dl_release(&msp->ms_dl_mp, mp);

	SETSTATE(msp, DL_UNBOUND);

	dlokack(wq, mp, DL_UNBIND_REQ);

}

static void
man_ireq(queue_t *wq, mblk_t *mp)
{
	manstr_t	*msp;
	dl_info_ack_t	*dlip;
	man_dladdr_t	*dlap;
	eaddr_t		*ep;
	size_t	size;

	msp = (manstr_t *)wq->q_ptr;

	if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
		dlerrorack(wq, mp, DL_INFO_REQ, DL_BADPRIM, 0);
		return;
	}

	/* Exchange current msg for a DL_INFO_ACK. */
	size = sizeof (dl_info_ack_t) + MAN_ADDRL + ETHERADDRL;
	mp = mexchange(wq, mp, size, M_PCPROTO, DL_INFO_ACK);
	if (mp == NULL) {
		MAN_DBG(MAN_DLPI, ("man_ireq: man_ireq: mp == NULL."));
		return;
	}

	/* Fill in the DL_INFO_ACK fields and reply. */
	dlip = (dl_info_ack_t *)mp->b_rptr;
	*dlip = man_infoack;
	dlip->dl_current_state = msp->ms_dlpistate;
	dlap = (man_dladdr_t *)(mp->b_rptr + dlip->dl_addr_offset);
	dlap->dl_sap = msp->ms_sap;

	/*
	 * If attached, return physical address.
	 */
	if (msp->ms_manp != NULL) {
		ether_copy(&msp->ms_manp->man_eaddr, &dlap->dl_phys);
	} else {
		bzero((caddr_t)&dlap->dl_phys, ETHERADDRL);
	}

	ep = (struct ether_addr *)(mp->b_rptr + dlip->dl_brdcst_addr_offset);
	ether_copy(&etherbroadcast, ep);

	qreply(wq, mp);

}


static void
man_ponreq(queue_t *wq, mblk_t *mp)
{
	manstr_t	*msp;
	int		flag;

	msp = (manstr_t *)wq->q_ptr;

	if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
		return;
	}

	switch (((dl_promiscon_req_t *)mp->b_rptr)->dl_level) {
	case DL_PROMISC_PHYS:
		flag = MAN_SFLAG_ALLPHYS;
		break;

	case DL_PROMISC_SAP:
		flag = MAN_SFLAG_ALLSAP;
		break;

	case DL_PROMISC_MULTI:
		flag = MAN_SFLAG_ALLMULTI;
		break;

	default:
		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_NOTSUPPORTED, 0);
		return;
	}

	/*
	 * Catch request for replay, and forward down to any lower
	 * lower stream.
	 */
	if (man_dlpi(msp, mp)) {
		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_SYSERR, ENOMEM);
		return;
	}

	msp->ms_flags |= flag;

	dlokack(wq, mp, DL_PROMISCON_REQ);

}

static void
man_poffreq(queue_t *wq, mblk_t *mp)
{
	manstr_t		*msp;
	int			flag;

	msp = (manstr_t *)wq->q_ptr;

	if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
		return;
	}

	switch (((dl_promiscoff_req_t *)mp->b_rptr)->dl_level) {
	case DL_PROMISC_PHYS:
		flag = MAN_SFLAG_ALLPHYS;
		break;

	case DL_PROMISC_SAP:
		flag = MAN_SFLAG_ALLSAP;
		break;

	case DL_PROMISC_MULTI:
		flag = MAN_SFLAG_ALLMULTI;
		break;

	default:
		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_NOTSUPPORTED, 0);
		return;
	}

	if ((msp->ms_flags & flag) == 0) {
		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
		return;
	}

	if (man_dlpi_senddown(msp, mp)) {
		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_SYSERR, ENOMEM);
		return;
	}

	man_dl_release(&msp->ms_dl_mp, mp);

	msp->ms_flags &= ~flag;

	dlokack(wq, mp, DL_PROMISCOFF_REQ);

}

/*
 * Enable multicast requests. We might need to track addresses instead of
 * just passing things through (see eri_dmreq) - TBD.
 */
static void
man_emreq(queue_t *wq, mblk_t *mp)
{
	manstr_t		*msp;
	union DL_primitives	*dlp;
	eaddr_t			*addrp;
	t_uscalar_t		off;
	t_uscalar_t		len;

	msp = (manstr_t *)wq->q_ptr;

	if (MBLKL(mp) < DL_ENABMULTI_REQ_SIZE) {
		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADPRIM, 0);
		return;
	}

	if (msp->ms_dlpistate == DL_UNATTACHED) {
		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_OUTSTATE, 0);
		return;
	}

	dlp = (union DL_primitives *)mp->b_rptr;
	len = dlp->enabmulti_req.dl_addr_length;
	off = dlp->enabmulti_req.dl_addr_offset;
	addrp = (struct ether_addr *)(mp->b_rptr + off);

	if ((len != ETHERADDRL) ||
	    !MBLKIN(mp, off, len) ||
	    ((addrp->ether_addr_octet[0] & 01) == 0)) {
		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADADDR, 0);
		return;
	}

	/*
	 * Catch request for replay, and forward down to any lower
	 * lower stream.
	 */
	if (man_dlpi(msp, mp)) {
		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_SYSERR, ENOMEM);
		return;
	}

	dlokack(wq, mp, DL_ENABMULTI_REQ);

}

static void
man_dmreq(queue_t *wq, mblk_t *mp)
{
	manstr_t		*msp;
	union DL_primitives	*dlp;
	eaddr_t			*addrp;
	t_uscalar_t		off;
	t_uscalar_t		len;

	msp = (manstr_t *)wq->q_ptr;

	if (MBLKL(mp) < DL_DISABMULTI_REQ_SIZE) {
		dlerrorack(wq, mp, DL_DISABMULTI_REQ, DL_BADPRIM, 0);
		return;
	}

	if (msp->ms_dlpistate == DL_UNATTACHED) {
		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_OUTSTATE, 0);
		return;
	}

	dlp = (union DL_primitives *)mp->b_rptr;
	len = dlp->enabmulti_req.dl_addr_length;
	off = dlp->enabmulti_req.dl_addr_offset;
	addrp = (struct ether_addr *)(mp->b_rptr + off);

	if ((len != ETHERADDRL) ||
	    !MBLKIN(mp, off, len) ||
	    ((addrp->ether_addr_octet[0] & 01) == 0)) {
		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADADDR, 0);
		return;
	}

	if (man_dlpi_senddown(msp, mp)) {
		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_SYSERR, ENOMEM);
		return;
	}

	man_dl_release(&msp->ms_dl_mp, mp);

	dlokack(wq, mp, DL_DISABMULTI_REQ);

}

static void
man_pareq(queue_t *wq, mblk_t *mp)
{
	manstr_t		*msp;
	union	DL_primitives	*dlp;
	uint32_t		type;
	struct	ether_addr	addr;

	msp = (manstr_t *)wq->q_ptr;

	if (MBLKL(mp) < DL_PHYS_ADDR_REQ_SIZE) {
		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_BADPRIM, 0);
		return;
	}

	dlp = (union DL_primitives *)mp->b_rptr;
	type = dlp->physaddr_req.dl_addr_type;
	if (msp->ms_manp == NULL) {
		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_OUTSTATE, 0);
		return;
	}

	switch (type) {
	case	DL_FACT_PHYS_ADDR:
		(void) localetheraddr((struct ether_addr *)NULL, &addr);
		break;

	case	DL_CURR_PHYS_ADDR:
		ether_bcopy(&msp->ms_manp->man_eaddr, &addr);
		break;

	default:
		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_NOTSUPPORTED, 0);
		return;
	}

	dlphysaddrack(wq, mp, &addr, ETHERADDRL);
}

/*
 * TBD - this routine probably should be protected w/ an ndd
 * tuneable, or a man.conf parameter.
 */
static void
man_spareq(queue_t *wq, mblk_t *mp)
{
	manstr_t		*msp;
	union DL_primitives	*dlp;
	t_uscalar_t		off;
	t_uscalar_t		len;
	eaddr_t			*addrp;

	msp = (manstr_t *)wq->q_ptr;

	if (MBLKL(mp) < DL_SET_PHYS_ADDR_REQ_SIZE) {
		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADPRIM, 0);
		return;
	}

	dlp = (union DL_primitives *)mp->b_rptr;
	len = dlp->set_physaddr_req.dl_addr_length;
	off = dlp->set_physaddr_req.dl_addr_offset;

	if (!MBLKIN(mp, off, len)) {
		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADPRIM, 0);
		return;
	}

	addrp = (struct ether_addr *)(mp->b_rptr + off);

	/*
	 * Error if length of address isn't right or the address
	 * specified is a multicast or broadcast address.
	 */
	if ((len != ETHERADDRL) ||
	    ((addrp->ether_addr_octet[0] & 01) == 1) ||
	    (ether_cmp(addrp, &etherbroadcast) == 0)) {
		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADADDR, 0);
		return;
	}
	/*
	 * Error if this stream is not attached to a device.
	 */
	if (msp->ms_manp == NULL) {
		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_OUTSTATE, 0);
		return;
	}

	/*
	 * We will also resend DL_SET_PHYS_ADDR_REQ for each dest
	 * when it is linked under us.
	 */
	if (man_dlpi_senddown(msp, mp)) {
		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_SYSERR, ENOMEM);
		return;
	}

	ether_copy(addrp, msp->ms_manp->man_eaddr.ether_addr_octet);

	MAN_DBG(MAN_DLPI, ("man_sareq: snagged %s\n",
	    ether_sprintf(&msp->ms_manp->man_eaddr)));

	dlokack(wq, mp, DL_SET_PHYS_ADDR_REQ);

}

/*
 * These routines make up the lower part of the MAN streams framework.
 */

/*
 * man_lwsrv - Deferred mblks for down stream. We end up here when
 * the destination is not DL_IDLE when traffic comes downstream.
 *
 *	wq - lower write queue of mxx
 */
static int
man_lwsrv(queue_t *wq)
{
	mblk_t		*mp;
	mblk_t		*mlistp;
	man_dest_t	*mdp;
	size_t		count;

	mdp = (man_dest_t *)wq->q_ptr;

	MAN_DBG(MAN_LWSRV, ("man_lwsrv: wq(0x%p) mdp(0x%p)"
	    " md_rq(0x%p)\n", (void *)wq, (void *)mdp,
	    mdp ? (void *)mdp->md_rq : NULL));

	if (mdp == NULL)
		goto exit;

	if (mdp->md_state & MAN_DSTATE_CLOSING) {
			flushq(wq, FLUSHDATA);
			flushq(RD(wq), FLUSHDATA);
			goto exit;
	}

	/*
	 * Arrange to send deferred mp's first, then mblks on the
	 * service queue. Since we are exclusive in the inner perimeter,
	 * we dont have to worry about md_lock, like the put procedures,
	 * which are MTPUTSHARED.
	 */
	mutex_enter(&mdp->md_lock);
	mlistp = mdp->md_dmp_head;
	mdp->md_dmp_head = NULL;
	count = mdp->md_dmp_count;
	mdp->md_dmp_count = 0;
	mutex_exit(&mdp->md_lock);

	while (mlistp != NULL) {
		mp = mlistp;
		mlistp = mp->b_next;
		mp->b_next = NULL;
		count -= msgsize(mp);
		if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {

			mutex_enter(&mdp->md_lock);
			mdp->md_dmp_count += count + msgsize(mp);
			mp->b_next = mlistp;
			mdp->md_dmp_head = mp;
			mutex_exit(&mdp->md_lock);
			goto exit;
		}
	}
	mdp->md_dmp_tail = NULL;

	while (mp = getq(wq)) {
		if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
			/*
			 * Put it back on queue, making sure to avoid
			 * infinite loop mentioned in putbq(9F)
			 */
			noenable(wq);
			putbq(wq, mp);
			enableok(wq);

			break;
		}
	}

exit:

	return (0);
}

/*
 * man_lrput - handle DLPI messages issued from downstream.
 *
 *	rq - lower read queue of mxx
 *	mp - mblk ptr to DLPI request
 *
 *	returns 0
 */
static int
man_lrput(queue_t *rq, mblk_t *mp)
{
	man_dest_t	*mdp;
	manstr_t	*msp;

#if defined(DEBUG)
	union DL_primitives	*dlp;
	t_uscalar_t		prim = MAN_DLPI_MAX_PRIM + 1;
	char			*prim_str;
#endif  /* DEBUG */

	mdp = (man_dest_t *)rq->q_ptr;

#if defined(DEBUG)
	if (DB_TYPE(mp) == M_PROTO) {
		dlp = (union DL_primitives *)mp->b_rptr;
		prim = dlp->dl_primitive;
	}

	prim_str = (prim > MAN_DLPI_MAX_PRIM) ? "NON DLPI" :
	    (prim == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
	    (prim == DLIOCRAW) ? "DLIOCRAW" :
	    dps[(unsigned int)prim];
	MAN_DBG(MAN_LRPUT, ("man_lrput: rq(0x%p) mp(0x%p) mdp(0x%p)"
	    " db_type(0x%x) dl_prim %s", (void *)rq,
	    (void *)mp, (void *)mdp, DB_TYPE(mp), prim_str));
	MAN_DBGCALL(MAN_LRPUT2, man_print_mdp(mdp));
#endif  /* DEBUG */

	if (DB_TYPE(mp) == M_FLUSH) {
		/* Turn around */
		if (*mp->b_rptr & FLUSHW) {
			*mp->b_rptr &= ~FLUSHR;
			qreply(rq, mp);
		} else
			freemsg(mp);
		return (0);
	}

	if (mdp == NULL || mdp->md_state != MAN_DSTATE_READY) {

		MAN_DBG(MAN_LRPUT, ("man_lrput: not ready mdp(0x%p),"
		    " state(%d)", (void *)mdp, mdp ? mdp->md_state : -1));
		freemsg(mp);
		return (0);
	}

	/*
	 * If we have a destination in the right state, forward on datagrams.
	 */
	if (MAN_IS_DATA(mp)) {
		if (mdp->md_dlpistate == DL_IDLE && canputnext(mdp->md_rq)) {

			msp = mdp->md_msp;
			if (!(msp->ms_flags & MAN_SFLAG_PROMISC))
				mdp->md_rcvcnt++; /* Count for failover */
			/*
			 * go put mblk_t directly up to next queue.
			 */
			MAN_DBG(MAN_LRPUT, ("man_lrput: putnext to rq(0x%p)",
			    (void *)mdp->md_rq));
			(void) putnext(mdp->md_rq, mp);
		} else {
			freemsg(mp);
		}
	} else {
		/*
		 * Handle in man_lrsrv with exclusive inner perimeter lock.
		 */
		putq(rq, mp);
	}

	return (0);
}

/*
 * Either this is a response from our attempt to sync the upper and lower
 * stream states, or its data. If its not data. Do DL_* response processing
 * and transition md_dlpistate accordingly. If its data, toss it.
 */
static int
man_lrsrv(queue_t *rq)
{
	man_dest_t		*mdp;
	mblk_t			*mp;
	union DL_primitives	*dlp;
	ulong_t			prim;
	ulong_t			cprim;
	int			need_dl_reset = FALSE;

#if defined(DEBUG)
		struct iocblk	*iocp;
		char		ioc_cmd[256];
#endif  /* DEBUG */

	MAN_DBG(MAN_LRSRV, ("man_lrsrv: rq(0x%p)", (void *)rq));

	mdp = (man_dest_t *)rq->q_ptr;

	if ((mdp == NULL) || (mdp->md_state & MAN_DSTATE_CLOSING)) {
			flushq(rq, FLUSHDATA);
			flushq(WR(rq), FLUSHDATA);
			goto exit;
	}

	while (mp = getq(rq)) {


	/*
	 * If we're not connected, or its a datagram, toss it.
	 */
	if (MAN_IS_DATA(mp) || mdp->md_state != MAN_DSTATE_READY) {

		MAN_DBG(MAN_LRSRV, ("man_lrsrv: dropping mblk mdp(0x%p)"
		    " is_data(%d)", (void *)mdp, MAN_IS_DATA(mp)));
		freemsg(mp);
		continue;
	}

	/*
	 * Should be response to man_dlpi_replay. Discard unless there
	 * is a failure we care about.
	 */

	switch (DB_TYPE(mp)) {
	case M_PROTO:
	case M_PCPROTO:
		/* Do proto processing below. */
		break;

	case M_IOCNAK:
		/*
		 * DL_IOC* failed for some reason.
		 */
		need_dl_reset = TRUE;

#if defined(DEBUG)
		iocp = (struct iocblk *)mp->b_rptr;

		sprintf(ioc_cmd, "0x%x", iocp->ioc_cmd);
		MAN_DBG(MAN_LRSRV, ("man_lrsrv: M_IOCNAK err %d for cmd(%s)\n",
		    iocp->ioc_error,
		    (iocp->ioc_cmd == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
		    (iocp->ioc_cmd == DLIOCRAW) ? "DLIOCRAW" : ioc_cmd));
#endif  /* DEBUG */

		/* FALLTHRU */

	case M_IOCACK:
	case M_CTL:
		/*
		 * OK response from DL_IOC*, ignore.
		 */
		goto dl_reset;
	}

	dlp = (union DL_primitives *)mp->b_rptr;
	prim = dlp->dl_primitive;

	MAN_DBG(MAN_LRSRV, ("man_lrsrv: prim %s", dps[(int)prim]));

	/*
	 * DLPI state processing big theory: We do not rigorously check
	 * DLPI states (e.g. PENDING stuff). Simple rules:
	 *
	 * 	1) If we see an OK_ACK to an ATTACH_REQ, dlpistate = DL_UNBOUND.
	 *	2) If we see an BIND_ACK to a BIND_REQ, dlpistate = DL_IDLE.
	 *	3) If we see a OK_ACK response to an UNBIND_REQ
	 *	   dlpistate = DL_UNBOUND.
	 *	4) If we see a OK_ACK response to a DETACH_REQ,
	 *	   dlpistate = DL_UNATTACHED.
	 *
	 * Everything that isn't handle by 1-4 above is handled by 5)
	 *
	 *	5) A NAK to any DL_* messages we care about causes
	 *	   dlpistate = DL_UNATTACHED and man_reset_dlpi to run
	 *
	 * TBD - need a reset counter so we can try a switch if it gets
	 * too high.
	 */

	switch (prim) {
	case DL_OK_ACK:
		cprim = dlp->ok_ack.dl_correct_primitive;

		switch (cprim) {
		case DL_ATTACH_REQ:
			if (man_dlioc_replay(mdp)) {
				D_SETSTATE(mdp, DL_UNBOUND);
			} else {
				need_dl_reset = TRUE;
				break;
			}
			break;

		case DL_DETACH_REQ:
			D_SETSTATE(mdp, DL_UNATTACHED);
			break;

		case DL_UNBIND_REQ:
			/*
			 * Cancel timer and set md_dlpistate.
			 */
			D_SETSTATE(mdp, DL_UNBOUND);

			ASSERT(mdp->md_bc_id == 0);
			if (mdp->md_lc_timer_id != 0) {
				(void) quntimeout(man_ctl_wq,
				    mdp->md_lc_timer_id);
				mdp->md_lc_timer_id = 0;
			}
		}
		MAN_DBG(MAN_DLPI,
		    ("		cprim %s", dps[(int)cprim]));
		break;

	case DL_BIND_ACK:
		/*
		 * We're ready for data. Get man_lwsrv to run to
		 * process any defered data and start linkcheck timer.
		 */
		D_SETSTATE(mdp, DL_IDLE);
		qenable(mdp->md_wq);
		mdp->md_linkstate = MAN_LINKGOOD;
		if (man_needs_linkcheck(mdp)) {
			mdp->md_lc_timer_id = qtimeout(man_ctl_wq,
			    man_linkcheck_timer, (void *)mdp,
			    man_gettimer(MAN_TIMER_LINKCHECK, mdp));
		}

		break;

	case DL_ERROR_ACK:
		cprim = dlp->error_ack.dl_error_primitive;
		switch (cprim) {
		case DL_ATTACH_REQ:
		case DL_BIND_REQ:
		case DL_DISABMULTI_REQ:
		case DL_ENABMULTI_REQ:
		case DL_PROMISCON_REQ:
		case DL_PROMISCOFF_REQ:
		case DL_SET_PHYS_ADDR_REQ:
			need_dl_reset = TRUE;
			break;

		/*
		 * ignore error TBD (better comment)
		 */
		case DL_UNBIND_REQ:
		case DL_DETACH_REQ:
			break;
		}

		MAN_DBG(MAN_DLPI,
		    ("\tdl_errno %d dl_unix_errno %d cprim %s",
		    dlp->error_ack.dl_errno, dlp->error_ack.dl_unix_errno,
		    dps[(int)cprim]));
		break;

	case DL_UDERROR_IND:
		MAN_DBG(MAN_DLPI,
		    ("\tdl_errno %d unix_errno %d",
		    dlp->uderror_ind.dl_errno,
		    dlp->uderror_ind.dl_unix_errno));
		break;

	case DL_INFO_ACK:
		break;

	default:
		/*
		 * We should not get here.
		 */
		cmn_err(CE_WARN, "man_lrsrv: unexpected DL prim 0x%lx!",
		    prim);
		need_dl_reset = TRUE;
		break;
	}

dl_reset:
	freemsg(mp);

	if (need_dl_reset) {
		man_pg_t	*mpg;
		man_path_t	*mp;

		if (qsize(rq)) {	/* Dump all messages. */
			flushq(rq, FLUSHDATA);
			flushq(WR(rq), FLUSHDATA);
		}

		mdp->md_dlpierrors++;
		D_SETSTATE(mdp, DL_UNATTACHED);
		if (mdp->md_lc_timer_id != 0) {
			(void) quntimeout(man_ctl_wq, mdp->md_lc_timer_id);
			mdp->md_lc_timer_id = 0;
		}

		mutex_enter(&man_lock);
		ASSERT(mdp->md_msp != NULL);
		ASSERT(mdp->md_msp->ms_manp != NULL);
		mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg,
		    mdp->md_pg_id);
		ASSERT(mpg != NULL);
		mp = man_find_path_by_ppa(mpg->mpg_pathp,
		    mdp->md_device.mdev_ppa);
		ASSERT(mp != NULL);
		mp->mp_device.mdev_state |= MDEV_FAILED;
		if ((mdp->md_dlpierrors >= MAN_MAX_DLPIERRORS) &&
		    (man_is_on_domain ||
		    mdp->md_msp->ms_manp->man_meta_ppa == 1)) {
			/*
			 * Autoswitching is disabled for instance 0
			 * on the SC as we expect the domain to
			 * initiate the path switching.
			 */
			(void) man_do_autoswitch((man_dest_t *)mdp);
			MAN_DBG(MAN_WARN, ("man_lrsrv: dlpi failure(%d,%d),"
			    " switching path", mdp->md_device.mdev_major,
			    mdp->md_device.mdev_ppa));
		} else {
			mdp->md_lc_timer_id = qtimeout(man_ctl_wq,
			    man_reset_dlpi, (void *)mdp,
			    man_gettimer(MAN_TIMER_DLPIRESET, mdp));
		}
		mutex_exit(&man_lock);
	}


	} /* End while (getq()) */

exit:
	MAN_DBG(MAN_DLPI, ("man_lrsrv: returns"));

	return (0);
}

static int
man_needs_linkcheck(man_dest_t *mdp)
{
	/*
	 * Not ready for linkcheck.
	 */
	if (mdp->md_msp == NULL || mdp->md_msp->ms_manp == NULL)
		return (0);

	/*
	 * Linkchecking needs to be done on IP streams. For domain, all
	 * driver instances need checking, for SC only instance 1 needs it.
	 */
	if ((man_is_on_domain || mdp->md_msp->ms_manp->man_meta_ppa == 1) &&
	    (mdp->md_msp->ms_sap == ETHERTYPE_IP ||
	    mdp->md_msp->ms_sap == ETHERTYPE_IPV6))

		return (1);

	/*
	 * Linkcheck not need on this link.
	 */
	return (0);
}

/*
 * The following routines process work requests posted to man_iwork_q
 * from the non-STREAMS half of the driver (see man_bwork.c). The work
 * requires access to the inner perimeter lock of the driver. This
 * lock is acquired by man_uwsrv, who calls man_iwork to process the
 * man_iwork_q->
 */

/*
 * The man_bwork has posted some work for us to do inside the
 * perimeter. This mainly involves updating lower multiplexor data
 * structures (non-blocking type stuff). So, we can hold the man_lock
 * until we are done processing all work items. Note that some of these
 * routines in turn submit work back to the bgthread, which they can do
 * since we hold the man_lock.
 */
static void
man_iwork()
{
	man_work_t	*wp;
	int		wp_finished;

	MAN_DBG(MAN_SWITCH, ("man_iwork: q_work(0x%p)",
	    (void *)man_iwork_q->q_work));

	mutex_enter(&man_lock);

	while (man_iwork_q->q_work) {

		wp = man_iwork_q->q_work;
		man_iwork_q->q_work = wp->mw_next;
		wp->mw_next = NULL;

		mutex_exit(&man_lock);

		MAN_DBG(MAN_SWITCH, ("man_iwork: type %s",
		    _mw_type[wp->mw_type]));

		wp_finished = TRUE;

		switch (wp->mw_type) {
		case MAN_WORK_DRATTACH:
			(void) man_do_dr_attach(wp);
			break;

		case MAN_WORK_DRSWITCH:
			/*
			 * Return status to man_dr_detach immediately. If
			 * no error submitting SWITCH request, man_iswitch
			 * or man_bclose will cv_signal man_dr_detach on
			 * completion of SWITCH work request.
			 */
			if (man_do_dr_switch(wp) == 0)
				wp_finished = FALSE;
			break;

		case MAN_WORK_DRDETACH:
			man_do_dr_detach(wp);
			break;

		case MAN_WORK_SWITCH:
			if (man_iswitch(wp))
				wp_finished = FALSE;
			break;

		case MAN_WORK_KSTAT_UPDATE:
			man_do_kstats(wp);
			break;

		default:
			cmn_err(CE_WARN, "man_iwork: "
			    "illegal work type(%d)", wp->mw_type);
			break;
		}

		mutex_enter(&man_lock);

		/*
		 * If we've completed the work request, delete, or
		 * cv_signal waiter.
		 */
		if (wp_finished) {
			wp->mw_flags |= MAN_WFLAGS_DONE;

			if (wp->mw_flags & MAN_WFLAGS_CVWAITER)
				cv_signal(&wp->mw_cv);
			else
				man_work_free(wp);
		}
	}

	mutex_exit(&man_lock);
}

/*
 * man_dr_detach has submitted a request to DRSWITCH a path.
 * He is in cv_wait_sig(wp->mw_cv). We forward the work request on to
 * man_bwork as a switch request. It should end up back at
 * man_iwork, who will cv_signal(wp->mw_cv) man_dr_detach.
 *
 * Called holding inner perimeter lock.
 * man_lock is held to synchronize access to pathgroup list(man_pg).
 */
static int
man_do_dr_switch(man_work_t *wp)
{
	man_t		*manp;
	man_pg_t	*mpg;
	man_path_t	*mp;
	man_path_t	*ap;
	man_adest_t	*adp;
	mi_path_t	mpath;
	int		status = 0;

	adp = &wp->mw_arg;

	MAN_DBG(MAN_SWITCH, ("man_do_dr_switch: pg_id %d work:", adp->a_pg_id));
	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));

	mutex_enter(&man_lock);
	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
	if (manp == NULL || manp->man_pg == NULL) {
		status = ENODEV;
		goto exit;
	}

	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
	if (mpg == NULL) {
		status = ENODEV;
		goto exit;
	}

	if (mpg->mpg_flags & MAN_PG_SWITCHING) {
		status = EAGAIN;
		goto exit;
	}

	/*
	 * Check to see if detaching device is active. If so, activate
	 * an alternate.
	 */
	mp = man_find_active_path(mpg->mpg_pathp);
	if (mp && mp->mp_device.mdev_ppa == adp->a_sf_dev.mdev_ppa) {

		ap = man_find_alternate_path(mpg->mpg_pathp);
		if (ap == NULL) {
			status = EBUSY;
			goto exit;
		}

		bzero((char *)&mpath, sizeof (mi_path_t));

		mpath.mip_cmd = MI_PATH_ACTIVATE;
		mpath.mip_man_ppa = 0;
		mpath.mip_pg_id = 0;
		mpath.mip_devs[0] = ap->mp_device;
		mpath.mip_ndevs = 1;
		ether_copy(&manp->man_eaddr, &mpath.mip_eaddr);

		/*
		 * DR thread is sleeping on wp->mw_cv. We change the work
		 * request from DRSWITCH to SWITCH and submit it to
		 * for processing by man_bwork (via man_pg_cmd). At
		 * completion the SWITCH work request is processed by
		 * man_iswitch() or man_bclose and the DR thread will
		 * be cv_signal'd.
		 */
		wp->mw_type = MAN_WORK_SWITCH;
		if (status = man_pg_cmd(&mpath, wp))
			goto exit;

	} else {
		/*
		 * Tell man_dr_detach that detaching device is not currently
		 * in use.
		 */
		status = ENODEV;
	}

exit:
	if (status) {
		/*
		 * ENODEV is a noop, not really an error.
		 */
		if (status != ENODEV)
			wp->mw_status = status;
	}
	mutex_exit(&man_lock);

	return (status);
}

/*
 * man_dr_attach has submitted a request to DRATTACH a path,
 * add that path to the path list.
 *
 * Called holding perimeter lock.
 */
static int
man_do_dr_attach(man_work_t *wp)
{
	man_t		*manp;
	man_adest_t	*adp;
	mi_path_t	mpath;
	manc_t		manc;
	int		status = 0;

	adp = &wp->mw_arg;

	MAN_DBG(MAN_SWITCH, ("man_do_dr_attach: pg_id %d work:", adp->a_pg_id));
	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));

	mutex_enter(&man_lock);
	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
	if (manp == NULL || manp->man_pg == NULL) {
		status = ENODEV;
		goto exit;
	}

	if (status = man_get_iosram(&manc)) {
		goto exit;
	}
	/*
	 * Extract SC ethernet address from IOSRAM.
	 */
	ether_copy(&manc.manc_sc_eaddr, &mpath.mip_eaddr);

	mpath.mip_pg_id = adp->a_pg_id;
	mpath.mip_man_ppa = adp->a_man_ppa;
	/*
	 * man_dr_attach passes the new device info in a_sf_dev.
	 */
	MAN_DBG(MAN_DR, ("man_do_dr_attach: "));
	MAN_DBGCALL(MAN_DR, man_print_dev(&adp->a_sf_dev));
	mpath.mip_devs[0] = adp->a_sf_dev;
	mpath.mip_ndevs = 1;
	mpath.mip_cmd = MI_PATH_ADD;
	status = man_pg_cmd(&mpath, NULL);

exit:
	mutex_exit(&man_lock);
	return (status);
}

/*
 * man_dr_detach has submitted a request to DRDETACH a path.
 * He is in cv_wait_sig(wp->mw_cv). We remove the path and
 * cv_signal(wp->mw_cv) man_dr_detach.
 *
 * Called holding perimeter lock.
 */
static void
man_do_dr_detach(man_work_t *wp)
{
	man_t		*manp;
	man_pg_t	*mpg;
	man_path_t	*mp;
	man_adest_t	*adp;
	manc_t		manc;
	mi_path_t	mpath;
	int		i;
	int		found;
	int		status = 0;

	adp = &wp->mw_arg;

	MAN_DBG(MAN_SWITCH, ("man_do_dr_detach: pg_id %d work:", adp->a_pg_id));
	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));

	mutex_enter(&man_lock);
	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
	if (manp == NULL || manp->man_pg == NULL) {
		status = ENODEV;
		goto exit;
	}

	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
	if (mpg == NULL) {
		status = ENODEV;
		goto exit;
	}

	if (mpg->mpg_flags & MAN_PG_SWITCHING) {
		status = EAGAIN;
		goto exit;
	}

	/*
	 * We should have switched detaching path if it was active.
	 */
	mp = man_find_active_path(mpg->mpg_pathp);
	if (mp && mp->mp_device.mdev_ppa == adp->a_sf_dev.mdev_ppa) {
		status = EAGAIN;
		goto exit;
	}

	/*
	 * Submit an ASSIGN command, minus the detaching device.
	 */
	bzero((char *)&mpath, sizeof (mi_path_t));

	if (status = man_get_iosram(&manc)) {
		goto exit;
	}

	mpath.mip_cmd = MI_PATH_ASSIGN;
	mpath.mip_man_ppa = 0;
	mpath.mip_pg_id = 0;

	mp = mpg->mpg_pathp;
	i = 0;
	found = FALSE;
	while (mp != NULL) {
		if (mp->mp_device.mdev_ppa != adp->a_sf_dev.mdev_ppa) {
			mpath.mip_devs[i] = mp->mp_device;
			i++;
		} else {
			found = TRUE;
		}
		mp = mp->mp_next;
	}

	if (found) {
		/*
		 * Need to include SCs ethernet address in command.
		 */
		mpath.mip_ndevs = i;
		ether_copy(&manc.manc_sc_eaddr, &mpath.mip_eaddr);

		status = man_pg_cmd(&mpath, NULL);
	}

	/*
	 * Hand back status to man_dr_detach request.
	 */
exit:
	if (status != ENODEV)
		wp->mw_status = status;

	mutex_exit(&man_lock);

}


/*
 * The background thread has configured new lower multiplexor streams for
 * the given destinations. Update the appropriate destination data structures
 * inside the inner perimeter. We must take care to deal with destinations
 * whose upper stream has closed or detached from lower streams.
 *
 * Returns
 *	0		Done with work request.
 *	1		Reused work request.
 */
static int
man_iswitch(man_work_t *wp)
{
	man_adest_t	*adp;
	man_t		*manp;
	man_pg_t	*mpg;
	man_path_t	*mp = NULL;
	man_dest_t	*mdp;
	man_dest_t	*tdp;
	int		i;
	int		switch_ok = TRUE;

	adp = &wp->mw_arg;

	if (wp->mw_status != 0) {
		switch_ok = FALSE;	/* Never got things opened */
	}

	/*
	 * Update destination structures as appropriate.
	 */
	for (i = 0; i < adp->a_ndests; i++) {
		man_dest_t	tmp;

		/*
		 * Check to see if lower stream we just switch is still
		 * around.
		 */
		tdp = &adp->a_mdp[i];
		mdp = man_switch_match(tdp, adp->a_pg_id, tdp->md_switch_id);

		if (mdp == NULL)
			continue;

		if (switch_ok == FALSE) {
			/*
			 * Switch failed for some reason.  Clear
			 * PLUMBING flag and retry switch again later.
			 */
			man_ifail_dest(mdp);
			continue;
		}

		/*
		 * Swap new info, for old. We return the old info to
		 * man_bwork to close things up below.
		 */
		bcopy((char *)mdp, (char *)&tmp, sizeof (man_dest_t));

		ASSERT(mdp->md_state & MAN_DSTATE_PLUMBING);
		ASSERT(mdp->md_state == tdp->md_state);

		mdp->md_state = tdp->md_state;

		/*
		 * save the wq from the destination passed(tdp).
		 */
		mdp->md_wq = tdp->md_wq;
		RD(mdp->md_wq)->q_ptr = (void *)(mdp);
		WR(mdp->md_wq)->q_ptr = (void *)(mdp);

		mdp->md_state &= ~MAN_DSTATE_INITIALIZING;
		mdp->md_state |= MAN_DSTATE_READY;

		ASSERT(mdp->md_device.mdev_major == adp->a_sf_dev.mdev_major);

		ASSERT(tdp->md_device.mdev_ppa == adp->a_st_dev.mdev_ppa);
		ASSERT(tdp->md_device.mdev_major == adp->a_st_dev.mdev_major);

		mdp->md_device = tdp->md_device;
		mdp->md_muxid = tdp->md_muxid;
		mdp->md_linkstate = MAN_LINKUNKNOWN;
		(void) drv_getparm(TIME, &mdp->md_lastswitch);
		mdp->md_state &= ~MAN_DSTATE_PLUMBING;
		mdp->md_switch_id = 0;
		mdp->md_switches++;
		mdp->md_dlpierrors = 0;
		D_SETSTATE(mdp, DL_UNATTACHED);

		/*
		 * Resync lower w/ upper dlpi state. This will start link
		 * timer if/when lower stream goes to DL_IDLE (see man_lrsrv).
		 */
		man_reset_dlpi((void *)mdp);

		bcopy((char *)&tmp, (char *)tdp, sizeof (man_dest_t));
	}

	if (switch_ok) {
		for (i = 0; i < adp->a_ndests; i++) {
			tdp = &adp->a_mdp[i];

			tdp->md_state &= ~MAN_DSTATE_PLUMBING;
			tdp->md_state &= ~MAN_DSTATE_INITIALIZING;
			tdp->md_state |= MAN_DSTATE_READY;
		}
	} else {
		/*
		 * Never got switch-to destinations open, free them.
		 */
		man_kfree(adp->a_mdp,
		    sizeof (man_dest_t) * adp->a_ndests);
	}

	/*
	 * Clear pathgroup switching flag and update path flags.
	 */
	mutex_enter(&man_lock);
	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);

	ASSERT(manp != NULL);
	ASSERT(manp->man_pg != NULL);

	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
	ASSERT(mpg != NULL);
	ASSERT(mpg->mpg_flags & MAN_PG_SWITCHING);
	mpg->mpg_flags &= ~MAN_PG_SWITCHING;

	/*
	 * Switch succeeded, mark path we switched from as failed, and
	 * device we switch to as active and clear its failed flag (if set).
	 * Sync up kstats.
	 */
	if (switch_ok) {
		mp = man_find_active_path(mpg->mpg_pathp);
		if (mp != NULL) {

			ASSERT(adp->a_sf_dev.mdev_major != 0);

			MAN_DBG(MAN_SWITCH, ("man_iswitch: switch from dev:"));
			MAN_DBGCALL(MAN_SWITCH, man_print_dev(&adp->a_sf_dev));

			mp->mp_device.mdev_state &= ~MDEV_ACTIVE;
		} else
			ASSERT(adp->a_sf_dev.mdev_major == 0);

		MAN_DBG(MAN_SWITCH, ("man_iswitch: switch to dev:"));
		MAN_DBGCALL(MAN_SWITCH, man_print_dev(&adp->a_st_dev));

		ASSERT(adp->a_st_dev.mdev_major != 0);

		mp = man_find_path_by_ppa(mpg->mpg_pathp,
		    adp->a_st_dev.mdev_ppa);

		ASSERT(mp != NULL);

		mp->mp_device.mdev_state |= MDEV_ACTIVE;
	}

	/*
	 * Decrement manp reference count and hand back work request if
	 * needed.
	 */
	manp->man_refcnt--;

	if (switch_ok) {
		wp->mw_type = MAN_WORK_CLOSE;
		man_work_add(man_bwork_q, wp);
	}

	mutex_exit(&man_lock);

	return (switch_ok);
}

/*
 * Find the destination in the upper stream that we just switched.
 */
man_dest_t *
man_switch_match(man_dest_t *sdp, int pg_id, void *sid)
{
	man_dest_t	*mdp = NULL;
	manstr_t	*msp;

	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
		/*
		 * Check if upper stream closed, or detached.
		 */
		if (msp != sdp->md_msp)
			continue;

		if (msp->ms_dests == NULL)
			break;

		mdp = &msp->ms_dests[pg_id];

		/*
		 * Upper stream detached and reattached while we were
		 * switching.
		 */
		if (mdp->md_switch_id != sid) {
			mdp = NULL;
			break;
		}
	}

	return (mdp);
}

/*
 * bg_thread cant complete the switch for some reason. (Re)start the
 * linkcheck timer again.
 */
static void
man_ifail_dest(man_dest_t *mdp)
{
	ASSERT(mdp->md_lc_timer_id == 0);
	ASSERT(mdp->md_bc_id == 0);
	ASSERT(mdp->md_state & MAN_DSTATE_PLUMBING);

	MAN_DBG(MAN_SWITCH, ("man_ifail_dest"));
	MAN_DBGCALL(MAN_SWITCH, man_print_mdp(mdp));

	mdp->md_state &= ~MAN_DSTATE_PLUMBING;
	mdp->md_linkstate = MAN_LINKFAIL;

	/*
	 * If we have not yet initialized link, or the upper stream is
	 * DL_IDLE, restart the linktimer.
	 */
	if ((mdp->md_state & MAN_DSTATE_INITIALIZING) ||
	    ((mdp->md_msp->ms_sap == ETHERTYPE_IPV6 ||
	    mdp->md_msp->ms_sap == ETHERTYPE_IP) &&
	    mdp->md_msp->ms_dlpistate == DL_IDLE)) {

		mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
		    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
	}

}

/*
 * Arrange to replay all of ms_dl_mp on the new lower stream to get it
 * in sync with the upper stream. Note that this includes setting the
 * physical address.
 *
 * Called from qtimeout with inner perimeter lock.
 */
static void
man_reset_dlpi(void *argp)
{
	man_dest_t	*mdp = (man_dest_t *)argp;
	manstr_t	*msp;
	mblk_t		*mp;
	mblk_t		*rmp = NULL;
	mblk_t		*tmp;

	mdp->md_lc_timer_id = 0;

	if (mdp->md_state != MAN_DSTATE_READY) {
		MAN_DBG(MAN_DLPI, ("man_reset_dlpi: not ready!"));
		return;
	}

	msp = mdp->md_msp;

	rmp = man_dup_mplist(msp->ms_dl_mp);
	if (rmp == NULL)
		goto fail;

	/*
	 * Send down an unbind and detach request, just to clean things
	 * out, we ignore ERROR_ACKs for unbind and detach in man_lrsrv.
	 */
	tmp = man_alloc_ubreq_dreq();
	if (tmp == NULL) {
		goto fail;
	}
	mp = tmp;
	while (mp->b_next != NULL)
		mp = mp->b_next;
	mp->b_next = rmp;
	rmp = tmp;

	man_dlpi_replay(mdp, rmp);

	return;

fail:

	while (rmp) {
		mp = rmp;
		rmp = rmp->b_next;
		mp->b_next = mp->b_prev = NULL;
		freemsg(mp);
	}

	ASSERT(mdp->md_lc_timer_id == 0);
	ASSERT(mdp->md_bc_id == 0);

	/*
	 * If low on memory, try again later. I Could use qbufcall, but that
	 * could fail and I would have to try and recover from that w/
	 * qtimeout anyway.
	 */
	mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_reset_dlpi,
	    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
}

/*
 * Once we receive acknowledgement that DL_ATTACH_REQ was successful,
 * we can send down the DL_* related IOCTLs (e.g. DL_IOC_HDR). If we
 * try and send them downsteam w/o waiting, the ioctl's get processed before
 * the ATTACH_REQ and they are rejected. TBD - could just do the lower
 * dlpi state change in lock step. TBD
 */
static int
man_dlioc_replay(man_dest_t *mdp)
{
	mblk_t		*rmp;
	int		status = 1;

	if (mdp->md_msp->ms_dlioc_mp == NULL)
		goto exit;

	rmp = man_dup_mplist(mdp->md_msp->ms_dlioc_mp);
	if (rmp == NULL) {
		status = 0;
		goto exit;
	}

	man_dlpi_replay(mdp, rmp);
exit:
	return (status);
}

static mblk_t *
man_alloc_ubreq_dreq()
{
	mblk_t			*dreq;
	mblk_t			*ubreq = NULL;
	union DL_primitives	*dlp;

	dreq = allocb(DL_DETACH_REQ_SIZE, BPRI_MED);
	if (dreq == NULL)
		goto exit;

	dreq->b_datap->db_type = M_PROTO;
	dlp = (union DL_primitives *)dreq->b_rptr;
	dlp->dl_primitive = DL_DETACH_REQ;
	dreq->b_wptr += DL_DETACH_REQ_SIZE;

	ubreq = allocb(DL_UNBIND_REQ_SIZE, BPRI_MED);
	if (ubreq == NULL) {
		freemsg(dreq);
		goto exit;
	}

	ubreq->b_datap->db_type = M_PROTO;
	dlp = (union DL_primitives *)ubreq->b_rptr;
	dlp->dl_primitive = DL_UNBIND_REQ;
	ubreq->b_wptr += DL_UNBIND_REQ_SIZE;

	ubreq->b_next = dreq;

exit:

	return (ubreq);
}

static mblk_t *
man_dup_mplist(mblk_t *mp)
{
	mblk_t	*listp = NULL;
	mblk_t	*tailp = NULL;

	for (; mp != NULL; mp = mp->b_next) {

		mblk_t	*nmp;
		mblk_t	*prev;
		mblk_t	*next;

		prev = mp->b_prev;
		next = mp->b_next;
		mp->b_prev = mp->b_next = NULL;

		nmp = copymsg(mp);

		mp->b_prev = prev;
		mp->b_next = next;

		if (nmp == NULL)
			goto nomem;

		if (listp == NULL) {
			listp = tailp = nmp;
		} else {
			tailp->b_next = nmp;
			tailp = nmp;
		}
	}

	return (listp);
nomem:

	while (listp) {
		mp = listp;
		listp = mp->b_next;
		mp->b_next = mp->b_prev = NULL;
		freemsg(mp);
	}

	return (NULL);

}

static mblk_t *
man_alloc_physreq_mp(eaddr_t *man_eap)
{

	mblk_t			*mp;
	union DL_primitives	*dlp;
	t_uscalar_t		off;
	eaddr_t			*eap;

	mp = allocb(DL_SET_PHYS_ADDR_REQ_SIZE + ETHERADDRL, BPRI_MED);
	if (mp == NULL)
		goto exit;

	mp->b_datap->db_type = M_PROTO;
	dlp = (union DL_primitives *)mp->b_wptr;
	dlp->set_physaddr_req.dl_primitive = DL_SET_PHYS_ADDR_REQ;
	dlp->set_physaddr_req.dl_addr_length = ETHERADDRL;
	off = DL_SET_PHYS_ADDR_REQ_SIZE;
	dlp->set_physaddr_req.dl_addr_offset =  off;
	mp->b_wptr += DL_SET_PHYS_ADDR_REQ_SIZE + ETHERADDRL;

	eap = (eaddr_t *)(mp->b_rptr + off);
	ether_copy(man_eap, eap);

exit:
	MAN_DBG(MAN_DLPI, ("man_alloc_physreq: physaddr %s\n",
	    ether_sprintf(eap)));

	return (mp);
}

/*
 * A new path in a pathgroup has become active for the first time. Setup
 * the lower destinations in prepartion for man_pg_activate to call
 * man_autoswitch.
 */
static void
man_add_dests(man_pg_t *mpg)
{
	manstr_t	*msp;
	man_dest_t	*mdp;

	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {

		if (!man_str_uses_pg(msp, mpg))
			continue;

		mdp = &msp->ms_dests[mpg->mpg_pg_id];

/*
 * TBD - Take out
 *		ASSERT(mdp->md_device.mdev_state == MDEV_UNASSIGNED);
 *		ASSERT(mdp->md_state == MAN_DSTATE_NOTPRESENT);
 */
		if (mdp->md_device.mdev_state != MDEV_UNASSIGNED) {
			cmn_err(CE_NOTE, "man_add_dests mdev !unassigned");
			MAN_DBGCALL(MAN_PATH, man_print_mdp(mdp));
		}

		man_start_dest(mdp, msp, mpg);
	}

}

static int
man_remove_dests(man_pg_t *mpg)
{
	manstr_t	*msp;
	int		close_cnt = 0;
	man_dest_t	*cdp;
	man_dest_t	*mdp;
	man_dest_t	*tdp;
	man_work_t	*wp;
	mblk_t		*mp;
	int		status = 0;

	wp = man_work_alloc(MAN_WORK_CLOSE, KM_NOSLEEP);
	if (wp == NULL) {
		status = ENOMEM;
		goto exit;
	}

	/*
	 * Count up number of destinations we need to close.
	 */
	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
		if (!man_str_uses_pg(msp, mpg))
			continue;

		close_cnt++;
	}

	if (close_cnt == 0)
		goto exit;

	cdp = man_kzalloc(sizeof (man_dest_t) * close_cnt, KM_NOSLEEP);
	if (cdp == NULL) {
		status = ENOMEM;
		man_work_free(wp);
		goto exit;
	}

	tdp = cdp;
	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
		if (!man_str_uses_pg(msp, mpg))
			continue;

		mdp = &msp->ms_dests[mpg->mpg_pg_id];

		mdp->md_state |= MAN_DSTATE_CLOSING;
		mdp->md_device.mdev_state = MDEV_UNASSIGNED;
		mdp->md_msp = NULL;
		mdp->md_rq = NULL;

		/*
		 * Clean up optimized destination pointer if we are
		 * closing it.
		 */
		man_set_optimized_dest(msp);

		if (mdp->md_lc_timer_id != 0) {
			(void) quntimeout(man_ctl_wq, mdp->md_lc_timer_id);
			mdp->md_lc_timer_id = 0;
		}
		if (mdp->md_bc_id != 0) {
			qunbufcall(man_ctl_wq, mdp->md_bc_id);
			mdp->md_bc_id = 0;
		}

		mutex_enter(&mdp->md_lock);
		while ((mp = mdp->md_dmp_head) != NULL) {
			mdp->md_dmp_head = mp->b_next;
			mp->b_next = NULL;
			freemsg(mp);
		}
		mdp->md_dmp_count = 0;
		mdp->md_dmp_tail = NULL;
		mutex_exit(&mdp->md_lock);

		*tdp++ = *mdp;

		mdp->md_state = MAN_DSTATE_NOTPRESENT;
		mdp->md_muxid = -1;
	}

	wp->mw_arg.a_mdp = cdp;
	wp->mw_arg.a_ndests = close_cnt;
	man_work_add(man_bwork_q, wp);

exit:
	return (status);

}

/*
 * Returns TRUE if stream uses pathgroup, FALSE otherwise.
 */
static int
man_str_uses_pg(manstr_t *msp, man_pg_t *mpg)
{
	int	status;

	status = ((msp->ms_flags & MAN_SFLAG_CONTROL)	||
	    (msp->ms_dests == NULL)	||
	    (msp->ms_manp == NULL)	||
	    (msp->ms_manp->man_meta_ppa != mpg->mpg_man_ppa));

	return (!status);
}

static int
man_gettimer(int timer, man_dest_t *mdp)
{

	int attached = TRUE;
	int time = 0;

	if (mdp == NULL || mdp->md_msp == NULL || mdp->md_msp->ms_manp == NULL)
		attached = FALSE;

	switch (timer) {
	case MAN_TIMER_INIT:
		if (attached)
			time = mdp->md_msp->ms_manp->man_init_time;
		else
			time = MAN_INIT_TIME;
		break;

	case MAN_TIMER_LINKCHECK:
		if (attached) {
			if (mdp->md_linkstate == MAN_LINKSTALE)
				time = mdp->md_msp->ms_manp->man_linkstale_time;
			else
				time = mdp->md_msp->ms_manp->man_linkcheck_time;
		} else
			time = MAN_LINKCHECK_TIME;
		break;

	case MAN_TIMER_DLPIRESET:
		if (attached)
			time = mdp->md_msp->ms_manp->man_dlpireset_time;
		else
			time = MAN_DLPIRESET_TIME;
		break;

	default:
		MAN_DBG(MAN_LINK, ("man_gettimer: unknown timer %d", timer));
		time = MAN_LINKCHECK_TIME;
		break;
	}

	return (drv_usectohz(time));
}

/*
 * Check the links for each active destination. Called inside inner
 * perimeter via qtimeout. This timer only runs on the domain side of the
 * driver. It should never run on the SC side.
 *
 * On a MAN_LINKGOOD link, we check/probe the link health every
 * MAN_LINKCHECK_TIME seconds. If the link goes MAN_LINKSTALE, the we probe
 * the link every MAN_LINKSTALE_TIME seconds, and fail the link after probing
 * the link MAN_LINKSTALE_RETRIES times.
 * The man_lock is held to synchronize access pathgroup list(man_pg).
 */
void
man_linkcheck_timer(void *argp)
{
	man_dest_t		*mdp = (man_dest_t *)argp;
	int			restart_timer = TRUE;
	int			send_ping = TRUE;
	int			newstate;
	int			oldstate;
	man_pg_t		*mpg;
	man_path_t		*mp;

	MAN_DBG(MAN_LINK, ("man_linkcheck_timer: mdp"));
	MAN_DBGCALL(MAN_LINK, man_print_mdp(mdp));

	/*
	 * Clear timeout id and check if someones waiting on us to
	 * complete a close.
	 */
	mdp->md_lc_timer_id = 0;

	if (mdp->md_state == MAN_DSTATE_NOTPRESENT ||
	    mdp->md_state & MAN_DSTATE_BUSY) {

		MAN_DBG(MAN_LINK, ("man_linkcheck_timer: not ready mdp"));
		MAN_DBGCALL(MAN_LINK, man_print_mdp(mdp));
		goto exit;
	}

	mutex_enter(&man_lock);
	/*
	 * If the lower stream needs initializing, just go straight to
	 * switch code. As the linkcheck timer is started for all
	 * SAPs, do not send ping packets during the initialization.
	 */
	if (mdp->md_state == MAN_DSTATE_INITIALIZING) {
		send_ping = FALSE;
		goto do_switch;
	}

	newstate = oldstate = mdp->md_linkstate;

	if (!man_needs_linkcheck(mdp)) {
		cmn_err(CE_NOTE,
		    "man_linkcheck_timer: unneeded linkcheck on mdp(0x%p)",
		    (void *)mdp);
		mutex_exit(&man_lock);
		return;
	}

	/*
	 * The above call to  man_needs_linkcheck() validates
	 * mdp->md_msp and mdp->md_msp->ms_manp pointers.
	 */
	mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg, mdp->md_pg_id);
	ASSERT(mpg != NULL);
	mp = man_find_path_by_ppa(mpg->mpg_pathp, mdp->md_device.mdev_ppa);
	ASSERT(mp != NULL);

	/*
	 * This is the most common case, when traffic is flowing.
	 */
	if (mdp->md_rcvcnt != mdp->md_lastrcvcnt) {

		newstate = MAN_LINKGOOD;
		mdp->md_lastrcvcnt = mdp->md_rcvcnt;
		send_ping = FALSE;

		/*
		 * Clear the FAILED flag and update lru.
		 */
		mp->mp_device.mdev_state &= ~MDEV_FAILED;
		(void) drv_getparm(TIME, &mp->mp_lru);

		if (mdp->md_link_updown_msg == MAN_LINK_DOWN_MSG) {
			man_t *manp = mdp->md_msp->ms_manp;

			cmn_err(CE_NOTE, "%s%d Link up",
			    ddi_major_to_name(manp->man_meta_major),
			    manp->man_meta_ppa);

			mdp->md_link_updown_msg = MAN_LINK_UP_MSG;
		}

		goto done;
	}

	/*
	 * If we're here, it means we have not seen any traffic
	 */
	switch (oldstate) {
	case MAN_LINKINIT:
	case MAN_LINKGOOD:
		newstate = MAN_LINKSTALE;
		mdp->md_linkstales++;
		mdp->md_linkstale_retries =
		    mdp->md_msp->ms_manp->man_linkstale_retries;
		break;

	case MAN_LINKSTALE:
	case MAN_LINKFAIL:
		mdp->md_linkstales++;
		mdp->md_linkstale_retries--;
		if (mdp->md_linkstale_retries < 0) {
			newstate = MAN_LINKFAIL;
			mdp->md_linkfails++;
			mdp->md_linkstale_retries =
			    mdp->md_msp->ms_manp->man_linkstale_retries;
			/*
			 * Mark the destination as FAILED and
			 * update lru.
			 */
			if (oldstate != MAN_LINKFAIL) {
				mp->mp_device.mdev_state |= MDEV_FAILED;
				(void) drv_getparm(TIME, &mp->mp_lru);
			}
		}
		break;

	default:
		cmn_err(CE_WARN, "man_linkcheck_timer: illegal link"
		    " state %d", oldstate);
		break;
	}
done:

	if (oldstate != newstate) {

		MAN_DBG(MAN_LINK, ("man_linkcheck_timer"
		    " link state %s -> %s", lss[oldstate],
		    lss[newstate]));

		mdp->md_linkstate = newstate;
	}

	/*
	 * Do any work required from state transitions above.
	 */
	if (newstate == MAN_LINKFAIL) {
do_switch:
		if (!man_do_autoswitch(mdp)) {
			/*
			 * Stop linkcheck timer until switch completes.
			 */
			restart_timer = FALSE;
			send_ping = FALSE;
		}
	}

	mutex_exit(&man_lock);
	if (send_ping)
		man_do_icmp_bcast(mdp, mdp->md_msp->ms_sap);

	if (restart_timer)
		mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
		    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));

exit:
	MAN_DBG(MAN_LINK, ("man_linkcheck_timer: returns"));

}

/*
 * Handle linkcheck initiated autoswitching.
 * Called with man_lock held.
 */
static int
man_do_autoswitch(man_dest_t *mdp)
{
	man_pg_t	*mpg;
	man_path_t	*ap;
	int		status = 0;

	ASSERT(MUTEX_HELD(&man_lock));
	/*
	 * Set flags and refcnt. Cleared in man_iswitch when SWITCH completes.
	 */
	mdp->md_msp->ms_manp->man_refcnt++;

	mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg, mdp->md_pg_id);
	ASSERT(mpg);

	if (mpg->mpg_flags & MAN_PG_SWITCHING)
		return (EBUSY);

	mpg->mpg_flags |= MAN_PG_SWITCHING;

	if (mdp->md_state == MAN_DSTATE_INITIALIZING) {
		/*
		 * We're initializing, ask for a switch to our currently
		 * active device.
		 */
		status = man_autoswitch(mpg, &mdp->md_device, NULL);
	} else {

		if (mdp->md_msp != NULL && mdp->md_msp->ms_manp != NULL &&
		    mdp->md_link_updown_msg == MAN_LINK_UP_MSG) {

			man_t *manp = mdp->md_msp->ms_manp;

			cmn_err(CE_NOTE, "%s%d Link down",
			    ddi_major_to_name(manp->man_meta_major),
			    manp->man_meta_ppa);
		}
		mdp->md_link_updown_msg = MAN_LINK_DOWN_MSG;

		MAN_DBG(MAN_LINK, ("man_linkcheck_timer: link failure on %s%d",
		    ddi_major_to_name(mdp->md_device.mdev_major),
		    mdp->md_device.mdev_ppa));

		ap = man_find_alternate_path(mpg->mpg_pathp);

		if (ap == NULL) {
			status = ENODEV;
			goto exit;
		}
		status = man_autoswitch(mpg, &ap->mp_device, NULL);
	}
exit:
	if (status != 0) {
		/*
		 * man_iswitch not going to run, clean up.
		 */
		mpg->mpg_flags &= ~MAN_PG_SWITCHING;
		mdp->md_msp->ms_manp->man_refcnt--;
	}

	return (status);
}

/*
 * Gather up all lower multiplexor streams that have this link open and
 * try to switch them. Called from inner perimeter and holding man_lock.
 *
 *	pg_id		- Pathgroup to do switch for.
 *	st_devp		- New device to switch to.
 *	wait_for_switch	- whether or not to qwait for completion.
 */
static int
man_autoswitch(man_pg_t *mpg, man_dev_t *st_devp, man_work_t *waiter_wp)
{
	man_work_t	*wp;
	int		sdp_cnt = 0;
	man_dest_t	*sdp;
	int		status = 0;

	ASSERT(MUTEX_HELD(&man_lock));
	if (waiter_wp == NULL) {
		wp = man_work_alloc(MAN_WORK_SWITCH, KM_NOSLEEP);
		if (wp == NULL) {
			status = ENOMEM;
			goto exit;
		}
	} else {
		ASSERT(waiter_wp->mw_type == MAN_WORK_SWITCH);
		wp = waiter_wp;
	}

	/*
	 * Set dests as PLUMBING, cancel timers and return array of dests
	 * that need a switch.
	 */
	status = man_prep_dests_for_switch(mpg, &sdp, &sdp_cnt);
	if (status) {
		if (waiter_wp == NULL)
			man_work_free(wp);
		goto exit;
	}

	/*
	 * If no streams are active, there are no streams to switch.
	 * Return ENODEV (see man_pg_activate).
	 */
	if (sdp_cnt == 0) {
		if (waiter_wp == NULL)
			man_work_free(wp);
		status = ENODEV;
		goto exit;
	}

	/*
	 * Ask the bgthread to switch. See man_bwork.
	 */
	wp->mw_arg.a_sf_dev = sdp->md_device;
	wp->mw_arg.a_st_dev = *st_devp;
	wp->mw_arg.a_pg_id = mpg->mpg_pg_id;
	wp->mw_arg.a_man_ppa = mpg->mpg_man_ppa;

	wp->mw_arg.a_mdp = sdp;
	wp->mw_arg.a_ndests = sdp_cnt;
	man_work_add(man_bwork_q, wp);

exit:

	return (status);
}

/*
 * If an alternate path exists for pathgroup, arrange for switch to
 * happen. Note that we need to switch each of msp->dests[pg_id], for
 * all on man_strup. We must:
 *
 *		Cancel any timers
 *		Mark dests as PLUMBING
 *		Submit switch request to man_bwork_q->
 */
static int
man_prep_dests_for_switch(man_pg_t *mpg, man_dest_t **mdpp, int *cntp)
{
	manstr_t	*msp;
	man_dest_t	*mdp;
	int		sdp_cnt = 0;
	man_dest_t	*sdp = NULL;
	man_dest_t	*tdp;
	int		status = 0;

	MAN_DBG(MAN_SWITCH, ("man_prep_dests_for_switch: pg_id %d",
	    mpg->mpg_pg_id));

	/*
	 * Count up number of streams, there is one destination that needs
	 * switching per stream.
	 */
	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
		if (man_str_uses_pg(msp, mpg))
			sdp_cnt++;
	}

	if (sdp_cnt == 0)
		goto exit;

	sdp = man_kzalloc(sizeof (man_dest_t) * sdp_cnt, KM_NOSLEEP);
	if (sdp == NULL) {
		status = ENOMEM;
		goto exit;
	}
	tdp = sdp;
	/*
	 * Mark each destination as unusable.
	 */
	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
		if (man_str_uses_pg(msp, mpg)) {

			/*
			 * Mark destination as plumbing and store the
			 * address of sdp as a way to identify the
			 * SWITCH request when it comes back (see man_iswitch).
			 */
			mdp = &msp->ms_dests[mpg->mpg_pg_id];
			mdp->md_state |= MAN_DSTATE_PLUMBING;
			mdp->md_switch_id = sdp;

			/*
			 * Copy destination info.
			 */
			bcopy(mdp, tdp, sizeof (man_dest_t));
			tdp++;

			/*
			 * Cancel timers.
			 */
			if (mdp->md_lc_timer_id) {
				(void) quntimeout(man_ctl_wq,
				    mdp->md_lc_timer_id);
				mdp->md_lc_timer_id = 0;
			}
			if (mdp->md_bc_id) {
				qunbufcall(man_ctl_wq, mdp->md_bc_id);
				mdp->md_bc_id = 0;
			}
		}
	}

	*mdpp = sdp;
	*cntp = sdp_cnt;
	status = 0;
exit:

	MAN_DBG(MAN_SWITCH, ("man_prep_dests_for_switch: returns %d"
	    " sdp(0x%p) sdp_cnt(%d)", status, (void *)sdp, sdp_cnt));

	return (status);

}

/*
 * The code below generates an ICMP echo packet and sends it to the
 * broadcast address in the hopes that the other end will respond
 * and the man_linkcheck_timer logic will see the traffic.
 *
 * This assumes ethernet-like media.
 */
/*
 * Generate an ICMP packet. Called exclusive inner perimeter.
 *
 *	mdp - destination to send packet to.
 *	sap - either ETHERTYPE_ARP or ETHERTYPE_IPV6
 */
static void
man_do_icmp_bcast(man_dest_t *mdp, t_uscalar_t sap)
{
	mblk_t			*mp = NULL;

	/* TBD - merge pinger and this routine. */

	ASSERT(sap == ETHERTYPE_IPV6 || sap == ETHERTYPE_IP);

	if (sap == ETHERTYPE_IPV6) {
		mdp->md_icmpv6probes++;
	} else {
		mdp->md_icmpv4probes++;
	}
	/*
	 * Send the ICMP message
	 */
	mp = man_pinger(sap);

	MAN_DBG(MAN_LINK, ("man_do_icmp_bcast: sap=0x%x mp=0x%p",
	    sap, (void *)mp));
	if (mp == NULL)
		return;

	/*
	 * Send it out.
	 */
	if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {

		MAN_DBG(MAN_LINK, ("man_do_icmp_broadcast: xmit failed"));

		freemsg(mp);
	}

}

static mblk_t *
man_pinger(t_uscalar_t sap)
{
	mblk_t		*mp = NULL;
	man_dladdr_t	dlsap;
	icmph_t		*icmph;
	int		ipver;
	ipha_t		*ipha;
	ip6_t		*ip6h;
	int		iph_hdr_len;
	int		datalen = 64;
	uchar_t		*datap;
	uint16_t	size;
	uchar_t		i;

	dlsap.dl_sap = htons(sap);
	bcopy(&etherbroadcast, &dlsap.dl_phys, sizeof (dlsap.dl_phys));

	if (sap == ETHERTYPE_IPV6) {
		ipver = IPV6_VERSION;
		iph_hdr_len = sizeof (ip6_t);
		size = ICMP6_MINLEN;
	} else {
		ipver = IPV4_VERSION;
		iph_hdr_len = sizeof (ipha_t);
		size = ICMPH_SIZE;
	}
	size += (uint16_t)iph_hdr_len;
	size += datalen;

	mp = man_alloc_udreq(size, &dlsap);
	if (mp == NULL)
		goto exit;

	/*
	 * fill out the ICMP echo packet headers
	 */
	mp->b_cont->b_wptr += iph_hdr_len;
	if (ipver == IPV4_VERSION) {
		ipha = (ipha_t *)mp->b_cont->b_rptr;
		ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
		    | IP_SIMPLE_HDR_LENGTH_IN_WORDS;
		ipha->ipha_type_of_service = 0;
		ipha->ipha_length = size;
		ipha->ipha_fragment_offset_and_flags = IPH_DF;
		ipha->ipha_ttl = 1;
		ipha->ipha_protocol = IPPROTO_ICMP;
		if (man_is_on_domain) {
			manc_t		manc;

			if (man_get_iosram(&manc)) {
				freemsg(mp);
				mp = NULL;
				goto exit;
			}

			/*
			 * Domain generates ping packets for domain to
			 * SC network (dman0 <--> scman0).
			 */
			ipha->ipha_dst = manc.manc_sc_ipaddr;
			ipha->ipha_src = manc.manc_dom_ipaddr;
		} else {
			/*
			 * Note that ping packets are only generated
			 * by the SC across scman1 (SC to SC network).
			 */
			ipha->ipha_dst = man_sc_ipaddrs.ip_other_sc_ipaddr;
			ipha->ipha_src = man_sc_ipaddrs.ip_my_sc_ipaddr;
		}

		ipha->ipha_ident = 0;

		ipha->ipha_hdr_checksum = 0;
		ipha->ipha_hdr_checksum = IP_CSUM(mp->b_cont, 0, 0);

	} else {
		ip6h = (ip6_t *)mp->b_cont->b_rptr;
		/*
		 * IP version = 6, priority = 0, flow = 0
		 */
		ip6h->ip6_flow = (IPV6_VERSION << 28);
		ip6h->ip6_plen =
		    htons((short)(size - iph_hdr_len));
		ip6h->ip6_nxt = IPPROTO_ICMPV6;
		ip6h->ip6_hlim = 1;	/* stay on link */

		if (man_is_on_domain) {
			manc_t		manc;

			if (man_get_iosram(&manc)) {
				freemsg(mp);
				mp = NULL;
				goto exit;
			}

			/*
			 * Domain generates ping packets for domain to
			 * SC network (dman0 <--> scman0).
			 */
			ip6h->ip6_src = manc.manc_dom_ipv6addr;
			ip6h->ip6_dst = manc.manc_sc_ipv6addr;
		} else {
			/*
			 * Note that ping packets are only generated
			 * by the SC across scman1 (SC to SC network).
			 */
			ip6h->ip6_src = man_sc_ip6addrs.ip6_my_sc_ipaddr;
			ip6h->ip6_dst = man_sc_ip6addrs.ip6_other_sc_ipaddr;
		}
	}

	/*
	 * IPv6 and IP are the same for ICMP as far as I'm concerned.
	 */
	icmph = (icmph_t *)mp->b_cont->b_wptr;
	if (ipver == IPV4_VERSION) {
		mp->b_cont->b_wptr += ICMPH_SIZE;
		icmph->icmph_type = ICMP_ECHO_REQUEST;
		icmph->icmph_code = 0;
	} else {
		mp->b_cont->b_wptr += ICMP6_MINLEN;
		icmph->icmph_type = ICMP6_ECHO_REQUEST;
		icmph->icmph_code = 0;
	}

	datap = mp->b_cont->b_wptr;
	mp->b_cont->b_wptr += datalen;

	for (i = 0; i < datalen; i++)
		*datap++ = i;

	if (ipver == IPV4_VERSION) {
		icmph->icmph_checksum = IP_CSUM(mp->b_cont, iph_hdr_len, 0);
	} else {
		uint32_t	sum;

		sum = htons(IPPROTO_ICMPV6) + ip6h->ip6_plen;
		icmph->icmph_checksum = IP_CSUM(mp->b_cont, iph_hdr_len - 32,
		    (sum & 0xffff) + (sum >> 16));
	}

/*
 * TBD
 *	icp->icmp_time =  ???;
 */

exit:
	return (mp);
}

static mblk_t *
man_alloc_udreq(int size, man_dladdr_t *dlsap)
{
	dl_unitdata_req_t	*udreq;
	mblk_t			*bp;
	mblk_t			*mp;

	mp = allocb(sizeof (dl_unitdata_req_t) + sizeof (*dlsap), BPRI_MED);

	if (mp == NULL) {
		cmn_err(CE_NOTE, "man_preparepkt: allocb failed");
		return (NULL);
	}

	if ((bp = allocb(size, BPRI_MED)) == NULL) {
		freemsg(mp);
		cmn_err(CE_NOTE, "man_preparepkts: allocb failed");
		return (NULL);
	}
	bzero(bp->b_rptr, size);

	mp->b_cont = bp;
	mp->b_datap->db_type = M_PROTO;
	udreq = (dl_unitdata_req_t *)mp->b_wptr;
	mp->b_wptr += sizeof (dl_unitdata_req_t);

	/*
	 * phys addr first - TBD
	 */
	bcopy((char *)dlsap, mp->b_wptr, sizeof (*dlsap));
	mp->b_wptr += sizeof (*dlsap);

	udreq->dl_primitive = DL_UNITDATA_REQ;
	udreq->dl_dest_addr_length = sizeof (*dlsap);
	udreq->dl_dest_addr_offset = sizeof (*udreq);
	udreq->dl_priority.dl_min = 0;
	udreq->dl_priority.dl_max = 0;

	return (mp);
}


/*
 * The routines in this file are executed by the MAN background thread,
 * which executes outside of the STREAMS framework (see man_str.c). It is
 * allowed to do the things required to modify the STREAMS driver (things
 * that are normally done from a user process). These routines do things like
 * open and close drivers, PLINK and PUNLINK streams to/from the multiplexor,
 * etc.
 *
 * The mechanism of communication between the STREAMS portion of the driver
 * and the background thread portion are two work queues, man_bwork_q
 * and man_iwork_q (background work q and streams work q).  Work
 * requests are placed on those queues when one half of the driver wants
 * the other half to do some work for it.
 *
 * The MAN background thread executes the man_bwork routine. Its sole
 * job is to process work requests placed on this work q. The MAN upper
 * write service routine is responsible for processing work requests posted
 * to the man_iwork_q->
 *
 * Both work queues are protected by the global mutex man_lock. The
 * man_bwork is signalged via the condvarman_bwork_q->q_cv. The man_uwsrv
 * routine is signaled by calling qenable (forcing man_uwsrv to run).
 */

/*
 * man_bwork - Work thread for this device.  It is responsible for
 * performing operations which can't occur within the STREAMS framework.
 *
 * Locking:
 *	- Called holding no locks
 *	- Obtains the global mutex man_lock to remove work from
 *	  man_bwork_q, and post work to man_iwork_q->
 *	- Note that we do not want to hold any locks when making
 *	  any ldi_ calls.
 */
void
man_bwork()
{
	man_work_t	*wp;
	int		done = 0;
	callb_cpr_t	cprinfo;
	int		wp_finished;

	CALLB_CPR_INIT(&cprinfo, &man_lock, callb_generic_cpr,
	    "mn_work_thrd");

	MAN_DBG(MAN_CONFIG, ("man_bwork: enter"));

	while (done == 0) {

		mutex_enter(&man_lock);
		/*
		 * While there is nothing to do, sit in cv_wait.  If work
		 * request is made, requester will signal.
		 */
		while (man_bwork_q->q_work == NULL) {

			CALLB_CPR_SAFE_BEGIN(&cprinfo);

			cv_wait(&man_bwork_q->q_cv, &man_lock);

			CALLB_CPR_SAFE_END(&cprinfo, &man_lock);
		}

		wp = man_bwork_q->q_work;
		man_bwork_q->q_work = wp->mw_next;
		wp->mw_next = NULL;
		mutex_exit(&man_lock);

		wp_finished = TRUE;

		MAN_DBG(MAN_SWITCH, ("man_bwork: type %s",
		    _mw_type[wp->mw_type]));

		switch (wp->mw_type) {
		case MAN_WORK_OPEN_CTL:
			wp->mw_status = man_open_ctl();
			break;

		case MAN_WORK_CLOSE_CTL:
			man_close_ctl();
			break;

		case MAN_WORK_CLOSE:
		case MAN_WORK_CLOSE_STREAM:
			man_bclose(&wp->mw_arg);
			break;

		case MAN_WORK_SWITCH:
			man_bswitch(&wp->mw_arg, wp);
			wp_finished = FALSE;
			break;

		case MAN_WORK_STOP:		/* man_bwork_stop() */
			done = 1;
			mutex_enter(&man_lock);
			CALLB_CPR_EXIT(&cprinfo); /* Unlocks man_lock */
			break;

		default:
			cmn_err(CE_WARN, "man_bwork: "
			    "illegal work type(%d)", wp->mw_type);
			break;
		}

		mutex_enter(&man_lock);

		if (wp_finished) {
			wp->mw_flags |= MAN_WFLAGS_DONE;
			if (wp->mw_flags & MAN_WFLAGS_CVWAITER)
				cv_signal(&wp->mw_cv);
			else if (wp->mw_flags & MAN_WFLAGS_QWAITER)
				qenable(wp->mw_q);
			else
				man_work_free(wp);
		}

		mutex_exit(&man_lock);
	}

	MAN_DBG(MAN_CONFIG, ("man_bwork: thread_exit"));

	mutex_enter(&man_lock);
	man_bwork_id = NULL;
	mutex_exit(&man_lock);

	thread_exit();
}

/*
 * man_open_ctl - Open the control stream.
 *
 *	returns	- success - 0
 *		- failure - errno code
 *
 * Mutex Locking Notes:
 *	We need a way to keep the CLONE_OPEN qwaiters in man_open from
 *	checking the man_config variables after the ldi_open call below
 *	returns from man_open, leaving the inner perimeter. So, we use the
 *	man_lock to synchronize the threads in man_open_ctl and man_open.  We
 *	hold man_lock across this call into man_open, which in general is a
 *	no-no. But, the STREAMs portion of the driver (other than open)
 *	doesn't use it. So, if ldi_open gets hijacked to run any part of
 *	the MAN streams driver, it wont end up recursively trying to acquire
 *	man_lock. Note that the non-CLONE_OPEN portion of man_open doesnt
 *	acquire it either, so again no recursive mutex.
 */
static int
man_open_ctl()
{
	int		status = 0;
	ldi_handle_t	ctl_lh = NULL;
	ldi_ident_t	li = NULL;

	MAN_DBG(MAN_CONFIG, ("man_open_ctl: plumbing control stream\n"));

	/*
	 * Get eri driver loaded and kstats initialized. Is there a better
	 * way to do this? - TBD.
	 */
	status = ldi_ident_from_mod(&modlinkage, &li);
	if (status) {
		cmn_err(CE_WARN,
		    "man_open_ctl: ident alloc failed, error %d", status);
		goto exit;
	}

	status = ldi_open_by_name(ERI_PATH, FREAD | FWRITE | FNOCTTY,
	    kcred, &ctl_lh, li);
	if (status) {
		cmn_err(CE_WARN,
		    "man_open_ctl: eri open failed, error %d", status);
		ctl_lh = NULL;
		goto exit;
	}
	(void) ldi_close(ctl_lh, NULL, kcred);
	ctl_lh = NULL;

	mutex_enter(&man_lock);

	if (man_ctl_lh != NULL) {
		mutex_exit(&man_lock);
		goto exit;
	}

	ASSERT(man_ctl_wq == NULL);
	mutex_exit(&man_lock);

	status = ldi_open_by_name(DMAN_INT_PATH, FREAD | FWRITE | FNOCTTY,
	    kcred, &ctl_lh, li);
	if (status) {
		cmn_err(CE_WARN,
		    "man_open_ctl: man control dev open failed, "
		    "error %d", status);
		goto exit;
	}

	/*
	 * Update global config state. TBD - dont need lock here, since
	 * everyone is stuck in open until we finish. Only other modifier
	 * is man_deconfigure via _fini, which returns EBUSY if there is
	 * any open streams (other than control). Do need to signal qwaiters
	 * on error.
	 */
	mutex_enter(&man_lock);
	ASSERT(man_config_state == MAN_CONFIGURING);
	ASSERT(man_ctl_lh == NULL);
	man_ctl_lh = ctl_lh;
	mutex_exit(&man_lock);

exit:
	if (li)
		ldi_ident_release(li);

	MAN_DBG(MAN_CONFIG, ("man_open_ctl: man_ctl_lh(0x%p) errno = %d\n",
	    (void *)man_ctl_lh, status));

	return (status);
}

/*
 * man_close_ctl - Close control stream, we are about to unload driver.
 *
 * Locking:
 *	- Called holding no locks.
 */
static void
man_close_ctl()
{
	ldi_handle_t tlh;

	MAN_DBG(MAN_CONFIG, ("man_close_ctl: unplumbing control stream\n"));

	mutex_enter(&man_lock);
	if ((tlh = man_ctl_lh) != NULL)
		man_ctl_lh = NULL;
	mutex_exit(&man_lock);

	if (tlh != NULL) {
		(void) ldi_close(tlh, NULL, kcred);
	}

}

/*
 * Close the lower streams. Get all the timers canceled, close the lower
 * stream and delete the dest array.
 *
 * Returns:
 *	0	Closed all streams.
 *	1	Couldn't close one or more streams, timers still running.
 *
 * Locking:
 *	- Called holding no locks.
 */
static void
man_bclose(man_adest_t *adp)
{
	int		i;
	man_dest_t	*mdp;

	man_cancel_timers(adp);

	for (i = 0; i < adp->a_ndests; i++) {
		mdp = &adp->a_mdp[i];

		if (mdp->md_muxid != -1)
			man_unplumb(mdp);
	}

	mutex_destroy(&mdp->md_lock);
	man_kfree(adp->a_mdp, sizeof (man_dest_t) * adp->a_ndests);
	adp->a_mdp = NULL;
}

/*
 * We want to close down all lower streams. Need to wait until all
 * timers and work related to these lower streams is quiesced.
 *
 * Returns 1 if lower streams are quiesced, 0 if we need to wait
 * a bit longer.
 */
static void
man_cancel_timers(man_adest_t *adp)
{
	man_dest_t	*mdp;
	int		cnt;
	int		i;

	mdp = adp->a_mdp;
	cnt = adp->a_ndests;

	MAN_DBG(MAN_SWITCH, ("man_cancel_timers: mdp(0x%p) cnt %d",
	    (void *)mdp, cnt));

	for (i = 0; i < cnt; i++) {

		if (mdp[i].md_lc_timer_id != 0) {
			(void) quntimeout(man_ctl_wq, mdp[i].md_lc_timer_id);
			mdp[i].md_lc_timer_id = 0;
		}

		if (mdp[i].md_bc_id != 0) {
			qunbufcall(man_ctl_wq, mdp[i].md_bc_id);
			mdp[i].md_bc_id = 0;
		}
	}

	MAN_DBG(MAN_SWITCH, ("man_cancel_timers: returns"));
}

/*
 * A failover is started at start of day, when the driver detects a
 * link failure (see man_linkcheck_timer), or when DR detaches
 * the IO board containing the current active link between SC and
 * domain (see man_dr_detach, man_iwork, and man_do_dr_detach). A
 * MAN_WORK_SWITCH work request containing all the lower streams that
 * should be switched is posted on the man_bwork_q-> This work request is
 * processed here. Once all lower streams have been switched to an
 * alternate path, the MAN_WORK_SWITCH work request is passed back to
 * man_iwork_q where it is processed within the inner perimeter of the
 * STREAMS framework (see man_iswitch).
 *
 * Note that when the switch fails for whatever reason, we just hand
 * back the lower streams untouched and let another failover happen.
 * Hopefully we will sooner or later succeed at the failover.
 */
static void
man_bswitch(man_adest_t *adp, man_work_t *wp)
{
	man_dest_t	*tdp;
	man_t		*manp;
	int		i;
	int		status = 0;

	/*
	 * Make a temporary copy of dest array, updating device to the
	 * alternate and try to open all lower streams. bgthread can sleep.
	 */

	tdp = man_kzalloc(sizeof (man_dest_t) * adp->a_ndests,
	    KM_SLEEP);
	bcopy(adp->a_mdp, tdp, sizeof (man_dest_t) * adp->a_ndests);

	/*
	 * Before we switch to the new path, lets sync the kstats.
	 */
	mutex_enter(&man_lock);

	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
	if (manp != NULL) {
		man_update_path_kstats(manp);
	} else
		status = ENODEV;

	mutex_exit(&man_lock);

	if (status != 0)
		goto exit;

	for (i = 0; i < adp->a_ndests; i++) {

		tdp[i].md_device = adp->a_st_dev;
		tdp[i].md_muxid = -1;

		if (man_plumb(&tdp[i]))
			break;
	}

	/*
	 * Didn't plumb everyone, unplumb new lower stuff and return.
	 */
	if (i < adp->a_ndests) {
		int	j;

		for (j = 0; j <= i; j++)
			man_unplumb(&tdp[j]);
		status = EAGAIN;
		goto exit;
	}

	if (man_is_on_domain && man_dossc_switch(adp->a_st_dev.mdev_exp_id)) {
		/*
		 * If we cant set new path on the SSC, then fail the
		 * failover.
		 */
		for (i = 0; i < adp->a_ndests; i++)
			man_unplumb(&tdp[i]);
		status = EAGAIN;
		goto exit;
	}

	man_kfree(adp->a_mdp, sizeof (man_dest_t) * adp->a_ndests);
	adp->a_mdp = tdp;

exit:
	if (status)
		man_kfree(tdp, sizeof (man_dest_t) * adp->a_ndests);


	MAN_DBG(MAN_SWITCH, ("man_bswitch: returns %d", status));

	/*
	 * Hand processed switch request back to man_iwork for
	 * processing in man_iswitch.
	 */
	wp->mw_status = status;

	mutex_enter(&man_lock);
	man_work_add(man_iwork_q, wp);
	mutex_exit(&man_lock);

}

/*
 * man_plumb - Configure a lower stream for this destination.
 *
 * Locking:
 * 	- Called holding no locks.
 *
 * Returns:
 *	- success - 0
 *	- failure - error code of failure
 */
static int
man_plumb(man_dest_t *mdp)
{
	int		status;
	int		muxid;
	ldi_handle_t	lh;
	ldi_ident_t	li = NULL;

	MAN_DBG(MAN_SWITCH, ("man_plumb: mdp(0x%p) %s%d exp(%d)",
	    (void *)mdp, ddi_major_to_name(mdp->md_device.mdev_major),
	    mdp->md_device.mdev_ppa, mdp->md_device.mdev_exp_id));

	/*
	 * Control stream should already be open.
	 */
	if (man_ctl_lh == NULL) {
		status = EAGAIN;
		goto exit;
	}

	mutex_enter(&man_lock);
	ASSERT(man_ctl_wq != NULL);
	status = ldi_ident_from_stream(man_ctl_wq, &li);
	if (status != 0) {
		cmn_err(CE_WARN,
		    "man_plumb: ident alloc failed, error %d", status);
		goto exit;
	}
	mutex_exit(&man_lock);

	/*
	 * previously opens were done by a dev_t of makedev(clone_major,
	 * mdev_major) which should always map to /devices/pseudo/clone@0:eri
	 */
	ASSERT(strcmp(ERI_IDNAME,
	    ddi_major_to_name(mdp->md_device.mdev_major)) == 0);

	status = ldi_open_by_name(ERI_PATH, FREAD | FWRITE | FNOCTTY,
	    kcred, &lh, li);
	if (status) {
		cmn_err(CE_WARN,
		    "man_plumb: eri open failed, error %d", status);
		goto exit;
	}

	/*
	 * Link netdev under MAN.
	 */
	ASSERT(mdp->md_muxid == -1);

	status = ldi_ioctl(man_ctl_lh, I_PLINK, (intptr_t)lh,
	    FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &muxid);
	if (status) {
		cmn_err(CE_WARN,
		    "man_plumb: ldi_ioctl(I_PLINK) failed, error %d", status);
		(void) ldi_close(lh, NULL, kcred);
		goto exit;

	}
	mdp->md_muxid = muxid;
	mdp->md_wq = man_linkrec_find(muxid);
	/*
	 * If we can't find the linkrec then return an
	 * error. It will be automatically unplumbed on failure.
	 */
	if (mdp->md_wq == NULL)
		status = EAGAIN;

	(void) ldi_close(lh, NULL, kcred);
exit:
	if (li)
		ldi_ident_release(li);

	MAN_DBG(MAN_SWITCH, ("man_plumb: exit\n"));

	return (status);
}

/*
 * man_unplumb - tear down the STREAMs framework for the lower multiplexor.
 *
 *	mdp - destination struct of interest
 *
 *	returns	- success - 0
 *		- failure - return error from ldi_ioctl
 */
static void
man_unplumb(man_dest_t *mdp)
{
	int	status, rval;

	MAN_DBG(MAN_SWITCH, ("man_unplumb: mdp"));
	MAN_DBGCALL(MAN_SWITCH, man_print_mdp(mdp));

	if (mdp->md_muxid == -1)
		return;

	ASSERT(man_ctl_lh != NULL);

	/*
	 * I_PUNLINK causes the multiplexor resources to be freed.
	 */
	status = ldi_ioctl(man_ctl_lh, I_PUNLINK, (intptr_t)mdp->md_muxid,
	    FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &rval);
	if (status) {
		cmn_err(CE_WARN, "man_unplumb: ldi_ioctl(I_PUNLINK) failed"
		    " errno %d\n", status);
	}
	/*
	 * Delete linkrec if it exists.
	 */
	(void) man_linkrec_find(mdp->md_muxid);
	mdp->md_muxid = -1;

}

/*
 * The routines below deal with paths and pathgroups. These data structures
 * are used to track the physical devices connecting the domain and SSC.
 * These devices make up the lower streams of the MAN multiplexor. The
 * routines all expect the man_lock to be held.
 *
 * A pathgroup consists of all paths that connect a particular domain and the
 * SSC. The concept of a pathgroup id (pg_id) is used to uniquely identify
 * a pathgroup.  For Domains, there is just one pathgroup, that connecting
 * the domain to the SSC (pg_id == 0). On the SSC, there is one pathgroup per
 * domain. The pg_id field corresponds to the domain tags A-R. A pg_id of
 * 0 means domain tag A, a pg_id of 1 means domain B, etc.
 *
 * The path data structure identifies one path between the SSC and a domain.
 * It describes the information for the path: the major and minor number of
 * the physical device; kstat pointers; and ethernet address of the
 * other end of the path.
 *
 * The pathgroups are anchored at man_pg_head and are protected by the
 * by the inner perimeter. The routines are only called by the STREAMs
 * portion of the driver.
 */

/*
 * Update man instance pathgroup info. Exclusive inner perimeter assures
 * this code is single threaded. man_refcnt assures man_t wont detach
 * while we are playing with man_pg stuff.
 *
 * Returns 0 on success, errno on failure.
 */
int
man_pg_cmd(mi_path_t *mip, man_work_t *waiter_wp)
{
	int		status = 0;
	man_t		*manp;

	if (mip->mip_ndevs < 0) {
		status = EINVAL;
		cmn_err(CE_WARN, "man_pg_cmd: EINVAL: mip_ndevs %d",
		    mip->mip_ndevs);
		goto exit;
	}

	ASSERT(MUTEX_HELD(&man_lock));
	manp = ddi_get_soft_state(man_softstate, mip->mip_man_ppa);
	if (manp == NULL) {
		status = ENODEV;
		goto exit;
	}

	MAN_DBG(MAN_PATH, ("man_pg_cmd: mip"));
	MAN_DBGCALL(MAN_PATH, man_print_mip(mip));

	MAN_DBG(MAN_PATH, ("\tman_t"));
	MAN_DBGCALL(MAN_PATH, man_print_man(manp));

	switch (mip->mip_cmd) {
	case MI_PATH_ASSIGN:
		status = man_pg_assign(&manp->man_pg, mip, FALSE);
		break;

	case MI_PATH_ADD:
		status = man_pg_assign(&manp->man_pg, mip, TRUE);
		break;

	case MI_PATH_UNASSIGN:
		status = man_pg_unassign(&manp->man_pg, mip);
		break;

	case MI_PATH_ACTIVATE:
		status = man_pg_activate(manp, mip, waiter_wp);
		break;

	case MI_PATH_READ:
		status = man_pg_read(manp->man_pg, mip);
		break;

	default:
		status = EINVAL;
		cmn_err(CE_NOTE, "man_pg_cmd: invalid command");
		break;
	}

exit:
	MAN_DBG(MAN_PATH, ("man_pg_cmd: returns %d", status));

	return (status);
}

/*
 * Assign paths to a pathgroup. If pathgroup doesnt exists, create it.
 * If path doesnt exist, create it. If ethernet address of existing
 * pathgroup different, change it. If an existing path is not in the new
 * list, remove it.  If anything changed, send PATH_UPDATE request to
 * man_iwork to update all man_dest_t's.
 *
 * 	mplpp	- man pathgroup list point to point.
 *	mip	- new/updated pathgroup info to assign.
 */
static int
man_pg_assign(man_pg_t **mplpp, mi_path_t *mip, int add_only)
{
	man_pg_t	*mpg;
	man_path_t	*mp;
	man_path_t	*add_paths = NULL;
	int		cnt;
	int		i;
	int		first_pass = TRUE;
	int		status = 0;

	ASSERT(MUTEX_HELD(&man_lock));

	cnt = mip->mip_ndevs;
	if (cnt == 0) {
		status = EINVAL;
		cmn_err(CE_NOTE, "man_pg_assign: mip_ndevs == 0");
		goto exit;
	}

	/*
	 * Assure the devices to be assigned are not assigned to some other
	 * pathgroup.
	 */
	for (i = 0; i < cnt; i++) {
		mpg = man_find_path_by_dev(*mplpp, &mip->mip_devs[i], NULL);

		if (mpg == NULL)
			continue;

		if ((mpg->mpg_man_ppa != mip->mip_man_ppa) ||
		    (mpg->mpg_pg_id != mip->mip_pg_id)) {
			/*
			 * Already assigned to some other man instance
			 * or pathgroup.
			 */
			status = EEXIST;
			goto exit;
		}
	}

	/*
	 * Find pathgroup, or allocate new one if it doesnt exist and
	 * add it to list at mplpp. Result is that mpg points to
	 * pathgroup to modify.
	 */
	mpg = man_find_pg_by_id(*mplpp, mip->mip_pg_id);
	if (mpg == NULL) {

		status = man_pg_create(mplpp, &mpg, mip);
		if (status)
			goto exit;

	} else if (ether_cmp(&mip->mip_eaddr, &mpg->mpg_dst_eaddr) != 0) {

		cmn_err(CE_WARN, "man_pg_assign: ethernet address mismatch");
		cmn_err(CE_CONT, "existing %s",
		    ether_sprintf(&mpg->mpg_dst_eaddr));
		cmn_err(CE_CONT, "new %s",
		    ether_sprintf(&mip->mip_eaddr));

		status = EINVAL;
		goto exit;
	}

	/*
	 * Create list of new paths to add to pathgroup.
	 */
	for (i = 0; i < cnt; i++) {

		if (man_find_path_by_dev(*mplpp, &mip->mip_devs[i], NULL))
			continue;	/* Already exists in this pathgroup */

		mp = man_kzalloc(sizeof (man_path_t), KM_NOSLEEP);
		if (mp == NULL) {
			status = ENOMEM;
			goto exit;
		}

		mp->mp_device = mip->mip_devs[i];
		mp->mp_device.mdev_state = MDEV_ASSIGNED;

		MAN_DBG(MAN_PATH, ("man_pg_assign: assigning mdp"));
		MAN_DBGCALL(MAN_PATH, man_print_dev(&mp->mp_device));

		status = man_path_kstat_init(mp);
		if (status) {
			man_kfree(mp, sizeof (man_path_t));
			goto exit;
		}

		man_path_insert(&add_paths, mp);
	}

	/*
	 * man_dr_attach passes only the path which is being DRd in.
	 * So just add the path and don't worry about removing paths.
	 */
	if (add_only == TRUE)
		goto exit;


	/*
	 * Check if any paths we want to remove are ACTIVE. If not,
	 * do a second pass and remove them.
	 */
again:
	mp = mpg->mpg_pathp;
	while (mp != NULL) {
		int		in_new_list;
		man_path_t	*rp;

		rp = NULL;
		in_new_list = FALSE;

		for (i = 0; i < cnt; i++) {
			if (mp->mp_device.mdev_ppa ==
			    mip->mip_devs[i].mdev_ppa) {

				in_new_list = TRUE;
				break;
			}
		}

		if (!in_new_list) {
			if (first_pass) {
				if (mp->mp_device.mdev_state & MDEV_ACTIVE) {
					status = EBUSY;
					goto exit;
				}
			} else {
				rp = mp;
			}
		}
		mp = mp->mp_next;

		if (rp != NULL)
			man_path_remove(&mpg->mpg_pathp, rp);
	}

	if (first_pass == TRUE) {
		first_pass = FALSE;
		goto again;
	}

exit:
	if (status == 0) {
		if (add_paths)
			man_path_merge(&mpg->mpg_pathp, add_paths);
	} else {
		while (add_paths != NULL) {
			mp = add_paths;
			add_paths = mp->mp_next;
			mp->mp_next = NULL;

			man_path_kstat_uninit(mp);
			man_kfree(mp, sizeof (man_path_t));
		}
	}

	return (status);
}

/*
 * Remove all paths from a pathgroup (domain shutdown). If there is an
 * active path in the group, shut down all destinations referencing it
 * first.
 */
static int
man_pg_unassign(man_pg_t **plpp, mi_path_t *mip)
{
	man_pg_t	*mpg;
	man_pg_t	*tpg;
	man_pg_t	*tppg;
	man_path_t	*mp = NULL;
	int		status = 0;

	ASSERT(MUTEX_HELD(&man_lock));

	/*
	 * Check for existence of pathgroup.
	 */
	if ((mpg = man_find_pg_by_id(*plpp, mip->mip_pg_id)) == NULL)
		goto exit;

	if (man_find_active_path(mpg->mpg_pathp) != NULL) {
		status = man_remove_dests(mpg);
		if (status)
			goto exit;
	}

	/*
	 * Free all the paths for this pathgroup.
	 */
	while (mpg->mpg_pathp) {
		mp = mpg->mpg_pathp;
		mpg->mpg_pathp = mp->mp_next;
		mp->mp_next = NULL;

		man_path_kstat_uninit(mp);
		man_kfree(mp, sizeof (man_path_t));
	}

	/*
	 * Remove this pathgroup from the list, and free it.
	 */
	tpg = tppg = *plpp;
	if (tpg == mpg) {
		*plpp = tpg->mpg_next;
		goto free_pg;
	}

	for (tpg = tpg->mpg_next; tpg != NULL; tpg = tpg->mpg_next) {
		if (tpg == mpg)
			break;
		tppg = tpg;
	}

	ASSERT(tpg != NULL);

	tppg->mpg_next = tpg->mpg_next;
	tpg->mpg_next = NULL;

free_pg:
	man_kfree(tpg, sizeof (man_pg_t));

exit:
	return (status);

}

/*
 * Set a new active path. This is done via man_ioctl so we are
 * exclusive in the inner perimeter.
 */
static int
man_pg_activate(man_t *manp, mi_path_t *mip, man_work_t *waiter_wp)
{
	man_pg_t	*mpg1;
	man_pg_t	*mpg2;
	man_pg_t	*plp;
	man_path_t	*mp;
	man_path_t	*ap;
	int		status = 0;

	ASSERT(MUTEX_HELD(&man_lock));
	MAN_DBG(MAN_PATH, ("man_pg_activate: dev"));
	MAN_DBGCALL(MAN_PATH, man_print_dev(mip->mip_devs));

	if (mip->mip_ndevs != 1) {
		status = EINVAL;
		goto exit;
	}

	plp = manp->man_pg;
	mpg1 = man_find_pg_by_id(plp, mip->mip_pg_id);
	if (mpg1 == NULL) {
		status = EINVAL;
		goto exit;
	}

	mpg2 = man_find_path_by_dev(plp, mip->mip_devs, &mp);
	if (mpg2 == NULL) {
		status = ENODEV;
		goto exit;
	}

	if (mpg1 != mpg2) {
		status = EINVAL;
		goto exit;
	}

	ASSERT(mp->mp_device.mdev_ppa == mip->mip_devs->mdev_ppa);

	if (mpg1->mpg_flags & MAN_PG_SWITCHING) {
		status = EAGAIN;
		goto exit;
	}

	ap = man_find_active_path(mpg1->mpg_pathp);
	if (ap == NULL) {
		/*
		 * This is the first time a path has been activated for
		 * this pathgroup. Initialize all upper streams dest
		 * structure for this pathgroup so autoswitch will find
		 * them.
		 */
		mp->mp_device.mdev_state |= MDEV_ACTIVE;
		man_add_dests(mpg1);
		goto exit;
	}

	/*
	 * Path already active, nothing to do.
	 */
	if (ap == mp)
		goto exit;

	/*
	 * Try to autoswitch to requested device. Set flags and refcnt.
	 * Cleared in man_iswitch when SWITCH completes.
	 */
	manp->man_refcnt++;
	mpg1->mpg_flags |= MAN_PG_SWITCHING;

	/*
	 * Switch to path specified.
	 */
	status = man_autoswitch(mpg1, mip->mip_devs, waiter_wp);

	if (status != 0) {
		/*
		 * man_iswitch not going to run, clean up.
		 */
		manp->man_refcnt--;
		mpg1->mpg_flags &= ~MAN_PG_SWITCHING;

		if (status == ENODEV) {
			/*
			 * Device not plumbed isn't really an error. Change
			 * active device setting here, since man_iswitch isn't
			 * going to be run to do it.
			 */
			status = 0;
			ap->mp_device.mdev_state &= ~MDEV_ACTIVE;
			mp->mp_device.mdev_state |= MDEV_ACTIVE;
		}
	}

exit:
	MAN_DBG(MAN_PATH, ("man_pg_activate: returns %d", status));

	return (status);
}

static int
man_pg_read(man_pg_t *plp, mi_path_t *mip)
{
	man_pg_t	*mpg;
	man_path_t	*mp;
	int		cnt;
	int		status = 0;

	ASSERT(MUTEX_HELD(&man_lock));

	if ((mpg = man_find_pg_by_id(plp, mip->mip_pg_id)) == NULL) {
		status = ENODEV;
		goto exit;
	}

	cnt = 0;
	for (mp = mpg->mpg_pathp; mp != NULL; mp = mp->mp_next) {
		bcopy(&mp->mp_device, &mip->mip_devs[cnt], sizeof (man_dev_t));
		if (cnt == mip->mip_ndevs)
			break;
		cnt++;
	}

	MAN_DBG(MAN_PATH, ("man_pg_read: pg(0x%p) id(%d) found %d paths",
	    (void *)mpg, mpg->mpg_pg_id, cnt));

	mip->mip_ndevs = cnt;

	/*
	 * TBD - What should errno be if user buffer too small ?
	 */
	if (mp != NULL) {
		status = ENOMEM;
	}

exit:

	return (status);
}

/*
 * return existing pathgroup, or create it. TBD - Need to update
 * all of destinations if we added a pathgroup. Also, need to update
 * all of man_strup if we add a path.
 *
 * 	mplpp	- man pathgroup list point to pointer.
 * 	mpgp	- returns newly created man pathgroup.
 *	mip	- info to fill in mpgp.
 */
static int
man_pg_create(man_pg_t **mplpp, man_pg_t **mpgp, mi_path_t *mip)
{
	man_pg_t	*mpg;
	man_pg_t	*tpg;
	int		status = 0;

	ASSERT(MUTEX_HELD(&man_lock));

	if (ether_cmp(&mip->mip_eaddr, &zero_ether_addr) == 0) {
		cmn_err(CE_NOTE, "man_ioctl: man_pg_create: ether"
		    " addresss not set!");
		status = EINVAL;
		goto exit;
	}

	mpg = man_kzalloc(sizeof (man_pg_t), KM_NOSLEEP);
	if (mpg == NULL) {
		status = ENOMEM;
		goto exit;
	}

	mpg->mpg_flags = MAN_PG_IDLE;
	mpg->mpg_pg_id = mip->mip_pg_id;
	mpg->mpg_man_ppa = mip->mip_man_ppa;
	ether_copy(&mip->mip_eaddr, &mpg->mpg_dst_eaddr);

	MAN_DBG(MAN_PATH, ("man_pg_create: new mpg"));
	MAN_DBGCALL(MAN_PATH, man_print_mpg(mpg));

	tpg = *mplpp;
	if (tpg == NULL) {
		*mplpp = mpg;
	} else {
		while (tpg->mpg_next != NULL)
			tpg = tpg->mpg_next;
		tpg->mpg_next = mpg;
	}

exit:
	*mpgp = mpg;

	return (status);
}

/*
 * Return pointer to pathgroup containing mdevp, null otherwise. Also,
 * if a path pointer is passed in, set it to matching path in pathgroup.
 *
 * Called holding man_lock.
 */
static man_pg_t *
man_find_path_by_dev(man_pg_t *plp, man_dev_t *mdevp, man_path_t **mpp)
{
	man_pg_t	*mpg;
	man_path_t	*mp;

	ASSERT(MUTEX_HELD(&man_lock));
	for (mpg = plp; mpg != NULL; mpg = mpg->mpg_next) {
		for (mp  = mpg->mpg_pathp; mp != NULL; mp = mp->mp_next) {
			if (mp->mp_device.mdev_major == mdevp->mdev_major &&
			    mp->mp_device.mdev_ppa == mdevp->mdev_ppa) {

				if (mpp != NULL)
					*mpp = mp;
				return (mpg);
			}
		}
	}

	return (NULL);
}

/*
 * Return pointer to pathgroup assigned to destination, null if not found.
 *
 * Called holding man_lock.
 */
static man_pg_t *
man_find_pg_by_id(man_pg_t *mpg, int pg_id)
{
	ASSERT(MUTEX_HELD(&man_lock));
	for (; mpg != NULL; mpg = mpg->mpg_next) {
		if (mpg->mpg_pg_id == pg_id)
			return (mpg);
	}

	return (NULL);
}

static man_path_t *
man_find_path_by_ppa(man_path_t *mplist, int ppa)
{
	man_path_t	*mp;

	ASSERT(MUTEX_HELD(&man_lock));
	for (mp = mplist; mp != NULL; mp = mp->mp_next) {
		if (mp->mp_device.mdev_ppa == ppa)
			return (mp);
	}

	return (NULL);
}

static man_path_t *
man_find_active_path(man_path_t *mplist)
{
	man_path_t	*mp;

	ASSERT(MUTEX_HELD(&man_lock));
	for (mp = mplist; mp != NULL; mp = mp->mp_next)
		if (mp->mp_device.mdev_state & MDEV_ACTIVE)
			return (mp);

	return (NULL);
}

/*
 * Try and find an alternate path.
 */
static man_path_t *
man_find_alternate_path(man_path_t *mlp)
{
	man_path_t	*ap;		/* Active path */
	man_path_t	*np;		/* New alternate path */
	man_path_t	*fp = NULL;	/* LRU failed path */

	ASSERT(MUTEX_HELD(&man_lock));
	ap = man_find_active_path(mlp);

	/*
	 * Find a non-failed path, or the lru failed path and switch to it.
	 */
	for (np = mlp; np != NULL; np = np->mp_next) {
		if (np == ap)
			continue;

		if (np->mp_device.mdev_state == MDEV_ASSIGNED)
			goto exit;

		if (np->mp_device.mdev_state & MDEV_FAILED) {
			if (fp == NULL)
				fp = np;
			else
				if (fp->mp_lru > np->mp_lru)
						fp = np;
		}
	}

	/*
	 * Nowhere to switch to.
	 */
	if (np == NULL && (np =  fp) == NULL)
		goto exit;

exit:
	return (np);
}

/*
 * Assumes caller has verified existence.
 */
static void
man_path_remove(man_path_t **lpp, man_path_t *mp)
{
	man_path_t	*tp;
	man_path_t	*tpp;

	ASSERT(MUTEX_HELD(&man_lock));
	MAN_DBG(MAN_PATH, ("man_path_remove: removing path"));
	MAN_DBGCALL(MAN_PATH, man_print_path(mp));

	tp = tpp = *lpp;
	if (tp == mp) {
		*lpp = tp->mp_next;
		goto exit;
	}

	for (tp = tp->mp_next; tp != NULL; tp = tp->mp_next) {
		if (tp == mp)
			break;
		tpp = tp;
	}

	ASSERT(tp != NULL);

	tpp->mp_next = tp->mp_next;
	tp->mp_next = NULL;

exit:
	man_path_kstat_uninit(tp);
	man_kfree(tp, sizeof (man_path_t));

}

/*
 * Insert path into list, ascending order by ppa.
 */
static void
man_path_insert(man_path_t **lpp, man_path_t *mp)
{
	man_path_t	*tp;
	man_path_t	*tpp;

	ASSERT(MUTEX_HELD(&man_lock));
	if (*lpp == NULL) {
		*lpp = mp;
		return;
	}

	tp = tpp = *lpp;
	if (tp->mp_device.mdev_ppa > mp->mp_device.mdev_ppa) {
		mp->mp_next = tp;
		*lpp = mp;
		return;
	}

	for (tp = tp->mp_next; tp != NULL; tp =  tp->mp_next) {
		if (tp->mp_device.mdev_ppa > mp->mp_device.mdev_ppa)
			break;
		tpp = tp;
	}

	if (tp == NULL) {
		tpp->mp_next = mp;
	} else {
		tpp->mp_next = mp;
		mp->mp_next = tp;
	}
}

/*
 * Merge npp into lpp, ascending order by ppa. Assumes no
 * duplicates in either list.
 */
static void
man_path_merge(man_path_t **lpp, man_path_t *np)
{
	man_path_t	*tmp;

	ASSERT(MUTEX_HELD(&man_lock));
	while (np != NULL) {
		tmp = np;
		np = np->mp_next;
		tmp->mp_next = NULL;

		man_path_insert(lpp, tmp);
	}

}

static int
man_path_kstat_init(man_path_t *mpp)
{

	kstat_named_t	*dev_knp;
	int		status = 0;

	ASSERT(MUTEX_HELD(&man_lock));
	MAN_DBG(MAN_PATH, ("man_path_kstat_init: mpp(0x%p)\n", (void *)mpp));

	/*
	 * Create named kstats for accounting purposes.
	 */
	dev_knp = man_kzalloc(MAN_NUMSTATS * sizeof (kstat_named_t),
	    KM_NOSLEEP);
	if (dev_knp == NULL) {
		status = ENOMEM;
		goto exit;
	}
	man_kstat_named_init(dev_knp, MAN_NUMSTATS);
	mpp->mp_last_knp = dev_knp;

exit:

	MAN_DBG(MAN_PATH, ("man_path_kstat_init: returns %d\n", status));

	return (status);
}

static void
man_path_kstat_uninit(man_path_t *mp)
{
	ASSERT(MUTEX_HELD(&man_lock));
	man_kfree(mp->mp_last_knp, MAN_NUMSTATS * sizeof (kstat_named_t));
}

/*
 * man_work_alloc - allocate and initiate a work request structure
 *
 *	type - type of request to allocate
 *	returns	- success - ptr to an initialized work structure
 *		- failure - NULL
 */
man_work_t *
man_work_alloc(int type, int kmflag)
{
	man_work_t	*wp;

	wp = man_kzalloc(sizeof (man_work_t), kmflag);
	if (wp == NULL)
		goto exit;

	cv_init(&wp->mw_cv, NULL, CV_DRIVER, NULL); \
	wp->mw_type = type;

exit:
	return (wp);
}

/*
 * man_work_free - deallocate a work request structure
 *
 *	wp - ptr to work structure to be freed
 */
void
man_work_free(man_work_t *wp)
{
	cv_destroy(&wp->mw_cv);
	man_kfree((void *)wp, sizeof (man_work_t));
}

/*
 * Post work to a work queue.  The man_bwork sleeps on
 * man_bwork_q->q_cv, and work requesters may sleep on mw_cv.
 * The man_lock is used to protect both cv's.
 */
void
man_work_add(man_workq_t *q, man_work_t *wp)
{
	man_work_t	*lp = q->q_work;

	if (lp) {
		while (lp->mw_next != NULL)
			lp = lp->mw_next;

		lp->mw_next = wp;

	} else {
		q->q_work = wp;
	}

	/*
	 * cv_signal for man_bwork_q, qenable for man_iwork_q
	 */
	if (q == man_bwork_q) {
		cv_signal(&q->q_cv);

	} else {	/* q == man_iwork_q */

		if (man_ctl_wq != NULL)
			qenable(man_ctl_wq);
	}

}

/* <<<<<<<<<<<<<<<<<<<<<<< NDD SUPPORT FUNCTIONS	>>>>>>>>>>>>>>>>>>> */
/*
 * ndd support functions to get/set parameters
 */

/*
 * Register each element of the parameter array with the
 * named dispatch handler. Each element is loaded using
 * nd_load()
 *
 * 	cnt	- the number of elements present in the parameter array
 */
static int
man_param_register(param_t *manpa, int cnt)
{
	int	i;
	ndgetf_t getp;
	ndsetf_t setp;
	int	status = B_TRUE;

	MAN_DBG(MAN_CONFIG, ("man_param_register: manpa(0x%p) cnt %d\n",
	    (void *)manpa, cnt));

	getp = man_param_get;

	for (i = 0; i < cnt; i++, manpa++) {
		switch (man_param_display[i]) {
		case MAN_NDD_GETABLE:
			setp = NULL;
			break;

		case MAN_NDD_SETABLE:
			setp = man_param_set;
			break;

		default:
			continue;
		}

		if (!nd_load(&man_ndlist, manpa->param_name, getp,
		    setp, (caddr_t)manpa)) {

			(void) man_nd_free(&man_ndlist);
			status = B_FALSE;
			goto exit;
		}
	}

	if (!nd_load(&man_ndlist, "man_pathgroups_report",
	    man_pathgroups_report, NULL, NULL)) {

		(void) man_nd_free(&man_ndlist);
		status = B_FALSE;
		goto exit;
	}

	if (!nd_load(&man_ndlist, "man_set_active_path",
	    NULL, man_set_active_path, NULL)) {

		(void) man_nd_free(&man_ndlist);
		status = B_FALSE;
		goto exit;
	}

	if (!nd_load(&man_ndlist, "man_get_hostinfo",
	    man_get_hostinfo, NULL, NULL)) {

		(void) man_nd_free(&man_ndlist);
		status = B_FALSE;
		goto exit;
	}

exit:

	MAN_DBG(MAN_CONFIG, ("man_param_register: returns %d\n", status));

	return (status);
}

static void
man_nd_getset(queue_t *wq, mblk_t *mp)
{

	if (!nd_getset(wq, man_ndlist, mp))
		miocnak(wq, mp, 0, ENOENT);
	else
		qreply(wq, mp);
}

/*ARGSUSED*/
static int
man_pathgroups_report(queue_t *wq, mblk_t *mp, caddr_t cp, cred_t *cr)
{

	man_t		*manp;
	man_pg_t	*mpg;
	int		i;
	char		pad[] = "                 "; /* 17 spaces */
	int		pad_end;


	MAN_DBG(MAN_PATH, ("man_pathgroups_report: wq(0x%p) mp(0x%p)"
	    " caddr 0x%p", (void *)wq, (void *)mp, (void *)cp));

	(void) mi_mpprintf(mp, "MAN Pathgroup report: (* == failed)");
	(void) mi_mpprintf(mp, "====================================="
	    "==========================================");

	mutex_enter(&man_lock);

	for (i = 0; i < 2; i++) {
		manp = ddi_get_soft_state(man_softstate, i);
		if (manp == NULL)
			continue;

	(void) mi_mpprintf(mp,
	    "Interface\tDestination\t\tActive Path\tAlternate Paths");
	(void) mi_mpprintf(mp, "---------------------------------------"
	    "----------------------------------------");

		for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {

			(void) mi_mpprintf(mp, "%s%d\t\t",
			    ddi_major_to_name(manp->man_meta_major),
			    manp->man_meta_ppa);

			if (man_is_on_domain) {
				(void) mi_mpprintf_nr(mp, "Master SSC\t");
				man_preport(mpg->mpg_pathp, mp);
			} else {
				if (i == 0) {
					pad_end = 17 - strlen(ether_sprintf(
					    &mpg->mpg_dst_eaddr));
					if (pad_end < 0 || pad_end > 16)
					pad_end = 0;
					pad[pad_end] = '\0';

					(void) mi_mpprintf_nr(mp, "%c %s%s",
					    mpg->mpg_pg_id + 'A',
					    ether_sprintf(&mpg->mpg_dst_eaddr),
					    pad);

					pad[pad_end] = ' ';
				} else {
					(void) mi_mpprintf_nr(mp,
					    "Other SSC\t");
				}
				man_preport(mpg->mpg_pathp, mp);
			}
			(void) mi_mpprintf_nr(mp, "\n");
		}
	}

	mutex_exit(&man_lock);
	MAN_DBG(MAN_PATH, ("man_pathgroups_report: returns"));

	return (0);
}

static void
man_preport(man_path_t *plist, mblk_t *mp)
{
	man_path_t	*ap;

	ap = man_find_active_path(plist);
	/*
	 * Active path
	 */
	if (ap != NULL) {
		(void) mi_mpprintf_nr(mp, "\t%s%d\t\t",
		    ddi_major_to_name(ap->mp_device.mdev_major),
		    ap->mp_device.mdev_ppa);
	} else {
		(void) mi_mpprintf_nr(mp, "None \t");
	}

	/*
	 * Alternate Paths.
	 */
	while (plist != NULL) {
		(void) mi_mpprintf_nr(mp, "%s%d exp %d",
		    ddi_major_to_name(plist->mp_device.mdev_major),
		    plist->mp_device.mdev_ppa,
		    plist->mp_device.mdev_exp_id);
		if (plist->mp_device.mdev_state & MDEV_FAILED)
			(void) mi_mpprintf_nr(mp, "*");
		plist = plist->mp_next;
		if (plist)
			(void) mi_mpprintf_nr(mp, ", ");
	}
}

/*
 * NDD request to set active path. Calling context is man_ioctl, so we are
 * exclusive in the inner perimeter.
 *
 *	Syntax is "ndd -set /dev/dman <man ppa> <pg_id> <phys ppa>"
 */
/* ARGSUSED3 */
static int
man_set_active_path(queue_t *wq, mblk_t *mp, char *value, caddr_t cp,
    cred_t *cr)
{
	char		*end, *meta_ppap, *phys_ppap, *pg_idp;
	int		meta_ppa;
	int		phys_ppa;
	int		pg_id;
	man_t		*manp;
	man_pg_t	*mpg;
	man_path_t	*np;
	mi_path_t	mpath;
	int		status = 0;

	MAN_DBG(MAN_PATH, ("man_set_active_path: wq(0x%p) mp(0x%p)"
	    " args %s", (void *)wq, (void *)mp, value));

	meta_ppap = value;

	if ((pg_idp = strchr(value, ' ')) == NULL) {
		status = EINVAL;
		goto exit;
	}

	*pg_idp++ = '\0';

	if ((phys_ppap = strchr(pg_idp, ' ')) == NULL) {
		status = EINVAL;
		goto exit;
	}

	*phys_ppap++ = '\0';

	meta_ppa = (int)mi_strtol(meta_ppap, &end, 10);
	pg_id = (int)mi_strtol(pg_idp, &end, 10);
	phys_ppa = (int)mi_strtol(phys_ppap, &end, 10);

	mutex_enter(&man_lock);
	manp = ddi_get_soft_state(man_softstate, meta_ppa);
	if (manp == NULL || manp->man_pg == NULL) {
		status = EINVAL;
		mutex_exit(&man_lock);
		goto exit;
	}

	mpg = man_find_pg_by_id(manp->man_pg, pg_id);
	if (mpg == NULL) {
		status = EINVAL;
		mutex_exit(&man_lock);
		goto exit;
	}

	np = man_find_path_by_ppa(mpg->mpg_pathp, phys_ppa);

	if (np == NULL) {
		status = EINVAL;
		mutex_exit(&man_lock);
		goto exit;
	}

	mpath.mip_cmd = MI_PATH_ACTIVATE;
	mpath.mip_pg_id = pg_id;
	mpath.mip_man_ppa = meta_ppa;
	mpath.mip_devs[0] = np->mp_device;
	mpath.mip_ndevs = 1;

	status = man_pg_cmd(&mpath, NULL);
	mutex_exit(&man_lock);

exit:

	MAN_DBG(MAN_PATH, ("man_set_active_path: returns %d", status));

	return (status);
}

/*
 * Dump out the contents of the IOSRAM handoff structure. Note that if
 * anything changes here, you must make sure that the sysinit script
 * stays in sync with this output.
 */
/* ARGSUSED */
static int
man_get_hostinfo(queue_t *wq, mblk_t *mp, caddr_t cp, cred_t *cr)
{
	manc_t	manc;
	char	*ipaddr;
	char	ipv6addr[INET6_ADDRSTRLEN];
	int	i;
	int	status;

	if (!man_is_on_domain)
		return (0);

	if (status = man_get_iosram(&manc)) {
		return (status);
	}

	mi_mpprintf(mp, "manc_magic = 0x%x", manc.manc_magic);
	mi_mpprintf(mp, "manc_version = 0%d", manc.manc_version);
	mi_mpprintf(mp, "manc_csum = 0x%x", manc.manc_csum);

	if (manc.manc_ip_type == AF_INET) {
		in_addr_t	netnum;

		mi_mpprintf(mp, "manc_ip_type = AF_INET");

		ipaddr = man_inet_ntoa(manc.manc_dom_ipaddr);
		mi_mpprintf(mp, "manc_dom_ipaddr = %s", ipaddr);

		ipaddr = man_inet_ntoa(manc.manc_dom_ip_netmask);
		mi_mpprintf(mp, "manc_dom_ip_netmask = %s", ipaddr);

		netnum = manc.manc_dom_ipaddr & manc.manc_dom_ip_netmask;
		ipaddr = man_inet_ntoa(netnum);
		mi_mpprintf(mp, "manc_dom_ip_netnum = %s", ipaddr);

		ipaddr = man_inet_ntoa(manc.manc_sc_ipaddr);
		mi_mpprintf(mp, "manc_sc_ipaddr = %s", ipaddr);

	} else if (manc.manc_ip_type == AF_INET6) {

		mi_mpprintf(mp, "manc_ip_type = AF_INET6");

		(void) inet_ntop(AF_INET6, (void *)&manc.manc_dom_ipv6addr,
		    ipv6addr, INET6_ADDRSTRLEN);
		mi_mpprintf(mp, "manc_dom_ipv6addr = %s", ipv6addr);

		mi_mpprintf(mp, "manc_dom_ipv6_netmask = %d",
		    manc.manc_dom_ipv6_netmask.s6_addr[0]);

		(void) inet_ntop(AF_INET6, (void *)&manc.manc_sc_ipv6addr,
		    ipv6addr, INET6_ADDRSTRLEN);
		mi_mpprintf(mp, "manc_sc_ipv6addr = %s", ipv6addr);

	} else {

		mi_mpprintf(mp, "manc_ip_type = NONE");
	}

	mi_mpprintf(mp, "manc_dom_eaddr = %s",
	    ether_sprintf(&manc.manc_dom_eaddr));
	mi_mpprintf(mp, "manc_sc_eaddr = %s",
	    ether_sprintf(&manc.manc_sc_eaddr));

	mi_mpprintf(mp, "manc_iob_bitmap = 0x%x\tio boards = ",
	    manc.manc_iob_bitmap);
	for (i = 0; i < MAN_MAX_EXPANDERS; i++) {
		if ((manc.manc_iob_bitmap >> i) & 0x1) {
			mi_mpprintf_nr(mp, "%d.1, ", i);
		}
	}
	mi_mpprintf(mp, "manc_golden_iob = %d", manc.manc_golden_iob);

	return (0);
}

static char *
man_inet_ntoa(in_addr_t in)
{
	static char b[18];
	unsigned char *p;

	p = (unsigned char *)&in;
	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
	return (b);
}

/*
 * parameter value. cp points to the required parameter.
 */
/* ARGSUSED */
static int
man_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
{
	param_t	*manpa = (param_t *)cp;

	(void) mi_mpprintf(mp, "%u", manpa->param_val);
	return (0);
}

/*
 * Sets the man parameter to the value in the param_register using
 * nd_load().
 */
/* ARGSUSED */
static int
man_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
{
	char *end;
	size_t new_value;
	param_t	*manpa = (param_t *)cp;

	new_value = mi_strtol(value, &end, 10);

	if (end == value || new_value < manpa->param_min ||
	    new_value > manpa->param_max) {
			return (EINVAL);
	}

	manpa->param_val = new_value;

	return (0);

}

/*
 * Free the Named Dispatch Table by calling man_nd_free
 */
static void
man_param_cleanup()
{
	if (man_ndlist != NULL)
		nd_free(&man_ndlist);
}

/*
 * Free the table pointed to by 'ndp'
 */
static void
man_nd_free(caddr_t *nd_pparam)
{
	ND	*nd;

	if ((nd = (ND *)(*nd_pparam)) != NULL) {
		if (nd->nd_tbl)
			mi_free((char *)nd->nd_tbl);
		mi_free((char *)nd);
		*nd_pparam = NULL;
	}
}


/*
 * man_kstat_update - update the statistics for a meta-interface.
 *
 *	ksp - kstats struct
 *	rw - flag indicating whether stats are to be read or written.
 *
 *	returns	0
 *
 * The destination specific kstat information is protected by the
 * perimeter lock, so we submit a work request to get the stats
 * updated (see man_do_kstats()), and then collect the results
 * when cv_signal'd. Note that we are doing cv_timedwait_sig()
 * as a precautionary measure only.
 */
static int
man_kstat_update(kstat_t *ksp, int rw)
{
	man_t			*manp;		/* per instance data */
	man_work_t		*wp;
	int			status = 0;
	kstat_named_t		*knp;
	kstat_named_t		*man_knp;
	int			i;

	MAN_DBG(MAN_KSTAT, ("man_kstat_update: %s\n", rw ? "KSTAT_WRITE" :
	    "KSTAT_READ"));

	mutex_enter(&man_lock);
	manp = (man_t *)ksp->ks_private;
	manp->man_refcnt++;

	/*
	 * If the driver has been configured, get kstats updated by inner
	 * perimeter prior to retrieving.
	 */
	if (man_config_state == MAN_CONFIGURED) {
		clock_t wait_status;

		man_update_path_kstats(manp);
		wp = man_work_alloc(MAN_WORK_KSTAT_UPDATE, KM_SLEEP);
		wp->mw_arg.a_man_ppa = manp->man_meta_ppa;
		wp->mw_flags = MAN_WFLAGS_CVWAITER;
		man_work_add(man_iwork_q, wp);

		wait_status = cv_timedwait_sig(&wp->mw_cv, &man_lock,
		    ddi_get_lbolt() + drv_usectohz(manp->man_kstat_waittime));

		if (wp->mw_flags & MAN_WFLAGS_DONE) {
			status = wp->mw_status;
			man_work_free(wp);
		} else {
			ASSERT(wait_status <= 0);
			wp->mw_flags &= ~MAN_WFLAGS_CVWAITER;
			if (wait_status == 0)
				status = EINTR;
			else {
				MAN_DBG(MAN_KSTAT, ("man_kstat_update: "
				    "timedout, returning stale stats."));
				status = 0;
			}
		}
		if (status)
			goto exit;
	}

	knp = (kstat_named_t *)ksp->ks_data;
	man_knp = (kstat_named_t *)manp->man_ksp->ks_data;

	if (rw == KSTAT_READ) {
		for (i = 0; i < MAN_NUMSTATS; i++) {
			knp[i].value.ui64 = man_knp[i].value.ui64;
		}
	} else {
		for (i = 0; i < MAN_NUMSTATS; i++) {
			man_knp[i].value.ui64 = knp[i].value.ui64;
		}
	}

exit:
	manp->man_refcnt--;
	mutex_exit(&man_lock);

	MAN_DBG(MAN_KSTAT, ("man_kstat_update: returns %d", status));

	return (status);
}

/*
 * Sum destination kstats for all active paths for a given instance of the
 * MAN driver. Called with perimeter lock.
 */
static void
man_do_kstats(man_work_t *wp)
{
	man_t		*manp;
	man_pg_t	*mpg;
	man_path_t	*mp;

	MAN_DBG(MAN_KSTAT, ("man_do_kstats:"));

	mutex_enter(&man_lock);
	/*
	 * Sync mp_last_knp for each path associated with the MAN instance.
	 */
	manp = (man_t *)ddi_get_soft_state(man_softstate,
	    wp->mw_arg.a_man_ppa);
	for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {

		ASSERT(mpg->mpg_man_ppa == manp->man_meta_ppa);

		if ((mp = man_find_active_path(mpg->mpg_pathp)) != NULL) {

			MAN_DBG(MAN_KSTAT, ("\tkstat: path"));
			MAN_DBGCALL(MAN_KSTAT, man_print_path(mp));

			/*
			 * We just to update the destination statistics here.
			 */
			man_sum_dests_kstats(mp->mp_last_knp, mpg);
		}
	}
	mutex_exit(&man_lock);
	MAN_DBG(MAN_KSTAT, ("man_do_kstats: returns"));
}

/*
 * Sum device kstats for all active paths for a given instance of the
 * MAN driver. Called with man_lock.
 */
static void
man_update_path_kstats(man_t *manp)
{
	kstat_named_t	*man_knp;
	man_pg_t	*mpg;
	man_path_t	*mp;

	ASSERT(MUTEX_HELD(&man_lock));
	MAN_DBG(MAN_KSTAT, ("man_update_path_kstats:"));

	man_knp = (kstat_named_t *)manp->man_ksp->ks_data;

	for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {

		ASSERT(mpg->mpg_man_ppa == manp->man_meta_ppa);

		if ((mp = man_find_active_path(mpg->mpg_pathp)) != NULL) {

			man_update_dev_kstats(man_knp, mp);

		}
	}
	MAN_DBG(MAN_KSTAT, ("man_update_path_kstats: returns"));
}

/*
 * Update the device kstats.
 * As man_kstat_update() is called with kstat_chain_lock held,
 * we can safely update the statistics from the underlying driver here.
 */
static void
man_update_dev_kstats(kstat_named_t *man_knp, man_path_t *mp)
{
	kstat_t		*dev_ksp;
	major_t		major;
	int		instance;
	char		buf[KSTAT_STRLEN];


	major = mp->mp_device.mdev_major;
	instance = mp->mp_device.mdev_ppa;
	(void) sprintf(buf, "%s%d", ddi_major_to_name(major), instance);

	dev_ksp = kstat_hold_byname(ddi_major_to_name(major), instance, buf,
	    ALL_ZONES);
	if (dev_ksp != NULL) {

		KSTAT_ENTER(dev_ksp);
		KSTAT_UPDATE(dev_ksp, KSTAT_READ);
		man_sum_kstats(man_knp, dev_ksp, mp->mp_last_knp);
		KSTAT_EXIT(dev_ksp);
		kstat_rele(dev_ksp);

	} else {
		MAN_DBG(MAN_KSTAT,
		    ("man_update_dev_kstats: no kstat data found for %s(%d,%d)",
		    buf, major, instance));
	}
}

static void
man_sum_dests_kstats(kstat_named_t *knp, man_pg_t *mpg)
{
	int		i;
	int		flags;
	char		*statname;
	manstr_t	*msp;
	man_dest_t	*mdp;
	uint64_t	switches = 0;
	uint64_t	linkfails = 0;
	uint64_t	linkstales = 0;
	uint64_t	icmpv4probes = 0;
	uint64_t	icmpv6probes = 0;

	MAN_DBG(MAN_KSTAT, ("man_sum_dests_kstats: mpg 0x%p", (void *)mpg));

	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {

		if (!man_str_uses_pg(msp, mpg))
			continue;

		mdp = &msp->ms_dests[mpg->mpg_pg_id];

		switches += mdp->md_switches;
		linkfails += mdp->md_linkfails;
		linkstales += mdp->md_linkstales;
		icmpv4probes += mdp->md_icmpv4probes;
		icmpv6probes += mdp->md_icmpv6probes;
	}

	for (i = 0; i < MAN_NUMSTATS; i++) {

		statname = man_kstat_info[i].mk_name;
		flags = man_kstat_info[i].mk_flags;

		if (!(flags & MK_NOT_PHYSICAL))
			continue;

		if (strcmp(statname, "man_switches") == 0) {
			knp[i].value.ui64 = switches;
		} else if (strcmp(statname, "man_link_fails") == 0) {
			knp[i].value.ui64 = linkfails;
		} else if (strcmp(statname, "man_link_stales") == 0) {
			knp[i].value.ui64 = linkstales;
		} else if (strcmp(statname, "man_icmpv4_probes") == 0) {
			knp[i].value.ui64 = icmpv4probes;
		} else if (strcmp(statname, "man_icmpv6_probes") == 0) {
			knp[i].value.ui64 = icmpv6probes;
		}
	}

	MAN_DBG(MAN_KSTAT, ("man_sum_dests_kstats: returns"));
}

/*
 * Initialize MAN named kstats in the space provided.
 */
static void
man_kstat_named_init(kstat_named_t *knp, int num_stats)
{
	int	i;

	MAN_DBG(MAN_KSTAT, ("man_kstat_named_init: knp(0x%p) num_stats = %d",
	    (void *)knp, num_stats));

	for (i = 0; i < num_stats; i++) {
		kstat_named_init(&knp[i], man_kstat_info[i].mk_name,
		    man_kstat_info[i].mk_type);
	}

	MAN_DBG(MAN_KSTAT, ("man_kstat_named_init: returns"));

}

/*
 * man_kstat_byname - get a kernel stat value from its structure
 *
 *	ksp - kstat_t structure to play with
 *	s   - string to match names with
 *	res - in/out result data pointer
 *
 *	returns	- success - 1 (found)
 *		- failure - 0 (not found)
 */
static int
man_kstat_byname(kstat_t *ksp, char *s, kstat_named_t *res)
{
	int		found = 0;

	MAN_DBG(MAN_KSTAT2, ("man_kstat_byname: GETTING %s\n", s));

	if (ksp->ks_type == KSTAT_TYPE_NAMED) {
		kstat_named_t *knp;

		for (knp = KSTAT_NAMED_PTR(ksp);
		    (caddr_t)knp < ((caddr_t)ksp->ks_data+ksp->ks_data_size);
		    knp++) {

			if (strcmp(s, knp->name) == NULL) {

				res->data_type = knp->data_type;
				res->value = knp->value;
				found++;

				MAN_DBG(MAN_KSTAT2, ("\t%s: %d\n", knp->name,
				    (int)knp->value.ul));
			}
		}
	} else {
		MAN_DBG(MAN_KSTAT2, ("\tbad kstats type %d\n", ksp->ks_type));
	}

	/*
	 * if getting a value but couldn't find the namestring, result = 0.
	 */
	if (!found) {
		/*
		 * a reasonable default
		 */
		res->data_type = KSTAT_DATA_ULONG;
		res->value.l = 0;
		MAN_DBG(MAN_KSTAT2, ("\tcouldn't find, using defaults\n"));
	}

	MAN_DBG(MAN_KSTAT2, ("man_kstat_byname: returns\n"));

	return (found);
}


/*
 *
 * Accumulate MAN driver kstats from the incremental values of the underlying
 * physical interfaces.
 *
 * Parameters:
 *	sum_knp		- The named kstat area to put cumulative value,
 *			  NULL if we just want to sync next two params.
 *	phys_ksp	- Physical interface kstat_t pointer. Contains
 *			  more current counts.
 * 	phys_last_knp	- counts from the last time we were called for this
 *			  physical interface. Note that the name kstats
 *			  pointed to are actually in MAN format, but they
 *			  hold the mirrored physical devices last read
 *			  kstats.
 * Basic algorithm is:
 *
 * 	for each named kstat variable {
 *	    sum_knp[i] += (phys_ksp->ksp_data[i] - phys_last_knp[i]);
 *	    phys_last_knp[i] = phys_ksp->ksp_data[i];
 *	}
 *
 */
static void
man_sum_kstats(kstat_named_t *sum_knp, kstat_t *phys_ksp,
	kstat_named_t *phys_last_knp)
{
	char		*physname;
	char		*physalias;
	char		*statname;
	kstat_named_t	phys_kn_entry;
	uint64_t	delta64;
	int		i;

	MAN_DBG(MAN_KSTAT, ("man_sum_kstats: sum_knp(0x%p) phys_ksp(0x%p)"
	    " phys_last_knp(0x%p)\n", (void *)sum_knp, (void *)phys_ksp,
	    (void *)phys_last_knp));

	/*
	 * Now for each entry in man_kstat_info, sum the named kstat.
	 * Not that all MAN specific kstats will end up !found.
	 */
	for (i = 0; i < MAN_NUMSTATS; i++) {
		int	found = 0;
		int	flags = 0;

		delta64 = 0;

		statname = man_kstat_info[i].mk_name;
		physname = man_kstat_info[i].mk_physname;
		physalias = man_kstat_info[i].mk_physalias;
		flags = man_kstat_info[i].mk_flags;

		/*
		 * Update MAN private kstats.
		 */
		if (flags & MK_NOT_PHYSICAL) {

			kstat_named_t	*knp = phys_last_knp;

			if (sum_knp == NULL)
				continue;

			if (strcmp(statname, "man_switches") == 0) {
				sum_knp[i].value.ui64 = knp[i].value.ui64;
			} else if (strcmp(statname, "man_link_fails") == 0) {
				sum_knp[i].value.ui64 = knp[i].value.ui64;
			} else if (strcmp(statname, "man_link_stales") == 0) {
				sum_knp[i].value.ui64 = knp[i].value.ui64;
			} else if (strcmp(statname, "man_icmpv4_probes") == 0) {
				sum_knp[i].value.ui64 = knp[i].value.ui64;
			} else if (strcmp(statname, "man_icmpv6_probes") == 0) {
				sum_knp[i].value.ui64 = knp[i].value.ui64;
			}

			continue;	/* phys_ksp doesnt have this stat */
		}

		/*
		 * first try it by the "official" name
		 */
		if (phys_ksp) {
			if (man_kstat_byname(phys_ksp, physname,
			    &phys_kn_entry)) {

				found = 1;

			} else if ((physalias) && (man_kstat_byname(phys_ksp,
			    physalias, &phys_kn_entry))) {

				found = 1;
			}
		}

		if (!found) {
			/*
			 * clear up the "last" value, no change to the sum
			 */
			phys_last_knp[i].value.ui64 = 0;
			continue;
		}

		/*
		 * at this point, we should have the good underlying
		 * kstat value stored in phys_kn_entry
		 */
		if (flags & MK_NOT_COUNTER) {
			/*
			 * it isn't a counter, so store the value and
			 * move on (e.g. ifspeed)
			 */
			phys_last_knp[i].value = phys_kn_entry.value;
			continue;
		}

		switch (phys_kn_entry.data_type) {
		case KSTAT_DATA_UINT32:

			/*
			 * this handles 32-bit wrapping
			 */
			if (phys_kn_entry.value.ui32 <
			    phys_last_knp[i].value.ui32) {

				/*
				 * we've wrapped!
				 */
				delta64 += (UINT_MAX -
				    phys_last_knp[i].value.ui32);
				phys_last_knp[i].value.ui32 = 0;
			}

			delta64 += phys_kn_entry.value.ui32 -
			    phys_last_knp[i].value.ui32;
			phys_last_knp[i].value.ui32 = phys_kn_entry.value.ui32;
			break;

		default:
			/*
			 * must be a 64-bit value, we ignore 64-bit
			 * wraps, since they shouldn't ever happen
			 * within the life of a machine (if we assume
			 * machines don't stay up for more than a few
			 * hundred years without a reboot...)
			 */
			delta64 = phys_kn_entry.value.ui64 -
			    phys_last_knp[i].value.ui64;
			phys_last_knp[i].value.ui64 = phys_kn_entry.value.ui64;
		}

		if (sum_knp != NULL) {
			/*
			 * now we need to save the value
			 */
			switch (sum_knp[i].data_type) {
			case KSTAT_DATA_UINT32:
				/* trunk down to 32 bits, possibly lossy */
				sum_knp[i].value.ui32 += (uint32_t)delta64;
				break;

			default:
				sum_knp[i].value.ui64 += delta64;
				break;
			}
		}
	}

	MAN_DBG(MAN_KSTAT, ("man_sum_kstats: returns\n"));
}


#if defined(DEBUG)


static char *_ms_flags[] = {
	"NONE",
	"FAST", 	/* 0x1 */
	"RAW",		/* 0x2 */
	"ALLPHYS",	/* 0x4 */
	"ALLMULTI",	/* 0x8 */
	"ALLSAP",	/* 0x10 */
	"CKSUM",	/* 0x20 */
	"MULTI",	/* 0x40 */
	"SERLPBK",	/* 0x80 */
	"MACLPBK",	/* 0x100 */
	"CLOSING",	/* 0x200 */
	"CLOSE_DONE",	/* 0x400 */
	"CONTROL"	/* 0x800 */
};

static void
man_print_msp(manstr_t *msp)
{
	char	buf[512];
	char	prbuf[512];
	uint_t	flags;
	int	i;

	cmn_err(CE_CONT, "\tmsp(0x%p)\n", (void *)msp);

	if (msp == NULL)
		return;

	cmn_err(CE_CONT, "\t%s%d SAP(0x%x):\n",
	    ddi_major_to_name(msp->ms_meta_maj), msp->ms_meta_ppa,
	    msp->ms_sap);

	buf[0] = '\0';
	prbuf[0] = '\0';
	flags = msp->ms_flags;
	for (i = 0; i < A_CNT(_ms_flags); i++) {
		if ((flags >> i) & 0x1) {
			sprintf(buf, " %s |", _ms_flags[i+1]);
			strcat(prbuf, buf);
		}
	}
	prbuf[strlen(prbuf) - 1] = '\0';
	cmn_err(CE_CONT, "\tms_flags: %s\n", prbuf);

	cmn_err(CE_CONT, "\tms_dlpistate: %s\n", dss[msp->ms_dlpistate]);

	cmn_err(CE_CONT, "\tms_dl_mp: 0x%p\n", (void *)msp->ms_dl_mp);

	cmn_err(CE_CONT, "\tms_manp: 0x%p\n", (void *)msp->ms_manp);

	cmn_err(CE_CONT, "\tms_dests: 0x%p\n", (void *)msp->ms_dests);

}

static char *_md_state[] = {
	"NOTPRESENT",		/* 0x0 */
	"INITIALIZING",		/* 0x1 */
	"READY",		/* 0x2 */
	"PLUMBING",		/* 0x4 */
	"CLOSING"		/* 0x8 */
};

static void
man_print_mdp(man_dest_t *mdp)
{
	uint_t		state;
	int		i;
	char		buf[64];
	char		prbuf[512];

	buf[0] = '\0';
	prbuf[0] = '\0';

	cmn_err(CE_CONT, "\tmdp(0x%p)\n", (void *)mdp);

	if (mdp == NULL)
		return;

	cmn_err(CE_CONT, "\tmd_pg_id: %d\n", mdp->md_pg_id);
	cmn_err(CE_CONT, "\tmd_dst_eaddr: %s\n",
	    ether_sprintf(&mdp->md_dst_eaddr));
	cmn_err(CE_CONT, "\tmd_src_eaddr: %s\n",
	    ether_sprintf(&mdp->md_src_eaddr));
	cmn_err(CE_CONT, "\tmd_dlpistate: %s", dss[mdp->md_dlpistate]);
	cmn_err(CE_CONT, "\tmd_muxid: 0x%u", mdp->md_muxid);
	cmn_err(CE_CONT, "\tmd_rcvcnt %lu md_lastrcvcnt %lu", mdp->md_rcvcnt,
	    mdp->md_lastrcvcnt);

	/*
	 * Print out state as text.
	 */
	state = mdp->md_state;

	if (state == 0) {
		strcat(prbuf, _md_state[0]);
	} else {

		for (i = 0; i < A_CNT(_md_state); i++) {
			if ((state >> i) & 0x1)  {
				sprintf(buf, " %s |", _md_state[i+1]);
				strcat(prbuf, buf);
			}
		}
		prbuf[strlen(prbuf) -1] = '\0';
	}
	cmn_err(CE_CONT, "\tmd_state: %s", prbuf);

	cmn_err(CE_CONT, "\tmd_device:\n");
	man_print_dev(&mdp->md_device);

}

static void
man_print_man(man_t *manp)
{
	char	buf[512];
	char	prbuf[512];

	buf[0] = '\0';
	prbuf[0] = '\0';

	if (manp == NULL)
		return;

	if (ddi_major_to_name(manp->man_meta_major)) {
		sprintf(buf, "\t man_device: %s%d\n",
		    ddi_major_to_name(manp->man_meta_major),
		    manp->man_meta_ppa);
	} else {
		sprintf(buf, "\t major: %d", manp->man_meta_major);
		sprintf(buf, "\t ppa: %d", manp->man_meta_ppa);
	}

	cmn_err(CE_CONT, "%s", buf);

}

static char *_mdev_state[] = {
	"UNASSIGNED  ",
	"ASSIGNED",
	"ACTIVE",
	"FAILED"
};

static void
man_print_dev(man_dev_t *mdevp)
{
	char	buf[512];
	char	prbuf[512];
	int	i;
	uint_t	state;

	buf[0] = '\0';
	prbuf[0] = '\0';

	if (mdevp == NULL)
		return;

	if (mdevp->mdev_major == 0) {
number:
		sprintf(buf, "\t mdev_major: %d\n", mdevp->mdev_major);
	} else if (ddi_major_to_name(mdevp->mdev_major)) {
		sprintf(buf, "\t mdev_device: %s%d\n",
		    ddi_major_to_name(mdevp->mdev_major),
		    mdevp->mdev_ppa);
	} else
		goto number;

	cmn_err(CE_CONT, "%s", buf);

	cmn_err(CE_CONT, "\t mdev_exp_id: %d\n", mdevp->mdev_exp_id);

	buf[0] = '\0';
	prbuf[0] = '\0';
	state = mdevp->mdev_state;

	if (state == 0) {
		strcat(prbuf, _mdev_state[0]);
	} else {
		for (i = 0; i < A_CNT(_mdev_state); i++) {
			if ((state >> i) & 0x1) {
				sprintf(buf, " %s |", _mdev_state[i+1]);
				strcat(prbuf, buf);
			}
		}
	}

	prbuf[strlen(prbuf) - 2] = '\0';

	cmn_err(CE_CONT, "\t mdev_state: %s\n", prbuf);

}

static char *_mip_cmd[] = {
	"MI_PATH_READ",
	"MI_PATH_ASSIGN",
	"MI_PATH_ACTIVATE",
	"MI_PATH_DEACTIVATE",
	"MI_PATH_UNASSIGN"
};

static void
man_print_mtp(mi_time_t *mtp)
{
	cmn_err(CE_CONT, "\tmtp(0x%p)\n", (void *)mtp);

	if (mtp == NULL)
		return;

	cmn_err(CE_CONT, "\tmtp_instance: %d\n", mtp->mtp_man_ppa);

	cmn_err(CE_CONT, "\tmtp_time: %d\n", mtp->mtp_time);

}

static void
man_print_mip(mi_path_t *mip)
{
	cmn_err(CE_CONT, "\tmip(0x%p)\n", (void *)mip);

	if (mip == NULL)
		return;

	cmn_err(CE_CONT, "\tmip_pg_id: %d\n", mip->mip_pg_id);

	cmn_err(CE_CONT, "\tmip_cmd: %s\n", _mip_cmd[mip->mip_cmd]);

	cmn_err(CE_CONT, "\tmip_eaddr: %s\n", ether_sprintf(&mip->mip_eaddr));

	cmn_err(CE_CONT, "\tmip_devs: 0x%p\n", (void *)mip->mip_devs);

	cmn_err(CE_CONT, "\tmip_ndevs: %d\n", mip->mip_ndevs);

}

static void
man_print_mpg(man_pg_t *mpg)
{
	cmn_err(CE_CONT, "\tmpg(0x%p)\n", (void *)mpg);

	if (mpg == NULL)
		return;

	cmn_err(CE_CONT, "\tmpg_next: 0x%p\n", (void *)mpg->mpg_next);

	cmn_err(CE_CONT, "\tmpg_pg_id: %d\n", mpg->mpg_pg_id);

	cmn_err(CE_CONT, "\tmpg_man_ppa: %d\n", mpg->mpg_man_ppa);

	cmn_err(CE_CONT, "\tmpg_dst_eaddr: %s\n",
	    ether_sprintf(&mpg->mpg_dst_eaddr));

	cmn_err(CE_CONT, "\tmpg_pathp: 0x%p\n", (void *)mpg->mpg_pathp);

}

static char *_mw_flags[] = {
	"NOWAITER",		/* 0x0 */
	"CVWAITER",		/* 0x1 */
	"QWAITER",		/* 0x2 */
	"DONE"		/* 0x3 */
};

static void
man_print_work(man_work_t *wp)
{
	int 	i;

	cmn_err(CE_CONT, "\twp(0x%p)\n\n", (void *)wp);

	if (wp == NULL)
		return;

	cmn_err(CE_CONT, "\tmw_type: %s\n", _mw_type[wp->mw_type]);

	cmn_err(CE_CONT, "\tmw_flags: ");
	for (i = 0; i < A_CNT(_mw_flags); i++) {
		if ((wp->mw_flags >> i) & 0x1)
			cmn_err(CE_CONT, "%s", _mw_flags[i]);
	}
	cmn_err(CE_CONT, "\n");

	cmn_err(CE_CONT, "\twp_status: %d\n", wp->mw_status);

	cmn_err(CE_CONT, "\twp_arg: 0x%p\n", (void *)&wp->mw_arg);

	cmn_err(CE_CONT, "\tmw_next: 0x%p\n", (void *)wp->mw_next);

	cmn_err(CE_CONT, "\twp_q: 0x%p", (void *)wp->mw_q);

}

static void
man_print_path(man_path_t *mp)
{
	cmn_err(CE_CONT, "\tmp(0x%p)\n\n", (void *)mp);

	if (mp == NULL)
		return;

	cmn_err(CE_CONT, "\tmp_device:");
	man_print_dev(&mp->mp_device);

	cmn_err(CE_CONT, "\tmp_next: 0x%p\n", (void *)mp->mp_next);

	cmn_err(CE_CONT, "\tmp_last_knp: 0x%p\n", (void *)mp->mp_last_knp);

	cmn_err(CE_CONT, "\tmp_lru: 0x%lx", mp->mp_lru);

}

void *
man_dbg_kzalloc(int line, size_t size, int kmflags)
{
	void *tmp;

	tmp = kmem_zalloc(size, kmflags);
	MAN_DBG(MAN_KMEM, ("0x%p %lu\tzalloc'd @ %d\n", (void *)tmp,
	    size, line));

	return (tmp);

}

void
man_dbg_kfree(int line, void *buf, size_t size)
{

	MAN_DBG(MAN_KMEM, ("0x%p %lu\tfree'd @ %d\n", (void *)buf, size, line));

	kmem_free(buf, size);

}

#endif  /* DEBUG */