xref: /titanic_50/usr/src/uts/sun4u/starcat/io/dman.c (revision fd845fc0cb4fbc8e85f974e2e4eaacca1cc26e81)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Starcat Management Network Driver
31  *
32  * ****** NOTICE **** This file also resides in the SSC gate as
33  * ****** NOTICE **** usr/src/uts/sun4u/scman/scman.c. Any changes
34  * ****** NOTICE **** made here must be propogated there as well.
35  *
36  */
37 
38 #include <sys/types.h>
39 #include <sys/proc.h>
40 #include <sys/disp.h>
41 #include <sys/kmem.h>
42 #include <sys/stat.h>
43 #include <sys/kstat.h>
44 #include <sys/ksynch.h>
45 #include <sys/stream.h>
46 #include <sys/dlpi.h>
47 #include <sys/stropts.h>
48 #include <sys/strsubr.h>
49 #include <sys/debug.h>
50 #include <sys/conf.h>
51 #include <sys/kstr.h>
52 #include <sys/errno.h>
53 #include <sys/ethernet.h>
54 #include <sys/byteorder.h>
55 #include <sys/ddi.h>
56 #include <sys/sunddi.h>
57 #include <sys/sunldi.h>
58 #include <sys/modctl.h>
59 #include <sys/strsun.h>
60 #include <sys/callb.h>
61 #include <sys/pci.h>
62 #include <netinet/in.h>
63 #include <inet/common.h>
64 #include <inet/mi.h>
65 #include <inet/nd.h>
66 #include <sys/socket.h>
67 #include <netinet/igmp_var.h>
68 #include <netinet/ip6.h>
69 #include <netinet/icmp6.h>
70 #include <inet/ip.h>
71 #include <inet/ip6.h>
72 #include <sys/file.h>
73 #include <sys/dman.h>
74 #include <sys/autoconf.h>
75 #include <sys/zone.h>
76 
77 extern int ddi_create_internal_pathname(dev_info_t *, char *, int, minor_t);
78 
79 #define	MAN_IDNAME	"dman"
80 #define	DMAN_INT_PATH	"/devices/pseudo/dman@0:dman"
81 #define	DMAN_PATH	"/devices/pseudo/clone@0:dman"
82 #define	ERI_IDNAME	"eri"
83 #define	ERI_PATH	"/devices/pseudo/clone@0:eri"
84 
85 #if defined(DEBUG)
86 
87 static void man_print_msp(manstr_t *);
88 static void man_print_man(man_t *);
89 static void man_print_mdp(man_dest_t *);
90 static void man_print_dev(man_dev_t *);
91 static void man_print_mip(mi_path_t *);
92 static void man_print_mtp(mi_time_t *);
93 static void man_print_mpg(man_pg_t *);
94 static void man_print_path(man_path_t *);
95 static void man_print_work(man_work_t *);
96 
97 /*
98  * Set manstr_t dlpistate (upper half of multiplexor)
99  */
100 #define	SETSTATE(msp, state) \
101 	MAN_DBG(MAN_DLPI, ("msp=0x%p @ %d state %s=>%s\n",		\
102 		    (void *)msp, __LINE__, dss[msp->ms_dlpistate],	\
103 		    dss[(state)]));					\
104 		    msp->ms_dlpistate = (state);
105 /*
106  * Set man_dest_t dlpistate (lower half of multiplexor)
107  */
108 #define	D_SETSTATE(mdp, state) \
109 	MAN_DBG(MAN_DLPI, ("dst=0x%p @ %d state %s=>%s\n",	   \
110 		    (void *)mdp, __LINE__, dss[mdp->md_dlpistate], \
111 		    dss[(state)]));				   \
112 		    mdp->md_dlpistate = (state);
113 
114 static char *promisc[] = {	/* DLPI promisc Strings */
115 	"not used",		/* 0x00 */
116 	"DL_PROMISC_PHYS",	/* 0x01 */
117 	"DL_PROMISC_SAP",	/* 0x02 */
118 	"DL_PROMISC_MULTI"	/* 0x03 */
119 };
120 
121 static char *dps[] = {			/* DLPI Primitive Strings */
122 	"DL_INFO_REQ",			/* 0x00 */
123 	"DL_BIND_REQ",			/* 0x01 */
124 	"DL_UNBIND_REQ",		/* 0x02 */
125 	"DL_INFO_ACK",			/* 0x03 */
126 	"DL_BIND_ACK",			/* 0x04 */
127 	"DL_ERROR_ACK",			/* 0x05 */
128 	"DL_OK_ACK",			/* 0x06 */
129 	"DL_UNITDATA_REQ",		/* 0x07 */
130 	"DL_UNITDATA_IND",		/* 0x08 */
131 	"DL_UDERROR_IND",		/* 0x09 */
132 	"DL_UDQOS_REQ",			/* 0x0a */
133 	"DL_ATTACH_REQ",		/* 0x0b */
134 	"DL_DETACH_REQ",		/* 0x0c */
135 	"DL_CONNECT_REQ",		/* 0x0d */
136 	"DL_CONNECT_IND",		/* 0x0e */
137 	"DL_CONNECT_RES",		/* 0x0f */
138 	"DL_CONNECT_CON",		/* 0x10 */
139 	"DL_TOKEN_REQ",			/* 0x11 */
140 	"DL_TOKEN_ACK",			/* 0x12 */
141 	"DL_DISCONNECT_REQ",		/* 0x13 */
142 	"DL_DISCONNECT_IND",		/* 0x14 */
143 	"DL_SUBS_UNBIND_REQ",		/* 0x15 */
144 	"DL_LIARLIARPANTSONFIRE",	/* 0x16 */
145 	"DL_RESET_REQ",			/* 0x17 */
146 	"DL_RESET_IND",			/* 0x18 */
147 	"DL_RESET_RES",			/* 0x19 */
148 	"DL_RESET_CON",			/* 0x1a */
149 	"DL_SUBS_BIND_REQ",		/* 0x1b */
150 	"DL_SUBS_BIND_ACK",		/* 0x1c */
151 	"DL_ENABMULTI_REQ",		/* 0x1d */
152 	"DL_DISABMULTI_REQ",		/* 0x1e */
153 	"DL_PROMISCON_REQ",		/* 0x1f */
154 	"DL_PROMISCOFF_REQ",		/* 0x20 */
155 	"DL_DATA_ACK_REQ",		/* 0x21 */
156 	"DL_DATA_ACK_IND",		/* 0x22 */
157 	"DL_DATA_ACK_STATUS_IND",	/* 0x23 */
158 	"DL_REPLY_REQ",			/* 0x24 */
159 	"DL_REPLY_IND",			/* 0x25 */
160 	"DL_REPLY_STATUS_IND",		/* 0x26 */
161 	"DL_REPLY_UPDATE_REQ",		/* 0x27 */
162 	"DL_REPLY_UPDATE_STATUS_IND",	/* 0x28 */
163 	"DL_XID_REQ",			/* 0x29 */
164 	"DL_XID_IND",			/* 0x2a */
165 	"DL_XID_RES",			/* 0x2b */
166 	"DL_XID_CON",			/* 0x2c */
167 	"DL_TEST_REQ",			/* 0x2d */
168 	"DL_TEST_IND",			/* 0x2e */
169 	"DL_TEST_RES",			/* 0x2f */
170 	"DL_TEST_CON",			/* 0x30 */
171 	"DL_PHYS_ADDR_REQ",		/* 0x31 */
172 	"DL_PHYS_ADDR_ACK",		/* 0x32 */
173 	"DL_SET_PHYS_ADDR_REQ",		/* 0x33 */
174 	"DL_GET_STATISTICS_REQ",	/* 0x34 */
175 	"DL_GET_STATISTICS_ACK",	/* 0x35 */
176 };
177 
178 #define	MAN_DLPI_MAX_PRIM	0x35
179 
180 static char *dss[] = {			/* DLPI State Strings */
181 	"DL_UNBOUND",			/* 0x00	*/
182 	"DL_BIND_PENDING",		/* 0x01	*/
183 	"DL_UNBIND_PENDING",		/* 0x02	*/
184 	"DL_IDLE",			/* 0x03	*/
185 	"DL_UNATTACHED",		/* 0x04	*/
186 	"DL_ATTACH_PENDING",		/* 0x05	*/
187 	"DL_DETACH_PENDING",		/* 0x06	*/
188 	"DL_UDQOS_PENDING",		/* 0x07	*/
189 	"DL_OUTCON_PENDING",		/* 0x08	*/
190 	"DL_INCON_PENDING",		/* 0x09	*/
191 	"DL_CONN_RES_PENDING",		/* 0x0a	*/
192 	"DL_DATAXFER",			/* 0x0b	*/
193 	"DL_USER_RESET_PENDING",	/* 0x0c	*/
194 	"DL_PROV_RESET_PENDING",	/* 0x0d	*/
195 	"DL_RESET_RES_PENDING",		/* 0x0e	*/
196 	"DL_DISCON8_PENDING",		/* 0x0f	*/
197 	"DL_DISCON9_PENDING",		/* 0x10	*/
198 	"DL_DISCON11_PENDING",		/* 0x11	*/
199 	"DL_DISCON12_PENDING",		/* 0x12	*/
200 	"DL_DISCON13_PENDING",		/* 0x13	*/
201 	"DL_SUBS_BIND_PND",		/* 0x14	*/
202 	"DL_SUBS_UNBIND_PND",		/* 0x15	*/
203 };
204 
205 static const char *lss[] = {
206 	"UNKNOWN",	/* 0x0 */
207 	"INIT",		/* 0x1 */
208 	"GOOD",		/* 0x2 */
209 	"STALE",	/* 0x3 */
210 	"FAIL",		/* 0x4 */
211 };
212 
213 static char *_mw_type[] = {
214 	"OPEN_CTL",		/* 0x0 */
215 	"CLOSE_CTL",		/* 0x1 */
216 	"SWITCH",		/* 0x2 */
217 	"PATH_UPDATE",		/* 0x3 */
218 	"CLOSE",		/* 0x4 */
219 	"CLOSE_STREAM",	/* 0x5 */
220 	"DRATTACH",		/* 0x6 */
221 	"DRDETACH",		/* 0x7 */
222 	"STOP",			/* 0x8 */
223 	"DRSWITCH",		/* 0x9 */
224 	"KSTAT_UPDATE"		/* 0xA */
225 };
226 
227 uint32_t		man_debug = MAN_WARN;
228 
229 #define	man_kzalloc(a, b)	man_dbg_kzalloc(__LINE__, a, b)
230 #define	man_kfree(a, b)		man_dbg_kfree(__LINE__, a, b)
231 void	*man_dbg_kzalloc(int line, size_t size, int kmflags);
232 void	man_dbg_kfree(int line, void *buf, size_t size);
233 
234 #else	/* DEBUG */
235 
236 uint32_t		man_debug = 0;
237 /*
238  * Set manstr_t dlpistate (upper half of multiplexor)
239  */
240 #define	SETSTATE(msp, state) msp->ms_dlpistate = (state);
241 /*
242  * Set man_dest_t dlpistate (lower half of multiplexor)
243  */
244 #define	D_SETSTATE(mdp, state) mdp->md_dlpistate = (state);
245 
246 #define	man_kzalloc(a, b)	kmem_zalloc(a, b)
247 #define	man_kfree(a, b)		kmem_free(a, b)
248 
249 #endif	/* DEBUG */
250 
251 #define	DL_PRIM(mp)	(((union DL_primitives *)(mp)->b_rptr)->dl_primitive)
252 #define	DL_PROMISCON_TYPE(mp)	\
253 		(((union DL_primitives *)(mp)->b_rptr)->promiscon_req.dl_level)
254 #define	IOC_CMD(mp)	(((struct iocblk *)(mp)->b_rptr)->ioc_cmd)
255 
256 /*
257  * Start of kstat-related declarations
258  */
259 #define	MK_NOT_COUNTER		(1<<0)	/* is it a counter? */
260 #define	MK_ERROR		(1<<2)	/* for error statistics */
261 #define	MK_NOT_PHYSICAL		(1<<3)	/* no matching physical stat */
262 
263 typedef struct man_kstat_info_s {
264 	char		*mk_name;	/* e.g. align_errors */
265 	char		*mk_physname;	/* e.g. framing (NULL for same) */
266 	char		*mk_physalias;	/* e.g. framing (NULL for same) */
267 	uchar_t		mk_type;	/* e.g. KSTAT_DATA_UINT32 */
268 	int		mk_flags;
269 } man_kstat_info_t;
270 
271 /*
272  * Master declaration macro, note that it uses token pasting
273  */
274 #define	MK_DECLARE(name, pname, palias, bits, flags) \
275 	{ name,		pname,	palias,	KSTAT_DATA_UINT ## bits, flags }
276 
277 /*
278  * Obsolete forms don't have the _sinceswitch forms, they are all errors
279  */
280 #define	MK_OBSOLETE32(name, alias) MK_DECLARE(alias, name, alias, 32, MK_ERROR)
281 #define	MK_OBSOLETE64(name, alias) MK_DECLARE(alias, name, alias, 64, MK_ERROR)
282 
283 /*
284  * The only non-counters don't have any other aliases
285  */
286 #define	MK_NOTCOUNTER32(name) MK_DECLARE(name, name, NULL, 32, MK_NOT_COUNTER)
287 #define	MK_NOTCOUNTER64(name) MK_DECLARE(name, name, NULL, 64, MK_NOT_COUNTER)
288 
289 /*
290  * Normal counter forms
291  */
292 #define	MK_DECLARE32(name, alias) \
293 	MK_DECLARE(name, name, alias, 32, 0)
294 #define	MK_DECLARE64(name, alias) \
295 	MK_DECLARE(name, name, alias, 64, 0)
296 
297 /*
298  * Error counters need special MK_ERROR flag only for the non-AP form
299  */
300 #define	MK_ERROR32(name, alias) \
301 	MK_DECLARE(name, name, alias, 32, MK_ERROR)
302 #define	MK_ERROR64(name, alias) \
303 	MK_DECLARE(name, name, alias, 64, MK_ERROR)
304 
305 /*
306  * These AP-specific stats are not backed by physical statistics
307  */
308 #define	MK_NOTPHYS32(name) MK_DECLARE(name, NULL, NULL, 32, MK_NOT_PHYSICAL)
309 #define	MK_NOTPHYS64(name) MK_DECLARE(name, NULL, NULL, 64, MK_NOT_PHYSICAL)
310 
311 /*
312  * START of the actual man_kstat_info declaration using above macros
313  */
314 static man_kstat_info_t man_kstat_info[] = {
315 	/*
316 	 * Link Input/Output stats
317 	 */
318 	MK_DECLARE32("ipackets", NULL),
319 	MK_ERROR32("ierrors", NULL),
320 	MK_DECLARE32("opackets", NULL),
321 	MK_ERROR32("oerrors", NULL),
322 	MK_ERROR32("collisions", NULL),
323 	MK_NOTCOUNTER64("ifspeed"),
324 	/*
325 	 * These are new MIB-II stats, per PSARC 1997/198
326 	 */
327 	MK_DECLARE32("rbytes", NULL),
328 	MK_DECLARE32("obytes", NULL),
329 	MK_DECLARE32("multircv", NULL),
330 	MK_DECLARE32("multixmt", NULL),
331 	MK_DECLARE32("brdcstrcv", NULL),
332 	MK_DECLARE32("brdcstxmt", NULL),
333 	/*
334 	 * Error values
335 	 */
336 	MK_ERROR32("norcvbuf", NULL),
337 	MK_ERROR32("noxmtbuf", NULL),
338 	MK_ERROR32("unknowns", NULL),
339 	/*
340 	 * These are the 64-bit values, they fallback to 32-bit values
341 	 */
342 	MK_DECLARE64("ipackets64", "ipackets"),
343 	MK_DECLARE64("opackets64", "opackets"),
344 	MK_DECLARE64("rbytes64", "rbytes"),
345 	MK_DECLARE64("obytes64", "obytes"),
346 
347 	/* New AP switching statistics */
348 	MK_NOTPHYS64("man_switches"),
349 	MK_NOTPHYS64("man_link_fails"),
350 	MK_NOTPHYS64("man_link_stales"),
351 	MK_NOTPHYS64("man_icmpv4_probes"),
352 	MK_NOTPHYS64("man_icmpv6_probes"),
353 
354 	MK_ERROR32("align_errors", "framing"),
355 	MK_ERROR32("fcs_errors", "crc"),
356 	MK_ERROR32("first_collisions", NULL),
357 	MK_ERROR32("multi_collisions", NULL),
358 	MK_ERROR32("sqe_errors", "sqe"),
359 
360 	MK_ERROR32("tx_late_collisions", NULL),
361 	MK_ERROR32("ex_collisions", "excollisions"),
362 	MK_ERROR32("macxmt_errors", NULL),
363 	MK_ERROR32("carrier_errors", "nocarrier"),
364 	MK_ERROR32("toolong_errors", "buff"),
365 	MK_ERROR32("macrcv_errors", NULL),
366 
367 	MK_OBSOLETE32("framing", "align_errors"),
368 	MK_OBSOLETE32("crc", "fcs_errors"),
369 	MK_OBSOLETE32("sqe", "sqe_errors"),
370 	MK_OBSOLETE32("excollisions", "ex_collisions"),
371 	MK_OBSOLETE32("nocarrier", "carrier_errors"),
372 	MK_OBSOLETE32("buff", "toolong_errors"),
373 };
374 
375 #define	MAN_NUMSTATS (sizeof (man_kstat_info) / sizeof (man_kstat_info_t))
376 
377 /*
378  * Miscellaneous ethernet stuff.
379  *
380  * MANs DL_INFO_ACK template.
381  */
382 static	dl_info_ack_t man_infoack = {
383 	DL_INFO_ACK,				/* dl_primitive */
384 	ETHERMTU,				/* dl_max_sdu */
385 	0,					/* dl_min_sdu */
386 	MAN_ADDRL,				/* dl_addr_length */
387 	DL_ETHER,				/* dl_mac_type */
388 	0,					/* dl_reserved */
389 	0,					/* dl_current_state */
390 	-2,					/* dl_sap_length */
391 	DL_CLDLS,				/* dl_service_mode */
392 	0,					/* dl_qos_length */
393 	0,					/* dl_qos_offset */
394 	0,					/* dl_range_length */
395 	0,					/* dl_range_offset */
396 	DL_STYLE2,				/* dl_provider_style */
397 	sizeof (dl_info_ack_t),			/* dl_addr_offset */
398 	DL_VERSION_2,				/* dl_version */
399 	ETHERADDRL,				/* dl_brdcst_addr_length */
400 	sizeof (dl_info_ack_t) + MAN_ADDRL,	/* dl_brdcst_addr_offset */
401 	0					/* dl_growth */
402 };
403 
404 /*
405  * Ethernet broadcast address definition.
406  */
407 static	struct ether_addr	etherbroadcast = {
408 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
409 };
410 
411 static struct ether_addr zero_ether_addr = {
412 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
413 };
414 
415 /*
416  * Set via MAN_SET_SC_IPADDRS ioctl.
417  */
418 man_sc_ipaddrs_t	man_sc_ipaddrs = { 0xffffffffU, 0xffffffffU };
419 
420 /*
421  * Set via MAN_SET_SC_IP6ADDRS ioctl.
422  */
423 man_sc_ip6addrs_t	man_sc_ip6addrs = { 0, 0, 0, 0, 0, 0, 0, 0 };
424 
425 /*
426  * IP & ICMP constants
427  */
428 #ifndef	ETHERTYPE_IPV6
429 #define	ETHERTYPE_IPV6 0x86DD
430 #endif
431 
432 /*
433  * Function prototypes.
434  *
435  * Upper multiplexor functions.
436  */
437 static int	man_attach(dev_info_t *, ddi_attach_cmd_t);
438 static int	man_detach(dev_info_t *, ddi_detach_cmd_t);
439 static int	man_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
440 static int	man_open(register queue_t *, dev_t *, int, int, cred_t *);
441 static int	man_configure(queue_t *);
442 static int	man_deconfigure(void);
443 static int	man_init_dests(man_t *, manstr_t *);
444 static void	man_start_dest(man_dest_t *, manstr_t *, man_pg_t *);
445 static void	man_set_optimized_dest(manstr_t *);
446 static int	man_close(queue_t *);
447 static void	man_cancel_timers(man_adest_t *);
448 static int	man_uwput(queue_t *, mblk_t *);
449 static int	man_start(queue_t *, mblk_t *, eaddr_t *);
450 static void	man_ioctl(queue_t *, mblk_t *);
451 static void	man_set_linkcheck_time(queue_t *, mblk_t *);
452 static void	man_setpath(queue_t *, mblk_t *);
453 static void	man_geteaddr(queue_t *, mblk_t *);
454 static void	man_set_sc_ipaddrs(queue_t *, mblk_t *);
455 static void	man_set_sc_ip6addrs(queue_t *, mblk_t *);
456 static int	man_get_our_etheraddr(eaddr_t *eap);
457 static void	man_nd_getset(queue_t *, mblk_t *);
458 static void	man_dl_ioc_hdr_info(queue_t *, mblk_t *);
459 static int	man_uwsrv(queue_t *);
460 static int	man_proto(queue_t *, mblk_t *);
461 static int	man_udreq(queue_t *, mblk_t *);
462 static void	man_areq(queue_t *, mblk_t *);
463 static mblk_t	*man_alloc_physreq_mp(eaddr_t *);
464 static void	man_dreq(queue_t *, mblk_t *);
465 static void	man_dodetach(manstr_t *, man_work_t *);
466 static void	man_dl_clean(mblk_t **);
467 static void	man_breq(queue_t *, mblk_t *);
468 static void	man_ubreq(queue_t *, mblk_t *);
469 static void	man_ireq(queue_t *, mblk_t *);
470 static void	man_ponreq(queue_t *, mblk_t *);
471 static void	man_poffreq(queue_t *, mblk_t *);
472 static void	man_emreq(queue_t *, mblk_t *);
473 static void	man_dmreq(queue_t *, mblk_t *);
474 static void	man_pareq(queue_t *, mblk_t *);
475 static void	man_spareq(queue_t *, mblk_t *);
476 static int	man_dlpi(manstr_t *, mblk_t *);
477 static int	man_dlioc(manstr_t *, mblk_t *);
478 static int	man_dl_catch(mblk_t **, mblk_t *);
479 static void	man_dl_release(mblk_t **, mblk_t *);
480 static int	man_match_proto(mblk_t *, mblk_t *);
481 static int	man_open_ctl();
482 static void	man_close_ctl();
483 /*
484  * upper/lower multiplexor functions.
485  */
486 static int	man_dlpi_senddown(manstr_t *, mblk_t *);
487 static int	man_start_lower(man_dest_t *, mblk_t *, queue_t *, int caller);
488 static int	man_lrput(queue_t *, mblk_t *);
489 /*
490  * Lower multiplexor functions.
491  */
492 static int	man_lwsrv(queue_t *);
493 static int	man_lrsrv(queue_t *);
494 static void	man_dlpi_replay(man_dest_t *, mblk_t *);
495 static int	man_dlioc_replay(man_dest_t *);
496 /*
497  * Link failover routines.
498  */
499 static int	man_gettimer(int, man_dest_t *);
500 static void	man_linkcheck_timer(void *);
501 static int	man_needs_linkcheck(man_dest_t *);
502 static int	man_do_autoswitch(man_dest_t *);
503 static int	man_autoswitch(man_pg_t *, man_dev_t *, man_work_t *);
504 static int	man_prep_dests_for_switch(man_pg_t *, man_dest_t **, int *);
505 static int	man_str_uses_pg(manstr_t *, man_pg_t *);
506 static void	man_do_icmp_bcast(man_dest_t *, t_uscalar_t);
507 static mblk_t	*man_alloc_udreq(int, man_dladdr_t *);
508 static mblk_t	*man_pinger(t_uscalar_t);
509 /*
510  * Functions normally executing outside of the STREAMs perimeter.
511  */
512 /*
513  * Functions supporting/processing work requests.
514  */
515 static void	man_bwork(void);
516 static void	man_iwork(void);		/* inside perimeter */
517 void		man_work_add(man_workq_t *, man_work_t *);
518 man_work_t	*man_work_alloc(int, int);
519 void		man_work_free(man_work_t *);
520 /*
521  * Functions implementing/supporting failover.
522  *
523  * Executed inside perimeter.
524  */
525 static int	man_do_dr_attach(man_work_t *);
526 static int	man_do_dr_switch(man_work_t *);
527 static void	man_do_dr_detach(man_work_t *);
528 static int	man_iswitch(man_work_t *);
529 static void	man_ifail_dest(man_dest_t *);
530 static man_dest_t *man_switch_match(man_dest_t *, int, void *);
531 static void	man_add_dests(man_pg_t *);
532 static void	man_reset_dlpi(void *);
533 static mblk_t	*man_dup_mplist(mblk_t *);
534 static mblk_t	*man_alloc_ubreq_dreq();
535 /*
536  * Executed outside perimeter (us man_lock for synchronization).
537  */
538 static void	man_bclose(man_adest_t *);
539 static void	man_bswitch(man_adest_t *, man_work_t *);
540 static int	man_plumb(man_dest_t *);
541 static void	man_unplumb(man_dest_t *);
542 static void	man_plink(queue_t *, mblk_t *);
543 static void	man_unplink(queue_t *, mblk_t *);
544 static void	man_linkrec_insert(man_linkrec_t *);
545 static queue_t	*man_linkrec_find(int);
546 /*
547  * Functions supporting pathgroups
548  */
549 int	man_pg_cmd(mi_path_t *, man_work_t *);
550 static int	man_pg_assign(man_pg_t **, mi_path_t *, int);
551 static int	man_pg_create(man_pg_t **, man_pg_t **, mi_path_t *);
552 static int	man_pg_unassign(man_pg_t **, mi_path_t *);
553 static int	man_pg_activate(man_t *, mi_path_t *, man_work_t *);
554 static int	man_pg_read(man_pg_t *, mi_path_t *);
555 static man_pg_t	*man_find_path_by_dev(man_pg_t *, man_dev_t *, man_path_t **);
556 static man_pg_t	*man_find_pg_by_id(man_pg_t *, int);
557 static man_path_t	*man_find_path_by_ppa(man_path_t *, int);
558 static man_path_t	*man_find_active_path(man_path_t *);
559 static man_path_t	*man_find_alternate_path(man_path_t *);
560 static void	man_path_remove(man_path_t **, man_path_t *);
561 static void	man_path_insert(man_path_t **, man_path_t *);
562 static void	man_path_merge(man_path_t **, man_path_t *);
563 static int	man_path_kstat_init(man_path_t *);
564 static void	man_path_kstat_uninit(man_path_t *);
565 /*
566  * Functions supporting kstat reporting.
567  */
568 static int	man_kstat_update(kstat_t *, int);
569 static void	man_do_kstats(man_work_t *);
570 static void	man_update_path_kstats(man_t *);
571 static void 	man_update_dev_kstats(kstat_named_t *, man_path_t *);
572 static void	man_sum_dests_kstats(kstat_named_t *, man_pg_t *);
573 static void	man_kstat_named_init(kstat_named_t *, int);
574 static int	man_kstat_byname(kstat_t *, char *, kstat_named_t *);
575 static void	man_sum_kstats(kstat_named_t *, kstat_t *, kstat_named_t *);
576 /*
577  * Functions supporting ndd.
578  */
579 static int	man_param_register(param_t *, int);
580 static int	man_pathgroups_report(queue_t *, mblk_t *, caddr_t, cred_t *);
581 static void	man_preport(man_path_t *, mblk_t *);
582 static int	man_set_active_path(queue_t *, mblk_t *, char *, caddr_t,
583 			cred_t *);
584 static int	man_get_hostinfo(queue_t *, mblk_t *, caddr_t, cred_t *);
585 static char	*man_inet_ntoa(in_addr_t);
586 static int	man_param_get(queue_t *, mblk_t *, caddr_t, cred_t *);
587 static int	man_param_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *);
588 static  void    man_param_cleanup(void);
589 static  void    man_nd_free(caddr_t *nd_pparam);
590 /*
591  * MAN SSC/Domain specific externs.
592  */
593 extern int	man_get_iosram(manc_t *);
594 extern int	man_domain_configure(void);
595 extern int	man_domain_deconfigure(void);
596 extern int	man_dossc_switch(uint32_t);
597 extern int	man_is_on_domain;
598 
599 /*
600  * Driver Globals protected by inner perimeter.
601  */
602 static manstr_t	*man_strup = NULL;	/* list of MAN STREAMS */
603 static caddr_t	man_ndlist = NULL;	/* head of ndd var list */
604 void		*man_softstate = NULL;
605 
606 /*
607  * Driver globals protected by man_lock.
608  */
609 kmutex_t		man_lock;		/* lock protecting vars below */
610 static kthread_id_t	man_bwork_id = NULL;	/* background thread ID */
611 man_workq_t		*man_bwork_q;		/* bgthread work q */
612 man_workq_t		*man_iwork_q;		/* inner perim (uwsrv) work q */
613 static man_linkrec_t	*man_linkrec_head = NULL;	/* list of linkblks */
614 ldi_handle_t		man_ctl_lh = NULL;	/* MAN control handle */
615 queue_t			*man_ctl_wq = NULL;	/* MAN control rq */
616 static int		man_config_state = MAN_UNCONFIGURED;
617 static int		man_config_error = ENODEV;
618 
619 /*
620  * These parameters are accessed via ndd to report the link configuration
621  * for the MAN driver. They can also be used to force configuration changes.
622  */
623 #define	MAN_NOTUSR	0x0f000000
624 
625 /* ------------------------------------------------------------------------- */
626 
627 static  param_t	man_param_arr[] = {
628 	/* min		max		value		name */
629 	{  0,		0xFFFF,		0,		"man_debug_level"},
630 };
631 
632 #define	MAN_NDD_GETABLE	1
633 #define	MAN_NDD_SETABLE	2
634 
635 static  uint32_t	man_param_display[] = {
636 /* DISPLAY */
637 MAN_NDD_SETABLE,	/* man_debug_level */
638 };
639 
640 /*
641  * STREAMs information.
642  */
643 static struct module_info man_m_info = {
644 	MAN_IDNUM,			/* mi_idnum */
645 	MAN_IDNAME,			/* mi_idname */
646 	MAN_MINPSZ,			/* mi_minpsz */
647 	MAN_MAXPSZ,			/* mi_maxpsz */
648 	MAN_HIWAT,			/* mi_hiwat */
649 	MAN_LOWAT			/* mi_lowat */
650 };
651 
652 /*
653  * Upper read queue does not do anything.
654  */
655 static struct qinit man_urinit = {
656 	NULL,				/* qi_putp */
657 	NULL,				/* qi_srvp */
658 	man_open,			/* qi_qopen */
659 	man_close,			/* qi_qclose */
660 	NULL,				/* qi_qadmin */
661 	&man_m_info,			/* qi_minfo */
662 	NULL				/* qi_mstat */
663 };
664 
665 static struct qinit man_lrinit = {
666 	man_lrput,			/* qi_putp */
667 	man_lrsrv,			/* qi_srvp */
668 	man_open,			/* qi_qopen */
669 	man_close,			/* qi_qclose */
670 	NULL,				/* qi_qadmin */
671 	&man_m_info,			/* qi_minfo */
672 	NULL				/* qi_mstat */
673 };
674 
675 static struct qinit man_uwinit = {
676 	man_uwput,			/* qi_putp */
677 	man_uwsrv,			/* qi_srvp */
678 	man_open,			/* qi_qopen */
679 	man_close,			/* qi_qclose */
680 	NULL,				/* qi_qadmin */
681 	&man_m_info,			/* qi_minfo */
682 	NULL				/* qi_mstat */
683 };
684 
685 static struct qinit man_lwinit = {
686 	NULL,				/* qi_putp */
687 	man_lwsrv,			/* qi_srvp */
688 	man_open,			/* qi_qopen */
689 	man_close,			/* qi_qclose */
690 	NULL,				/* qi_qadmin */
691 	&man_m_info,			/* qi_minfo */
692 	NULL				/* qi_mstat */
693 };
694 
695 static struct streamtab man_maninfo = {
696 	&man_urinit,			/* st_rdinit */
697 	&man_uwinit,			/* st_wrinit */
698 	&man_lrinit,			/* st_muxrinit */
699 	&man_lwinit			/* st_muxwrinit */
700 };
701 
702 
703 /*
704  * Module linkage information for the kernel.
705  *
706  * Locking Theory:
707  * 	D_MTPERMOD -	Only an inner perimeter: All routines single
708  * 			threaded (except put, see below).
709  *	D_MTPUTSHARED -	Put routines enter inner perimeter shared (not
710  *			exclusive) for concurrency/performance reasons.
711  *
712  *	Anyone who needs exclusive outer perimeter permission (changing
713  *	global data structures) does so via qwriter() calls. The
714  *	background thread does all his work outside of perimeter and
715  *	submits work via qtimeout() when data structures need to be
716  *	modified.
717  */
718 
719 #define	MAN_MDEV_FLAGS	(D_MP|D_MTPERMOD|D_MTPUTSHARED)
720 
721 DDI_DEFINE_STREAM_OPS(man_ops, nulldev, nulldev, man_attach,
722     man_detach, nodev, man_info, MAN_MDEV_FLAGS, &man_maninfo);
723 
724 extern int nodev(), nulldev();
725 
726 static struct modldrv modldrv = {
727 	&mod_driverops, 	/* Module type.  This one is a pseudo driver */
728 	"MAN MetaDriver v%I%",
729 	&man_ops,		/* driver ops */
730 };
731 
732 static struct modlinkage modlinkage = {
733 	MODREV_1,
734 	(void *) &modldrv,
735 	NULL
736 };
737 
738 
739 /* Virtual Driver loader entry points */
740 
741 int
742 _init(void)
743 {
744 	int		status = DDI_FAILURE;
745 
746 	MAN_DBG(MAN_INIT, ("_init:"));
747 	MAN_DBG(MAN_INIT, ("_init: compiled %s at %s\n", __DATE__, __TIME__));
748 
749 	status = mod_install(&modlinkage);
750 	if (status != 0) {
751 		cmn_err(CE_WARN, "man_init: mod_install failed"
752 			" error = %d", status);
753 		return (status);
754 	}
755 
756 	status = ddi_soft_state_init(&man_softstate, sizeof (man_t), 4);
757 	if (status != 0) {
758 		cmn_err(CE_WARN, "man_init: ddi_soft_state_init failed"
759 			" error = %d", status);
760 		mod_remove(&modlinkage);
761 		return (status);
762 	}
763 
764 	man_bwork_q = man_kzalloc(sizeof (man_workq_t), KM_SLEEP);
765 	man_iwork_q = man_kzalloc(sizeof (man_workq_t), KM_SLEEP);
766 
767 	mutex_init(&man_lock, NULL, MUTEX_DRIVER, NULL);
768 	cv_init(&man_bwork_q->q_cv, NULL, CV_DRIVER, NULL);
769 	cv_init(&man_iwork_q->q_cv, NULL, CV_DRIVER, NULL);
770 
771 	return (0);
772 }
773 
774 /*
775  * _info is called by modinfo().
776  */
777 int
778 _info(struct modinfo *modinfop)
779 {
780 	int	status;
781 
782 	MAN_DBG(MAN_INIT, ("_info:"));
783 
784 	status = mod_info(&modlinkage, modinfop);
785 
786 	MAN_DBG(MAN_INIT, ("_info: returns %d", status));
787 
788 	return (status);
789 }
790 
791 /*
792  * _fini called by modunload() just before driver is unloaded from memory.
793  */
794 int
795 _fini(void)
796 {
797 	int status = 0;
798 
799 	MAN_DBG(MAN_INIT, ("_fini:"));
800 
801 
802 	/*
803 	 * The only upper stream left should be man_ctl_lh. Note that
804 	 * man_close (upper stream) is synchronous (i.e. it waits for
805 	 * all STREAMS framework associated with the upper stream to be
806 	 * torn down). This guarantees that man_ctl_lh will never become
807 	 * NULL until noone is around to notice. This assumption is made
808 	 * in a few places like man_plumb, man_unplumb, etc.
809 	 */
810 	if (man_strup && (man_strup->ms_next != NULL))
811 		return (EBUSY);
812 
813 	/*
814 	 * Deconfigure the driver.
815 	 */
816 	status = man_deconfigure();
817 	if (status)
818 		goto exit;
819 
820 	/*
821 	 * need to detach every instance of the driver
822 	 */
823 	status = mod_remove(&modlinkage);
824 	if (status != 0)
825 		goto exit;
826 
827 	ddi_soft_state_fini(&man_softstate);
828 
829 	/*
830 	 * Free up locks.
831 	 */
832 	mutex_destroy(&man_lock);
833 	cv_destroy(&man_bwork_q->q_cv);
834 	cv_destroy(&man_iwork_q->q_cv);
835 
836 	man_kfree(man_bwork_q, sizeof (man_workq_t));
837 	man_kfree(man_iwork_q, sizeof (man_workq_t));
838 
839 exit:
840 
841 	MAN_DBG(MAN_INIT, ("_fini: returns %d", status));
842 
843 	return (status);
844 }
845 
846 /*
847  * Deconfigure the MAN driver.
848  */
849 static int
850 man_deconfigure()
851 {
852 	man_work_t	*wp;
853 	int		status = 0;
854 
855 	MAN_DBG(MAN_CONFIG, ("man_deconfigure:\n"));
856 
857 	mutex_enter(&man_lock);
858 
859 	if (man_is_on_domain) {
860 		status = man_domain_deconfigure();
861 		if (status != 0)
862 			goto exit;
863 	}
864 
865 	man_param_cleanup();	/* Free up NDD resources */
866 
867 	/*
868 	 * I may have to handle straggling work requests. Just qwait?
869 	 * or cvwait? Called from _fini - TBD
870 	 */
871 	ASSERT(man_bwork_q->q_work == NULL);
872 	ASSERT(man_iwork_q->q_work == NULL);
873 
874 	MAN_DBG(MAN_CONFIG, ("man_deconfigure: submitting CLOSE_CTL\n"));
875 
876 	if (man_ctl_lh != NULL) {
877 		wp = man_work_alloc(MAN_WORK_CLOSE_CTL, KM_SLEEP);
878 		wp->mw_flags = MAN_WFLAGS_CVWAITER;
879 		man_work_add(man_bwork_q, wp);
880 
881 		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
882 			cv_wait(&wp->mw_cv, &man_lock);
883 		}
884 		man_work_free(wp);
885 	}
886 
887 	MAN_DBG(MAN_CONFIG, ("man_deconfigure: submitting STOP\n"));
888 	if (man_bwork_id != NULL) {
889 
890 		wp = man_work_alloc(MAN_WORK_STOP, KM_SLEEP);
891 		wp->mw_flags = MAN_WFLAGS_CVWAITER;
892 		man_work_add(man_bwork_q, wp);
893 
894 		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
895 			cv_wait(&wp->mw_cv, &man_lock);
896 		}
897 		man_work_free(wp);
898 	}
899 	man_config_state = MAN_UNCONFIGURED;
900 
901 exit:
902 	mutex_exit(&man_lock);
903 
904 	MAN_DBG(MAN_CONFIG, ("man_deconfigure: returns %d\n", status));
905 
906 	return (status);
907 }
908 
909 /*
910  * man_attach - allocate resources and attach an instance of the MAN driver
911  * The <man>.conf file controls how many instances of the MAN driver are
912  * available.
913  *
914  *	dip - devinfo of node
915  * 	cmd - one of DDI_ATTACH | DDI_RESUME
916  *
917  *	returns	- success - DDI_SUCCESS
918  *		- failure - DDI_FAILURE
919  */
920 static int
921 man_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
922 {
923 	man_t		*manp;		/* per instance data */
924 	uchar_t		flag = KSTAT_FLAG_WRITABLE; /* support netstat -kc */
925 	kstat_t		*ksp;
926 	int		minor_node_created = 0;
927 	int		instance;
928 	eaddr_t		man_eaddr;
929 
930 	MAN_DBG(MAN_INIT, ("man_attach: \n"));
931 
932 	if (cmd != DDI_ATTACH) {
933 		MAN_DBG(MAN_INIT, ("man_attach: bad command %d\n", cmd));
934 		return (DDI_FAILURE);
935 	}
936 
937 	if (man_get_our_etheraddr(&man_eaddr))
938 		return (DDI_FAILURE);
939 
940 	instance = ddi_get_instance(dip);
941 
942 	/*
943 	 * we assume that instance is always equal to zero.
944 	 * and there will always only be one instance.
945 	 * this is done because when dman opens itself via DMAN_INT_PATH,
946 	 * the path assumes that the instance number is zero.
947 	 * if we ever need to support multiple instances of the dman
948 	 * driver or non-zero instances, this will have to change.
949 	 */
950 	ASSERT(instance == 0);
951 
952 	/*
953 	 * Allocate per device info pointer and link in to global list of
954 	 * MAN devices.
955 	 */
956 	if ((ddi_soft_state_zalloc(man_softstate, instance) != DDI_SUCCESS) ||
957 	    ((manp = ddi_get_soft_state(man_softstate, instance)) == NULL)) {
958 		cmn_err(CE_WARN, "man_attach: cannot zalloc soft state!");
959 		return (DDI_FAILURE);
960 	}
961 
962 	ddi_set_driver_private(dip, manp);
963 	manp->man_dip = dip;
964 	manp->man_meta_major = ddi_name_to_major(ddi_get_name(dip));
965 	manp->man_meta_ppa = instance;
966 
967 	/*
968 	 * Set ethernet address. Note that this address is duplicated
969 	 * at md_src_eaddr.
970 	 */
971 	ether_copy(&man_eaddr, &manp->man_eaddr);
972 	manp->man_eaddr_v = 1;
973 
974 	MAN_DBG(MAN_INIT, ("man_attach: set ether to %s",
975 		ether_sprintf(&manp->man_eaddr)));
976 
977 	/*
978 	 * Initialize failover-related fields (timers and such),
979 	 * taking values from properties if present.
980 	 */
981 	manp->man_init_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
982 	    "init_time", MAN_INIT_TIME);
983 
984 	manp->man_linkcheck_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
985 	    "linkcheck_time", MAN_LINKCHECK_TIME);
986 
987 	manp->man_linkstale_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
988 	    "man_linkstale_time", MAN_LINKSTALE_TIME);
989 
990 	manp->man_linkstale_retries = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
991 	    "man_linkstale_retries", MAN_LINKSTALE_RETRIES);
992 
993 	manp->man_dr_delay = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
994 	    "man_dr_delay", MAN_DR_DELAY);
995 
996 	manp->man_dr_retries = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
997 	    "man_dr_retries", MAN_DR_RETRIES);
998 
999 	manp->man_kstat_waittime = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
1000 	    "man_kstat_waittime", MAN_KSTAT_WAITTIME);
1001 
1002 	manp->man_dlpireset_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
1003 	    "man_dlpireset_time", MAN_DLPIRESET_TIME);
1004 
1005 	if (ddi_create_internal_pathname(dip, MAN_IDNAME, S_IFCHR,
1006 		ddi_get_instance(dip)) == DDI_SUCCESS) {
1007 		minor_node_created = 1;
1008 	} else {
1009 		cmn_err(CE_WARN, "man_attach: failed for instance %d",
1010 		    ddi_get_instance(dip));
1011 		goto exit;
1012 	}
1013 
1014 	if (ddi_create_minor_node(dip, MAN_IDNAME, S_IFCHR,
1015 		ddi_get_instance(dip), DDI_NT_NET, CLONE_DEV) == DDI_SUCCESS) {
1016 		minor_node_created = 1;
1017 	} else {
1018 		cmn_err(CE_WARN, "man_attach: failed for instance %d",
1019 		    ddi_get_instance(dip));
1020 		goto exit;
1021 	}
1022 
1023 	/*
1024 	 * Allocate meta kstat_t for this instance of the driver.
1025 	 * Note that each of man_path_t keeps track of the kstats
1026 	 * for the real devices via mp_last_knp.
1027 	 */
1028 #ifdef	kstat
1029 	flag |= KSTAT_FLAG_PERSISTENT;
1030 #endif
1031 	ksp = kstat_create(MAN_IDNAME, ddi_get_instance(dip), NULL, "net",
1032 		KSTAT_TYPE_NAMED, MAN_NUMSTATS, flag);
1033 
1034 	if (ksp == NULL) {
1035 		cmn_err(CE_WARN, "man_attach(%d): kstat_create failed"
1036 			" - manp(0x%p)", manp->man_meta_ppa,
1037 			(void *)manp);
1038 		goto exit;
1039 	}
1040 
1041 	man_kstat_named_init(ksp->ks_data, MAN_NUMSTATS);
1042 	ksp->ks_update = man_kstat_update;
1043 	ksp->ks_private = (void *) manp;
1044 	manp->man_ksp = ksp;
1045 	kstat_install(manp->man_ksp);
1046 
1047 	ddi_report_dev(dip);
1048 
1049 	MAN_DBG(MAN_INIT, ("man_attach(%d) returns DDI_SUCCESS",
1050 		ddi_get_instance(dip)));
1051 
1052 	return (DDI_SUCCESS);
1053 
1054 exit:
1055 	if (minor_node_created)
1056 		ddi_remove_minor_node(dip, NULL);
1057 	ddi_set_driver_private(dip, NULL);
1058 	ddi_soft_state_free(man_softstate, instance);
1059 
1060 	MAN_DBG(MAN_INIT, ("man_attach(%d) eaddr returns DDI_FAILIRE",
1061 		ddi_get_instance(dip)));
1062 
1063 	return (DDI_FAILURE);
1064 
1065 }
1066 
1067 static int
1068 man_get_our_etheraddr(eaddr_t *eap)
1069 {
1070 	manc_t	manc;
1071 	int	status = 0;
1072 
1073 	if (man_is_on_domain) {
1074 		if (status = man_get_iosram(&manc))
1075 			return (status);
1076 		ether_copy(&manc.manc_dom_eaddr, eap);
1077 	} else {
1078 		(void) localetheraddr((struct ether_addr *)NULL, eap);
1079 	}
1080 
1081 	return (status);
1082 }
1083 
1084 /*
1085  * man_detach - detach an instance of a driver
1086  *
1087  *	dip - devinfo of node
1088  * 	cmd - one of DDI_DETACH | DDI_SUSPEND
1089  *
1090  *	returns	- success - DDI_SUCCESS
1091  *		- failure - DDI_FAILURE
1092  */
1093 static int
1094 man_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1095 {
1096 	register man_t	*manp;		/* per instance data */
1097 	int		instance;
1098 
1099 	MAN_DBG(MAN_INIT, ("man_detach(%d):\n", ddi_get_instance(dip)));
1100 
1101 	if (cmd != DDI_DETACH) {
1102 		MAN_DBG(MAN_INIT, ("man_detach: bad command %d\n", cmd));
1103 		return (DDI_FAILURE);
1104 	}
1105 
1106 	if (dip == NULL) {
1107 		MAN_DBG(MAN_INIT, ("man_detach: dip == NULL\n"));
1108 		return (DDI_FAILURE);
1109 	}
1110 
1111 	instance = ddi_get_instance(dip);
1112 
1113 	mutex_enter(&man_lock);
1114 
1115 	manp = (man_t *)ddi_get_soft_state(man_softstate, instance);
1116 	if (manp == NULL) {
1117 		mutex_exit(&man_lock);
1118 
1119 		cmn_err(CE_WARN, "man_detach: unable to get softstate"
1120 			" for instance = %d, dip = 0x%p!\n", instance,
1121 			(void *)dip);
1122 		return (DDI_FAILURE);
1123 	}
1124 
1125 	if (manp->man_refcnt != 0) {
1126 		mutex_exit(&man_lock);
1127 
1128 		cmn_err(CE_WARN, "man_detach: %s%d refcnt %d", MAN_IDNAME,
1129 			instance, manp->man_refcnt);
1130 		MAN_DBGCALL(MAN_INIT, man_print_man(manp));
1131 
1132 		return (DDI_FAILURE);
1133 	}
1134 
1135 	ddi_remove_minor_node(dip, NULL);
1136 
1137 	mutex_exit(&man_lock);
1138 
1139 	kstat_delete(manp->man_ksp);
1140 	ddi_soft_state_free(man_softstate, instance);
1141 	ddi_set_driver_private(dip, NULL);
1142 
1143 	MAN_DBG(MAN_INIT, ("man_detach returns DDI_SUCCESS"));
1144 
1145 	return (DDI_SUCCESS);
1146 }
1147 
1148 /*
1149  * man_info:
1150  *	As a standard DLPI style-2, man_info() should always return
1151  *	DDI_FAILURE.
1152  *
1153  *	However, man_open() has special treatment for a direct open
1154  *	via kstr_open() without going through the CLONE driver.
1155  *	To make this special kstr_open() work, we need to map
1156  *	minor of 0 to instance 0.
1157  */
1158 /*ARGSUSED*/
1159 static int
1160 man_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1161 {
1162 	minor_t minor;
1163 
1164 	switch (infocmd) {
1165 	case DDI_INFO_DEVT2DEVINFO:
1166 		break;
1167 
1168 	case DDI_INFO_DEVT2INSTANCE:
1169 		minor = getminor((dev_t)arg);
1170 		if (minor == 0) {
1171 			*result = (void *)(uintptr_t)minor;
1172 			return (DDI_SUCCESS);
1173 		}
1174 		break;
1175 	default:
1176 		break;
1177 	}
1178 	return (DDI_FAILURE);
1179 }
1180 
1181 /* Standard Device Driver entry points */
1182 
1183 /*
1184  * man_open - open the device
1185  *
1186  *	rq - upper read queue of the stream
1187  *	devp - pointer to a device number
1188  *	flag - information passed from the user program open(2) system call
1189  *	sflag - stream flags
1190  *	credp - pointer to the cred(9S) user credential structure
1191  *
1192  *	returns	- success - 0
1193  *		- failure - errno value for failure
1194  */
1195 /*ARGSUSED*/
1196 static int
1197 man_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
1198 {
1199 	int			minordev = -1;
1200 	manstr_t		*msp;
1201 	manstr_t		*tsp;
1202 	manstr_t		**prevmsp;
1203 	int			status = 0;
1204 
1205 	MAN_DBG(MAN_OCLOSE, ("man_open: rq(0x%p) sflag(0x%x)\n",
1206 	    (void *)rq, sflag));
1207 
1208 	ASSERT(rq);
1209 	ASSERT(sflag != MODOPEN);
1210 
1211 	/*
1212 	 * reopen; q_ptr set to msp at open completion.
1213 	 */
1214 	if (rq->q_ptr) {
1215 		return (0);
1216 	}
1217 
1218 	/*
1219 	 * Allocate and initialize manstr_t for this device.
1220 	 */
1221 	msp = man_kzalloc(sizeof (manstr_t), KM_SLEEP);
1222 	SETSTATE(msp, DL_UNATTACHED);
1223 	msp->ms_meta_ppa = -1;
1224 	msp->ms_rq = rq;
1225 	rq->q_ptr = WR(rq)->q_ptr = msp;
1226 
1227 	/*
1228 	 * Get the MAN driver configured on 1st open.  Note that the only way
1229 	 * we get sflag != CLONEOPEN is via the call in man_plumbctl().  All
1230 	 * CLONEOPEN calls to man_open will be via the file system
1231 	 * device node /dev/man, a pseudo clone device.
1232 	 */
1233 
1234 	qprocson(rq);
1235 
1236 	if (sflag == CLONEOPEN && man_config_state != MAN_CONFIGURED) {
1237 		/*
1238 		 * First open calls man_configure. Everyone qwaits until
1239 		 * we get it open. See man_open_ctl() comments for mutex
1240 		 * lock/synchronization info.
1241 		 */
1242 
1243 		mutex_enter(&man_lock);
1244 
1245 		if (man_config_state == MAN_UNCONFIGURED) {
1246 			man_config_state = MAN_CONFIGURING;
1247 			mutex_exit(&man_lock);
1248 			status = man_configure(rq);
1249 			if (status != 0)
1250 				goto exit;
1251 		} else {
1252 			while (man_config_state == MAN_CONFIGURING) {
1253 
1254 				mutex_exit(&man_lock);
1255 				status = qwait_sig(rq);
1256 
1257 				if (status == 0) {
1258 					status = EINTR;
1259 					goto exit;
1260 				}
1261 
1262 				mutex_enter(&man_lock);
1263 			}
1264 			mutex_exit(&man_lock);
1265 
1266 			if (man_config_error) {
1267 				status = man_config_error;
1268 				goto exit;
1269 			}
1270 		}
1271 	}
1272 
1273 	/*
1274 	 * Determine minor device number. man_open serialized by
1275 	 * D_MTPERMOD.
1276 	 */
1277 	prevmsp = &man_strup;
1278 	if (sflag == CLONEOPEN) {
1279 
1280 		minordev = 0;
1281 		for (; (tsp = *prevmsp) != NULL; prevmsp = &tsp->ms_next) {
1282 			if (minordev < tsp->ms_minor)
1283 				break;
1284 			minordev++;
1285 		}
1286 		*devp = makedevice(getmajor(*devp), minordev);
1287 
1288 	} else {
1289 		/*
1290 		 * Should only get here from man_plumbctl().
1291 		 */
1292 		/*LINTED E_ASSIGN_UINT_TO_SIGNED_INT*/
1293 		minordev = getminor(*devp);
1294 
1295 		/*
1296 		 * No need to protect this here as all opens are
1297 		 * qwaiting, and the bgthread (who is doing this open)
1298 		 * is the only one who mucks with this variable.
1299 		 */
1300 		man_ctl_wq = WR(rq);
1301 
1302 		ASSERT(minordev == 0);	/* TBD delete this */
1303 	}
1304 
1305 	msp->ms_meta_maj = getmajor(*devp);
1306 	msp->ms_minor = minordev;
1307 	if (minordev == 0)
1308 		msp->ms_flags = MAN_SFLAG_CONTROL;
1309 
1310 	/*
1311 	 * Link new entry into global list of active entries.
1312 	 */
1313 	msp->ms_next = *prevmsp;
1314 	*prevmsp = msp;
1315 
1316 
1317 	/*
1318 	 * Disable automatic enabling of our write service procedure.
1319 	 * We control this explicitly.
1320 	 */
1321 	noenable(WR(rq));
1322 
1323 exit:
1324 	MAN_DBG(MAN_OCLOSE, ("man_open: exit rq(0x%p) minor %d errno %d\n",
1325 		(void *)rq, minordev, status));
1326 
1327 	/*
1328 	 * Clean up on error.
1329 	 */
1330 	if (status) {
1331 		qprocsoff(rq);
1332 		rq->q_ptr = WR(rq)->q_ptr = NULL;
1333 		man_kfree((char *)msp, sizeof (manstr_t));
1334 	} else
1335 		(void) qassociate(rq, -1);
1336 
1337 	return (status);
1338 }
1339 
1340 /*
1341  * Get the driver configured.  Called from first man_open with exclusive
1342  * inner perimeter.
1343  */
1344 static int
1345 man_configure(queue_t *rq)
1346 {
1347 	man_work_t	*wp;
1348 	int		status = 0;
1349 
1350 	MAN_DBG(MAN_CONFIG, ("man_configure:"));
1351 
1352 	/*
1353 	 * Initialize NDD parameters.
1354 	 */
1355 	if (!man_ndlist &&
1356 	    !man_param_register(man_param_arr, A_CNT(man_param_arr))) {
1357 		cmn_err(CE_WARN, "man_configure: man_param_register failed!");
1358 		man_config_error = ENOMEM;
1359 		goto exit;
1360 	}
1361 
1362 	mutex_enter(&man_lock);
1363 
1364 	/*
1365 	 * Start up background thread.
1366 	 */
1367 	if (man_bwork_id == NULL)
1368 		man_bwork_id = thread_create(NULL, 2 * DEFAULTSTKSZ,
1369 		    man_bwork, NULL, 0, &p0, TS_RUN, minclsyspri);
1370 
1371 	/*
1372 	 * Submit work to get control stream opened. Qwait until its
1373 	 * done. See man_open_ctl for mutex lock/synchronization info.
1374 	 */
1375 
1376 	if (man_ctl_lh == NULL) {
1377 		wp = man_work_alloc(MAN_WORK_OPEN_CTL, KM_SLEEP);
1378 		wp->mw_flags |= MAN_WFLAGS_QWAITER;
1379 		wp->mw_q = WR(rq);
1380 
1381 		/*
1382 		 * Submit work and wait. When man_open_ctl exits
1383 		 * man_open, it will cause qwait below to return.
1384 		 */
1385 		man_work_add(man_bwork_q, wp);
1386 		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
1387 			mutex_exit(&man_lock);
1388 			qwait(rq);
1389 			mutex_enter(&man_lock);
1390 		}
1391 		status = wp->mw_status;
1392 		man_work_free(wp);
1393 
1394 	}
1395 	mutex_exit(&man_lock);
1396 
1397 	/*
1398 	 * If on domain, setup IOSRAM and build the pathgroups
1399 	 * automatically.
1400 	 */
1401 	if ((status == 0) && man_is_on_domain)
1402 		status = man_domain_configure();
1403 
1404 exit:
1405 	mutex_enter(&man_lock);
1406 
1407 	man_config_error = status;
1408 	if (status != 0)
1409 		man_config_state = MAN_UNCONFIGURED;
1410 	else
1411 		man_config_state = MAN_CONFIGURED;
1412 
1413 	mutex_exit(&man_lock);
1414 
1415 	MAN_DBG(MAN_CONFIG, ("man_configure: returns %d\n", status));
1416 
1417 	return (status);
1418 }
1419 
1420 /*
1421  * man_close - close the device
1422  *
1423  *	rq - upper read queue of the stream
1424  *
1425  *	returns	- success - 0
1426  *		- failure - errno value for failure
1427  */
1428 static int
1429 man_close(queue_t *rq)
1430 {
1431 	manstr_t		*close_msp;
1432 	manstr_t		*msp;
1433 
1434 	MAN_DBG(MAN_OCLOSE, ("man_close: rq(0x%p)\n", (void *)rq));
1435 
1436 	qprocsoff(rq);
1437 	close_msp = (manstr_t *)rq->q_ptr;
1438 
1439 	/*
1440 	 * Unlink the per-Stream entry from the active list and free it.
1441 	 */
1442 	if (close_msp == man_strup)
1443 		man_strup = close_msp->ms_next;
1444 	else {
1445 		for (msp = man_strup; msp && msp->ms_next != close_msp; )
1446 			msp = msp->ms_next;
1447 
1448 		if (msp == NULL) {
1449 			cmn_err(CE_WARN, "man_close: no stream!");
1450 			return (ENODEV);
1451 		}
1452 
1453 		msp->ms_next = close_msp->ms_next;
1454 	}
1455 
1456 	if (close_msp->ms_dests != NULL) {
1457 		/*
1458 		 * Still DL_ATTACHED
1459 		 */
1460 		man_work_t *wp;
1461 
1462 		wp = man_work_alloc(MAN_WORK_CLOSE_STREAM, KM_SLEEP);
1463 		man_dodetach(close_msp, wp);
1464 	}
1465 
1466 	if (close_msp->ms_flags & MAN_SFLAG_CONTROL) {
1467 		/*
1468 		 * Driver about to unload.
1469 		 */
1470 		man_ctl_wq = NULL;
1471 	}
1472 
1473 	rq->q_ptr = WR(rq)->q_ptr = NULL;
1474 	man_kfree((char *)close_msp, sizeof (manstr_t));
1475 	(void) qassociate(rq, -1);
1476 
1477 	MAN_DBG(MAN_OCLOSE, ("man_close: exit\n"));
1478 
1479 	return (0);
1480 }
1481 
1482 /*
1483  * Ask bgthread to tear down lower stream and qwait
1484  * until its done.
1485  */
1486 static void
1487 man_dodetach(manstr_t *msp, man_work_t *wp)
1488 {
1489 	man_dest_t	*mdp;
1490 	int		i;
1491 	mblk_t		*mp;
1492 
1493 	mdp = msp->ms_dests;
1494 	msp->ms_dests = NULL;
1495 	msp->ms_destp = NULL;
1496 
1497 	/*
1498 	 * Excise lower dests array, set it closing and hand it to
1499 	 * background thread to dispose of.
1500 	 */
1501 	for (i = 0; i < MAN_MAX_DESTS; i++) {
1502 
1503 		mdp[i].md_state |= MAN_DSTATE_CLOSING;
1504 		mdp[i].md_msp = NULL;
1505 		mdp[i].md_rq = NULL;
1506 
1507 		if (mdp[i].md_lc_timer_id != 0) {
1508 			(void) quntimeout(man_ctl_wq, mdp[i].md_lc_timer_id);
1509 			mdp[i].md_lc_timer_id = 0;
1510 		}
1511 		if (mdp[i].md_bc_id != 0) {
1512 			qunbufcall(man_ctl_wq, mdp[i].md_bc_id);
1513 			mdp[i].md_bc_id = 0;
1514 		}
1515 
1516 		mutex_enter(&mdp[i].md_lock);
1517 		while ((mp = mdp[i].md_dmp_head) != NULL) {
1518 			mdp[i].md_dmp_head = mp->b_next;
1519 			mp->b_next = NULL;
1520 			freemsg(mp);
1521 		}
1522 		mdp[i].md_dmp_count = 0;
1523 		mdp[i].md_dmp_tail = NULL;
1524 		mutex_exit(&mdp[i].md_lock);
1525 	}
1526 
1527 	/*
1528 	 * Dump any DL type messages previously caught.
1529 	 */
1530 	man_dl_clean(&msp->ms_dl_mp);
1531 	man_dl_clean(&msp->ms_dlioc_mp);
1532 
1533 	/*
1534 	 * We need to clear fast path flag when dlioc messages are cleaned.
1535 	 */
1536 	msp->ms_flags &= ~MAN_SFLAG_FAST;
1537 
1538 	/*
1539 	 * MAN_WORK_CLOSE_STREAM work request preallocated by caller.
1540 	 */
1541 	ASSERT(wp->mw_type == MAN_WORK_CLOSE_STREAM);
1542 	ASSERT(mdp != NULL);
1543 	wp->mw_arg.a_mdp = mdp;
1544 	wp->mw_arg.a_ndests = MAN_MAX_DESTS;
1545 	wp->mw_arg.a_pg_id = -1;	/* Don't care */
1546 
1547 	mutex_enter(&man_lock);
1548 	man_work_add(man_bwork_q, wp);
1549 	msp->ms_manp->man_refcnt--;
1550 	mutex_exit(&man_lock);
1551 
1552 	msp->ms_manp = NULL;
1553 
1554 }
1555 
1556 
1557 /*
1558  * man_uwput - handle DLPI messages issued from upstream, the write
1559  * side of the upper half of multiplexor. Called with shared access to
1560  * the inner perimeter.
1561  *
1562  *	wq - upper write queue of mxx
1563  *	mp - mblk ptr to DLPI request
1564  */
1565 static int
1566 man_uwput(register queue_t *wq, register mblk_t *mp)
1567 {
1568 	register manstr_t	*msp;		/* per stream data */
1569 	register man_t		*manp;		/* per instance data */
1570 
1571 	msp = (manstr_t *)wq->q_ptr;
1572 
1573 	MAN_DBG(MAN_UWPUT, ("man_uwput: wq(0x%p) mp(0x%p) db_type(0x%x)"
1574 		" msp(0x%p)\n",
1575 		(void *)wq, (void *)mp, DB_TYPE(mp), (void *)msp));
1576 #if DEBUG
1577 	if (man_debug & MAN_UWPUT) {
1578 		if (DB_TYPE(mp) == M_IOCTL) {
1579 			struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
1580 			MAN_DBG(MAN_UWPUT,
1581 			    ("man_uwput: M_IOCTL ioc_cmd(0x%x)\n",
1582 			    iocp->ioc_cmd));
1583 		} else if (DB_TYPE(mp) == M_CTL) {
1584 			struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
1585 			MAN_DBG(MAN_UWPUT,
1586 			    ("man_uwput: M_CTL ioc_cmd(0x%x)\n",
1587 			    iocp->ioc_cmd));
1588 		}
1589 	}
1590 #endif	/* DEBUG */
1591 
1592 
1593 	switch (DB_TYPE(mp)) {
1594 	case M_DATA:
1595 		manp = msp->ms_manp;
1596 
1597 		if (((msp->ms_flags & (MAN_SFLAG_FAST | MAN_SFLAG_RAW)) == 0) ||
1598 			(msp->ms_dlpistate != DL_IDLE) ||
1599 			(manp == NULL)) {
1600 
1601 			merror(wq, mp, EPROTO);
1602 			break;
1603 		}
1604 
1605 		if (wq->q_first) {
1606 			(void) putq(wq, mp);
1607 			qenable(wq);
1608 		} else {
1609 			ehdr_t	*ep = (ehdr_t *)mp->b_rptr;
1610 
1611 			(void) man_start(wq, mp, &ep->ether_dhost);
1612 		}
1613 		break;
1614 
1615 	case M_PROTO:
1616 	case M_PCPROTO:
1617 		if ((DL_PRIM(mp) == DL_UNITDATA_IND) && !wq->q_first) {
1618 			(void) man_udreq(wq, mp);
1619 		} else {
1620 			(void) putq(wq, mp);
1621 			qenable(wq);
1622 		}
1623 		break;
1624 
1625 	case M_IOCTL:
1626 	case M_IOCDATA:
1627 		qwriter(wq, mp, man_ioctl, PERIM_INNER);
1628 		break;
1629 
1630 	case M_CTL:
1631 		freemsg(mp);
1632 		break;
1633 
1634 	case M_FLUSH:
1635 		MAN_DBG(MAN_UWPUT, ("man_wput: M_FLUSH\n"));
1636 		if (*mp->b_rptr & FLUSHW)
1637 			flushq(wq, FLUSHDATA);
1638 		if (*mp->b_rptr & FLUSHR) {
1639 			flushq(RD(wq), FLUSHDATA);
1640 			*mp->b_rptr &= ~FLUSHW;
1641 			qreply(wq, mp);
1642 		} else {
1643 			freemsg(mp);
1644 		}
1645 		break;
1646 
1647 	default:
1648 		MAN_DBG(MAN_WARN,
1649 		    ("man_uwput: illegal mblk(0x%p) type(0x%x)\n",
1650 		    (void *)mp, DB_TYPE(mp)));
1651 		freemsg(mp);
1652 		break;
1653 	} /* End switch */
1654 
1655 	MAN_DBG(MAN_UWPUT, ("man_uwput: exit wq(0x%p) mp(0x%p)\n",
1656 	    (void *)wq, (void *)mp));
1657 
1658 	return (0);
1659 }
1660 
1661 /*
1662  * man_start - handle data messages issued from upstream.  Send down
1663  * to particular man_dest based on ether_addr, otherwise send out to all
1664  * valid man_dests.
1665  *
1666  *	wq - upper write queue of mxx
1667  *	mp - mblk ptr to DLPI request
1668  * 	caller - Caller ID for decision making on canput failure
1669  *
1670  * Returns:
1671  *	0	- Data xmitted or No flow control situation detected.
1672  *	1	- Flow control situation detected.
1673  *
1674  * STREAMS Flow Control: can be used if there is only one destination
1675  * for a stream (1 to 1 multiplexor). In this case, we will use the upper
1676  * write queue to store mblks when in flow control. If there are multiple
1677  * destinations, we cannot use the STREAMs based flow control (1 to many
1678  * multiplexor). In this case, we will use the lower write queue to store
1679  * mblks when in flow control. Since destinations come and go, we may
1680  * transition between 1-to-1 and 1-to-m. So it may be the case that we have
1681  * some mblks stored on the upper queue, and some on the lower queue. However,
1682  * we will never send mblks out of order. See man_uwput and man_start_lower().
1683  *
1684  * A simple flow control mechanism is implemented for the deferred mblk list,
1685  * as this list is expected to be used temporarily for a very short
1686  * period required for switching paths. This flow control mechanism is
1687  * used only as a defensive approach to avoid infinite growth of this list.
1688  */
1689 static int
1690 man_start(register queue_t *wq, register mblk_t *mp, eaddr_t *eap)
1691 {
1692 	register manstr_t	*msp;		/* per stream data */
1693 	register man_dest_t	*mdp = NULL;	/* destination */
1694 	mblk_t			*tmp;
1695 	int			i;
1696 	int			status = 0;
1697 
1698 	msp = (manstr_t *)wq->q_ptr;
1699 
1700 	MAN_DBG(MAN_DATA, ("man_start: msp(0x%p) ether_addr(%s)\n",
1701 		(void *)msp, ether_sprintf(eap)));
1702 
1703 	if (msp->ms_dests == NULL) {
1704 		cmn_err(CE_WARN, "man_start: no destinations");
1705 		freemsg(mp);
1706 		return (0);
1707 	}
1708 
1709 	/*
1710 	 * Optimization if only one valid destination.
1711 	 */
1712 	mdp = msp->ms_destp;
1713 
1714 	if (IS_UNICAST(eap)) {
1715 		queue_t			*flow_wq = NULL;
1716 
1717 		if (mdp == NULL) {
1718 			/*
1719 			 * TDB - This needs to be optimized (some bits in
1720 			 * ehp->dhost will act as an index.
1721 			 */
1722 			for (i = 0; i < MAN_MAX_DESTS; i++) {
1723 
1724 				mdp = &msp->ms_dests[i];
1725 
1726 				if ((mdp->md_state == MAN_DSTATE_READY) &&
1727 				    (ether_cmp(eap, &mdp->md_dst_eaddr) == 0))
1728 					break;
1729 				mdp = NULL;
1730 			}
1731 		} else {
1732 			/*
1733 			 * 1 to 1 multiplexing, use upper wq for flow control.
1734 			 */
1735 			flow_wq = wq;
1736 		}
1737 
1738 		if (mdp != NULL) {
1739 			/*
1740 			 * Its going somewhere specific
1741 			 */
1742 			status =  man_start_lower(mdp, mp, flow_wq, MAN_UPPER);
1743 
1744 		} else {
1745 			MAN_DBG(MAN_DATA, ("man_start: no destination"
1746 				" for eaddr %s\n", ether_sprintf(eap)));
1747 			freemsg(mp);
1748 		}
1749 	} else {
1750 		/*
1751 		 * Broadcast or multicast - send everone a copy.
1752 		 */
1753 		if (mdp == NULL) {
1754 			for (i = 0; i < MAN_MAX_DESTS; i++) {
1755 				mdp = &msp->ms_dests[i];
1756 
1757 				if (mdp->md_state != MAN_DSTATE_READY)
1758 					continue;
1759 
1760 				if ((tmp = copymsg(mp)) != NULL) {
1761 					(void) man_start_lower(mdp, tmp,
1762 						NULL, MAN_UPPER);
1763 				} else {
1764 					MAN_DBG(MAN_DATA, ("man_start: copymsg"
1765 						" failed!"));
1766 				}
1767 			}
1768 			freemsg(mp);
1769 		} else {
1770 			if (mdp->md_state == MAN_DSTATE_READY)
1771 				status =  man_start_lower(mdp, mp, wq,
1772 								MAN_UPPER);
1773 			else
1774 				freemsg(mp);
1775 		}
1776 	}
1777 	return (status);
1778 }
1779 
1780 /*
1781  * Send a DL_UNITDATA or M_DATA fastpath data mblk to a particular
1782  * destination. Others mblk types sent down via * man_dlpi_senddown().
1783  *
1784  * Returns:
1785  *	0	- Data xmitted
1786  *	1	- Data not xmitted due to flow control.
1787  */
1788 static int
1789 man_start_lower(man_dest_t *mdp, mblk_t *mp, queue_t *flow_wq, int caller)
1790 {
1791 	queue_t		*wq = mdp->md_wq;
1792 	int		status = 0;
1793 
1794 	/*
1795 	 * Lower stream ready for data transmit.
1796 	 */
1797 	if (mdp->md_state == MAN_DSTATE_READY &&
1798 	    mdp->md_dlpistate == DL_IDLE) {
1799 
1800 		ASSERT(mdp->md_wq != NULL);
1801 
1802 		if (caller == MAN_UPPER) {
1803 			/*
1804 			 * Check for flow control conditions for lower
1805 			 * stream.
1806 			 */
1807 			if (mdp->md_dmp_head == NULL &&
1808 				wq->q_first == NULL && canputnext(wq)) {
1809 
1810 				(void) putnext(wq, mp);
1811 
1812 			} else {
1813 				mutex_enter(&mdp->md_lock);
1814 				if (mdp->md_dmp_head != NULL) {
1815 					/*
1816 					 * A simple flow control mechanism.
1817 					 */
1818 					if (mdp->md_dmp_count >= MAN_HIWAT) {
1819 						freemsg(mp);
1820 					} else {
1821 						/*
1822 						 * Add 'mp' to the deferred
1823 						 * msg list.
1824 						 */
1825 						mdp->md_dmp_tail->b_next = mp;
1826 						mdp->md_dmp_tail = mp;
1827 						mdp->md_dmp_count +=
1828 								msgsize(mp);
1829 					}
1830 					mutex_exit(&mdp->md_lock);
1831 					/*
1832 					 * Inform flow control situation
1833 					 * to the caller.
1834 					 */
1835 					status = 1;
1836 					qenable(wq);
1837 					goto exit;
1838 				}
1839 				mutex_exit(&mdp->md_lock);
1840 				/*
1841 				 * If 1 to 1 mux, use upper write queue for
1842 				 * flow control.
1843 				 */
1844 				if (flow_wq != NULL) {
1845 					/*
1846 					 * putbq() message and indicate
1847 					 * flow control situation to the
1848 					 * caller.
1849 					 */
1850 					putbq(flow_wq, mp);
1851 					qenable(flow_wq);
1852 					status = 1;
1853 					goto exit;
1854 				}
1855 				/*
1856 				 * 1 to many mux, use lower write queue for
1857 				 * flow control. Be mindful not to overflow
1858 				 * the lower MAN STREAM q.
1859 				 */
1860 				if (canput(wq)) {
1861 					(void) putq(wq, mp);
1862 					qenable(wq);
1863 				} else {
1864 					MAN_DBG(MAN_DATA, ("man_start_lower:"
1865 						" lower q flow controlled -"
1866 						" discarding packet"));
1867 					freemsg(mp);
1868 					goto exit;
1869 				}
1870 			}
1871 
1872 		} else {
1873 			/*
1874 			 * man_lwsrv  is draining flow controlled mblks.
1875 			 */
1876 			if (canputnext(wq))
1877 				(void) putnext(wq, mp);
1878 			else
1879 				status = 1;
1880 		}
1881 		goto exit;
1882 	}
1883 
1884 	/*
1885 	 * Lower stream in transition, do flow control.
1886 	 */
1887 	status = 1;
1888 
1889 	if (mdp->md_state == MAN_DSTATE_NOTPRESENT) {
1890 nodest:
1891 		cmn_err(CE_WARN,
1892 			"man_start_lower: no dest for mdp(0x%p), caller(%d)!",
1893 			(void *)mdp, caller);
1894 		if (caller == MAN_UPPER)
1895 			freemsg(mp);
1896 		goto exit;
1897 	}
1898 
1899 	if (mdp->md_state & MAN_DSTATE_CLOSING) {
1900 		MAN_DBG(MAN_DATA, ("man_start_lower: mdp(0x%p) closing",
1901 			(void *)mdp));
1902 		if (caller == MAN_UPPER)
1903 			freemsg(mp);
1904 		goto exit;
1905 	}
1906 
1907 	if ((mdp->md_state & MAN_DSTATE_PLUMBING) ||
1908 	    (mdp->md_state == MAN_DSTATE_INITIALIZING) ||
1909 	    (mdp->md_dlpistate != DL_IDLE)) {
1910 		/*
1911 		 * Defer until PLUMBED and DL_IDLE. See man_lwsrv().
1912 		 */
1913 		if (caller == MAN_UPPER) {
1914 			/*
1915 			 * Upper stream sending data down, add to defered mblk
1916 			 * list for stream.
1917 			 */
1918 			mutex_enter(&mdp->md_lock);
1919 			if (mdp->md_dmp_count >= MAN_HIWAT) {
1920 				freemsg(mp);
1921 			} else {
1922 				if (mdp->md_dmp_head == NULL) {
1923 					ASSERT(mdp->md_dmp_tail == NULL);
1924 					mdp->md_dmp_head = mp;
1925 					mdp->md_dmp_tail = mp;
1926 				} else {
1927 					mdp->md_dmp_tail->b_next = mp;
1928 					mdp->md_dmp_tail = mp;
1929 				}
1930 				mdp->md_dmp_count += msgsize(mp);
1931 			}
1932 			mutex_exit(&mdp->md_lock);
1933 		}
1934 
1935 		goto exit;
1936 	}
1937 
1938 exit:
1939 	return (status);
1940 }
1941 
1942 /*
1943  * man_ioctl - handle ioctl requests for this driver (I_PLINK/I_PUNLINK)
1944  * or pass thru to the physical driver below.  Note that most M_IOCTLs we
1945  * care about come down the control msp, but the IOC ones come down the IP.
1946  * Called with exclusive inner perimeter.
1947  *
1948  *	wq - upper write queue of mxx
1949  *	mp - mblk ptr to DLPI ioctl request
1950  */
1951 static void
1952 man_ioctl(register queue_t *wq, register mblk_t *mp)
1953 {
1954 	manstr_t		*msp;
1955 	struct iocblk		*iocp;
1956 
1957 	iocp = (struct iocblk *)mp->b_rptr;
1958 	msp = (manstr_t *)wq->q_ptr;
1959 
1960 #ifdef DEBUG
1961 	{
1962 		char			ioc_cmd[30];
1963 
1964 		sprintf(ioc_cmd, "not handled IOCTL 0x%x", iocp->ioc_cmd);
1965 		MAN_DBG((MAN_SWITCH | MAN_PATH | MAN_DLPI),
1966 			("man_ioctl: wq(0x%p) mp(0x%p) cmd(%s)\n",
1967 			(void *)wq, (void *)mp,
1968 			(iocp->ioc_cmd == I_PLINK) ? "I_PLINK" :
1969 			(iocp->ioc_cmd == I_PUNLINK) ? "I_PUNLINK" :
1970 			(iocp->ioc_cmd == MAN_SETPATH) ? "MAN_SETPATH" :
1971 			(iocp->ioc_cmd == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
1972 			(iocp->ioc_cmd == DLIOCRAW) ? "DLIOCRAW" : ioc_cmd));
1973 	}
1974 #endif /* DEBUG */
1975 
1976 
1977 	/*
1978 	 *  Handle the requests...
1979 	 */
1980 	switch ((unsigned int)iocp->ioc_cmd) {
1981 
1982 	case I_PLINK:
1983 		man_plink(wq, mp);
1984 		break;
1985 
1986 	case I_PUNLINK:
1987 		man_unplink(wq, mp);
1988 		break;
1989 
1990 	case MAN_SETPATH:
1991 		man_setpath(wq, mp);
1992 		break;
1993 
1994 	case MAN_GETEADDR:
1995 		man_geteaddr(wq, mp);
1996 		break;
1997 
1998 	case MAN_SET_LINKCHECK_TIME:
1999 		man_set_linkcheck_time(wq, mp);
2000 		break;
2001 
2002 	case MAN_SET_SC_IPADDRS:
2003 		man_set_sc_ipaddrs(wq, mp);
2004 		break;
2005 
2006 	case MAN_SET_SC_IP6ADDRS:
2007 		man_set_sc_ip6addrs(wq, mp);
2008 		break;
2009 
2010 	case DLIOCRAW:
2011 		if (man_dlioc(msp, mp))
2012 			miocnak(wq, mp, 0, ENOMEM);
2013 		else {
2014 			msp->ms_flags |= MAN_SFLAG_RAW;
2015 			miocack(wq, mp, 0, 0);
2016 		}
2017 		break;
2018 
2019 	case DL_IOC_HDR_INFO:
2020 		man_dl_ioc_hdr_info(wq, mp);
2021 		break;
2022 
2023 	case MAN_ND_GET:
2024 	case MAN_ND_SET:
2025 		man_nd_getset(wq, mp);
2026 		break;
2027 
2028 	default:
2029 		MAN_DBG(MAN_DDI, ("man_ioctl: unknown ioc_cmd %d\n",
2030 			(unsigned int)iocp->ioc_cmd));
2031 		miocnak(wq, mp, 0, EINVAL);
2032 		break;
2033 	}
2034 exit:
2035 	MAN_DBG((MAN_SWITCH | MAN_PATH | MAN_DLPI), ("man_ioctl: exit\n"));
2036 
2037 }
2038 
2039 /*
2040  * man_plink: handle I_PLINK requests on the control stream
2041  */
2042 void
2043 man_plink(queue_t *wq, mblk_t *mp)
2044 {
2045 	struct linkblk	*linkp;
2046 	man_linkrec_t	*lrp;
2047 	int		status = 0;
2048 
2049 	linkp = (struct linkblk *)mp->b_cont->b_rptr;
2050 
2051 	/*
2052 	 * Create a record to hold lower stream info. man_plumb will
2053 	 * retrieve it after calling ldi_ioctl(I_PLINK)
2054 	 */
2055 	lrp = man_kzalloc(sizeof (man_linkrec_t), KM_NOSLEEP);
2056 	if (lrp == NULL) {
2057 		status = ENOMEM;
2058 		goto exit;
2059 	}
2060 
2061 	lrp->l_muxid = linkp->l_index;
2062 	lrp->l_wq = linkp->l_qbot;
2063 	lrp->l_rq = RD(linkp->l_qbot);
2064 
2065 	man_linkrec_insert(lrp);
2066 
2067 exit:
2068 	if (status)
2069 		miocnak(wq, mp, 0, status);
2070 	else
2071 		miocack(wq, mp, 0, 0);
2072 
2073 }
2074 
2075 /*
2076  * man_unplink - handle I_PUNLINK requests on the control stream
2077  */
2078 void
2079 man_unplink(queue_t *wq, mblk_t *mp)
2080 {
2081 	struct linkblk	*linkp;
2082 
2083 	linkp = (struct linkblk *)mp->b_cont->b_rptr;
2084 	RD(linkp->l_qbot)->q_ptr = NULL;
2085 	WR(linkp->l_qbot)->q_ptr = NULL;
2086 	miocack(wq, mp, 0, 0);
2087 }
2088 
2089 void
2090 man_linkrec_insert(man_linkrec_t *lrp)
2091 {
2092 	mutex_enter(&man_lock);
2093 
2094 	lrp->l_next = man_linkrec_head;
2095 	man_linkrec_head = lrp;
2096 
2097 	mutex_exit(&man_lock);
2098 
2099 }
2100 
2101 static queue_t *
2102 man_linkrec_find(int muxid)
2103 {
2104 	man_linkrec_t	*lpp;
2105 	man_linkrec_t	*lp;
2106 	queue_t		*wq = NULL;
2107 
2108 	mutex_enter(&man_lock);
2109 
2110 	if (man_linkrec_head == NULL)
2111 		goto exit;
2112 
2113 	lp = lpp = man_linkrec_head;
2114 	if (lpp->l_muxid == muxid) {
2115 		man_linkrec_head = lpp->l_next;
2116 	} else {
2117 		for (lp = lpp->l_next; lp; lp = lp->l_next) {
2118 			if (lp->l_muxid == muxid)
2119 				break;
2120 			lpp = lp;
2121 		}
2122 	}
2123 
2124 	if (lp == NULL)
2125 		goto exit;
2126 
2127 	wq = lp->l_wq;
2128 	ASSERT(wq != NULL);
2129 
2130 	lpp->l_next = lp->l_next;
2131 	man_kfree(lp, sizeof (man_linkrec_t));
2132 
2133 exit:
2134 	mutex_exit(&man_lock);
2135 
2136 	return (wq);
2137 }
2138 
2139 /*
2140  * Set instance linkcheck timer value.
2141  */
2142 static void
2143 man_set_linkcheck_time(queue_t *wq, mblk_t *mp)
2144 {
2145 	mi_time_t	*mtp;
2146 	int		error;
2147 	man_t		*manp;
2148 
2149 	MAN_DBG(MAN_LINK, ("man_set_linkcheck_time: enter"));
2150 
2151 	error = miocpullup(mp, sizeof (mi_time_t));
2152 	if (error != 0)
2153 		goto exit;
2154 
2155 	mtp = (mi_time_t *)mp->b_cont->b_rptr;
2156 
2157 	MAN_DBG(MAN_LINK, ("man_set_linkcheck_time: mtp"));
2158 	MAN_DBGCALL(MAN_LINK, man_print_mtp(mtp));
2159 
2160 	manp = ddi_get_soft_state(man_softstate, mtp->mtp_man_ppa);
2161 	if (manp == NULL) {
2162 		error = ENODEV;
2163 		goto exit;
2164 	}
2165 
2166 	manp->man_linkcheck_time = mtp->mtp_time;
2167 exit:
2168 	if (error)
2169 		miocnak(wq, mp, 0, error);
2170 	else
2171 		miocack(wq, mp, sizeof (mi_time_t), 0);
2172 }
2173 
2174 /*
2175  * Man path ioctl processing. Should only happen on the SSC. Called
2176  * with exclusive inner perimeter.
2177  */
2178 static void
2179 man_setpath(queue_t *wq, mblk_t *mp)
2180 {
2181 	mi_path_t		*mip;
2182 	int			error;
2183 
2184 	error = miocpullup(mp, sizeof (mi_path_t));
2185 	if (error != 0)
2186 		goto exit;
2187 
2188 	mip = (mi_path_t *)mp->b_cont->b_rptr;
2189 	mutex_enter(&man_lock);
2190 	error = man_pg_cmd(mip, NULL);
2191 	mutex_exit(&man_lock);
2192 
2193 exit:
2194 	if (error)
2195 		miocnak(wq, mp, 0, error);
2196 	else
2197 		miocack(wq, mp, sizeof (mi_path_t), 0);
2198 }
2199 
2200 /*
2201  * Get the local ethernet address of this machine.
2202  */
2203 static void
2204 man_geteaddr(queue_t *wq, mblk_t *mp)
2205 {
2206 	eaddr_t			*eap;
2207 	int			error;
2208 
2209 	error = miocpullup(mp, sizeof (eaddr_t));
2210 	if (error != 0) {
2211 		miocnak(wq, mp, 0, error);
2212 		return;
2213 	}
2214 
2215 	eap = (eaddr_t *)mp->b_cont->b_rptr;
2216 	(void) localetheraddr(NULL, eap);
2217 	miocack(wq, mp, sizeof (eaddr_t), 0);
2218 }
2219 
2220 /*
2221  * Set my SC and other SC IPv4 addresses for use in man_pinger routine.
2222  */
2223 static void
2224 man_set_sc_ipaddrs(queue_t *wq, mblk_t *mp)
2225 {
2226 	int			error;
2227 
2228 	error = miocpullup(mp, sizeof (man_sc_ipaddrs_t));
2229 	if (error != 0)
2230 		goto exit;
2231 
2232 	man_sc_ipaddrs = *(man_sc_ipaddrs_t *)mp->b_cont->b_rptr;
2233 
2234 #ifdef DEBUG
2235 	{
2236 		char	buf[INET_ADDRSTRLEN];
2237 
2238 		(void) inet_ntop(AF_INET,
2239 				(void *) &man_sc_ipaddrs.ip_other_sc_ipaddr,
2240 				buf, INET_ADDRSTRLEN);
2241 		MAN_DBG(MAN_CONFIG, ("ip_other_sc_ipaddr = %s", buf));
2242 		(void) inet_ntop(AF_INET,
2243 				(void *) &man_sc_ipaddrs.ip_my_sc_ipaddr,
2244 				buf, INET_ADDRSTRLEN);
2245 		MAN_DBG(MAN_CONFIG, ("ip_my_sc_ipaddr = %s", buf));
2246 	}
2247 #endif /* DEBUG */
2248 exit:
2249 	if (error)
2250 		miocnak(wq, mp, 0, error);
2251 	else
2252 		miocack(wq, mp, sizeof (man_sc_ipaddrs_t), 0);
2253 }
2254 
2255 /*
2256  * Set my SC and other SC IPv6 addresses for use in man_pinger routine.
2257  */
2258 static void
2259 man_set_sc_ip6addrs(queue_t *wq, mblk_t *mp)
2260 {
2261 	int			error;
2262 
2263 	error = miocpullup(mp, sizeof (man_sc_ip6addrs_t));
2264 	if (error != 0)
2265 		goto exit;
2266 
2267 	man_sc_ip6addrs = *(man_sc_ip6addrs_t *)mp->b_cont->b_rptr;
2268 
2269 #ifdef DEBUG
2270 	{
2271 		char	buf[INET6_ADDRSTRLEN];
2272 
2273 		(void) inet_ntop(AF_INET6,
2274 				(void *) &man_sc_ip6addrs.ip6_other_sc_ipaddr,
2275 				buf, INET6_ADDRSTRLEN);
2276 		MAN_DBG(MAN_CONFIG, ("ip6_other_sc_ipaddr = %s", buf));
2277 		(void) inet_ntop(AF_INET6,
2278 				(void *) &man_sc_ip6addrs.ip6_my_sc_ipaddr,
2279 				buf, INET6_ADDRSTRLEN);
2280 		MAN_DBG(MAN_CONFIG, ("ip6_my_sc_ipaddr = %s", buf));
2281 	}
2282 #endif /* DEBUG */
2283 exit:
2284 	if (error)
2285 		miocnak(wq, mp, 0, error);
2286 	else
2287 		miocack(wq, mp, sizeof (man_sc_ip6addrs_t), 0);
2288 }
2289 
2290 /*
2291  * M_DATA fastpath info request.
2292  */
2293 static void
2294 man_dl_ioc_hdr_info(queue_t *wq, mblk_t *mp)
2295 {
2296 	manstr_t		*msp;
2297 	man_t			*manp;
2298 	mblk_t			*nmp;
2299 	man_dladdr_t		*dlap;
2300 	dl_unitdata_req_t	*dludp;
2301 	struct	ether_header	*headerp;
2302 	t_uscalar_t		off, len;
2303 	int			status = 0;
2304 
2305 	MAN_DBG(MAN_DLPI, ("man_dl_ioc_hdr_info: enter"));
2306 
2307 	msp = (manstr_t *)wq->q_ptr;
2308 	manp = msp->ms_manp;
2309 	if (manp == NULL) {
2310 		status = EINVAL;
2311 		goto exit;
2312 	}
2313 
2314 	status = miocpullup(mp, sizeof (dl_unitdata_req_t) + MAN_ADDRL);
2315 	if (status != 0)
2316 		goto exit;
2317 
2318 	/*
2319 	 * Sanity check the DL_UNITDATA_REQ destination address
2320 	 * offset and length values.
2321 	 */
2322 	dludp = (dl_unitdata_req_t *)mp->b_cont->b_rptr;
2323 	off = dludp->dl_dest_addr_offset;
2324 	len = dludp->dl_dest_addr_length;
2325 	if (dludp->dl_primitive != DL_UNITDATA_REQ ||
2326 	    !MBLKIN(mp->b_cont, off, len) || len != MAN_ADDRL) {
2327 		status = EINVAL;
2328 		goto exit;
2329 	}
2330 
2331 	dlap = (man_dladdr_t  *)(mp->b_cont->b_rptr + off);
2332 
2333 	/*
2334 	 * Allocate a new mblk to hold the ether header.
2335 	 */
2336 	if ((nmp = allocb(ETHERHEADER_SIZE, BPRI_MED)) == NULL) {
2337 		status = ENOMEM;
2338 		goto exit;
2339 	}
2340 
2341 	/* We only need one dl_ioc_hdr mblk for replay */
2342 	if (!(msp->ms_flags & MAN_SFLAG_FAST))
2343 		status = man_dl_catch(&msp->ms_dlioc_mp, mp);
2344 
2345 	/* Forward the packet to all lower destinations. */
2346 	if ((status != 0) || ((status = man_dlpi_senddown(msp, mp)) != 0)) {
2347 		freemsg(nmp);
2348 		goto exit;
2349 	}
2350 
2351 	nmp->b_wptr += ETHERHEADER_SIZE;
2352 
2353 	/*
2354 	 * Fill in the ether header.
2355 	 */
2356 	headerp = (struct ether_header *)nmp->b_rptr;
2357 	ether_copy(&dlap->dl_phys, &headerp->ether_dhost);
2358 	ether_copy(&manp->man_eaddr, &headerp->ether_shost);
2359 	put_ether_type(headerp, dlap->dl_sap);
2360 
2361 	/*
2362 	 * Link new mblk in after the "request" mblks.
2363 	 */
2364 	linkb(mp, nmp);
2365 
2366 exit:
2367 	MAN_DBG(MAN_DLPI, ("man_dl_ioc_hdr_info: returns, status = %d",
2368 		status));
2369 
2370 	if (status) {
2371 		miocnak(wq, mp, 0, status);
2372 	} else {
2373 		msp = (manstr_t *)wq->q_ptr;
2374 		msp->ms_flags |= MAN_SFLAG_FAST;
2375 		miocack(wq, mp, msgsize(mp->b_cont), 0);
2376 	}
2377 
2378 }
2379 
2380 /*
2381  * man_uwsrv - Upper write queue service routine to handle deferred
2382  * DLPI messages issued from upstream, the write side of the upper half
2383  * of multiplexor. It is also used by man_bwork to switch the lower
2384  * multiplexor.
2385  *
2386  *	wq - upper write queue of mxx
2387  */
2388 static int
2389 man_uwsrv(queue_t *wq)
2390 {
2391 	register mblk_t		*mp;
2392 	manstr_t		*msp;		/* per stream data */
2393 	man_t			*manp;		/* per instance data */
2394 	ehdr_t			*ep;
2395 	int			status;
2396 
2397 	msp = (manstr_t *)wq->q_ptr;
2398 
2399 	MAN_DBG(MAN_UWSRV, ("man_uwsrv: wq(0x%p) msp", (void *)wq));
2400 	MAN_DBGCALL(MAN_UWSRV, man_print_msp(msp));
2401 
2402 	if (msp == NULL)
2403 		goto done;
2404 
2405 	manp = msp->ms_manp;
2406 
2407 	while (mp = getq(wq)) {
2408 
2409 		switch (DB_TYPE(mp)) {
2410 		/*
2411 		 * Can probably remove this as I never put data messages
2412 		 * here.
2413 		 */
2414 		case M_DATA:
2415 			if (manp) {
2416 				ep = (ehdr_t *)mp->b_rptr;
2417 				status = man_start(wq, mp, &ep->ether_dhost);
2418 				if (status) {
2419 					/*
2420 					 * man_start() indicated flow control
2421 					 * situation, stop processing now.
2422 					 */
2423 					goto break_loop;
2424 				}
2425 			} else
2426 				freemsg(mp);
2427 			break;
2428 
2429 		case M_PROTO:
2430 		case M_PCPROTO:
2431 			status = man_proto(wq, mp);
2432 			if (status) {
2433 				/*
2434 				 * man_proto() indicated flow control
2435 				 * situation detected by man_start(),
2436 				 * stop processing now.
2437 				 */
2438 				goto break_loop;
2439 			}
2440 			break;
2441 
2442 		default:
2443 			MAN_DBG(MAN_UWSRV, ("man_uwsrv: discarding mp(0x%p)",
2444 				(void *)mp));
2445 			freemsg(mp);
2446 			break;
2447 		}
2448 	}
2449 
2450 break_loop:
2451 	/*
2452 	 * Check to see if bgthread wants us to do something inside the
2453 	 * perimeter.
2454 	 */
2455 	if ((msp->ms_flags & MAN_SFLAG_CONTROL) &&
2456 		man_iwork_q->q_work != NULL) {
2457 
2458 		man_iwork();
2459 	}
2460 
2461 done:
2462 
2463 	MAN_DBG(MAN_UWSRV, ("man_uwsrv: returns"));
2464 
2465 	return (0);
2466 }
2467 
2468 
2469 /*
2470  * man_proto - handle DLPI protocol requests issued from upstream.
2471  * Called by man_uwsrv().  We disassociate upper and lower multiplexor
2472  * DLPI state transitions. The upper stream here (manstr_t) transitions
2473  * appropriately, saves the DLPI requests via man_dlpi(), and then
2474  * arranges for the DLPI request to be sent down via man_dlpi_senddown() if
2475  * appropriate.
2476  *
2477  *	wq - upper write queue of mxx
2478  *	mp - mbl ptr to protocol request
2479  */
2480 static int
2481 man_proto(queue_t *wq, mblk_t *mp)
2482 {
2483 	union DL_primitives	*dlp;
2484 	int			flow_status = 0;
2485 
2486 	dlp = (union DL_primitives *)mp->b_rptr;
2487 
2488 	MAN_DBG((MAN_UWSRV | MAN_DLPI),
2489 		("man_proto: mp(0x%p) prim(%s)\n", (void *)mp,
2490 		dps[dlp->dl_primitive]));
2491 
2492 	switch (dlp->dl_primitive) {
2493 	case DL_UNITDATA_REQ:
2494 		flow_status = man_udreq(wq, mp);
2495 		break;
2496 
2497 	case DL_ATTACH_REQ:
2498 		man_areq(wq, mp);
2499 		break;
2500 
2501 	case DL_DETACH_REQ:
2502 		man_dreq(wq, mp);
2503 		break;
2504 
2505 	case DL_BIND_REQ:
2506 		man_breq(wq, mp);
2507 		break;
2508 
2509 	case DL_UNBIND_REQ:
2510 		man_ubreq(wq, mp);
2511 		break;
2512 
2513 	case DL_INFO_REQ:
2514 		man_ireq(wq, mp);
2515 		break;
2516 
2517 	case DL_PROMISCON_REQ:
2518 		man_ponreq(wq, mp);
2519 		break;
2520 
2521 	case DL_PROMISCOFF_REQ:
2522 		man_poffreq(wq, mp);
2523 		break;
2524 
2525 	case DL_ENABMULTI_REQ:
2526 		man_emreq(wq, mp);
2527 		break;
2528 
2529 	case DL_DISABMULTI_REQ:
2530 		man_dmreq(wq, mp);
2531 		break;
2532 
2533 	case DL_PHYS_ADDR_REQ:
2534 		man_pareq(wq, mp);
2535 		break;
2536 
2537 	case DL_SET_PHYS_ADDR_REQ:
2538 		man_spareq(wq, mp);
2539 		break;
2540 
2541 	default:
2542 		MAN_DBG((MAN_UWSRV | MAN_DLPI), ("man_proto: prim(%d)\n",
2543 			dlp->dl_primitive));
2544 		dlerrorack(wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
2545 		break;
2546 
2547 	} /* End switch */
2548 
2549 	MAN_DBG((MAN_UWSRV | MAN_DLPI), ("man_proto: exit\n"));
2550 	return (flow_status);
2551 
2552 }
2553 
2554 static int
2555 man_udreq(queue_t *wq, mblk_t *mp)
2556 {
2557 	manstr_t		*msp;
2558 	dl_unitdata_req_t	*dludp;
2559 	mblk_t	*nmp;
2560 	man_dladdr_t		*dlap;
2561 	t_uscalar_t 		off, len;
2562 	int 			flow_status = 0;
2563 
2564 	msp = (manstr_t *)wq->q_ptr;
2565 
2566 
2567 	if (msp->ms_dlpistate != DL_IDLE) {
2568 		dlerrorack(wq, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
2569 		return (flow_status);
2570 	}
2571 	dludp = (dl_unitdata_req_t *)mp->b_rptr;
2572 	off = dludp->dl_dest_addr_offset;
2573 	len = dludp->dl_dest_addr_length;
2574 
2575 	/*
2576 	 * Validate destination address format.
2577 	 */
2578 	if (!MBLKIN(mp, off, len) || (len != MAN_ADDRL)) {
2579 		dluderrorind(wq, mp, mp->b_rptr + off, len, DL_BADADDR, 0);
2580 		return (flow_status);
2581 	}
2582 
2583 	/*
2584 	 * Error if no M_DATA follows.
2585 	 */
2586 	nmp = mp->b_cont;
2587 	if (nmp == NULL) {
2588 		dluderrorind(wq, mp, mp->b_rptr + off, len, DL_BADDATA, 0);
2589 		return (flow_status);
2590 	}
2591 
2592 	dlap = (man_dladdr_t *)(mp->b_rptr + off);
2593 
2594 	flow_status = man_start(wq, mp, &dlap->dl_phys);
2595 	return (flow_status);
2596 }
2597 
2598 /*
2599  * Handle DL_ATTACH_REQ.
2600  */
2601 static void
2602 man_areq(queue_t *wq, mblk_t *mp)
2603 {
2604 	man_t			*manp;	/* per instance data */
2605 	manstr_t		*msp;	/* per stream data */
2606 	short			ppa;
2607 	union DL_primitives	*dlp;
2608 	mblk_t			*preq = NULL;
2609 	int			did_refcnt = FALSE;
2610 	int			dlerror = 0;
2611 	int			status = 0;
2612 
2613 	msp = (manstr_t *)wq->q_ptr;
2614 	dlp = (union DL_primitives *)mp->b_rptr;
2615 
2616 	/*
2617 	 * Attach us to MAN PPA (device instance).
2618 	 */
2619 	if (MBLKL(mp) < DL_ATTACH_REQ_SIZE) {
2620 		dlerror = DL_BADPRIM;
2621 		goto exit;
2622 	}
2623 
2624 	if (msp->ms_dlpistate != DL_UNATTACHED) {
2625 		dlerror = DL_OUTSTATE;
2626 		goto exit;
2627 	}
2628 
2629 	ppa = dlp->attach_req.dl_ppa;
2630 	if (ppa == -1 || qassociate(wq, ppa) != 0) {
2631 		dlerror = DL_BADPPA;
2632 		MAN_DBG(MAN_WARN, ("man_areq: bad PPA %d", ppa));
2633 		goto exit;
2634 	}
2635 
2636 	mutex_enter(&man_lock);
2637 	manp = ddi_get_soft_state(man_softstate, ppa);
2638 	ASSERT(manp != NULL);	/* qassociate() succeeded */
2639 
2640 	manp->man_refcnt++;
2641 	did_refcnt = TRUE;
2642 	mutex_exit(&man_lock);
2643 
2644 	/*
2645 	 * Create a DL replay list for the lower stream. These wont
2646 	 * actually be sent down until the lower streams are made active
2647 	 * (sometime after the call to man_init_dests below).
2648 	 */
2649 	preq = man_alloc_physreq_mp(&manp->man_eaddr);
2650 	if (preq == NULL) {
2651 		dlerror = DL_SYSERR;
2652 		status = ENOMEM;
2653 		goto exit;
2654 	}
2655 
2656 	/*
2657 	 * Make copy for dlpi resync of upper and lower streams.
2658 	 */
2659 	if (man_dlpi(msp, mp)) {
2660 		dlerror = DL_SYSERR;
2661 		status = ENOMEM;
2662 		goto exit;
2663 	}
2664 
2665 	/* TBD - need to clean off ATTACH req on failure here. */
2666 	if (man_dlpi(msp, preq)) {
2667 		dlerror = DL_SYSERR;
2668 		status = ENOMEM;
2669 		goto exit;
2670 	}
2671 
2672 	/*
2673 	 * man_init_dests/man_start_dest needs these set before call.
2674 	 */
2675 	msp->ms_manp = manp;
2676 	msp->ms_meta_ppa = ppa;
2677 
2678 	/*
2679 	 *  Allocate and init lower destination structures.
2680 	 */
2681 	ASSERT(msp->ms_dests == NULL);
2682 	if (man_init_dests(manp, msp)) {
2683 		mblk_t	 *tmp;
2684 
2685 		/*
2686 		 * If we cant get the lower streams ready, then
2687 		 * remove the messages from the DL replay list and
2688 		 * fail attach.
2689 		 */
2690 		while ((tmp = msp->ms_dl_mp) != NULL) {
2691 			msp->ms_dl_mp = msp->ms_dl_mp->b_next;
2692 			tmp->b_next = tmp->b_prev = NULL;
2693 			freemsg(tmp);
2694 		}
2695 
2696 		msp->ms_manp = NULL;
2697 		msp->ms_meta_ppa = -1;
2698 
2699 		dlerror = DL_SYSERR;
2700 		status = ENOMEM;
2701 		goto exit;
2702 	}
2703 
2704 	MAN_DBG(MAN_DLPI, ("man_areq: ppa 0x%x man_refcnt: %d\n",
2705 		ppa, manp->man_refcnt));
2706 
2707 	SETSTATE(msp, DL_UNBOUND);
2708 
2709 exit:
2710 	if (dlerror == 0) {
2711 		dlokack(wq, mp, DL_ATTACH_REQ);
2712 	} else {
2713 		if (did_refcnt) {
2714 			mutex_enter(&man_lock);
2715 			manp->man_refcnt--;
2716 			mutex_exit(&man_lock);
2717 		}
2718 		dlerrorack(wq, mp, DL_ATTACH_REQ, dlerror, status);
2719 		(void) qassociate(wq, -1);
2720 	}
2721 	if (preq != NULL)
2722 		freemsg(preq);
2723 
2724 }
2725 
2726 /*
2727  * Called at DL_ATTACH time.
2728  * Man_lock is held to protect pathgroup list(man_pg).
2729  */
2730 static int
2731 man_init_dests(man_t *manp, manstr_t *msp)
2732 {
2733 	man_dest_t	*mdp;
2734 	man_pg_t	*mpg;
2735 	int		i;
2736 
2737 	mdp = man_kzalloc(MAN_DEST_ARRAY_SIZE, KM_NOSLEEP);
2738 	if (mdp == NULL)
2739 		return (ENOMEM);
2740 
2741 	msp->ms_dests = mdp;
2742 
2743 	mutex_enter(&man_lock);
2744 	for (i = 0; i < MAN_MAX_DESTS; i++) {
2745 
2746 		mdp[i].md_muxid = -1;	/* muxid 0 is valid */
2747 		mutex_init(&mdp->md_lock, NULL, MUTEX_DRIVER, NULL);
2748 
2749 		mpg = man_find_pg_by_id(manp->man_pg, i);
2750 
2751 		if (mpg && man_find_active_path(mpg->mpg_pathp))
2752 			man_start_dest(&mdp[i], msp, mpg);
2753 	}
2754 	mutex_exit(&man_lock);
2755 
2756 	return (0);
2757 }
2758 
2759 /*
2760  * Get a destination ready for use.
2761  */
2762 static void
2763 man_start_dest(man_dest_t *mdp, manstr_t *msp, man_pg_t *mpg)
2764 {
2765 	man_path_t	*ap;
2766 
2767 	mdp->md_muxid = -1;
2768 	mdp->md_dlpistate = DL_UNATTACHED;
2769 	mdp->md_msp = msp;
2770 	mdp->md_rq = msp->ms_rq;
2771 	mdp->md_pg_id = mpg->mpg_pg_id;
2772 
2773 	ASSERT(msp->ms_manp);
2774 
2775 	ether_copy(&msp->ms_manp->man_eaddr, &mdp->md_src_eaddr);
2776 	ether_copy(&mpg->mpg_dst_eaddr, &mdp->md_dst_eaddr);
2777 
2778 	ap = man_find_active_path(mpg->mpg_pathp);
2779 	ASSERT(ap);
2780 	mdp->md_device = ap->mp_device;
2781 
2782 	/*
2783 	 * Set up linktimers so that first time through, we will do
2784 	 * a failover.
2785 	 */
2786 	mdp->md_linkstate = MAN_LINKFAIL;
2787 	mdp->md_state = MAN_DSTATE_INITIALIZING;
2788 	mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
2789 		(void *)mdp, man_gettimer(MAN_TIMER_INIT, mdp));
2790 
2791 	/*
2792 	 * As an optimization, if there is only one destination,
2793 	 * remember the destination pointer. Used by man_start().
2794 	 */
2795 	man_set_optimized_dest(msp);
2796 
2797 	MAN_DBG(MAN_DEST, ("man_start_dest: mdp"));
2798 	MAN_DBGCALL(MAN_DEST, man_print_mdp(mdp));
2799 }
2800 
2801 static void
2802 man_set_optimized_dest(manstr_t *msp)
2803 {
2804 	int		count = 0;
2805 	int		i;
2806 	man_dest_t	*mdp = NULL;
2807 
2808 	for (i = 0; i < MAN_MAX_DESTS; i++) {
2809 		if (msp->ms_dests[i].md_msp != NULL) {
2810 			count++;
2811 			mdp = &msp->ms_dests[i];
2812 		}
2813 	}
2814 
2815 	if (count == 1)
2816 		msp->ms_destp = mdp;
2817 	else
2818 		msp->ms_destp = NULL;
2819 
2820 }
2821 
2822 /*
2823  * Catch dlpi message for replaying, and arrange to send it down
2824  * to any destinations not PLUMBING. See man_dlpi_replay().
2825  */
2826 static int
2827 man_dlpi(manstr_t *msp, mblk_t *mp)
2828 {
2829 	int	status;
2830 
2831 	status = man_dl_catch(&msp->ms_dl_mp, mp);
2832 	if (status == 0)
2833 		status = man_dlpi_senddown(msp, mp);
2834 
2835 	return (status);
2836 }
2837 
2838 /*
2839  * Catch IOCTL type DL_ messages.
2840  */
2841 static int
2842 man_dlioc(manstr_t *msp, mblk_t *mp)
2843 {
2844 	int status;
2845 
2846 	status = man_dl_catch(&msp->ms_dlioc_mp, mp);
2847 	if (status == 0)
2848 		status = man_dlpi_senddown(msp, mp);
2849 
2850 	return (status);
2851 }
2852 
2853 /*
2854  * We catch all DLPI messages that we have to resend to a new AP'ed
2855  * device to put him in the right state.  We link these messages together
2856  * w/ their b_next fields and hang it off of msp->ms_dl_mp.  We
2857  * must be careful to restore b_next fields before doing dupmsg/freemsg!
2858  *
2859  *	msp - pointer of stream struct to process
2860  *	mblk - pointer to DLPI request to catch
2861  */
2862 static int
2863 man_dl_catch(mblk_t **mplist, mblk_t *mp)
2864 {
2865 	mblk_t			*dupmp;
2866 	mblk_t			*tmp;
2867 	unsigned		prim;
2868 	int			status = 0;
2869 
2870 	dupmp = copymsg(mp);
2871 	if (dupmp == NULL) {
2872 		status = ENOMEM;
2873 		goto exit;
2874 	}
2875 
2876 
2877 	if (*mplist == NULL)
2878 		*mplist = dupmp;
2879 	else {
2880 		for (tmp = *mplist; tmp->b_next; )
2881 			tmp = tmp->b_next;
2882 
2883 		tmp->b_next = dupmp;
2884 	}
2885 
2886 	prim = DL_PRIM(mp);
2887 	MAN_DBG(MAN_DLPI,
2888 		("man_dl_catch: adding %s\n",
2889 		(prim == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
2890 		(prim == DLIOCRAW) ? "DLIOCRAW" :
2891 		(prim == DL_PROMISCON_REQ) ? promisc[DL_PROMISCON_TYPE(mp)] :
2892 		dps[prim]));
2893 
2894 exit:
2895 
2896 	return (status);
2897 }
2898 
2899 /*
2900  * Send down a single DLPI M_[PC]PROTO to all currently valid dests.
2901  *
2902  *	msp - ptr to NDM stream structure DL_ messages was received on.
2903  *	mp - ptr to mblk containing DL_ request.
2904  */
2905 static int
2906 man_dlpi_senddown(manstr_t *msp, mblk_t *mp)
2907 {
2908 	man_dest_t	*mdp;
2909 	int		i;
2910 	mblk_t		*rmp[MAN_MAX_DESTS];	/* Copy to replay */
2911 	int		dstate[MAN_MAX_DESTS];
2912 	int		no_dests = TRUE;
2913 	int		status = 0;
2914 
2915 	if (msp->ms_dests == NULL)
2916 		goto exit;
2917 
2918 	for (i = 0; i < MAN_MAX_DESTS; i++) {
2919 		mdp = &msp->ms_dests[i];
2920 		if (mdp->md_state == MAN_DSTATE_READY) {
2921 			dstate[i] = TRUE;
2922 			no_dests = FALSE;
2923 		} else {
2924 			dstate[i] = FALSE;
2925 		}
2926 		rmp[i] = NULL;
2927 	}
2928 
2929 	if (no_dests)
2930 		goto exit;
2931 
2932 	/*
2933 	 * Build replay and duplicate list for all possible destinations.
2934 	 */
2935 	for (i = 0; i < MAN_MAX_DESTS; i++) {
2936 		if (dstate[i]) {
2937 			rmp[i] = copymsg(mp);
2938 			if (rmp[i] == NULL) {
2939 				status = ENOMEM;
2940 				break;
2941 			}
2942 		}
2943 	}
2944 
2945 	if (status == 0) {
2946 		for (i = 0; i < MAN_MAX_DESTS; i++)
2947 			if (dstate[i]) {
2948 				mdp = &msp->ms_dests[i];
2949 
2950 				ASSERT(mdp->md_wq != NULL);
2951 				ASSERT(mp->b_next == NULL);
2952 				ASSERT(mp->b_prev == NULL);
2953 
2954 				man_dlpi_replay(mdp, rmp[i]);
2955 			}
2956 	} else {
2957 		for (; i >= 0; i--)
2958 			if (dstate[i] && rmp[i])
2959 				freemsg(rmp[i]);
2960 	}
2961 
2962 exit:
2963 	return (status);
2964 }
2965 
2966 /*
2967  * man_dlpi_replay - traverse the list of DLPI requests and reapply them to
2968  * get the upper and lower streams into the same state. Called holding inner
2969  * perimeter lock exclusive. Note thet we defer M_IOCTL type dlpi messages
2970  * until we get an OK_ACK to our ATTACH (see man_lrsrv and
2971  * man_dlioc_replay).
2972  *
2973  * 	mdp - pointer to lower queue (destination)
2974  *	rmp - list of mblks to send down stream.
2975  */
2976 static void
2977 man_dlpi_replay(man_dest_t *mdp, mblk_t *rmp)
2978 {
2979 	mblk_t			*mp;
2980 	union DL_primitives	*dlp = NULL;
2981 
2982 	MAN_DBG(MAN_DLPI, ("man_dlpi_replay: mdp(0x%p)", (void *)mdp));
2983 
2984 	while (rmp) {
2985 		mp = rmp;
2986 		rmp = rmp->b_next;
2987 		mp->b_prev = mp->b_next = NULL;
2988 
2989 		dlp = (union DL_primitives *)mp->b_rptr;
2990 		MAN_DBG(MAN_DLPI,
2991 			("man_dlpi_replay: mdp(0x%p) sending %s\n",
2992 			(void *)mdp,
2993 			(dlp->dl_primitive == DL_IOC_HDR_INFO) ?
2994 			"DL_IOC_HDR_INFO" : (dlp->dl_primitive == DLIOCRAW) ?
2995 			"DLIOCRAW" : dps[(unsigned)(dlp->dl_primitive)]));
2996 
2997 		if (dlp->dl_primitive == DL_ATTACH_REQ) {
2998 			/*
2999 			 * insert the lower devices ppa.
3000 			 */
3001 			dlp->attach_req.dl_ppa = mdp->md_device.mdev_ppa;
3002 		}
3003 
3004 		(void) putnext(mdp->md_wq, mp);
3005 	}
3006 
3007 }
3008 
3009 static void
3010 man_dreq(queue_t *wq, mblk_t *mp)
3011 {
3012 	manstr_t	*msp;	/* per stream data */
3013 	man_work_t	*wp;
3014 
3015 	msp = (manstr_t *)wq->q_ptr;
3016 
3017 	if (MBLKL(mp) < DL_DETACH_REQ_SIZE) {
3018 		dlerrorack(wq, mp, DL_DETACH_REQ, DL_BADPRIM, 0);
3019 		return;
3020 	}
3021 
3022 	if (msp->ms_dlpistate != DL_UNBOUND) {
3023 		dlerrorack(wq, mp, DL_DETACH_REQ, DL_OUTSTATE, 0);
3024 		return;
3025 	}
3026 
3027 	ASSERT(msp->ms_dests != NULL);
3028 
3029 	wp = man_work_alloc(MAN_WORK_CLOSE_STREAM, KM_NOSLEEP);
3030 	if (wp == NULL) {
3031 		dlerrorack(wq, mp, DL_DETACH_REQ, DL_SYSERR, ENOMEM);
3032 		return;
3033 	}
3034 	man_dodetach(msp, wp);
3035 	(void) qassociate(wq, -1);
3036 
3037 	SETSTATE(msp, DL_UNATTACHED);
3038 
3039 	dlokack(wq, mp, DL_DETACH_REQ);
3040 }
3041 
3042 static void
3043 man_dl_clean(mblk_t **mplist)
3044 {
3045 	mblk_t	*tmp;
3046 
3047 	/*
3048 	 * Toss everything.
3049 	 */
3050 	while ((tmp = *mplist) != NULL) {
3051 		*mplist = (*mplist)->b_next;
3052 		tmp->b_next = tmp->b_prev = NULL;
3053 		freemsg(tmp);
3054 	}
3055 
3056 }
3057 
3058 /*
3059  * man_dl_release - Remove the corresponding DLPI request from the
3060  * catch list. Walk thru the catch list looking for the other half of
3061  * the pair and delete it.  If we are detaching, delete the entire list.
3062  *
3063  *	msp - pointer of stream struct to process
3064  *	mp  - pointer to mblk to first half of pair.  We will delete other
3065  * 		half of pair based on this.
3066  */
3067 static void
3068 man_dl_release(mblk_t **mplist, mblk_t *mp)
3069 {
3070 	uchar_t			match_dbtype;
3071 	mblk_t			*tmp;
3072 	mblk_t			*tmpp;
3073 	int			matched = FALSE;
3074 
3075 	if (*mplist == NULL)
3076 		goto exit;
3077 
3078 	match_dbtype = DB_TYPE(mp);
3079 
3080 	/*
3081 	 * Currently we only clean DL_ PROTO type messages. There is
3082 	 * no way to turn off M_CTL or DL_IOC stuff other than sending
3083 	 * down a DL_DETACH, which resets everything.
3084 	 */
3085 	if (match_dbtype != M_PROTO && match_dbtype != M_PCPROTO) {
3086 		goto exit;
3087 	}
3088 
3089 	/*
3090 	 * Selectively find a caught mblk that matches this one and
3091 	 * remove it from the list
3092 	 */
3093 	tmp = tmpp = *mplist;
3094 	matched = man_match_proto(mp, tmp);
3095 	if (matched) {
3096 		*mplist = tmp->b_next;
3097 		tmp->b_next = tmp->b_prev = NULL;
3098 	} else {
3099 		for (tmp = tmp->b_next; tmp != NULL; tmp = tmp->b_next) {
3100 			if (matched = man_match_proto(mp, tmp))
3101 				break;
3102 			tmpp = tmp;
3103 		}
3104 
3105 		if (matched) {
3106 			tmpp->b_next = tmp->b_next;
3107 			tmp->b_next = tmp->b_prev = NULL;
3108 		}
3109 	}
3110 
3111 exit:
3112 	if (matched) {
3113 
3114 		MAN_DBG(MAN_DLPI, ("man_dl_release: release %s",
3115 			(DL_PRIM(mp) == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3116 			(DL_PRIM(mp) == DLIOCRAW) ? "DLIOCRAW" :
3117 			dps[(int)DL_PRIM(mp)]));
3118 
3119 		freemsg(tmp);
3120 	}
3121 	MAN_DBG(MAN_DLPI, ("man_dl_release: returns"));
3122 
3123 }
3124 
3125 /*
3126  * Compare two DL_ messages. If they are complimentary (e.g. DL_UNBIND
3127  * compliments DL_BIND), return true.
3128  */
3129 static int
3130 man_match_proto(mblk_t *mp1, mblk_t *mp2)
3131 {
3132 	t_uscalar_t	prim1;
3133 	t_uscalar_t	prim2;
3134 	int		matched = FALSE;
3135 
3136 	/*
3137 	 * Primitive to clean off list.
3138 	 */
3139 	prim1 = DL_PRIM(mp1);
3140 	prim2 = DL_PRIM(mp2);
3141 
3142 	switch (prim1) {
3143 	case DL_UNBIND_REQ:
3144 		if (prim2 == DL_BIND_REQ)
3145 			matched = TRUE;
3146 		break;
3147 
3148 	case DL_PROMISCOFF_REQ:
3149 		if (prim2 == DL_PROMISCON_REQ) {
3150 			dl_promiscoff_req_t	*poff1;
3151 			dl_promiscoff_req_t	*poff2;
3152 
3153 			poff1 = (dl_promiscoff_req_t *)mp1->b_rptr;
3154 			poff2 = (dl_promiscoff_req_t *)mp2->b_rptr;
3155 
3156 			if (poff1->dl_level == poff2->dl_level)
3157 				matched = TRUE;
3158 		}
3159 		break;
3160 
3161 	case DL_DISABMULTI_REQ:
3162 		if (prim2 == DL_ENABMULTI_REQ) {
3163 			union DL_primitives	*dlp;
3164 			t_uscalar_t		off;
3165 			eaddr_t			*addrp1;
3166 			eaddr_t			*addrp2;
3167 
3168 			dlp = (union DL_primitives *)mp1->b_rptr;
3169 			off = dlp->disabmulti_req.dl_addr_offset;
3170 			addrp1 = (eaddr_t *)(mp1->b_rptr + off);
3171 
3172 			dlp = (union DL_primitives *)mp2->b_rptr;
3173 			off = dlp->disabmulti_req.dl_addr_offset;
3174 			addrp2 = (eaddr_t *)(mp2->b_rptr + off);
3175 
3176 			if (ether_cmp(addrp1, addrp2) == 0)
3177 				matched = 1;
3178 		}
3179 		break;
3180 
3181 	default:
3182 		break;
3183 	}
3184 
3185 	MAN_DBG(MAN_DLPI, ("man_match_proto returns %d", matched));
3186 
3187 	return (matched);
3188 }
3189 
3190 /*
3191  * Bind upper stream to a particular SAP. Called with exclusive innerperim
3192  * QPAIR, shared outerperim.
3193  */
3194 static void
3195 man_breq(queue_t *wq, mblk_t *mp)
3196 {
3197 	man_t			*manp;	/* per instance data */
3198 	manstr_t		*msp;	/* per stream data */
3199 	union DL_primitives	*dlp;
3200 	man_dladdr_t		man_addr;
3201 	t_uscalar_t		sap;
3202 	t_uscalar_t		xidtest;
3203 
3204 	msp = (manstr_t *)wq->q_ptr;
3205 
3206 	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
3207 		dlerrorack(wq, mp, DL_BIND_REQ, DL_BADPRIM, 0);
3208 		return;
3209 	}
3210 
3211 	if (msp->ms_dlpistate != DL_UNBOUND) {
3212 		dlerrorack(wq, mp, DL_BIND_REQ, DL_OUTSTATE, 0);
3213 		return;
3214 	}
3215 
3216 	dlp = (union DL_primitives *)mp->b_rptr;
3217 	manp = msp->ms_manp;			/* valid after attach */
3218 	sap = dlp->bind_req.dl_sap;
3219 	xidtest = dlp->bind_req.dl_xidtest_flg;
3220 
3221 	ASSERT(manp);
3222 
3223 	if (xidtest) {
3224 		dlerrorack(wq, mp, DL_BIND_REQ, DL_NOAUTO, 0);
3225 		return;
3226 	}
3227 
3228 	if (sap > ETHERTYPE_MAX) {
3229 		dlerrorack(wq, mp, DL_BIND_REQ, DL_BADSAP, 0);
3230 		return;
3231 	}
3232 
3233 	if (man_dlpi(msp, mp)) {
3234 		dlerrorack(wq, mp, DL_BIND_REQ, DL_SYSERR, ENOMEM);
3235 		return;
3236 	}
3237 
3238 	msp->ms_sap = sap;
3239 
3240 	SETSTATE(msp, DL_IDLE);
3241 
3242 	man_addr.dl_sap = msp->ms_sap;
3243 	ether_copy(&msp->ms_manp->man_eaddr, &man_addr.dl_phys);
3244 
3245 	dlbindack(wq, mp, msp->ms_sap, &man_addr, MAN_ADDRL, 0, 0);
3246 
3247 }
3248 
3249 static void
3250 man_ubreq(queue_t *wq, mblk_t *mp)
3251 {
3252 	manstr_t		*msp;	/* per stream data */
3253 
3254 	msp = (manstr_t *)wq->q_ptr;
3255 
3256 	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
3257 		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
3258 		return;
3259 	}
3260 
3261 	if (msp->ms_dlpistate != DL_IDLE) {
3262 		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
3263 		return;
3264 	}
3265 
3266 	if (man_dlpi_senddown(msp, mp)) {
3267 		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_SYSERR, ENOMEM);
3268 		return;
3269 	}
3270 
3271 	man_dl_release(&msp->ms_dl_mp, mp);
3272 
3273 	SETSTATE(msp, DL_UNBOUND);
3274 
3275 	dlokack(wq, mp, DL_UNBIND_REQ);
3276 
3277 }
3278 
3279 static void
3280 man_ireq(queue_t *wq, mblk_t *mp)
3281 {
3282 	manstr_t	*msp;
3283 	dl_info_ack_t	*dlip;
3284 	man_dladdr_t	*dlap;
3285 	eaddr_t		*ep;
3286 	size_t	size;
3287 
3288 	msp = (manstr_t *)wq->q_ptr;
3289 
3290 	if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
3291 		dlerrorack(wq, mp, DL_INFO_REQ, DL_BADPRIM, 0);
3292 		return;
3293 	}
3294 
3295 	/* Exchange current msg for a DL_INFO_ACK. */
3296 	size = sizeof (dl_info_ack_t) + MAN_ADDRL + ETHERADDRL;
3297 	mp = mexchange(wq, mp, size, M_PCPROTO, DL_INFO_ACK);
3298 	if (mp == NULL) {
3299 		MAN_DBG(MAN_DLPI, ("man_ireq: man_ireq: mp == NULL."));
3300 		return;
3301 	}
3302 
3303 	/* Fill in the DL_INFO_ACK fields and reply. */
3304 	dlip = (dl_info_ack_t *)mp->b_rptr;
3305 	*dlip = man_infoack;
3306 	dlip->dl_current_state = msp->ms_dlpistate;
3307 	dlap = (man_dladdr_t *)(mp->b_rptr + dlip->dl_addr_offset);
3308 	dlap->dl_sap = msp->ms_sap;
3309 
3310 	/*
3311 	 * If attached, return physical address.
3312 	 */
3313 	if (msp->ms_manp != NULL) {
3314 		ether_copy(&msp->ms_manp->man_eaddr, &dlap->dl_phys);
3315 	} else {
3316 		bzero((caddr_t)&dlap->dl_phys, ETHERADDRL);
3317 	}
3318 
3319 	ep = (struct ether_addr *)(mp->b_rptr + dlip->dl_brdcst_addr_offset);
3320 	ether_copy(&etherbroadcast, ep);
3321 
3322 	qreply(wq, mp);
3323 
3324 }
3325 
3326 
3327 static void
3328 man_ponreq(queue_t *wq, mblk_t *mp)
3329 {
3330 	manstr_t	*msp;
3331 	int		flag;
3332 
3333 	msp = (manstr_t *)wq->q_ptr;
3334 
3335 	if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
3336 		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
3337 		return;
3338 	}
3339 
3340 	switch (((dl_promiscon_req_t *)mp->b_rptr)->dl_level) {
3341 	case DL_PROMISC_PHYS:
3342 		flag = MAN_SFLAG_ALLPHYS;
3343 		break;
3344 
3345 	case DL_PROMISC_SAP:
3346 		flag = MAN_SFLAG_ALLSAP;
3347 		break;
3348 
3349 	case DL_PROMISC_MULTI:
3350 		flag = MAN_SFLAG_ALLMULTI;
3351 		break;
3352 
3353 	default:
3354 		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_NOTSUPPORTED, 0);
3355 		return;
3356 	}
3357 
3358 	/*
3359 	 * Catch request for replay, and forward down to any lower
3360 	 * lower stream.
3361 	 */
3362 	if (man_dlpi(msp, mp)) {
3363 		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_SYSERR, ENOMEM);
3364 		return;
3365 	}
3366 
3367 	msp->ms_flags |= flag;
3368 
3369 	dlokack(wq, mp, DL_PROMISCON_REQ);
3370 
3371 }
3372 
3373 static void
3374 man_poffreq(queue_t *wq, mblk_t *mp)
3375 {
3376 	manstr_t		*msp;
3377 	int			flag;
3378 
3379 	msp = (manstr_t *)wq->q_ptr;
3380 
3381 	if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
3382 		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
3383 		return;
3384 	}
3385 
3386 	switch (((dl_promiscoff_req_t *)mp->b_rptr)->dl_level) {
3387 	case DL_PROMISC_PHYS:
3388 		flag = MAN_SFLAG_ALLPHYS;
3389 		break;
3390 
3391 	case DL_PROMISC_SAP:
3392 		flag = MAN_SFLAG_ALLSAP;
3393 		break;
3394 
3395 	case DL_PROMISC_MULTI:
3396 		flag = MAN_SFLAG_ALLMULTI;
3397 		break;
3398 
3399 	default:
3400 		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_NOTSUPPORTED, 0);
3401 		return;
3402 	}
3403 
3404 	if ((msp->ms_flags & flag) == 0) {
3405 		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
3406 		return;
3407 	}
3408 
3409 	if (man_dlpi_senddown(msp, mp)) {
3410 		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_SYSERR, ENOMEM);
3411 		return;
3412 	}
3413 
3414 	man_dl_release(&msp->ms_dl_mp, mp);
3415 
3416 	msp->ms_flags &= ~flag;
3417 
3418 	dlokack(wq, mp, DL_PROMISCOFF_REQ);
3419 
3420 }
3421 
3422 /*
3423  * Enable multicast requests. We might need to track addresses instead of
3424  * just passing things through (see eri_dmreq) - TBD.
3425  */
3426 static void
3427 man_emreq(queue_t *wq, mblk_t *mp)
3428 {
3429 	manstr_t		*msp;
3430 	union DL_primitives	*dlp;
3431 	eaddr_t			*addrp;
3432 	t_uscalar_t		off;
3433 	t_uscalar_t		len;
3434 
3435 	msp = (manstr_t *)wq->q_ptr;
3436 
3437 	if (MBLKL(mp) < DL_ENABMULTI_REQ_SIZE) {
3438 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADPRIM, 0);
3439 		return;
3440 	}
3441 
3442 	if (msp->ms_dlpistate == DL_UNATTACHED) {
3443 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_OUTSTATE, 0);
3444 		return;
3445 	}
3446 
3447 	dlp = (union DL_primitives *)mp->b_rptr;
3448 	len = dlp->enabmulti_req.dl_addr_length;
3449 	off = dlp->enabmulti_req.dl_addr_offset;
3450 	addrp = (struct ether_addr *)(mp->b_rptr + off);
3451 
3452 	if ((len != ETHERADDRL) ||
3453 		!MBLKIN(mp, off, len) ||
3454 		((addrp->ether_addr_octet[0] & 01) == 0)) {
3455 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADADDR, 0);
3456 		return;
3457 	}
3458 
3459 	/*
3460 	 * Catch request for replay, and forward down to any lower
3461 	 * lower stream.
3462 	 */
3463 	if (man_dlpi(msp, mp)) {
3464 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_SYSERR, ENOMEM);
3465 		return;
3466 	}
3467 
3468 	dlokack(wq, mp, DL_ENABMULTI_REQ);
3469 
3470 }
3471 
3472 static void
3473 man_dmreq(queue_t *wq, mblk_t *mp)
3474 {
3475 	manstr_t		*msp;
3476 	union DL_primitives	*dlp;
3477 	eaddr_t			*addrp;
3478 	t_uscalar_t		off;
3479 	t_uscalar_t		len;
3480 
3481 	msp = (manstr_t *)wq->q_ptr;
3482 
3483 	if (MBLKL(mp) < DL_DISABMULTI_REQ_SIZE) {
3484 		dlerrorack(wq, mp, DL_DISABMULTI_REQ, DL_BADPRIM, 0);
3485 		return;
3486 	}
3487 
3488 	if (msp->ms_dlpistate == DL_UNATTACHED) {
3489 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_OUTSTATE, 0);
3490 		return;
3491 	}
3492 
3493 	dlp = (union DL_primitives *)mp->b_rptr;
3494 	len = dlp->enabmulti_req.dl_addr_length;
3495 	off = dlp->enabmulti_req.dl_addr_offset;
3496 	addrp = (struct ether_addr *)(mp->b_rptr + off);
3497 
3498 	if ((len != ETHERADDRL) ||
3499 		!MBLKIN(mp, off, len) ||
3500 		((addrp->ether_addr_octet[0] & 01) == 0)) {
3501 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADADDR, 0);
3502 		return;
3503 	}
3504 
3505 	if (man_dlpi_senddown(msp, mp)) {
3506 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_SYSERR, ENOMEM);
3507 		return;
3508 	}
3509 
3510 	man_dl_release(&msp->ms_dl_mp, mp);
3511 
3512 	dlokack(wq, mp, DL_DISABMULTI_REQ);
3513 
3514 }
3515 
3516 static void
3517 man_pareq(queue_t *wq, mblk_t *mp)
3518 {
3519 	manstr_t		*msp;
3520 	union	DL_primitives	*dlp;
3521 	uint32_t		type;
3522 	struct	ether_addr	addr;
3523 
3524 	msp = (manstr_t *)wq->q_ptr;
3525 
3526 	if (MBLKL(mp) < DL_PHYS_ADDR_REQ_SIZE) {
3527 		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3528 		return;
3529 	}
3530 
3531 	dlp = (union DL_primitives *)mp->b_rptr;
3532 	type = dlp->physaddr_req.dl_addr_type;
3533 	if (msp->ms_manp == NULL) {
3534 		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_OUTSTATE, 0);
3535 		return;
3536 	}
3537 
3538 	switch (type) {
3539 	case	DL_FACT_PHYS_ADDR:
3540 		(void) localetheraddr((struct ether_addr *)NULL, &addr);
3541 		break;
3542 
3543 	case	DL_CURR_PHYS_ADDR:
3544 		ether_bcopy(&msp->ms_manp->man_eaddr, &addr);
3545 		break;
3546 
3547 	default:
3548 		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_NOTSUPPORTED, 0);
3549 		return;
3550 	}
3551 
3552 	dlphysaddrack(wq, mp, &addr, ETHERADDRL);
3553 }
3554 
3555 /*
3556  * TBD - this routine probably should be protected w/ an ndd
3557  * tuneable, or a man.conf parameter.
3558  */
3559 static void
3560 man_spareq(queue_t *wq, mblk_t *mp)
3561 {
3562 	manstr_t		*msp;
3563 	union DL_primitives	*dlp;
3564 	t_uscalar_t		off;
3565 	t_uscalar_t		len;
3566 	eaddr_t			*addrp;
3567 
3568 	msp = (manstr_t *)wq->q_ptr;
3569 
3570 	if (MBLKL(mp) < DL_SET_PHYS_ADDR_REQ_SIZE) {
3571 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3572 		return;
3573 	}
3574 
3575 	dlp = (union DL_primitives *)mp->b_rptr;
3576 	len = dlp->set_physaddr_req.dl_addr_length;
3577 	off = dlp->set_physaddr_req.dl_addr_offset;
3578 
3579 	if (!MBLKIN(mp, off, len)) {
3580 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3581 		return;
3582 	}
3583 
3584 	addrp = (struct ether_addr *)(mp->b_rptr + off);
3585 
3586 	/*
3587 	 * Error if length of address isn't right or the address
3588 	 * specified is a multicast or broadcast address.
3589 	 */
3590 	if ((len != ETHERADDRL) ||
3591 		((addrp->ether_addr_octet[0] & 01) == 1) ||
3592 		(ether_cmp(addrp, &etherbroadcast) == 0)) {
3593 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADADDR, 0);
3594 		return;
3595 	}
3596 	/*
3597 	 * Error if this stream is not attached to a device.
3598 	 */
3599 	if (msp->ms_manp == NULL) {
3600 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_OUTSTATE, 0);
3601 		return;
3602 	}
3603 
3604 	/*
3605 	 * We will also resend DL_SET_PHYS_ADDR_REQ for each dest
3606 	 * when it is linked under us.
3607 	 */
3608 	if (man_dlpi_senddown(msp, mp)) {
3609 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_SYSERR, ENOMEM);
3610 		return;
3611 	}
3612 
3613 	ether_copy(addrp, msp->ms_manp->man_eaddr.ether_addr_octet);
3614 
3615 	MAN_DBG(MAN_DLPI, ("man_sareq: snagged %s\n",
3616 		ether_sprintf(&msp->ms_manp->man_eaddr)));
3617 
3618 	dlokack(wq, mp, DL_SET_PHYS_ADDR_REQ);
3619 
3620 }
3621 
3622 /*
3623  * These routines make up the lower part of the MAN streams framework.
3624  */
3625 
3626 /*
3627  * man_lwsrv - Deferred mblks for down stream. We end up here when
3628  * the destination is not DL_IDLE when traffic comes downstream.
3629  *
3630  *	wq - lower write queue of mxx
3631  */
3632 static int
3633 man_lwsrv(queue_t *wq)
3634 {
3635 	mblk_t		*mp;
3636 	mblk_t		*mlistp;
3637 	man_dest_t	*mdp;
3638 	size_t		count;
3639 
3640 	mdp = (man_dest_t *)wq->q_ptr;
3641 
3642 	MAN_DBG(MAN_LWSRV, ("man_lwsrv: wq(0x%p) mdp(0x%p)"
3643 		" md_rq(0x%p)\n", (void *)wq, (void *)mdp,
3644 		mdp ? (void *)mdp->md_rq : NULL));
3645 
3646 	if (mdp == NULL)
3647 		goto exit;
3648 
3649 	if (mdp->md_state & MAN_DSTATE_CLOSING) {
3650 			flushq(wq, FLUSHDATA);
3651 			flushq(RD(wq), FLUSHDATA);
3652 			goto exit;
3653 	}
3654 
3655 	/*
3656 	 * Arrange to send deferred mp's first, then mblks on the
3657 	 * service queue. Since we are exclusive in the inner perimeter,
3658 	 * we dont have to worry about md_lock, like the put procedures,
3659 	 * which are MTPUTSHARED.
3660 	 */
3661 	mutex_enter(&mdp->md_lock);
3662 	mlistp = mdp->md_dmp_head;
3663 	mdp->md_dmp_head = NULL;
3664 	count = mdp->md_dmp_count;
3665 	mdp->md_dmp_count = 0;
3666 	mutex_exit(&mdp->md_lock);
3667 
3668 	while (mlistp != NULL) {
3669 		mp = mlistp;
3670 		mlistp = mp->b_next;
3671 		mp->b_next = NULL;
3672 		count -= msgsize(mp);
3673 		if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
3674 
3675 			mutex_enter(&mdp->md_lock);
3676 			mdp->md_dmp_count += count + msgsize(mp);
3677 			mp->b_next = mlistp;
3678 			mdp->md_dmp_head = mp;
3679 			mutex_exit(&mdp->md_lock);
3680 			goto exit;
3681 		}
3682 	}
3683 	mdp->md_dmp_tail = NULL;
3684 
3685 	while (mp = getq(wq)) {
3686 		if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
3687 			/*
3688 			 * Put it back on queue, making sure to avoid
3689 			 * infinite loop mentioned in putbq(9F)
3690 			 */
3691 			noenable(wq);
3692 			putbq(wq, mp);
3693 			enableok(wq);
3694 
3695 			break;
3696 		}
3697 	}
3698 
3699 exit:
3700 
3701 	return (0);
3702 }
3703 
3704 /*
3705  * man_lrput - handle DLPI messages issued from downstream.
3706  *
3707  *	rq - lower read queue of mxx
3708  *	mp - mblk ptr to DLPI request
3709  *
3710  *	returns 0
3711  */
3712 static int
3713 man_lrput(queue_t *rq, mblk_t *mp)
3714 {
3715 	man_dest_t	*mdp;
3716 	manstr_t	*msp;
3717 
3718 #if defined(DEBUG)
3719 	union DL_primitives	*dlp;
3720 	t_uscalar_t		prim = MAN_DLPI_MAX_PRIM + 1;
3721 	char			*prim_str;
3722 #endif  /* DEBUG */
3723 
3724 	mdp = (man_dest_t *)rq->q_ptr;
3725 
3726 #if defined(DEBUG)
3727 	if (DB_TYPE(mp) == M_PROTO) {
3728 		dlp = (union DL_primitives *)mp->b_rptr;
3729 		prim = dlp->dl_primitive;
3730 	}
3731 
3732 	prim_str = (prim > MAN_DLPI_MAX_PRIM) ? "NON DLPI" :
3733 		    (prim == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3734 		    (prim == DLIOCRAW) ? "DLIOCRAW" :
3735 		    dps[(unsigned int)prim];
3736 	MAN_DBG(MAN_LRPUT, ("man_lrput: rq(0x%p) mp(0x%p) mdp(0x%p)"
3737 		" db_type(0x%x) dl_prim %s", (void *)rq,
3738 		(void *)mp, (void *)mdp, DB_TYPE(mp), prim_str));
3739 	MAN_DBGCALL(MAN_LRPUT2, man_print_mdp(mdp));
3740 #endif  /* DEBUG */
3741 
3742 	if (DB_TYPE(mp) == M_FLUSH) {
3743 		/* Turn around */
3744 		if (*mp->b_rptr & FLUSHW) {
3745 			*mp->b_rptr &= ~FLUSHR;
3746 			qreply(rq, mp);
3747 		} else
3748 			freemsg(mp);
3749 		return (0);
3750 	}
3751 
3752 	if (mdp == NULL || mdp->md_state != MAN_DSTATE_READY) {
3753 
3754 		MAN_DBG(MAN_LRPUT, ("man_lrput: not ready mdp(0x%p),"
3755 			" state(%d)", (void *)mdp, mdp ? mdp->md_state : -1));
3756 		freemsg(mp);
3757 		return (0);
3758 	}
3759 
3760 	/*
3761 	 * If we have a destination in the right state, forward on datagrams.
3762 	 */
3763 	if (MAN_IS_DATA(mp)) {
3764 		if (mdp->md_dlpistate == DL_IDLE && canputnext(mdp->md_rq)) {
3765 
3766 			msp = mdp->md_msp;
3767 			if (!(msp->ms_flags & MAN_SFLAG_PROMISC))
3768 				mdp->md_rcvcnt++; /* Count for failover */
3769 			/*
3770 			 * go put mblk_t directly up to next queue.
3771 			 */
3772 			MAN_DBG(MAN_LRPUT, ("man_lrput: putnext to rq(0x%p)",
3773 				(void *)mdp->md_rq));
3774 			(void) putnext(mdp->md_rq, mp);
3775 		} else {
3776 			freemsg(mp);
3777 		}
3778 	} else {
3779 		/*
3780 		 * Handle in man_lrsrv with exclusive inner perimeter lock.
3781 		 */
3782 		putq(rq, mp);
3783 	}
3784 
3785 	return (0);
3786 }
3787 
3788 /*
3789  * Either this is a response from our attempt to sync the upper and lower
3790  * stream states, or its data. If its not data. Do DL_* response processing
3791  * and transition md_dlpistate accordingly. If its data, toss it.
3792  */
3793 static int
3794 man_lrsrv(queue_t *rq)
3795 {
3796 	man_dest_t		*mdp;
3797 	mblk_t			*mp;
3798 	union DL_primitives	*dlp;
3799 	ulong_t			prim;
3800 	ulong_t			cprim;
3801 	int			need_dl_reset = FALSE;
3802 
3803 #if defined(DEBUG)
3804 		struct iocblk	*iocp;
3805 		char		ioc_cmd[256];
3806 #endif  /* DEBUG */
3807 
3808 	MAN_DBG(MAN_LRSRV, ("man_lrsrv: rq(0x%p)", (void *)rq));
3809 
3810 	mdp = (man_dest_t *)rq->q_ptr;
3811 
3812 	if ((mdp == NULL) || (mdp->md_state & MAN_DSTATE_CLOSING)) {
3813 			flushq(rq, FLUSHDATA);
3814 			flushq(WR(rq), FLUSHDATA);
3815 			goto exit;
3816 	}
3817 
3818 	while (mp = getq(rq)) {
3819 
3820 
3821 	/*
3822 	 * If we're not connected, or its a datagram, toss it.
3823 	 */
3824 	if (MAN_IS_DATA(mp) || mdp->md_state != MAN_DSTATE_READY) {
3825 
3826 		MAN_DBG(MAN_LRSRV, ("man_lrsrv: dropping mblk mdp(0x%p)"
3827 			" is_data(%d)", (void *)mdp, MAN_IS_DATA(mp)));
3828 		freemsg(mp);
3829 		continue;
3830 	}
3831 
3832 	/*
3833 	 * Should be response to man_dlpi_replay. Discard unless there
3834 	 * is a failure we care about.
3835 	 */
3836 
3837 	switch (DB_TYPE(mp)) {
3838 	case M_PROTO:
3839 	case M_PCPROTO:
3840 		/* Do proto processing below. */
3841 		break;
3842 
3843 	case M_IOCNAK:
3844 		/*
3845 		 * DL_IOC* failed for some reason.
3846 		 */
3847 		need_dl_reset = TRUE;
3848 
3849 #if defined(DEBUG)
3850 		iocp = (struct iocblk *)mp->b_rptr;
3851 
3852 		sprintf(ioc_cmd, "0x%x", iocp->ioc_cmd);
3853 		MAN_DBG(MAN_LRSRV, ("man_lrsrv: M_IOCNAK err %d for cmd(%s)\n",
3854 			iocp->ioc_error,
3855 			(iocp->ioc_cmd == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3856 			(iocp->ioc_cmd == DLIOCRAW) ? "DLIOCRAW" : ioc_cmd));
3857 #endif  /* DEBUG */
3858 
3859 		/* FALLTHRU */
3860 
3861 	case M_IOCACK:
3862 	case M_CTL:
3863 		/*
3864 		 * OK response from DL_IOC*, ignore.
3865 		 */
3866 		goto dl_reset;
3867 	}
3868 
3869 	dlp = (union DL_primitives *)mp->b_rptr;
3870 	prim = dlp->dl_primitive;
3871 
3872 	MAN_DBG(MAN_LRSRV, ("man_lrsrv: prim %s", dps[(int)prim]));
3873 
3874 	/*
3875 	 * DLPI state processing big theory: We do not rigorously check
3876 	 * DLPI states (e.g. PENDING stuff). Simple rules:
3877 	 *
3878 	 * 	1) If we see an OK_ACK to an ATTACH_REQ, dlpistate = DL_UNBOUND.
3879 	 *	2) If we see an BIND_ACK to a BIND_REQ, dlpistate = DL_IDLE.
3880 	 *	3) If we see a OK_ACK response to an UNBIND_REQ
3881 	 *	   dlpistate = DL_UNBOUND.
3882 	 *	4) If we see a OK_ACK response to a DETACH_REQ,
3883 	 *	   dlpistate = DL_UNATTACHED.
3884 	 *
3885 	 * Everything that isn't handle by 1-4 above is handled by 5)
3886 	 *
3887 	 *	5) A NAK to any DL_* messages we care about causes
3888 	 *	   dlpistate = DL_UNATTACHED and man_reset_dlpi to run
3889 	 *
3890 	 * TBD - need a reset counter so we can try a switch if it gets
3891 	 * too high.
3892 	 */
3893 
3894 	switch (prim) {
3895 	case DL_OK_ACK:
3896 		cprim = dlp->ok_ack.dl_correct_primitive;
3897 
3898 		switch (cprim) {
3899 		case DL_ATTACH_REQ:
3900 			if (man_dlioc_replay(mdp)) {
3901 				D_SETSTATE(mdp, DL_UNBOUND);
3902 			} else {
3903 				need_dl_reset = TRUE;
3904 				break;
3905 			}
3906 			break;
3907 
3908 		case DL_DETACH_REQ:
3909 			D_SETSTATE(mdp, DL_UNATTACHED);
3910 			break;
3911 
3912 		case DL_UNBIND_REQ:
3913 			/*
3914 			 * Cancel timer and set md_dlpistate.
3915 			 */
3916 			D_SETSTATE(mdp, DL_UNBOUND);
3917 
3918 			ASSERT(mdp->md_bc_id == 0);
3919 			if (mdp->md_lc_timer_id != 0) {
3920 				(void) quntimeout(man_ctl_wq,
3921 					mdp->md_lc_timer_id);
3922 				mdp->md_lc_timer_id = 0;
3923 			}
3924 		}
3925 		MAN_DBG(MAN_DLPI,
3926 			("		cprim %s", dps[(int)cprim]));
3927 		break;
3928 
3929 	case DL_BIND_ACK:
3930 		/*
3931 		 * We're ready for data. Get man_lwsrv to run to
3932 		 * process any defered data and start linkcheck timer.
3933 		 */
3934 		D_SETSTATE(mdp, DL_IDLE);
3935 		qenable(mdp->md_wq);
3936 		mdp->md_linkstate = MAN_LINKGOOD;
3937 		if (man_needs_linkcheck(mdp)) {
3938 			mdp->md_lc_timer_id = qtimeout(man_ctl_wq,
3939 				man_linkcheck_timer, (void *)mdp,
3940 				man_gettimer(MAN_TIMER_LINKCHECK, mdp));
3941 		}
3942 
3943 		break;
3944 
3945 	case DL_ERROR_ACK:
3946 		cprim = dlp->error_ack.dl_error_primitive;
3947 		switch (cprim) {
3948 		case DL_ATTACH_REQ:
3949 		case DL_BIND_REQ:
3950 		case DL_DISABMULTI_REQ:
3951 		case DL_ENABMULTI_REQ:
3952 		case DL_PROMISCON_REQ:
3953 		case DL_PROMISCOFF_REQ:
3954 		case DL_SET_PHYS_ADDR_REQ:
3955 			need_dl_reset = TRUE;
3956 			break;
3957 
3958 		/*
3959 		 * ignore error TBD (better comment)
3960 		 */
3961 		case DL_UNBIND_REQ:
3962 		case DL_DETACH_REQ:
3963 			break;
3964 		}
3965 
3966 		MAN_DBG(MAN_DLPI,
3967 			("\tdl_errno %d dl_unix_errno %d cprim %s",
3968 			dlp->error_ack.dl_errno, dlp->error_ack.dl_unix_errno,
3969 			dps[(int)cprim]));
3970 		break;
3971 
3972 	case DL_UDERROR_IND:
3973 		MAN_DBG(MAN_DLPI,
3974 			("\tdl_errno %d unix_errno %d",
3975 			dlp->uderror_ind.dl_errno,
3976 			dlp->uderror_ind.dl_unix_errno));
3977 		break;
3978 
3979 	case DL_INFO_ACK:
3980 		break;
3981 
3982 	default:
3983 		/*
3984 		 * We should not get here.
3985 		 */
3986 		cmn_err(CE_WARN, "man_lrsrv: unexpected DL prim 0x%lx!",
3987 			prim);
3988 		need_dl_reset = TRUE;
3989 		break;
3990 	}
3991 
3992 dl_reset:
3993 	freemsg(mp);
3994 
3995 	if (need_dl_reset) {
3996 		man_pg_t	*mpg;
3997 		man_path_t	*mp;
3998 
3999 		if (qsize(rq)) {	/* Dump all messages. */
4000 			flushq(rq, FLUSHDATA);
4001 			flushq(WR(rq), FLUSHDATA);
4002 		}
4003 
4004 		mdp->md_dlpierrors++;
4005 		D_SETSTATE(mdp, DL_UNATTACHED);
4006 		if (mdp->md_lc_timer_id != 0) {
4007 			(void) quntimeout(man_ctl_wq, mdp->md_lc_timer_id);
4008 			mdp->md_lc_timer_id = 0;
4009 		}
4010 
4011 		mutex_enter(&man_lock);
4012 		ASSERT(mdp->md_msp != NULL);
4013 		ASSERT(mdp->md_msp->ms_manp != NULL);
4014 		mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg,
4015 							mdp->md_pg_id);
4016 		ASSERT(mpg != NULL);
4017 		mp = man_find_path_by_ppa(mpg->mpg_pathp,
4018 					mdp->md_device.mdev_ppa);
4019 		ASSERT(mp != NULL);
4020 		mp->mp_device.mdev_state |= MDEV_FAILED;
4021 		if ((mdp->md_dlpierrors >= MAN_MAX_DLPIERRORS) &&
4022 			(man_is_on_domain ||
4023 			mdp->md_msp->ms_manp->man_meta_ppa == 1)) {
4024 			/*
4025 			 * Autoswitching is disabled for instance 0
4026 			 * on the SC as we expect the domain to
4027 			 * initiate the path switching.
4028 			 */
4029 			(void) man_do_autoswitch((man_dest_t *)mdp);
4030 			MAN_DBG(MAN_WARN, ("man_lrsrv: dlpi failure(%d,%d),"
4031 				" switching path", mdp->md_device.mdev_major,
4032 				mdp->md_device.mdev_ppa));
4033 		} else {
4034 			mdp->md_lc_timer_id = qtimeout(man_ctl_wq,
4035 					man_reset_dlpi, (void *)mdp,
4036 					man_gettimer(MAN_TIMER_DLPIRESET, mdp));
4037 		}
4038 		mutex_exit(&man_lock);
4039 	}
4040 
4041 
4042 	} /* End while (getq()) */
4043 
4044 exit:
4045 	MAN_DBG(MAN_DLPI, ("man_lrsrv: returns"));
4046 
4047 	return (0);
4048 }
4049 
4050 static int
4051 man_needs_linkcheck(man_dest_t *mdp)
4052 {
4053 	/*
4054 	 * Not ready for linkcheck.
4055 	 */
4056 	if (mdp->md_msp == NULL || mdp->md_msp->ms_manp == NULL)
4057 		return (0);
4058 
4059 	/*
4060 	 * Linkchecking needs to be done on IP streams. For domain, all
4061 	 * driver instances need checking, for SC only instance 1 needs it.
4062 	 */
4063 	if ((man_is_on_domain || mdp->md_msp->ms_manp->man_meta_ppa == 1) &&
4064 	    (mdp->md_msp->ms_sap == ETHERTYPE_IP ||
4065 	    mdp->md_msp->ms_sap == ETHERTYPE_IPV6))
4066 
4067 		return (1);
4068 
4069 	/*
4070 	 * Linkcheck not need on this link.
4071 	 */
4072 	return (0);
4073 }
4074 
4075 /*
4076  * The following routines process work requests posted to man_iwork_q
4077  * from the non-STREAMS half of the driver (see man_bwork.c). The work
4078  * requires access to the inner perimeter lock of the driver. This
4079  * lock is acquired by man_uwsrv, who calls man_iwork to process the
4080  * man_iwork_q->
4081  */
4082 
4083 /*
4084  * The man_bwork has posted some work for us to do inside the
4085  * perimeter. This mainly involves updating lower multiplexor data
4086  * structures (non-blocking type stuff). So, we can hold the man_lock
4087  * until we are done processing all work items. Note that some of these
4088  * routines in turn submit work back to the bgthread, which they can do
4089  * since we hold the man_lock.
4090  */
4091 static void
4092 man_iwork()
4093 {
4094 	man_work_t	*wp;
4095 	int		wp_finished;
4096 
4097 	MAN_DBG(MAN_SWITCH, ("man_iwork: q_work(0x%p)",
4098 		(void *)man_iwork_q->q_work));
4099 
4100 	mutex_enter(&man_lock);
4101 
4102 	while (man_iwork_q->q_work) {
4103 
4104 		wp = man_iwork_q->q_work;
4105 		man_iwork_q->q_work = wp->mw_next;
4106 		wp->mw_next = NULL;
4107 
4108 		mutex_exit(&man_lock);
4109 
4110 		MAN_DBG(MAN_SWITCH, ("man_iwork: type %s",
4111 			_mw_type[wp->mw_type]));
4112 
4113 		wp_finished = TRUE;
4114 
4115 		switch (wp->mw_type) {
4116 		case MAN_WORK_DRATTACH:
4117 			(void) man_do_dr_attach(wp);
4118 			break;
4119 
4120 		case MAN_WORK_DRSWITCH:
4121 			/*
4122 			 * Return status to man_dr_detach immediately. If
4123 			 * no error submitting SWITCH request, man_iswitch
4124 			 * or man_bclose will cv_signal man_dr_detach on
4125 			 * completion of SWITCH work request.
4126 			 */
4127 			if (man_do_dr_switch(wp) == 0)
4128 				wp_finished = FALSE;
4129 			break;
4130 
4131 		case MAN_WORK_DRDETACH:
4132 			man_do_dr_detach(wp);
4133 			break;
4134 
4135 		case MAN_WORK_SWITCH:
4136 			if (man_iswitch(wp))
4137 				wp_finished = FALSE;
4138 			break;
4139 
4140 		case MAN_WORK_KSTAT_UPDATE:
4141 			man_do_kstats(wp);
4142 			break;
4143 
4144 		default:
4145 			cmn_err(CE_WARN, "man_iwork: "
4146 			    "illegal work type(%d)", wp->mw_type);
4147 			break;
4148 		}
4149 
4150 		mutex_enter(&man_lock);
4151 
4152 		/*
4153 		 * If we've completed the work request, delete, or
4154 		 * cv_signal waiter.
4155 		 */
4156 		if (wp_finished) {
4157 			wp->mw_flags |= MAN_WFLAGS_DONE;
4158 
4159 			if (wp->mw_flags & MAN_WFLAGS_CVWAITER)
4160 				cv_signal(&wp->mw_cv);
4161 			else
4162 				man_work_free(wp);
4163 		}
4164 	}
4165 
4166 	mutex_exit(&man_lock);
4167 }
4168 
4169 /*
4170  * man_dr_detach has submitted a request to DRSWITCH a path.
4171  * He is in cv_wait_sig(wp->mw_cv). We forward the work request on to
4172  * man_bwork as a switch request. It should end up back at
4173  * man_iwork, who will cv_signal(wp->mw_cv) man_dr_detach.
4174  *
4175  * Called holding inner perimeter lock.
4176  * man_lock is held to synchronize access to pathgroup list(man_pg).
4177  */
4178 static int
4179 man_do_dr_switch(man_work_t *wp)
4180 {
4181 	man_t		*manp;
4182 	man_pg_t	*mpg;
4183 	man_path_t	*mp;
4184 	man_path_t	*ap;
4185 	man_adest_t	*adp;
4186 	mi_path_t	mpath;
4187 	int		status = 0;
4188 
4189 	adp = &wp->mw_arg;
4190 
4191 	MAN_DBG(MAN_SWITCH, ("man_do_dr_switch: pg_id %d work:", adp->a_pg_id));
4192 	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4193 
4194 	mutex_enter(&man_lock);
4195 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4196 	if (manp == NULL || manp->man_pg == NULL) {
4197 		status = ENODEV;
4198 		goto exit;
4199 	}
4200 
4201 	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4202 	if (mpg == NULL) {
4203 		status = ENODEV;
4204 		goto exit;
4205 	}
4206 
4207 	if (mpg->mpg_flags & MAN_PG_SWITCHING) {
4208 		status = EAGAIN;
4209 		goto exit;
4210 	}
4211 
4212 	/*
4213 	 * Check to see if detaching device is active. If so, activate
4214 	 * an alternate.
4215 	 */
4216 	mp = man_find_active_path(mpg->mpg_pathp);
4217 	if (mp && mp->mp_device.mdev_ppa == adp->a_sf_dev.mdev_ppa) {
4218 
4219 		ap = man_find_alternate_path(mpg->mpg_pathp);
4220 		if (ap == NULL) {
4221 			status = EBUSY;
4222 			goto exit;
4223 		}
4224 
4225 		bzero((char *)&mpath, sizeof (mi_path_t));
4226 
4227 		mpath.mip_cmd = MI_PATH_ACTIVATE;
4228 		mpath.mip_man_ppa = 0;
4229 		mpath.mip_pg_id = 0;
4230 		mpath.mip_devs[0] = ap->mp_device;
4231 		mpath.mip_ndevs = 1;
4232 		ether_copy(&manp->man_eaddr, &mpath.mip_eaddr);
4233 
4234 		/*
4235 		 * DR thread is sleeping on wp->mw_cv. We change the work
4236 		 * request from DRSWITCH to SWITCH and submit it to
4237 		 * for processing by man_bwork (via man_pg_cmd). At
4238 		 * completion the SWITCH work request is processed by
4239 		 * man_iswitch() or man_bclose and the DR thread will
4240 		 * be cv_signal'd.
4241 		 */
4242 		wp->mw_type = MAN_WORK_SWITCH;
4243 		if (status = man_pg_cmd(&mpath, wp))
4244 			goto exit;
4245 
4246 	} else {
4247 		/*
4248 		 * Tell man_dr_detach that detaching device is not currently
4249 		 * in use.
4250 		 */
4251 		status = ENODEV;
4252 	}
4253 
4254 exit:
4255 	if (status) {
4256 		/*
4257 		 * ENODEV is a noop, not really an error.
4258 		 */
4259 		if (status != ENODEV)
4260 			wp->mw_status = status;
4261 	}
4262 	mutex_exit(&man_lock);
4263 
4264 	return (status);
4265 }
4266 
4267 /*
4268  * man_dr_attach has submitted a request to DRATTACH a path,
4269  * add that path to the path list.
4270  *
4271  * Called holding perimeter lock.
4272  */
4273 static int
4274 man_do_dr_attach(man_work_t *wp)
4275 {
4276 	man_t		*manp;
4277 	man_adest_t	*adp;
4278 	mi_path_t	mpath;
4279 	manc_t		manc;
4280 	int		status = 0;
4281 
4282 	adp = &wp->mw_arg;
4283 
4284 	MAN_DBG(MAN_SWITCH, ("man_do_dr_attach: pg_id %d work:", adp->a_pg_id));
4285 	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4286 
4287 	mutex_enter(&man_lock);
4288 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4289 	if (manp == NULL || manp->man_pg == NULL) {
4290 		status = ENODEV;
4291 		goto exit;
4292 	}
4293 
4294 	if (status = man_get_iosram(&manc)) {
4295 		goto exit;
4296 	}
4297 	/*
4298 	 * Extract SC ethernet address from IOSRAM.
4299 	 */
4300 	ether_copy(&manc.manc_sc_eaddr, &mpath.mip_eaddr);
4301 
4302 	mpath.mip_pg_id = adp->a_pg_id;
4303 	mpath.mip_man_ppa = adp->a_man_ppa;
4304 	/*
4305 	 * man_dr_attach passes the new device info in a_sf_dev.
4306 	 */
4307 	MAN_DBG(MAN_DR, ("man_do_dr_attach: "));
4308 	MAN_DBGCALL(MAN_DR, man_print_dev(&adp->a_sf_dev));
4309 	mpath.mip_devs[0] = adp->a_sf_dev;
4310 	mpath.mip_ndevs = 1;
4311 	mpath.mip_cmd = MI_PATH_ADD;
4312 	status = man_pg_cmd(&mpath, NULL);
4313 
4314 exit:
4315 	mutex_exit(&man_lock);
4316 	return (status);
4317 }
4318 
4319 /*
4320  * man_dr_detach has submitted a request to DRDETACH a path.
4321  * He is in cv_wait_sig(wp->mw_cv). We remove the path and
4322  * cv_signal(wp->mw_cv) man_dr_detach.
4323  *
4324  * Called holding perimeter lock.
4325  */
4326 static void
4327 man_do_dr_detach(man_work_t *wp)
4328 {
4329 	man_t		*manp;
4330 	man_pg_t	*mpg;
4331 	man_path_t	*mp;
4332 	man_adest_t	*adp;
4333 	manc_t		manc;
4334 	mi_path_t	mpath;
4335 	int		i;
4336 	int		found;
4337 	int		status = 0;
4338 
4339 	adp = &wp->mw_arg;
4340 
4341 	MAN_DBG(MAN_SWITCH, ("man_do_dr_detach: pg_id %d work:", adp->a_pg_id));
4342 	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4343 
4344 	mutex_enter(&man_lock);
4345 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4346 	if (manp == NULL || manp->man_pg == NULL) {
4347 		status = ENODEV;
4348 		goto exit;
4349 	}
4350 
4351 	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4352 	if (mpg == NULL) {
4353 		status = ENODEV;
4354 		goto exit;
4355 	}
4356 
4357 	if (mpg->mpg_flags & MAN_PG_SWITCHING) {
4358 		status = EAGAIN;
4359 		goto exit;
4360 	}
4361 
4362 	/*
4363 	 * We should have switched detaching path if it was active.
4364 	 */
4365 	mp = man_find_active_path(mpg->mpg_pathp);
4366 	if (mp && mp->mp_device.mdev_ppa == adp->a_sf_dev.mdev_ppa) {
4367 		status = EAGAIN;
4368 		goto exit;
4369 	}
4370 
4371 	/*
4372 	 * Submit an ASSIGN command, minus the detaching device.
4373 	 */
4374 	bzero((char *)&mpath, sizeof (mi_path_t));
4375 
4376 	if (status = man_get_iosram(&manc)) {
4377 		goto exit;
4378 	}
4379 
4380 	mpath.mip_cmd = MI_PATH_ASSIGN;
4381 	mpath.mip_man_ppa = 0;
4382 	mpath.mip_pg_id = 0;
4383 
4384 	mp = mpg->mpg_pathp;
4385 	i = 0;
4386 	found = FALSE;
4387 	while (mp != NULL) {
4388 		if (mp->mp_device.mdev_ppa != adp->a_sf_dev.mdev_ppa) {
4389 			mpath.mip_devs[i] = mp->mp_device;
4390 			i++;
4391 		} else {
4392 			found = TRUE;
4393 		}
4394 		mp = mp->mp_next;
4395 	}
4396 
4397 	if (found) {
4398 		/*
4399 		 * Need to include SCs ethernet address in command.
4400 		 */
4401 		mpath.mip_ndevs = i;
4402 		ether_copy(&manc.manc_sc_eaddr, &mpath.mip_eaddr);
4403 
4404 		status = man_pg_cmd(&mpath, NULL);
4405 	}
4406 
4407 	/*
4408 	 * Hand back status to man_dr_detach request.
4409 	 */
4410 exit:
4411 	if (status != ENODEV)
4412 		wp->mw_status = status;
4413 
4414 	mutex_exit(&man_lock);
4415 
4416 }
4417 
4418 
4419 /*
4420  * The background thread has configured new lower multiplexor streams for
4421  * the given destinations. Update the appropriate destination data structures
4422  * inside the inner perimeter. We must take care to deal with destinations
4423  * whose upper stream has closed or detached from lower streams.
4424  *
4425  * Returns
4426  *	0		Done with work request.
4427  *	1		Reused work request.
4428  */
4429 static int
4430 man_iswitch(man_work_t *wp)
4431 {
4432 	man_adest_t	*adp;
4433 	man_t		*manp;
4434 	man_pg_t	*mpg;
4435 	man_path_t	*mp = NULL;
4436 	man_dest_t	*mdp;
4437 	man_dest_t	*tdp;
4438 	int		i;
4439 	int		switch_ok = TRUE;
4440 
4441 	adp = &wp->mw_arg;
4442 
4443 	if (wp->mw_status != 0) {
4444 		switch_ok = FALSE;	/* Never got things opened */
4445 	}
4446 
4447 	/*
4448 	 * Update destination structures as appropriate.
4449 	 */
4450 	for (i = 0; i < adp->a_ndests; i++) {
4451 		man_dest_t	tmp;
4452 
4453 		/*
4454 		 * Check to see if lower stream we just switch is still
4455 		 * around.
4456 		 */
4457 		tdp = &adp->a_mdp[i];
4458 		mdp = man_switch_match(tdp, adp->a_pg_id, tdp->md_switch_id);
4459 
4460 		if (mdp == NULL)
4461 			continue;
4462 
4463 		if (switch_ok == FALSE) {
4464 			/*
4465 			 * Switch failed for some reason.  Clear
4466 			 * PLUMBING flag and retry switch again later.
4467 			 */
4468 			man_ifail_dest(mdp);
4469 			continue;
4470 		}
4471 
4472 		/*
4473 		 * Swap new info, for old. We return the old info to
4474 		 * man_bwork to close things up below.
4475 		 */
4476 		bcopy((char *)mdp, (char *)&tmp, sizeof (man_dest_t));
4477 
4478 		ASSERT(mdp->md_state & MAN_DSTATE_PLUMBING);
4479 		ASSERT(mdp->md_state == tdp->md_state);
4480 
4481 		mdp->md_state = tdp->md_state;
4482 
4483 		/*
4484 		 * save the wq from the destination passed(tdp).
4485 		 */
4486 		mdp->md_wq = tdp->md_wq;
4487 		RD(mdp->md_wq)->q_ptr = (void *)(mdp);
4488 		WR(mdp->md_wq)->q_ptr = (void *)(mdp);
4489 
4490 		mdp->md_state &= ~MAN_DSTATE_INITIALIZING;
4491 		mdp->md_state |= MAN_DSTATE_READY;
4492 
4493 		ASSERT(mdp->md_device.mdev_major == adp->a_sf_dev.mdev_major);
4494 
4495 		ASSERT(tdp->md_device.mdev_ppa == adp->a_st_dev.mdev_ppa);
4496 		ASSERT(tdp->md_device.mdev_major == adp->a_st_dev.mdev_major);
4497 
4498 		mdp->md_device = tdp->md_device;
4499 		mdp->md_muxid = tdp->md_muxid;
4500 		mdp->md_linkstate = MAN_LINKUNKNOWN;
4501 		(void) drv_getparm(TIME, &mdp->md_lastswitch);
4502 		mdp->md_state &= ~MAN_DSTATE_PLUMBING;
4503 		mdp->md_switch_id = 0;
4504 		mdp->md_switches++;
4505 		mdp->md_dlpierrors = 0;
4506 		D_SETSTATE(mdp, DL_UNATTACHED);
4507 
4508 		/*
4509 		 * Resync lower w/ upper dlpi state. This will start link
4510 		 * timer if/when lower stream goes to DL_IDLE (see man_lrsrv).
4511 		 */
4512 		man_reset_dlpi((void *)mdp);
4513 
4514 		bcopy((char *)&tmp, (char *)tdp, sizeof (man_dest_t));
4515 	}
4516 
4517 	if (switch_ok) {
4518 		for (i = 0; i < adp->a_ndests; i++) {
4519 			tdp = &adp->a_mdp[i];
4520 
4521 			tdp->md_state &= ~MAN_DSTATE_PLUMBING;
4522 			tdp->md_state &= ~MAN_DSTATE_INITIALIZING;
4523 			tdp->md_state |= MAN_DSTATE_READY;
4524 		}
4525 	} else {
4526 		/*
4527 		 * Never got switch-to destinations open, free them.
4528 		 */
4529 		man_kfree(adp->a_mdp,
4530 			sizeof (man_dest_t) * adp->a_ndests);
4531 	}
4532 
4533 	/*
4534 	 * Clear pathgroup switching flag and update path flags.
4535 	 */
4536 	mutex_enter(&man_lock);
4537 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4538 
4539 	ASSERT(manp != NULL);
4540 	ASSERT(manp->man_pg != NULL);
4541 
4542 	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4543 	ASSERT(mpg != NULL);
4544 	ASSERT(mpg->mpg_flags & MAN_PG_SWITCHING);
4545 	mpg->mpg_flags &= ~MAN_PG_SWITCHING;
4546 
4547 	/*
4548 	 * Switch succeeded, mark path we switched from as failed, and
4549 	 * device we switch to as active and clear its failed flag (if set).
4550 	 * Sync up kstats.
4551 	 */
4552 	if (switch_ok) {
4553 		mp = man_find_active_path(mpg->mpg_pathp);
4554 		if (mp != NULL) {
4555 
4556 			ASSERT(adp->a_sf_dev.mdev_major != 0);
4557 
4558 			MAN_DBG(MAN_SWITCH, ("man_iswitch: switch from dev:"));
4559 			MAN_DBGCALL(MAN_SWITCH, man_print_dev(&adp->a_sf_dev));
4560 
4561 			mp->mp_device.mdev_state &= ~MDEV_ACTIVE;
4562 		} else
4563 			ASSERT(adp->a_sf_dev.mdev_major == 0);
4564 
4565 		MAN_DBG(MAN_SWITCH, ("man_iswitch: switch to dev:"));
4566 		MAN_DBGCALL(MAN_SWITCH, man_print_dev(&adp->a_st_dev));
4567 
4568 		ASSERT(adp->a_st_dev.mdev_major != 0);
4569 
4570 		mp = man_find_path_by_ppa(mpg->mpg_pathp,
4571 			adp->a_st_dev.mdev_ppa);
4572 
4573 		ASSERT(mp != NULL);
4574 
4575 		mp->mp_device.mdev_state |= MDEV_ACTIVE;
4576 	}
4577 
4578 	/*
4579 	 * Decrement manp reference count and hand back work request if
4580 	 * needed.
4581 	 */
4582 	manp->man_refcnt--;
4583 
4584 	if (switch_ok) {
4585 		wp->mw_type = MAN_WORK_CLOSE;
4586 		man_work_add(man_bwork_q, wp);
4587 	}
4588 
4589 	mutex_exit(&man_lock);
4590 
4591 	return (switch_ok);
4592 }
4593 
4594 /*
4595  * Find the destination in the upper stream that we just switched.
4596  */
4597 man_dest_t *
4598 man_switch_match(man_dest_t *sdp, int pg_id, void *sid)
4599 {
4600 	man_dest_t	*mdp = NULL;
4601 	manstr_t	*msp;
4602 
4603 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4604 		/*
4605 		 * Check if upper stream closed, or detached.
4606 		 */
4607 		if (msp != sdp->md_msp)
4608 			continue;
4609 
4610 		if (msp->ms_dests == NULL)
4611 			break;
4612 
4613 		mdp = &msp->ms_dests[pg_id];
4614 
4615 		/*
4616 		 * Upper stream detached and reattached while we were
4617 		 * switching.
4618 		 */
4619 		if (mdp->md_switch_id != sid) {
4620 			mdp = NULL;
4621 			break;
4622 		}
4623 	}
4624 
4625 	return (mdp);
4626 }
4627 
4628 /*
4629  * bg_thread cant complete the switch for some reason. (Re)start the
4630  * linkcheck timer again.
4631  */
4632 static void
4633 man_ifail_dest(man_dest_t *mdp)
4634 {
4635 	ASSERT(mdp->md_lc_timer_id == 0);
4636 	ASSERT(mdp->md_bc_id == 0);
4637 	ASSERT(mdp->md_state & MAN_DSTATE_PLUMBING);
4638 
4639 	MAN_DBG(MAN_SWITCH, ("man_ifail_dest"));
4640 	MAN_DBGCALL(MAN_SWITCH, man_print_mdp(mdp));
4641 
4642 	mdp->md_state &= ~MAN_DSTATE_PLUMBING;
4643 	mdp->md_linkstate = MAN_LINKFAIL;
4644 
4645 	/*
4646 	 * If we have not yet initialized link, or the upper stream is
4647 	 * DL_IDLE, restart the linktimer.
4648 	 */
4649 	if ((mdp->md_state & MAN_DSTATE_INITIALIZING) ||
4650 		((mdp->md_msp->ms_sap == ETHERTYPE_IPV6 ||
4651 		mdp->md_msp->ms_sap == ETHERTYPE_IP) &&
4652 		mdp->md_msp->ms_dlpistate == DL_IDLE)) {
4653 
4654 		mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
4655 		(void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
4656 	}
4657 
4658 }
4659 
4660 /*
4661  * Arrange to replay all of ms_dl_mp on the new lower stream to get it
4662  * in sync with the upper stream. Note that this includes setting the
4663  * physical address.
4664  *
4665  * Called from qtimeout with inner perimeter lock.
4666  */
4667 static void
4668 man_reset_dlpi(void *argp)
4669 {
4670 	man_dest_t	*mdp = (man_dest_t *)argp;
4671 	manstr_t	*msp;
4672 	mblk_t		*mp;
4673 	mblk_t		*rmp = NULL;
4674 	mblk_t		*tmp;
4675 
4676 	mdp->md_lc_timer_id = 0;
4677 
4678 	if (mdp->md_state != MAN_DSTATE_READY) {
4679 		MAN_DBG(MAN_DLPI, ("man_reset_dlpi: not ready!"));
4680 		return;
4681 	}
4682 
4683 	msp = mdp->md_msp;
4684 
4685 	rmp = man_dup_mplist(msp->ms_dl_mp);
4686 	if (rmp == NULL)
4687 		goto fail;
4688 
4689 	/*
4690 	 * Send down an unbind and detach request, just to clean things
4691 	 * out, we ignore ERROR_ACKs for unbind and detach in man_lrsrv.
4692 	 */
4693 	tmp = man_alloc_ubreq_dreq();
4694 	if (tmp == NULL) {
4695 		goto fail;
4696 	}
4697 	mp = tmp;
4698 	while (mp->b_next != NULL)
4699 		mp = mp->b_next;
4700 	mp->b_next = rmp;
4701 	rmp = tmp;
4702 
4703 	man_dlpi_replay(mdp, rmp);
4704 
4705 	return;
4706 
4707 fail:
4708 
4709 	while (rmp) {
4710 		mp = rmp;
4711 		rmp = rmp->b_next;
4712 		mp->b_next = mp->b_prev = NULL;
4713 		freemsg(mp);
4714 	}
4715 
4716 	ASSERT(mdp->md_lc_timer_id == 0);
4717 	ASSERT(mdp->md_bc_id == 0);
4718 
4719 	/*
4720 	 * If low on memory, try again later. I Could use qbufcall, but that
4721 	 * could fail and I would have to try and recover from that w/
4722 	 * qtimeout anyway.
4723 	 */
4724 	mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_reset_dlpi,
4725 		(void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
4726 }
4727 
4728 /*
4729  * Once we receive acknowledgement that DL_ATTACH_REQ was successful,
4730  * we can send down the DL_* related IOCTLs (e.g. DL_IOC_HDR). If we
4731  * try and send them downsteam w/o waiting, the ioctl's get processed before
4732  * the ATTACH_REQ and they are rejected. TBD - could just do the lower
4733  * dlpi state change in lock step. TBD
4734  */
4735 static int
4736 man_dlioc_replay(man_dest_t *mdp)
4737 {
4738 	mblk_t		*rmp;
4739 	int		status = 1;
4740 
4741 	if (mdp->md_msp->ms_dlioc_mp == NULL)
4742 		goto exit;
4743 
4744 	rmp = man_dup_mplist(mdp->md_msp->ms_dlioc_mp);
4745 	if (rmp == NULL) {
4746 		status = 0;
4747 		goto exit;
4748 	}
4749 
4750 	man_dlpi_replay(mdp, rmp);
4751 exit:
4752 	return (status);
4753 }
4754 
4755 static mblk_t *
4756 man_alloc_ubreq_dreq()
4757 {
4758 	mblk_t			*dreq;
4759 	mblk_t			*ubreq = NULL;
4760 	union DL_primitives	*dlp;
4761 
4762 	dreq = allocb(DL_DETACH_REQ_SIZE, BPRI_MED);
4763 	if (dreq == NULL)
4764 		goto exit;
4765 
4766 	dreq->b_datap->db_type = M_PROTO;
4767 	dlp = (union DL_primitives *)dreq->b_rptr;
4768 	dlp->dl_primitive = DL_DETACH_REQ;
4769 	dreq->b_wptr += DL_DETACH_REQ_SIZE;
4770 
4771 	ubreq = allocb(DL_UNBIND_REQ_SIZE, BPRI_MED);
4772 	if (ubreq == NULL) {
4773 		freemsg(dreq);
4774 		goto exit;
4775 	}
4776 
4777 	ubreq->b_datap->db_type = M_PROTO;
4778 	dlp = (union DL_primitives *)ubreq->b_rptr;
4779 	dlp->dl_primitive = DL_UNBIND_REQ;
4780 	ubreq->b_wptr += DL_UNBIND_REQ_SIZE;
4781 
4782 	ubreq->b_next = dreq;
4783 
4784 exit:
4785 
4786 	return (ubreq);
4787 }
4788 
4789 static mblk_t *
4790 man_dup_mplist(mblk_t *mp)
4791 {
4792 	mblk_t	*listp = NULL;
4793 	mblk_t	*tailp = NULL;
4794 
4795 	for (; mp != NULL; mp = mp->b_next) {
4796 
4797 		mblk_t	*nmp;
4798 		mblk_t	*prev;
4799 		mblk_t	*next;
4800 
4801 		prev = mp->b_prev;
4802 		next = mp->b_next;
4803 		mp->b_prev = mp->b_next = NULL;
4804 
4805 		nmp = copymsg(mp);
4806 
4807 		mp->b_prev = prev;
4808 		mp->b_next = next;
4809 
4810 		if (nmp == NULL)
4811 			goto nomem;
4812 
4813 		if (listp == NULL) {
4814 			listp = tailp = nmp;
4815 		} else {
4816 			tailp->b_next = nmp;
4817 			tailp = nmp;
4818 		}
4819 	}
4820 
4821 	return (listp);
4822 nomem:
4823 
4824 	while (listp) {
4825 		mp = listp;
4826 		listp = mp->b_next;
4827 		mp->b_next = mp->b_prev = NULL;
4828 		freemsg(mp);
4829 	}
4830 
4831 	return (NULL);
4832 
4833 }
4834 
4835 static mblk_t *
4836 man_alloc_physreq_mp(eaddr_t *man_eap)
4837 {
4838 
4839 	mblk_t			*mp;
4840 	union DL_primitives	*dlp;
4841 	t_uscalar_t		off;
4842 	eaddr_t			*eap;
4843 
4844 	mp = allocb(DL_SET_PHYS_ADDR_REQ_SIZE + ETHERADDRL, BPRI_MED);
4845 	if (mp == NULL)
4846 		goto exit;
4847 
4848 	mp->b_datap->db_type = M_PROTO;
4849 	dlp = (union DL_primitives *)mp->b_wptr;
4850 	dlp->set_physaddr_req.dl_primitive = DL_SET_PHYS_ADDR_REQ;
4851 	dlp->set_physaddr_req.dl_addr_length = ETHERADDRL;
4852 	off = DL_SET_PHYS_ADDR_REQ_SIZE;
4853 	dlp->set_physaddr_req.dl_addr_offset =  off;
4854 	mp->b_wptr += DL_SET_PHYS_ADDR_REQ_SIZE + ETHERADDRL;
4855 
4856 	eap = (eaddr_t *)(mp->b_rptr + off);
4857 	ether_copy(man_eap, eap);
4858 
4859 exit:
4860 	MAN_DBG(MAN_DLPI, ("man_alloc_physreq: physaddr %s\n",
4861 		    ether_sprintf(eap)));
4862 
4863 	return (mp);
4864 }
4865 
4866 /*
4867  * A new path in a pathgroup has become active for the first time. Setup
4868  * the lower destinations in prepartion for man_pg_activate to call
4869  * man_autoswitch.
4870  */
4871 static void
4872 man_add_dests(man_pg_t *mpg)
4873 {
4874 	manstr_t	*msp;
4875 	man_dest_t	*mdp;
4876 
4877 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4878 
4879 		if (!man_str_uses_pg(msp, mpg))
4880 			continue;
4881 
4882 		mdp = &msp->ms_dests[mpg->mpg_pg_id];
4883 
4884 /*
4885  * TBD - Take out
4886  *		ASSERT(mdp->md_device.mdev_state == MDEV_UNASSIGNED);
4887  *		ASSERT(mdp->md_state == MAN_DSTATE_NOTPRESENT);
4888  */
4889 		if (mdp->md_device.mdev_state != MDEV_UNASSIGNED) {
4890 			cmn_err(CE_NOTE, "man_add_dests mdev !unassigned");
4891 			MAN_DBGCALL(MAN_PATH, man_print_mdp(mdp));
4892 		}
4893 
4894 		man_start_dest(mdp, msp, mpg);
4895 	}
4896 
4897 }
4898 
4899 static int
4900 man_remove_dests(man_pg_t *mpg)
4901 {
4902 	manstr_t	*msp;
4903 	int		close_cnt = 0;
4904 	man_dest_t	*cdp;
4905 	man_dest_t	*mdp;
4906 	man_dest_t	*tdp;
4907 	man_work_t	*wp;
4908 	mblk_t		*mp;
4909 	int		status = 0;
4910 
4911 	wp = man_work_alloc(MAN_WORK_CLOSE, KM_NOSLEEP);
4912 	if (wp == NULL) {
4913 		status = ENOMEM;
4914 		goto exit;
4915 	}
4916 
4917 	/*
4918 	 * Count up number of destinations we need to close.
4919 	 */
4920 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4921 		if (!man_str_uses_pg(msp, mpg))
4922 			continue;
4923 
4924 		close_cnt++;
4925 	}
4926 
4927 	if (close_cnt == 0)
4928 		goto exit;
4929 
4930 	cdp = man_kzalloc(sizeof (man_dest_t) * close_cnt, KM_NOSLEEP);
4931 	if (cdp == NULL) {
4932 		status = ENOMEM;
4933 		man_work_free(wp);
4934 		goto exit;
4935 	}
4936 
4937 	tdp = cdp;
4938 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4939 		if (!man_str_uses_pg(msp, mpg))
4940 			continue;
4941 
4942 		mdp = &msp->ms_dests[mpg->mpg_pg_id];
4943 
4944 		mdp->md_state |= MAN_DSTATE_CLOSING;
4945 		mdp->md_device.mdev_state = MDEV_UNASSIGNED;
4946 		mdp->md_msp = NULL;
4947 		mdp->md_rq = NULL;
4948 
4949 		/*
4950 		 * Clean up optimized destination pointer if we are
4951 		 * closing it.
4952 		 */
4953 		man_set_optimized_dest(msp);
4954 
4955 		if (mdp->md_lc_timer_id != 0) {
4956 			(void) quntimeout(man_ctl_wq, mdp->md_lc_timer_id);
4957 			mdp->md_lc_timer_id = 0;
4958 		}
4959 		if (mdp->md_bc_id != 0) {
4960 			qunbufcall(man_ctl_wq, mdp->md_bc_id);
4961 			mdp->md_bc_id = 0;
4962 		}
4963 
4964 		mutex_enter(&mdp->md_lock);
4965 		while ((mp = mdp->md_dmp_head) != NULL) {
4966 			mdp->md_dmp_head = mp->b_next;
4967 			mp->b_next = NULL;
4968 			freemsg(mp);
4969 		}
4970 		mdp->md_dmp_count = 0;
4971 		mdp->md_dmp_tail = NULL;
4972 		mutex_exit(&mdp->md_lock);
4973 
4974 		*tdp++ = *mdp;
4975 
4976 		mdp->md_state = MAN_DSTATE_NOTPRESENT;
4977 		mdp->md_muxid = -1;
4978 	}
4979 
4980 	wp->mw_arg.a_mdp = cdp;
4981 	wp->mw_arg.a_ndests = close_cnt;
4982 	man_work_add(man_bwork_q, wp);
4983 
4984 exit:
4985 	return (status);
4986 
4987 }
4988 
4989 /*
4990  * Returns TRUE if stream uses pathgroup, FALSE otherwise.
4991  */
4992 static int
4993 man_str_uses_pg(manstr_t *msp, man_pg_t *mpg)
4994 {
4995 	int	status;
4996 
4997 	status = ((msp->ms_flags & MAN_SFLAG_CONTROL)	||
4998 		    (msp->ms_dests == NULL)		||
4999 		    (msp->ms_manp == NULL)		||
5000 		    (msp->ms_manp->man_meta_ppa != mpg->mpg_man_ppa));
5001 
5002 	return (!status);
5003 }
5004 
5005 static int
5006 man_gettimer(int timer, man_dest_t *mdp)
5007 {
5008 
5009 	int attached = TRUE;
5010 	int time = 0;
5011 
5012 	if (mdp == NULL || mdp->md_msp == NULL || mdp->md_msp->ms_manp == NULL)
5013 		attached = FALSE;
5014 
5015 	switch (timer) {
5016 	case MAN_TIMER_INIT:
5017 		if (attached)
5018 			time = mdp->md_msp->ms_manp->man_init_time;
5019 		else
5020 			time = MAN_INIT_TIME;
5021 		break;
5022 
5023 	case MAN_TIMER_LINKCHECK:
5024 		if (attached) {
5025 			if (mdp->md_linkstate == MAN_LINKSTALE)
5026 				time = mdp->md_msp->ms_manp->man_linkstale_time;
5027 			else
5028 				time = mdp->md_msp->ms_manp->man_linkcheck_time;
5029 		} else
5030 			time = MAN_LINKCHECK_TIME;
5031 		break;
5032 
5033 	case MAN_TIMER_DLPIRESET:
5034 		if (attached)
5035 			time = mdp->md_msp->ms_manp->man_dlpireset_time;
5036 		else
5037 			time = MAN_DLPIRESET_TIME;
5038 		break;
5039 
5040 	default:
5041 		MAN_DBG(MAN_LINK, ("man_gettimer: unknown timer %d", timer));
5042 		time = MAN_LINKCHECK_TIME;
5043 		break;
5044 	}
5045 
5046 	return (drv_usectohz(time));
5047 }
5048 
5049 /*
5050  * Check the links for each active destination. Called inside inner
5051  * perimeter via qtimeout. This timer only runs on the domain side of the
5052  * driver. It should never run on the SC side.
5053  *
5054  * On a MAN_LINKGOOD link, we check/probe the link health every
5055  * MAN_LINKCHECK_TIME seconds. If the link goes MAN_LINKSTALE, the we probe
5056  * the link every MAN_LINKSTALE_TIME seconds, and fail the link after probing
5057  * the link MAN_LINKSTALE_RETRIES times.
5058  * The man_lock is held to synchronize access pathgroup list(man_pg).
5059  */
5060 void
5061 man_linkcheck_timer(void *argp)
5062 {
5063 	man_dest_t		*mdp = (man_dest_t *)argp;
5064 	int			restart_timer = TRUE;
5065 	int			send_ping = TRUE;
5066 	int			newstate;
5067 	int			oldstate;
5068 	man_pg_t		*mpg;
5069 	man_path_t		*mp;
5070 
5071 	MAN_DBG(MAN_LINK, ("man_linkcheck_timer: mdp"));
5072 	MAN_DBGCALL(MAN_LINK, man_print_mdp(mdp));
5073 
5074 	/*
5075 	 * Clear timeout id and check if someones waiting on us to
5076 	 * complete a close.
5077 	 */
5078 	mdp->md_lc_timer_id = 0;
5079 
5080 	if (mdp->md_state == MAN_DSTATE_NOTPRESENT ||
5081 	    mdp->md_state & MAN_DSTATE_BUSY) {
5082 
5083 		MAN_DBG(MAN_LINK, ("man_linkcheck_timer: not ready mdp"));
5084 		MAN_DBGCALL(MAN_LINK, man_print_mdp(mdp));
5085 		goto exit;
5086 	}
5087 
5088 	mutex_enter(&man_lock);
5089 	/*
5090 	 * If the lower stream needs initializing, just go straight to
5091 	 * switch code. As the linkcheck timer is started for all
5092 	 * SAPs, do not send ping packets during the initialization.
5093 	 */
5094 	if (mdp->md_state == MAN_DSTATE_INITIALIZING) {
5095 		send_ping = FALSE;
5096 		goto do_switch;
5097 	}
5098 
5099 	newstate = oldstate = mdp->md_linkstate;
5100 
5101 	if (!man_needs_linkcheck(mdp)) {
5102 		cmn_err(CE_NOTE,
5103 			"man_linkcheck_timer: unneeded linkcheck on mdp(0x%p)",
5104 			(void *)mdp);
5105 		mutex_exit(&man_lock);
5106 		return;
5107 	}
5108 
5109 	/*
5110 	 * The above call to  man_needs_linkcheck() validates
5111 	 * mdp->md_msp and mdp->md_msp->ms_manp pointers.
5112 	 */
5113 	mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg, mdp->md_pg_id);
5114 	ASSERT(mpg != NULL);
5115 	mp = man_find_path_by_ppa(mpg->mpg_pathp, mdp->md_device.mdev_ppa);
5116 	ASSERT(mp != NULL);
5117 
5118 	/*
5119 	 * This is the most common case, when traffic is flowing.
5120 	 */
5121 	if (mdp->md_rcvcnt != mdp->md_lastrcvcnt) {
5122 
5123 		newstate = MAN_LINKGOOD;
5124 		mdp->md_lastrcvcnt = mdp->md_rcvcnt;
5125 		send_ping = FALSE;
5126 
5127 		/*
5128 		 * Clear the FAILED flag and update lru.
5129 		 */
5130 		mp->mp_device.mdev_state &= ~MDEV_FAILED;
5131 		(void) drv_getparm(TIME, &mp->mp_lru);
5132 
5133 		if (mdp->md_link_updown_msg == MAN_LINK_DOWN_MSG) {
5134 			man_t *manp = mdp->md_msp->ms_manp;
5135 
5136 			cmn_err(CE_NOTE, "%s%d Link up",
5137 			    ddi_major_to_name(manp->man_meta_major),
5138 			    manp->man_meta_ppa);
5139 
5140 			mdp->md_link_updown_msg = MAN_LINK_UP_MSG;
5141 		}
5142 
5143 		goto done;
5144 	}
5145 
5146 	/*
5147 	 * If we're here, it means we have not seen any traffic
5148 	 */
5149 	switch (oldstate) {
5150 	case MAN_LINKINIT:
5151 	case MAN_LINKGOOD:
5152 		newstate = MAN_LINKSTALE;
5153 		mdp->md_linkstales++;
5154 		mdp->md_linkstale_retries =
5155 			mdp->md_msp->ms_manp->man_linkstale_retries;
5156 		break;
5157 
5158 	case MAN_LINKSTALE:
5159 	case MAN_LINKFAIL:
5160 		mdp->md_linkstales++;
5161 		mdp->md_linkstale_retries--;
5162 		if (mdp->md_linkstale_retries < 0) {
5163 			newstate = MAN_LINKFAIL;
5164 			mdp->md_linkfails++;
5165 			mdp->md_linkstale_retries =
5166 				mdp->md_msp->ms_manp->man_linkstale_retries;
5167 			/*
5168 			 * Mark the destination as FAILED and
5169 			 * update lru.
5170 			 */
5171 			if (oldstate != MAN_LINKFAIL) {
5172 				mp->mp_device.mdev_state |= MDEV_FAILED;
5173 				(void) drv_getparm(TIME, &mp->mp_lru);
5174 			}
5175 		}
5176 		break;
5177 
5178 	default:
5179 		cmn_err(CE_WARN, "man_linkcheck_timer: illegal link"
5180 			" state %d", oldstate);
5181 		break;
5182 	}
5183 done:
5184 
5185 	if (oldstate != newstate) {
5186 
5187 		MAN_DBG(MAN_LINK, ("man_linkcheck_timer"
5188 			" link state %s -> %s", lss[oldstate],
5189 			lss[newstate]));
5190 
5191 		mdp->md_linkstate = newstate;
5192 	}
5193 
5194 	/*
5195 	 * Do any work required from state transitions above.
5196 	 */
5197 	if (newstate == MAN_LINKFAIL) {
5198 do_switch:
5199 		if (!man_do_autoswitch(mdp)) {
5200 			/*
5201 			 * Stop linkcheck timer until switch completes.
5202 			 */
5203 			restart_timer = FALSE;
5204 			send_ping = FALSE;
5205 		}
5206 	}
5207 
5208 	mutex_exit(&man_lock);
5209 	if (send_ping)
5210 		man_do_icmp_bcast(mdp, mdp->md_msp->ms_sap);
5211 
5212 	if (restart_timer)
5213 		mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
5214 			(void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
5215 
5216 exit:
5217 	MAN_DBG(MAN_LINK, ("man_linkcheck_timer: returns"));
5218 
5219 }
5220 
5221 /*
5222  * Handle linkcheck initiated autoswitching.
5223  * Called with man_lock held.
5224  */
5225 static int
5226 man_do_autoswitch(man_dest_t *mdp)
5227 {
5228 	man_pg_t	*mpg;
5229 	man_path_t	*ap;
5230 	int		status = 0;
5231 
5232 	ASSERT(MUTEX_HELD(&man_lock));
5233 	/*
5234 	 * Set flags and refcnt. Cleared in man_iswitch when SWITCH completes.
5235 	 */
5236 	mdp->md_msp->ms_manp->man_refcnt++;
5237 
5238 	mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg, mdp->md_pg_id);
5239 	ASSERT(mpg);
5240 
5241 	if (mpg->mpg_flags & MAN_PG_SWITCHING)
5242 		return (EBUSY);
5243 
5244 	mpg->mpg_flags |= MAN_PG_SWITCHING;
5245 
5246 	if (mdp->md_state == MAN_DSTATE_INITIALIZING) {
5247 		/*
5248 		 * We're initializing, ask for a switch to our currently
5249 		 * active device.
5250 		 */
5251 		status = man_autoswitch(mpg, &mdp->md_device, NULL);
5252 	} else {
5253 
5254 		if (mdp->md_msp != NULL && mdp->md_msp->ms_manp != NULL &&
5255 		    mdp->md_link_updown_msg == MAN_LINK_UP_MSG) {
5256 
5257 			man_t *manp = mdp->md_msp->ms_manp;
5258 
5259 			cmn_err(CE_NOTE, "%s%d Link down",
5260 			    ddi_major_to_name(manp->man_meta_major),
5261 			    manp->man_meta_ppa);
5262 		}
5263 		mdp->md_link_updown_msg = MAN_LINK_DOWN_MSG;
5264 
5265 		MAN_DBG(MAN_LINK, ("man_linkcheck_timer: link failure on %s%d",
5266 			ddi_major_to_name(mdp->md_device.mdev_major),
5267 			mdp->md_device.mdev_ppa));
5268 
5269 		ap = man_find_alternate_path(mpg->mpg_pathp);
5270 
5271 		if (ap == NULL) {
5272 			status = ENODEV;
5273 			goto exit;
5274 		}
5275 		status = man_autoswitch(mpg, &ap->mp_device, NULL);
5276 	}
5277 exit:
5278 	if (status != 0) {
5279 		/*
5280 		 * man_iswitch not going to run, clean up.
5281 		 */
5282 		mpg->mpg_flags &= ~MAN_PG_SWITCHING;
5283 		mdp->md_msp->ms_manp->man_refcnt--;
5284 	}
5285 
5286 	return (status);
5287 }
5288 
5289 /*
5290  * Gather up all lower multiplexor streams that have this link open and
5291  * try to switch them. Called from inner perimeter and holding man_lock.
5292  *
5293  *	pg_id		- Pathgroup to do switch for.
5294  *	st_devp		- New device to switch to.
5295  *	wait_for_switch	- whether or not to qwait for completion.
5296  */
5297 static int
5298 man_autoswitch(man_pg_t *mpg, man_dev_t *st_devp, man_work_t *waiter_wp)
5299 {
5300 	man_work_t	*wp;
5301 	int		sdp_cnt = 0;
5302 	man_dest_t	*sdp;
5303 	int		status = 0;
5304 
5305 	ASSERT(MUTEX_HELD(&man_lock));
5306 	if (waiter_wp == NULL) {
5307 		wp = man_work_alloc(MAN_WORK_SWITCH, KM_NOSLEEP);
5308 		if (wp == NULL) {
5309 			status = ENOMEM;
5310 			goto exit;
5311 		}
5312 	} else {
5313 		ASSERT(waiter_wp->mw_type == MAN_WORK_SWITCH);
5314 		wp = waiter_wp;
5315 	}
5316 
5317 	/*
5318 	 * Set dests as PLUMBING, cancel timers and return array of dests
5319 	 * that need a switch.
5320 	 */
5321 	status = man_prep_dests_for_switch(mpg, &sdp, &sdp_cnt);
5322 	if (status) {
5323 		if (waiter_wp == NULL)
5324 			man_work_free(wp);
5325 		goto exit;
5326 	}
5327 
5328 	/*
5329 	 * If no streams are active, there are no streams to switch.
5330 	 * Return ENODEV (see man_pg_activate).
5331 	 */
5332 	if (sdp_cnt == 0) {
5333 		if (waiter_wp == NULL)
5334 			man_work_free(wp);
5335 		status = ENODEV;
5336 		goto exit;
5337 	}
5338 
5339 	/*
5340 	 * Ask the bgthread to switch. See man_bwork.
5341 	 */
5342 	wp->mw_arg.a_sf_dev = sdp->md_device;
5343 	wp->mw_arg.a_st_dev = *st_devp;
5344 	wp->mw_arg.a_pg_id = mpg->mpg_pg_id;
5345 	wp->mw_arg.a_man_ppa = mpg->mpg_man_ppa;
5346 
5347 	wp->mw_arg.a_mdp = sdp;
5348 	wp->mw_arg.a_ndests = sdp_cnt;
5349 	man_work_add(man_bwork_q, wp);
5350 
5351 exit:
5352 
5353 	return (status);
5354 }
5355 
5356 /*
5357  * If an alternate path exists for pathgroup, arrange for switch to
5358  * happen. Note that we need to switch each of msp->dests[pg_id], for
5359  * all on man_strup. We must:
5360  *
5361  *		Cancel any timers
5362  *		Mark dests as PLUMBING
5363  *		Submit switch request to man_bwork_q->
5364  */
5365 static int
5366 man_prep_dests_for_switch(man_pg_t *mpg, man_dest_t **mdpp, int *cntp)
5367 {
5368 	manstr_t	*msp;
5369 	man_dest_t	*mdp;
5370 	int		sdp_cnt = 0;
5371 	man_dest_t	*sdp = NULL;
5372 	man_dest_t	*tdp;
5373 	int		status = 0;
5374 
5375 	MAN_DBG(MAN_SWITCH, ("man_prep_dests_for_switch: pg_id %d",
5376 		mpg->mpg_pg_id));
5377 
5378 	/*
5379 	 * Count up number of streams, there is one destination that needs
5380 	 * switching per stream.
5381 	 */
5382 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
5383 		if (man_str_uses_pg(msp, mpg))
5384 			sdp_cnt++;
5385 	}
5386 
5387 	if (sdp_cnt == 0)
5388 		goto exit;
5389 
5390 	sdp = man_kzalloc(sizeof (man_dest_t) * sdp_cnt, KM_NOSLEEP);
5391 	if (sdp == NULL) {
5392 		status = ENOMEM;
5393 		goto exit;
5394 	}
5395 	tdp = sdp;
5396 	/*
5397 	 * Mark each destination as unusable.
5398 	 */
5399 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
5400 		if (man_str_uses_pg(msp, mpg)) {
5401 
5402 			/*
5403 			 * Mark destination as plumbing and store the
5404 			 * address of sdp as a way to identify the
5405 			 * SWITCH request when it comes back (see man_iswitch).
5406 			 */
5407 			mdp = &msp->ms_dests[mpg->mpg_pg_id];
5408 			mdp->md_state |= MAN_DSTATE_PLUMBING;
5409 			mdp->md_switch_id = sdp;
5410 
5411 			/*
5412 			 * Copy destination info.
5413 			 */
5414 			bcopy(mdp, tdp, sizeof (man_dest_t));
5415 			tdp++;
5416 
5417 			/*
5418 			 * Cancel timers.
5419 			 */
5420 			if (mdp->md_lc_timer_id) {
5421 				(void) quntimeout(man_ctl_wq,
5422 					mdp->md_lc_timer_id);
5423 				mdp->md_lc_timer_id = 0;
5424 			}
5425 			if (mdp->md_bc_id) {
5426 				qunbufcall(man_ctl_wq, mdp->md_bc_id);
5427 				mdp->md_bc_id = 0;
5428 			}
5429 		}
5430 	}
5431 
5432 	*mdpp = sdp;
5433 	*cntp = sdp_cnt;
5434 	status = 0;
5435 exit:
5436 
5437 	MAN_DBG(MAN_SWITCH, ("man_prep_dests_for_switch: returns %d"
5438 		" sdp(0x%p) sdp_cnt(%d)", status, (void *)sdp, sdp_cnt));
5439 
5440 	return (status);
5441 
5442 }
5443 
5444 /*
5445  * The code below generates an ICMP echo packet and sends it to the
5446  * broadcast address in the hopes that the other end will respond
5447  * and the man_linkcheck_timer logic will see the traffic.
5448  *
5449  * This assumes ethernet-like media.
5450  */
5451 /*
5452  * Generate an ICMP packet. Called exclusive inner perimeter.
5453  *
5454  *	mdp - destination to send packet to.
5455  *	sap - either ETHERTYPE_ARP or ETHERTYPE_IPV6
5456  */
5457 static void
5458 man_do_icmp_bcast(man_dest_t *mdp, t_uscalar_t sap)
5459 {
5460 	mblk_t			*mp = NULL;
5461 
5462 	/* TBD - merge pinger and this routine. */
5463 
5464 	ASSERT(sap == ETHERTYPE_IPV6 || sap == ETHERTYPE_IP);
5465 
5466 	if (sap == ETHERTYPE_IPV6) {
5467 		mdp->md_icmpv6probes++;
5468 	} else {
5469 		mdp->md_icmpv4probes++;
5470 	}
5471 	/*
5472 	 * Send the ICMP message
5473 	 */
5474 	mp = man_pinger(sap);
5475 
5476 	MAN_DBG(MAN_LINK, ("man_do_icmp_bcast: sap=0x%x mp=0x%p",
5477 							sap, (void *)mp));
5478 	if (mp == NULL)
5479 		return;
5480 
5481 	/*
5482 	 * Send it out.
5483 	 */
5484 	if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
5485 
5486 		MAN_DBG(MAN_LINK, ("man_do_icmp_broadcast: xmit failed"));
5487 
5488 		freemsg(mp);
5489 	}
5490 
5491 }
5492 
5493 static mblk_t *
5494 man_pinger(t_uscalar_t sap)
5495 {
5496 	mblk_t		*mp = NULL;
5497 	man_dladdr_t	dlsap;
5498 	icmph_t		*icmph;
5499 	int		ipver;
5500 	ipha_t		*ipha;
5501 	ip6_t		*ip6h;
5502 	int		iph_hdr_len;
5503 	int		datalen = 64;
5504 	uchar_t		*datap;
5505 	uint16_t	size;
5506 	uchar_t		i;
5507 
5508 	dlsap.dl_sap = htons(sap);
5509 	bcopy(&etherbroadcast, &dlsap.dl_phys, sizeof (dlsap.dl_phys));
5510 
5511 	if (sap == ETHERTYPE_IPV6) {
5512 		ipver = IPV6_VERSION;
5513 		iph_hdr_len = sizeof (ip6_t);
5514 		size = ICMP6_MINLEN;
5515 	} else {
5516 		ipver = IPV4_VERSION;
5517 		iph_hdr_len = sizeof (ipha_t);
5518 		size = ICMPH_SIZE;
5519 	}
5520 	size += (uint16_t)iph_hdr_len;
5521 	size += datalen;
5522 
5523 	mp = man_alloc_udreq(size, &dlsap);
5524 	if (mp == NULL)
5525 		goto exit;
5526 
5527 	/*
5528 	 * fill out the ICMP echo packet headers
5529 	 */
5530 	mp->b_cont->b_wptr += iph_hdr_len;
5531 	if (ipver == IPV4_VERSION) {
5532 		ipha = (ipha_t *)mp->b_cont->b_rptr;
5533 		ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
5534 			| IP_SIMPLE_HDR_LENGTH_IN_WORDS;
5535 		ipha->ipha_type_of_service = 0;
5536 		ipha->ipha_length = size;
5537 		ipha->ipha_fragment_offset_and_flags = IPH_DF;
5538 		ipha->ipha_ttl = 1;
5539 		ipha->ipha_protocol = IPPROTO_ICMP;
5540 		if (man_is_on_domain) {
5541 			manc_t		manc;
5542 
5543 			if (man_get_iosram(&manc)) {
5544 				freemsg(mp);
5545 				mp = NULL;
5546 				goto exit;
5547 			}
5548 
5549 			/*
5550 			 * Domain generates ping packets for domain to
5551 			 * SC network (dman0 <--> scman0).
5552 			 */
5553 			ipha->ipha_dst = manc.manc_sc_ipaddr;
5554 			ipha->ipha_src = manc.manc_dom_ipaddr;
5555 		} else {
5556 			/*
5557 			 * Note that ping packets are only generated
5558 			 * by the SC across scman1 (SC to SC network).
5559 			 */
5560 			ipha->ipha_dst = man_sc_ipaddrs.ip_other_sc_ipaddr;
5561 			ipha->ipha_src = man_sc_ipaddrs.ip_my_sc_ipaddr;
5562 		}
5563 
5564 		ipha->ipha_ident = 0;
5565 
5566 		ipha->ipha_hdr_checksum = 0;
5567 		ipha->ipha_hdr_checksum = IP_CSUM(mp->b_cont, 0, 0);
5568 
5569 	} else {
5570 		ip6h = (ip6_t *)mp->b_cont->b_rptr;
5571 		/*
5572 		 * IP version = 6, priority = 0, flow = 0
5573 		 */
5574 		ip6h->ip6_flow = (IPV6_VERSION << 28);
5575 		ip6h->ip6_plen =
5576 			htons((short)(size - iph_hdr_len));
5577 		ip6h->ip6_nxt = IPPROTO_ICMPV6;
5578 		ip6h->ip6_hlim = 1;	/* stay on link */
5579 
5580 		if (man_is_on_domain) {
5581 			manc_t		manc;
5582 
5583 			if (man_get_iosram(&manc)) {
5584 				freemsg(mp);
5585 				mp = NULL;
5586 				goto exit;
5587 			}
5588 
5589 			/*
5590 			 * Domain generates ping packets for domain to
5591 			 * SC network (dman0 <--> scman0).
5592 			 */
5593 			ip6h->ip6_src = manc.manc_dom_ipv6addr;
5594 			ip6h->ip6_dst = manc.manc_sc_ipv6addr;
5595 		} else {
5596 			/*
5597 			 * Note that ping packets are only generated
5598 			 * by the SC across scman1 (SC to SC network).
5599 			 */
5600 			ip6h->ip6_src = man_sc_ip6addrs.ip6_my_sc_ipaddr;
5601 			ip6h->ip6_dst = man_sc_ip6addrs.ip6_other_sc_ipaddr;
5602 		}
5603 	}
5604 
5605 	/*
5606 	 * IPv6 and IP are the same for ICMP as far as I'm concerned.
5607 	 */
5608 	icmph = (icmph_t *)mp->b_cont->b_wptr;
5609 	if (ipver == IPV4_VERSION) {
5610 		mp->b_cont->b_wptr += ICMPH_SIZE;
5611 		icmph->icmph_type = ICMP_ECHO_REQUEST;
5612 		icmph->icmph_code = 0;
5613 	} else {
5614 		mp->b_cont->b_wptr += ICMP6_MINLEN;
5615 		icmph->icmph_type = ICMP6_ECHO_REQUEST;
5616 		icmph->icmph_code = 0;
5617 	}
5618 
5619 	datap = mp->b_cont->b_wptr;
5620 	mp->b_cont->b_wptr += datalen;
5621 
5622 	for (i = 0; i < datalen; i++)
5623 		*datap++ = i;
5624 
5625 	if (ipver == IPV4_VERSION) {
5626 		icmph->icmph_checksum = IP_CSUM(mp->b_cont, iph_hdr_len, 0);
5627 	} else {
5628 		uint32_t	sum;
5629 
5630 		sum = htons(IPPROTO_ICMPV6) + ip6h->ip6_plen;
5631 		icmph->icmph_checksum = IP_CSUM(mp->b_cont, iph_hdr_len - 32,
5632 			(sum & 0xffff) + (sum >> 16));
5633 	}
5634 
5635 /*
5636  * TBD
5637  *	icp->icmp_time =  ???;
5638  */
5639 
5640 exit:
5641 	return (mp);
5642 }
5643 
5644 static mblk_t *
5645 man_alloc_udreq(int size, man_dladdr_t *dlsap)
5646 {
5647 	dl_unitdata_req_t	*udreq;
5648 	mblk_t			*bp;
5649 	mblk_t			*mp;
5650 
5651 	mp = allocb(sizeof (dl_unitdata_req_t) + sizeof (*dlsap), BPRI_MED);
5652 
5653 	if (mp == NULL) {
5654 		cmn_err(CE_NOTE, "man_preparepkt: allocb failed");
5655 		return (NULL);
5656 	}
5657 
5658 	if ((bp = allocb(size, BPRI_MED)) == NULL) {
5659 		freemsg(mp);
5660 		cmn_err(CE_NOTE, "man_preparepkts: allocb failed");
5661 		return (NULL);
5662 	}
5663 	bzero(bp->b_rptr, size);
5664 
5665 	mp->b_cont = bp;
5666 	mp->b_datap->db_type = M_PROTO;
5667 	udreq = (dl_unitdata_req_t *)mp->b_wptr;
5668 	mp->b_wptr += sizeof (dl_unitdata_req_t);
5669 
5670 	/*
5671 	 * phys addr first - TBD
5672 	 */
5673 	bcopy((char *)dlsap, mp->b_wptr, sizeof (*dlsap));
5674 	mp->b_wptr += sizeof (*dlsap);
5675 
5676 	udreq->dl_primitive = DL_UNITDATA_REQ;
5677 	udreq->dl_dest_addr_length = sizeof (*dlsap);
5678 	udreq->dl_dest_addr_offset = sizeof (*udreq);
5679 	udreq->dl_priority.dl_min = 0;
5680 	udreq->dl_priority.dl_max = 0;
5681 
5682 	return (mp);
5683 }
5684 
5685 
5686 /*
5687  * The routines in this file are executed by the MAN background thread,
5688  * which executes outside of the STREAMS framework (see man_str.c). It is
5689  * allowed to do the things required to modify the STREAMS driver (things
5690  * that are normally done from a user process). These routines do things like
5691  * open and close drivers, PLINK and PUNLINK streams to/from the multiplexor,
5692  * etc.
5693  *
5694  * The mechanism of communication between the STREAMS portion of the driver
5695  * and the background thread portion are two work queues, man_bwork_q
5696  * and man_iwork_q (background work q and streams work q).  Work
5697  * requests are placed on those queues when one half of the driver wants
5698  * the other half to do some work for it.
5699  *
5700  * The MAN background thread executes the man_bwork routine. Its sole
5701  * job is to process work requests placed on this work q. The MAN upper
5702  * write service routine is responsible for processing work requests posted
5703  * to the man_iwork_q->
5704  *
5705  * Both work queues are protected by the global mutex man_lock. The
5706  * man_bwork is signalged via the condvarman_bwork_q->q_cv. The man_uwsrv
5707  * routine is signaled by calling qenable (forcing man_uwsrv to run).
5708  */
5709 
5710 /*
5711  * man_bwork - Work thread for this device.  It is responsible for
5712  * performing operations which can't occur within the STREAMS framework.
5713  *
5714  * Locking:
5715  *	- Called holding no locks
5716  *	- Obtains the global mutex man_lock to remove work from
5717  *	  man_bwork_q, and post work to man_iwork_q->
5718  *	- Note that we do not want to hold any locks when making
5719  *	  any ldi_ calls.
5720  */
5721 void
5722 man_bwork()
5723 {
5724 	man_work_t	*wp;
5725 	int		done = 0;
5726 	callb_cpr_t	cprinfo;
5727 	int		wp_finished;
5728 
5729 	CALLB_CPR_INIT(&cprinfo, &man_lock, callb_generic_cpr,
5730 	    "mn_work_thrd");
5731 
5732 	MAN_DBG(MAN_CONFIG, ("man_bwork: enter"));
5733 
5734 	while (done == 0) {
5735 
5736 		mutex_enter(&man_lock);
5737 		/*
5738 		 * While there is nothing to do, sit in cv_wait.  If work
5739 		 * request is made, requester will signal.
5740 		 */
5741 		while (man_bwork_q->q_work == NULL) {
5742 
5743 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
5744 
5745 			cv_wait(&man_bwork_q->q_cv, &man_lock);
5746 
5747 			CALLB_CPR_SAFE_END(&cprinfo, &man_lock);
5748 		}
5749 
5750 		wp = man_bwork_q->q_work;
5751 		man_bwork_q->q_work = wp->mw_next;
5752 		wp->mw_next = NULL;
5753 		mutex_exit(&man_lock);
5754 
5755 		wp_finished = TRUE;
5756 
5757 		MAN_DBG(MAN_SWITCH, ("man_bwork: type %s",
5758 			_mw_type[wp->mw_type]));
5759 
5760 		switch (wp->mw_type) {
5761 		case MAN_WORK_OPEN_CTL:
5762 			wp->mw_status = man_open_ctl();
5763 			break;
5764 
5765 		case MAN_WORK_CLOSE_CTL:
5766 			man_close_ctl();
5767 			break;
5768 
5769 		case MAN_WORK_CLOSE:
5770 		case MAN_WORK_CLOSE_STREAM:
5771 			man_bclose(&wp->mw_arg);
5772 			break;
5773 
5774 		case MAN_WORK_SWITCH:
5775 			man_bswitch(&wp->mw_arg, wp);
5776 			wp_finished = FALSE;
5777 			break;
5778 
5779 		case MAN_WORK_STOP:		/* man_bwork_stop() */
5780 			done = 1;
5781 			mutex_enter(&man_lock);
5782 			CALLB_CPR_EXIT(&cprinfo); /* Unlocks man_lock */
5783 			break;
5784 
5785 		default:
5786 			cmn_err(CE_WARN, "man_bwork: "
5787 			    "illegal work type(%d)", wp->mw_type);
5788 			break;
5789 		}
5790 
5791 		mutex_enter(&man_lock);
5792 
5793 		if (wp_finished) {
5794 			wp->mw_flags |= MAN_WFLAGS_DONE;
5795 			if (wp->mw_flags & MAN_WFLAGS_CVWAITER)
5796 				cv_signal(&wp->mw_cv);
5797 			else if (wp->mw_flags & MAN_WFLAGS_QWAITER)
5798 				qenable(wp->mw_q);
5799 			else
5800 				man_work_free(wp);
5801 		}
5802 
5803 		mutex_exit(&man_lock);
5804 	}
5805 
5806 	MAN_DBG(MAN_CONFIG, ("man_bwork: thread_exit"));
5807 
5808 	mutex_enter(&man_lock);
5809 	man_bwork_id = NULL;
5810 	mutex_exit(&man_lock);
5811 
5812 	thread_exit();
5813 }
5814 
5815 /*
5816  * man_open_ctl - Open the control stream.
5817  *
5818  *	returns	- success - 0
5819  *		- failure - errno code
5820  *
5821  * Mutex Locking Notes:
5822  *	We need a way to keep the CLONE_OPEN qwaiters in man_open from
5823  *	checking the man_config variables after the ldi_open call below
5824  *	returns from man_open, leaving the inner perimeter. So, we use the
5825  *	man_lock to synchronize the threads in man_open_ctl and man_open.  We
5826  *	hold man_lock across this call into man_open, which in general is a
5827  *	no-no. But, the STREAMs portion of the driver (other than open)
5828  *	doesn't use it. So, if ldi_open gets hijacked to run any part of
5829  *	the MAN streams driver, it wont end up recursively trying to acquire
5830  *	man_lock. Note that the non-CLONE_OPEN portion of man_open doesnt
5831  *	acquire it either, so again no recursive mutex.
5832  */
5833 static int
5834 man_open_ctl()
5835 {
5836 	int		status = 0;
5837 	ldi_handle_t	ctl_lh = NULL;
5838 	ldi_ident_t	li = NULL;
5839 
5840 	MAN_DBG(MAN_CONFIG, ("man_open_ctl: plumbing control stream\n"));
5841 
5842 	/*
5843 	 * Get eri driver loaded and kstats initialized. Is there a better
5844 	 * way to do this? - TBD.
5845 	 */
5846 	status = ldi_ident_from_mod(&modlinkage, &li);
5847 	if (status) {
5848 		cmn_err(CE_WARN,
5849 			"man_open_ctl: ident alloc failed, error %d", status);
5850 		goto exit;
5851 	}
5852 
5853 	status = ldi_open_by_name(ERI_PATH, FREAD | FWRITE | FNOCTTY,
5854 	    kcred, &ctl_lh, li);
5855 	if (status) {
5856 		cmn_err(CE_WARN,
5857 			"man_open_ctl: eri open failed, error %d", status);
5858 		ctl_lh = NULL;
5859 		goto exit;
5860 	}
5861 	(void) ldi_close(ctl_lh, NULL, kcred);
5862 	ctl_lh = NULL;
5863 
5864 	mutex_enter(&man_lock);
5865 
5866 	if (man_ctl_lh != NULL) {
5867 		mutex_exit(&man_lock);
5868 		goto exit;
5869 	}
5870 
5871 	ASSERT(man_ctl_wq == NULL);
5872 	mutex_exit(&man_lock);
5873 
5874 	status = ldi_open_by_name(DMAN_INT_PATH, FREAD | FWRITE | FNOCTTY,
5875 	    kcred, &ctl_lh, li);
5876 	if (status) {
5877 		cmn_err(CE_WARN,
5878 			"man_open_ctl: man control dev open failed, "
5879 			"error %d", status);
5880 		goto exit;
5881 	}
5882 
5883 	/*
5884 	 * Update global config state. TBD - dont need lock here, since
5885 	 * everyone is stuck in open until we finish. Only other modifier
5886 	 * is man_deconfigure via _fini, which returns EBUSY if there is
5887 	 * any open streams (other than control). Do need to signal qwaiters
5888 	 * on error.
5889 	 */
5890 	mutex_enter(&man_lock);
5891 	ASSERT(man_config_state == MAN_CONFIGURING);
5892 	ASSERT(man_ctl_lh == NULL);
5893 	man_ctl_lh = ctl_lh;
5894 	mutex_exit(&man_lock);
5895 
5896 exit:
5897 	if (li)
5898 		ldi_ident_release(li);
5899 
5900 	MAN_DBG(MAN_CONFIG, ("man_open_ctl: man_ctl_lh(0x%p) errno = %d\n",
5901 		(void *)man_ctl_lh, status));
5902 
5903 	return (status);
5904 }
5905 
5906 /*
5907  * man_close_ctl - Close control stream, we are about to unload driver.
5908  *
5909  * Locking:
5910  *	- Called holding no locks.
5911  */
5912 static void
5913 man_close_ctl()
5914 {
5915 	ldi_handle_t tlh;
5916 
5917 	MAN_DBG(MAN_CONFIG, ("man_close_ctl: unplumbing control stream\n"));
5918 
5919 	mutex_enter(&man_lock);
5920 	if ((tlh = man_ctl_lh) != NULL)
5921 		man_ctl_lh = NULL;
5922 	mutex_exit(&man_lock);
5923 
5924 	if (tlh != NULL) {
5925 		(void) ldi_close(tlh, NULL, kcred);
5926 	}
5927 
5928 }
5929 
5930 /*
5931  * Close the lower streams. Get all the timers canceled, close the lower
5932  * stream and delete the dest array.
5933  *
5934  * Returns:
5935  *	0	Closed all streams.
5936  *	1	Couldn't close one or more streams, timers still running.
5937  *
5938  * Locking:
5939  *	- Called holding no locks.
5940  */
5941 static void
5942 man_bclose(man_adest_t *adp)
5943 {
5944 	int		i;
5945 	man_dest_t	*mdp;
5946 
5947 	man_cancel_timers(adp);
5948 
5949 	for (i = 0; i < adp->a_ndests; i++) {
5950 		mdp = &adp->a_mdp[i];
5951 
5952 		if (mdp->md_muxid != -1)
5953 			man_unplumb(mdp);
5954 	}
5955 
5956 	mutex_destroy(&mdp->md_lock);
5957 	man_kfree(adp->a_mdp, sizeof (man_dest_t) * adp->a_ndests);
5958 	adp->a_mdp = NULL;
5959 }
5960 
5961 /*
5962  * We want to close down all lower streams. Need to wait until all
5963  * timers and work related to these lower streams is quiesced.
5964  *
5965  * Returns 1 if lower streams are quiesced, 0 if we need to wait
5966  * a bit longer.
5967  */
5968 static void
5969 man_cancel_timers(man_adest_t *adp)
5970 {
5971 	man_dest_t	*mdp;
5972 	int		cnt;
5973 	int		i;
5974 
5975 	mdp = adp->a_mdp;
5976 	cnt = adp->a_ndests;
5977 
5978 	MAN_DBG(MAN_SWITCH, ("man_cancel_timers: mdp(0x%p) cnt %d",
5979 		(void *)mdp, cnt));
5980 
5981 	for (i = 0; i < cnt; i++) {
5982 
5983 		if (mdp[i].md_lc_timer_id != 0) {
5984 			(void) quntimeout(man_ctl_wq, mdp[i].md_lc_timer_id);
5985 			mdp[i].md_lc_timer_id = 0;
5986 		}
5987 
5988 		if (mdp[i].md_bc_id != 0) {
5989 			qunbufcall(man_ctl_wq, mdp[i].md_bc_id);
5990 			mdp[i].md_bc_id = 0;
5991 		}
5992 	}
5993 
5994 	MAN_DBG(MAN_SWITCH, ("man_cancel_timers: returns"));
5995 }
5996 
5997 /*
5998  * A failover is started at start of day, when the driver detects a
5999  * link failure (see man_linkcheck_timer), or when DR detaches
6000  * the IO board containing the current active link between SC and
6001  * domain (see man_dr_detach, man_iwork, and man_do_dr_detach). A
6002  * MAN_WORK_SWITCH work request containing all the lower streams that
6003  * should be switched is posted on the man_bwork_q-> This work request is
6004  * processed here. Once all lower streams have been switched to an
6005  * alternate path, the MAN_WORK_SWITCH work request is passed back to
6006  * man_iwork_q where it is processed within the inner perimeter of the
6007  * STREAMS framework (see man_iswitch).
6008  *
6009  * Note that when the switch fails for whatever reason, we just hand
6010  * back the lower streams untouched and let another failover happen.
6011  * Hopefully we will sooner or later succeed at the failover.
6012  */
6013 static void
6014 man_bswitch(man_adest_t *adp, man_work_t *wp)
6015 {
6016 	man_dest_t	*tdp;
6017 	man_t		*manp;
6018 	int		i;
6019 	int		status = 0;
6020 
6021 	/*
6022 	 * Make a temporary copy of dest array, updating device to the
6023 	 * alternate and try to open all lower streams. bgthread can sleep.
6024 	 */
6025 
6026 	tdp = man_kzalloc(sizeof (man_dest_t) * adp->a_ndests,
6027 		KM_SLEEP);
6028 	bcopy(adp->a_mdp, tdp, sizeof (man_dest_t) * adp->a_ndests);
6029 
6030 	/*
6031 	 * Before we switch to the new path, lets sync the kstats.
6032 	 */
6033 	mutex_enter(&man_lock);
6034 
6035 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
6036 	if (manp != NULL) {
6037 		man_update_path_kstats(manp);
6038 	} else
6039 		status = ENODEV;
6040 
6041 	mutex_exit(&man_lock);
6042 
6043 	if (status != 0)
6044 		goto exit;
6045 
6046 	for (i = 0; i < adp->a_ndests; i++) {
6047 
6048 		tdp[i].md_device = adp->a_st_dev;
6049 		tdp[i].md_muxid = -1;
6050 
6051 		if (man_plumb(&tdp[i]))
6052 			break;
6053 	}
6054 
6055 	/*
6056 	 * Didn't plumb everyone, unplumb new lower stuff and return.
6057 	 */
6058 	if (i < adp->a_ndests) {
6059 		int	j;
6060 
6061 		for (j = 0; j <= i; j++)
6062 			man_unplumb(&tdp[j]);
6063 		status = EAGAIN;
6064 		goto exit;
6065 	}
6066 
6067 	if (man_is_on_domain && man_dossc_switch(adp->a_st_dev.mdev_exp_id)) {
6068 		/*
6069 		 * If we cant set new path on the SSC, then fail the
6070 		 * failover.
6071 		 */
6072 		for (i = 0; i < adp->a_ndests; i++)
6073 			man_unplumb(&tdp[i]);
6074 		status = EAGAIN;
6075 		goto exit;
6076 	}
6077 
6078 	man_kfree(adp->a_mdp, sizeof (man_dest_t) * adp->a_ndests);
6079 	adp->a_mdp = tdp;
6080 
6081 exit:
6082 	if (status)
6083 		man_kfree(tdp, sizeof (man_dest_t) * adp->a_ndests);
6084 
6085 
6086 	MAN_DBG(MAN_SWITCH, ("man_bswitch: returns %d", status));
6087 
6088 	/*
6089 	 * Hand processed switch request back to man_iwork for
6090 	 * processing in man_iswitch.
6091 	 */
6092 	wp->mw_status = status;
6093 
6094 	mutex_enter(&man_lock);
6095 	man_work_add(man_iwork_q, wp);
6096 	mutex_exit(&man_lock);
6097 
6098 }
6099 
6100 /*
6101  * man_plumb - Configure a lower stream for this destination.
6102  *
6103  * Locking:
6104  * 	- Called holding no locks.
6105  *
6106  * Returns:
6107  *	- success - 0
6108  *	- failure - error code of failure
6109  */
6110 static int
6111 man_plumb(man_dest_t *mdp)
6112 {
6113 	int		status;
6114 	int		muxid;
6115 	ldi_handle_t	lh;
6116 	ldi_ident_t	li = NULL;
6117 
6118 	MAN_DBG(MAN_SWITCH, ("man_plumb: mdp(0x%p) %s%d exp(%d)",
6119 		(void *)mdp, ddi_major_to_name(mdp->md_device.mdev_major),
6120 		mdp->md_device.mdev_ppa, mdp->md_device.mdev_exp_id));
6121 
6122 	/*
6123 	 * Control stream should already be open.
6124 	 */
6125 	if (man_ctl_lh == NULL) {
6126 		status = EAGAIN;
6127 		goto exit;
6128 	}
6129 
6130 	mutex_enter(&man_lock);
6131 	ASSERT(man_ctl_wq != NULL);
6132 	status = ldi_ident_from_stream(man_ctl_wq, &li);
6133 	if (status != 0) {
6134 		cmn_err(CE_WARN,
6135 		    "man_plumb: ident alloc failed, error %d", status);
6136 		goto exit;
6137 	}
6138 	mutex_exit(&man_lock);
6139 
6140 	/*
6141 	 * previously opens were done by a dev_t of makedev(clone_major,
6142 	 * mdev_major) which should always map to /devices/pseudo/clone@0:eri
6143 	 */
6144 	ASSERT(strcmp(ERI_IDNAME,
6145 		    ddi_major_to_name(mdp->md_device.mdev_major)) == 0);
6146 
6147 	status = ldi_open_by_name(ERI_PATH, FREAD | FWRITE | FNOCTTY,
6148 	    kcred, &lh, li);
6149 	if (status) {
6150 		cmn_err(CE_WARN,
6151 		    "man_plumb: eri open failed, error %d", status);
6152 		goto exit;
6153 	}
6154 
6155 	/*
6156 	 * Link netdev under MAN.
6157 	 */
6158 	ASSERT(mdp->md_muxid == -1);
6159 
6160 	status = ldi_ioctl(man_ctl_lh, I_PLINK, (intptr_t)lh,
6161 				FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &muxid);
6162 	if (status) {
6163 		cmn_err(CE_WARN,
6164 		    "man_plumb: ldi_ioctl(I_PLINK) failed, error %d", status);
6165 		(void) ldi_close(lh, NULL, kcred);
6166 		goto exit;
6167 
6168 	}
6169 	mdp->md_muxid = muxid;
6170 	mdp->md_wq = man_linkrec_find(muxid);
6171 	/*
6172 	 * If we can't find the linkrec then return an
6173 	 * error. It will be automatically unplumbed on failure.
6174 	 */
6175 	if (mdp->md_wq == NULL)
6176 		status = EAGAIN;
6177 
6178 	(void) ldi_close(lh, NULL, kcred);
6179 exit:
6180 	if (li)
6181 		ldi_ident_release(li);
6182 
6183 	MAN_DBG(MAN_SWITCH, ("man_plumb: exit\n"));
6184 
6185 	return (status);
6186 }
6187 
6188 /*
6189  * man_unplumb - tear down the STREAMs framework for the lower multiplexor.
6190  *
6191  *	mdp - destination struct of interest
6192  *
6193  *	returns	- success - 0
6194  *		- failure - return error from ldi_ioctl
6195  */
6196 static void
6197 man_unplumb(man_dest_t *mdp)
6198 {
6199 	int	status, rval;
6200 
6201 	MAN_DBG(MAN_SWITCH, ("man_unplumb: mdp"));
6202 	MAN_DBGCALL(MAN_SWITCH, man_print_mdp(mdp));
6203 
6204 	if (mdp->md_muxid == -1)
6205 		return;
6206 
6207 	ASSERT(man_ctl_lh != NULL);
6208 
6209 	/*
6210 	 * I_PUNLINK causes the multiplexor resources to be freed.
6211 	 */
6212 	status = ldi_ioctl(man_ctl_lh, I_PUNLINK, (intptr_t)mdp->md_muxid,
6213 				FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &rval);
6214 	if (status) {
6215 		cmn_err(CE_WARN, "man_unplumb: ldi_ioctl(I_PUNLINK) failed"
6216 			" errno %d\n", status);
6217 	}
6218 	/*
6219 	 * Delete linkrec if it exists.
6220 	 */
6221 	(void) man_linkrec_find(mdp->md_muxid);
6222 	mdp->md_muxid = -1;
6223 
6224 }
6225 
6226 /*
6227  * The routines below deal with paths and pathgroups. These data structures
6228  * are used to track the physical devices connecting the domain and SSC.
6229  * These devices make up the lower streams of the MAN multiplexor. The
6230  * routines all expect the man_lock to be held.
6231  *
6232  * A pathgroup consists of all paths that connect a particular domain and the
6233  * SSC. The concept of a pathgroup id (pg_id) is used to uniquely identify
6234  * a pathgroup.  For Domains, there is just one pathgroup, that connecting
6235  * the domain to the SSC (pg_id == 0). On the SSC, there is one pathgroup per
6236  * domain. The pg_id field corresponds to the domain tags A-R. A pg_id of
6237  * 0 means domain tag A, a pg_id of 1 means domain B, etc.
6238  *
6239  * The path data structure identifies one path between the SSC and a domain.
6240  * It describes the information for the path: the major and minor number of
6241  * the physical device; kstat pointers; and ethernet address of the
6242  * other end of the path.
6243  *
6244  * The pathgroups are anchored at man_pg_head and are protected by the
6245  * by the inner perimeter. The routines are only called by the STREAMs
6246  * portion of the driver.
6247  */
6248 
6249 /*
6250  * Update man instance pathgroup info. Exclusive inner perimeter assures
6251  * this code is single threaded. man_refcnt assures man_t wont detach
6252  * while we are playing with man_pg stuff.
6253  *
6254  * Returns 0 on success, errno on failure.
6255  */
6256 int
6257 man_pg_cmd(mi_path_t *mip, man_work_t *waiter_wp)
6258 {
6259 	int		status = 0;
6260 	man_t		*manp;
6261 
6262 	if (mip->mip_ndevs < 0) {
6263 		status = EINVAL;
6264 		cmn_err(CE_WARN, "man_pg_cmd: EINVAL: mip_ndevs %d",
6265 			mip->mip_ndevs);
6266 		goto exit;
6267 	}
6268 
6269 	ASSERT(MUTEX_HELD(&man_lock));
6270 	manp = ddi_get_soft_state(man_softstate, mip->mip_man_ppa);
6271 	if (manp == NULL) {
6272 		status = ENODEV;
6273 		goto exit;
6274 	}
6275 
6276 	MAN_DBG(MAN_PATH, ("man_pg_cmd: mip"));
6277 	MAN_DBGCALL(MAN_PATH, man_print_mip(mip));
6278 
6279 	MAN_DBG(MAN_PATH, ("\tman_t"));
6280 	MAN_DBGCALL(MAN_PATH, man_print_man(manp));
6281 
6282 	switch (mip->mip_cmd) {
6283 	case MI_PATH_ASSIGN:
6284 		status = man_pg_assign(&manp->man_pg, mip, FALSE);
6285 		break;
6286 
6287 	case MI_PATH_ADD:
6288 		status = man_pg_assign(&manp->man_pg, mip, TRUE);
6289 		break;
6290 
6291 	case MI_PATH_UNASSIGN:
6292 		status = man_pg_unassign(&manp->man_pg, mip);
6293 		break;
6294 
6295 	case MI_PATH_ACTIVATE:
6296 		status = man_pg_activate(manp, mip, waiter_wp);
6297 		break;
6298 
6299 	case MI_PATH_READ:
6300 		status = man_pg_read(manp->man_pg, mip);
6301 		break;
6302 
6303 	default:
6304 		status = EINVAL;
6305 		cmn_err(CE_NOTE, "man_pg_cmd: invalid command");
6306 		break;
6307 	}
6308 
6309 exit:
6310 	MAN_DBG(MAN_PATH, ("man_pg_cmd: returns %d", status));
6311 
6312 	return (status);
6313 }
6314 
6315 /*
6316  * Assign paths to a pathgroup. If pathgroup doesnt exists, create it.
6317  * If path doesnt exist, create it. If ethernet address of existing
6318  * pathgroup different, change it. If an existing path is not in the new
6319  * list, remove it.  If anything changed, send PATH_UPDATE request to
6320  * man_iwork to update all man_dest_t's.
6321  *
6322  * 	mplpp	- man pathgroup list point to point.
6323  *	mip	- new/updated pathgroup info to assign.
6324  */
6325 static int
6326 man_pg_assign(man_pg_t **mplpp, mi_path_t *mip, int add_only)
6327 {
6328 	man_pg_t	*mpg;
6329 	man_path_t	*mp;
6330 	man_path_t	*add_paths = NULL;
6331 	int		cnt;
6332 	int		i;
6333 	int		first_pass = TRUE;
6334 	int		status = 0;
6335 
6336 	ASSERT(MUTEX_HELD(&man_lock));
6337 
6338 	cnt = mip->mip_ndevs;
6339 	if (cnt == 0) {
6340 		status = EINVAL;
6341 		cmn_err(CE_NOTE, "man_pg_assign: mip_ndevs == 0");
6342 		goto exit;
6343 	}
6344 
6345 	/*
6346 	 * Assure the devices to be assigned are not assigned to some other
6347 	 * pathgroup.
6348 	 */
6349 	for (i = 0; i < cnt; i++) {
6350 		mpg = man_find_path_by_dev(*mplpp, &mip->mip_devs[i], NULL);
6351 
6352 		if (mpg == NULL)
6353 			continue;
6354 
6355 		if ((mpg->mpg_man_ppa != mip->mip_man_ppa) ||
6356 		    (mpg->mpg_pg_id != mip->mip_pg_id)) {
6357 			/*
6358 			 * Already assigned to some other man instance
6359 			 * or pathgroup.
6360 			 */
6361 			status = EEXIST;
6362 			goto exit;
6363 		}
6364 	}
6365 
6366 	/*
6367 	 * Find pathgroup, or allocate new one if it doesnt exist and
6368 	 * add it to list at mplpp. Result is that mpg points to
6369 	 * pathgroup to modify.
6370 	 */
6371 	mpg = man_find_pg_by_id(*mplpp, mip->mip_pg_id);
6372 	if (mpg == NULL) {
6373 
6374 		status = man_pg_create(mplpp, &mpg, mip);
6375 		if (status)
6376 			goto exit;
6377 
6378 	} else if (ether_cmp(&mip->mip_eaddr, &mpg->mpg_dst_eaddr) != 0) {
6379 
6380 		cmn_err(CE_WARN, "man_pg_assign: ethernet address mismatch");
6381 		cmn_err(CE_CONT, "existing %s",
6382 			ether_sprintf(&mpg->mpg_dst_eaddr));
6383 		cmn_err(CE_CONT, "new %s",
6384 			ether_sprintf(&mip->mip_eaddr));
6385 
6386 		status = EINVAL;
6387 		goto exit;
6388 	}
6389 
6390 	/*
6391 	 * Create list of new paths to add to pathgroup.
6392 	 */
6393 	for (i = 0; i < cnt; i++) {
6394 
6395 		if (man_find_path_by_dev(*mplpp, &mip->mip_devs[i], NULL))
6396 			continue;	/* Already exists in this pathgroup */
6397 
6398 		mp = man_kzalloc(sizeof (man_path_t), KM_NOSLEEP);
6399 		if (mp == NULL) {
6400 			status = ENOMEM;
6401 			goto exit;
6402 		}
6403 
6404 		mp->mp_device = mip->mip_devs[i];
6405 		mp->mp_device.mdev_state = MDEV_ASSIGNED;
6406 
6407 		MAN_DBG(MAN_PATH, ("man_pg_assign: assigning mdp"));
6408 		MAN_DBGCALL(MAN_PATH, man_print_dev(&mp->mp_device));
6409 
6410 		status = man_path_kstat_init(mp);
6411 		if (status) {
6412 			man_kfree(mp, sizeof (man_path_t));
6413 			goto exit;
6414 		}
6415 
6416 		man_path_insert(&add_paths, mp);
6417 	}
6418 
6419 	/*
6420 	 * man_dr_attach passes only the path which is being DRd in.
6421 	 * So just add the path and don't worry about removing paths.
6422 	 */
6423 	if (add_only == TRUE)
6424 		goto exit;
6425 
6426 
6427 	/*
6428 	 * Check if any paths we want to remove are ACTIVE. If not,
6429 	 * do a second pass and remove them.
6430 	 */
6431 again:
6432 	mp = mpg->mpg_pathp;
6433 	while (mp != NULL) {
6434 		int		in_new_list;
6435 		man_path_t	*rp;
6436 
6437 		rp = NULL;
6438 		in_new_list = FALSE;
6439 
6440 		for (i = 0; i < cnt; i++) {
6441 			if (mp->mp_device.mdev_ppa ==
6442 			    mip->mip_devs[i].mdev_ppa) {
6443 
6444 				in_new_list = TRUE;
6445 				break;
6446 			}
6447 		}
6448 
6449 		if (!in_new_list) {
6450 			if (first_pass) {
6451 				if (mp->mp_device.mdev_state & MDEV_ACTIVE) {
6452 					status = EBUSY;
6453 					goto exit;
6454 				}
6455 			} else {
6456 				rp = mp;
6457 			}
6458 		}
6459 		mp = mp->mp_next;
6460 
6461 		if (rp != NULL)
6462 			man_path_remove(&mpg->mpg_pathp, rp);
6463 	}
6464 
6465 	if (first_pass == TRUE) {
6466 		first_pass = FALSE;
6467 		goto again;
6468 	}
6469 
6470 exit:
6471 	if (status == 0) {
6472 		if (add_paths)
6473 			man_path_merge(&mpg->mpg_pathp, add_paths);
6474 	} else {
6475 		while (add_paths != NULL) {
6476 			mp = add_paths;
6477 			add_paths = mp->mp_next;
6478 			mp->mp_next = NULL;
6479 
6480 			man_path_kstat_uninit(mp);
6481 			man_kfree(mp, sizeof (man_path_t));
6482 		}
6483 	}
6484 
6485 	return (status);
6486 }
6487 
6488 /*
6489  * Remove all paths from a pathgroup (domain shutdown). If there is an
6490  * active path in the group, shut down all destinations referencing it
6491  * first.
6492  */
6493 static int
6494 man_pg_unassign(man_pg_t **plpp, mi_path_t *mip)
6495 {
6496 	man_pg_t	*mpg;
6497 	man_pg_t	*tpg;
6498 	man_pg_t	*tppg;
6499 	man_path_t	*mp = NULL;
6500 	int		status = 0;
6501 
6502 	ASSERT(MUTEX_HELD(&man_lock));
6503 
6504 	/*
6505 	 * Check for existence of pathgroup.
6506 	 */
6507 	if ((mpg = man_find_pg_by_id(*plpp, mip->mip_pg_id)) == NULL)
6508 		goto exit;
6509 
6510 	if (man_find_active_path(mpg->mpg_pathp) != NULL) {
6511 		status = man_remove_dests(mpg);
6512 		if (status)
6513 			goto exit;
6514 	}
6515 
6516 	/*
6517 	 * Free all the paths for this pathgroup.
6518 	 */
6519 	while (mpg->mpg_pathp) {
6520 		mp = mpg->mpg_pathp;
6521 		mpg->mpg_pathp = mp->mp_next;
6522 		mp->mp_next = NULL;
6523 
6524 		man_path_kstat_uninit(mp);
6525 		man_kfree(mp, sizeof (man_path_t));
6526 	}
6527 
6528 	/*
6529 	 * Remove this pathgroup from the list, and free it.
6530 	 */
6531 	tpg = tppg = *plpp;
6532 	if (tpg == mpg) {
6533 		*plpp = tpg->mpg_next;
6534 		goto free_pg;
6535 	}
6536 
6537 	for (tpg = tpg->mpg_next; tpg != NULL; tpg = tpg->mpg_next) {
6538 		if (tpg == mpg)
6539 			break;
6540 		tppg = tpg;
6541 	}
6542 
6543 	ASSERT(tpg != NULL);
6544 
6545 	tppg->mpg_next = tpg->mpg_next;
6546 	tpg->mpg_next = NULL;
6547 
6548 free_pg:
6549 	man_kfree(tpg, sizeof (man_pg_t));
6550 
6551 exit:
6552 	return (status);
6553 
6554 }
6555 
6556 /*
6557  * Set a new active path. This is done via man_ioctl so we are
6558  * exclusive in the inner perimeter.
6559  */
6560 static int
6561 man_pg_activate(man_t *manp, mi_path_t *mip, man_work_t *waiter_wp)
6562 {
6563 	man_pg_t	*mpg1;
6564 	man_pg_t	*mpg2;
6565 	man_pg_t	*plp;
6566 	man_path_t	*mp;
6567 	man_path_t	*ap;
6568 	int		status = 0;
6569 
6570 	ASSERT(MUTEX_HELD(&man_lock));
6571 	MAN_DBG(MAN_PATH, ("man_pg_activate: dev"));
6572 	MAN_DBGCALL(MAN_PATH, man_print_dev(mip->mip_devs));
6573 
6574 	if (mip->mip_ndevs != 1) {
6575 		status = EINVAL;
6576 		goto exit;
6577 	}
6578 
6579 	plp = manp->man_pg;
6580 	mpg1 = man_find_pg_by_id(plp, mip->mip_pg_id);
6581 	if (mpg1 == NULL) {
6582 		status = EINVAL;
6583 		goto exit;
6584 	}
6585 
6586 	mpg2 = man_find_path_by_dev(plp, mip->mip_devs, &mp);
6587 	if (mpg2 == NULL) {
6588 		status = ENODEV;
6589 		goto exit;
6590 	}
6591 
6592 	if (mpg1 != mpg2) {
6593 		status = EINVAL;
6594 		goto exit;
6595 	}
6596 
6597 	ASSERT(mp->mp_device.mdev_ppa == mip->mip_devs->mdev_ppa);
6598 
6599 	if (mpg1->mpg_flags & MAN_PG_SWITCHING) {
6600 		status = EAGAIN;
6601 		goto exit;
6602 	}
6603 
6604 	ap = man_find_active_path(mpg1->mpg_pathp);
6605 	if (ap == NULL) {
6606 		/*
6607 		 * This is the first time a path has been activated for
6608 		 * this pathgroup. Initialize all upper streams dest
6609 		 * structure for this pathgroup so autoswitch will find
6610 		 * them.
6611 		 */
6612 		mp->mp_device.mdev_state |= MDEV_ACTIVE;
6613 		man_add_dests(mpg1);
6614 		goto exit;
6615 	}
6616 
6617 	/*
6618 	 * Path already active, nothing to do.
6619 	 */
6620 	if (ap == mp)
6621 		goto exit;
6622 
6623 	/*
6624 	 * Try to autoswitch to requested device. Set flags and refcnt.
6625 	 * Cleared in man_iswitch when SWITCH completes.
6626 	 */
6627 	manp->man_refcnt++;
6628 	mpg1->mpg_flags |= MAN_PG_SWITCHING;
6629 
6630 	/*
6631 	 * Switch to path specified.
6632 	 */
6633 	status = man_autoswitch(mpg1, mip->mip_devs, waiter_wp);
6634 
6635 	if (status != 0) {
6636 		/*
6637 		 * man_iswitch not going to run, clean up.
6638 		 */
6639 		manp->man_refcnt--;
6640 		mpg1->mpg_flags &= ~MAN_PG_SWITCHING;
6641 
6642 		if (status == ENODEV) {
6643 			/*
6644 			 * Device not plumbed isn't really an error. Change
6645 			 * active device setting here, since man_iswitch isn't
6646 			 * going to be run to do it.
6647 			 */
6648 			status = 0;
6649 			ap->mp_device.mdev_state &= ~MDEV_ACTIVE;
6650 			mp->mp_device.mdev_state |= MDEV_ACTIVE;
6651 		}
6652 	}
6653 
6654 exit:
6655 	MAN_DBG(MAN_PATH, ("man_pg_activate: returns %d", status));
6656 
6657 	return (status);
6658 }
6659 
6660 static int
6661 man_pg_read(man_pg_t *plp, mi_path_t *mip)
6662 {
6663 	man_pg_t	*mpg;
6664 	man_path_t	*mp;
6665 	int		cnt;
6666 	int		status = 0;
6667 
6668 	ASSERT(MUTEX_HELD(&man_lock));
6669 
6670 	if ((mpg = man_find_pg_by_id(plp, mip->mip_pg_id)) == NULL) {
6671 		status = ENODEV;
6672 		goto exit;
6673 	}
6674 
6675 	cnt = 0;
6676 	for (mp = mpg->mpg_pathp; mp != NULL; mp = mp->mp_next) {
6677 		bcopy(&mp->mp_device, &mip->mip_devs[cnt], sizeof (man_dev_t));
6678 		if (cnt == mip->mip_ndevs)
6679 			break;
6680 		cnt++;
6681 	}
6682 
6683 	MAN_DBG(MAN_PATH, ("man_pg_read: pg(0x%p) id(%d) found %d paths",
6684 		(void *)mpg, mpg->mpg_pg_id, cnt));
6685 
6686 	mip->mip_ndevs = cnt;
6687 
6688 	/*
6689 	 * TBD - What should errno be if user buffer too small ?
6690 	 */
6691 	if (mp != NULL) {
6692 		status = ENOMEM;
6693 	}
6694 
6695 exit:
6696 
6697 	return (status);
6698 }
6699 
6700 /*
6701  * return existing pathgroup, or create it. TBD - Need to update
6702  * all of destinations if we added a pathgroup. Also, need to update
6703  * all of man_strup if we add a path.
6704  *
6705  * 	mplpp	- man pathgroup list point to pointer.
6706  * 	mpgp	- returns newly created man pathgroup.
6707  *	mip	- info to fill in mpgp.
6708  */
6709 static int
6710 man_pg_create(man_pg_t **mplpp, man_pg_t **mpgp, mi_path_t *mip)
6711 {
6712 	man_pg_t	*mpg;
6713 	man_pg_t	*tpg;
6714 	int		status = 0;
6715 
6716 	ASSERT(MUTEX_HELD(&man_lock));
6717 
6718 	if (ether_cmp(&mip->mip_eaddr, &zero_ether_addr) == 0) {
6719 		cmn_err(CE_NOTE, "man_ioctl: man_pg_create: ether"
6720 			" addresss not set!");
6721 		status = EINVAL;
6722 		goto exit;
6723 	}
6724 
6725 	mpg = man_kzalloc(sizeof (man_pg_t), KM_NOSLEEP);
6726 	if (mpg == NULL) {
6727 		status = ENOMEM;
6728 		goto exit;
6729 	}
6730 
6731 	mpg->mpg_flags = MAN_PG_IDLE;
6732 	mpg->mpg_pg_id = mip->mip_pg_id;
6733 	mpg->mpg_man_ppa = mip->mip_man_ppa;
6734 	ether_copy(&mip->mip_eaddr, &mpg->mpg_dst_eaddr);
6735 
6736 	MAN_DBG(MAN_PATH, ("man_pg_create: new mpg"));
6737 	MAN_DBGCALL(MAN_PATH, man_print_mpg(mpg));
6738 
6739 	tpg = *mplpp;
6740 	if (tpg == NULL) {
6741 		*mplpp = mpg;
6742 	} else {
6743 		while (tpg->mpg_next != NULL)
6744 			tpg = tpg->mpg_next;
6745 		tpg->mpg_next = mpg;
6746 	}
6747 
6748 exit:
6749 	*mpgp = mpg;
6750 
6751 	return (status);
6752 }
6753 
6754 /*
6755  * Return pointer to pathgroup containing mdevp, null otherwise. Also,
6756  * if a path pointer is passed in, set it to matching path in pathgroup.
6757  *
6758  * Called holding man_lock.
6759  */
6760 static man_pg_t *
6761 man_find_path_by_dev(man_pg_t *plp, man_dev_t *mdevp, man_path_t **mpp)
6762 {
6763 	man_pg_t	*mpg;
6764 	man_path_t	*mp;
6765 
6766 	ASSERT(MUTEX_HELD(&man_lock));
6767 	for (mpg = plp; mpg != NULL; mpg = mpg->mpg_next) {
6768 		for (mp  = mpg->mpg_pathp; mp != NULL; mp = mp->mp_next) {
6769 			if (mp->mp_device.mdev_major == mdevp->mdev_major &&
6770 			    mp->mp_device.mdev_ppa == mdevp->mdev_ppa) {
6771 
6772 				if (mpp != NULL)
6773 					*mpp = mp;
6774 				return (mpg);
6775 			}
6776 		}
6777 	}
6778 
6779 	return (NULL);
6780 }
6781 
6782 /*
6783  * Return pointer to pathgroup assigned to destination, null if not found.
6784  *
6785  * Called holding man_lock.
6786  */
6787 static man_pg_t *
6788 man_find_pg_by_id(man_pg_t *mpg, int pg_id)
6789 {
6790 	ASSERT(MUTEX_HELD(&man_lock));
6791 	for (; mpg != NULL; mpg = mpg->mpg_next) {
6792 		if (mpg->mpg_pg_id == pg_id)
6793 			return (mpg);
6794 	}
6795 
6796 	return (NULL);
6797 }
6798 
6799 static man_path_t *
6800 man_find_path_by_ppa(man_path_t *mplist, int ppa)
6801 {
6802 	man_path_t	*mp;
6803 
6804 	ASSERT(MUTEX_HELD(&man_lock));
6805 	for (mp = mplist; mp != NULL; mp = mp->mp_next) {
6806 		if (mp->mp_device.mdev_ppa == ppa)
6807 			return (mp);
6808 	}
6809 
6810 	return (NULL);
6811 }
6812 
6813 static man_path_t *
6814 man_find_active_path(man_path_t *mplist)
6815 {
6816 	man_path_t	*mp;
6817 
6818 	ASSERT(MUTEX_HELD(&man_lock));
6819 	for (mp = mplist; mp != NULL; mp = mp->mp_next)
6820 		if (mp->mp_device.mdev_state & MDEV_ACTIVE)
6821 			return (mp);
6822 
6823 	return (NULL);
6824 }
6825 
6826 /*
6827  * Try and find an alternate path.
6828  */
6829 static man_path_t *
6830 man_find_alternate_path(man_path_t *mlp)
6831 {
6832 	man_path_t	*ap;		/* Active path */
6833 	man_path_t	*np;		/* New alternate path */
6834 	man_path_t	*fp = NULL;	/* LRU failed path */
6835 
6836 	ASSERT(MUTEX_HELD(&man_lock));
6837 	ap = man_find_active_path(mlp);
6838 
6839 	/*
6840 	 * Find a non-failed path, or the lru failed path and switch to it.
6841 	 */
6842 	for (np = mlp; np != NULL; np = np->mp_next) {
6843 		if (np == ap)
6844 			continue;
6845 
6846 		if (np->mp_device.mdev_state == MDEV_ASSIGNED)
6847 			goto exit;
6848 
6849 		if (np->mp_device.mdev_state & MDEV_FAILED) {
6850 			if (fp == NULL)
6851 				fp = np;
6852 			else
6853 				if (fp->mp_lru > np->mp_lru)
6854 						fp = np;
6855 		}
6856 	}
6857 
6858 	/*
6859 	 * Nowhere to switch to.
6860 	 */
6861 	if (np == NULL && (np =  fp) == NULL)
6862 		goto exit;
6863 
6864 exit:
6865 	return (np);
6866 }
6867 
6868 /*
6869  * Assumes caller has verified existence.
6870  */
6871 static void
6872 man_path_remove(man_path_t **lpp, man_path_t *mp)
6873 {
6874 	man_path_t	*tp;
6875 	man_path_t	*tpp;
6876 
6877 	ASSERT(MUTEX_HELD(&man_lock));
6878 	MAN_DBG(MAN_PATH, ("man_path_remove: removing path"));
6879 	MAN_DBGCALL(MAN_PATH, man_print_path(mp));
6880 
6881 	tp = tpp = *lpp;
6882 	if (tp == mp) {
6883 		*lpp = tp->mp_next;
6884 		goto exit;
6885 	}
6886 
6887 	for (tp = tp->mp_next; tp != NULL; tp = tp->mp_next) {
6888 		if (tp == mp)
6889 			break;
6890 		tpp = tp;
6891 	}
6892 
6893 	ASSERT(tp != NULL);
6894 
6895 	tpp->mp_next = tp->mp_next;
6896 	tp->mp_next = NULL;
6897 
6898 exit:
6899 	man_path_kstat_uninit(tp);
6900 	man_kfree(tp, sizeof (man_path_t));
6901 
6902 }
6903 
6904 /*
6905  * Insert path into list, ascending order by ppa.
6906  */
6907 static void
6908 man_path_insert(man_path_t **lpp, man_path_t *mp)
6909 {
6910 	man_path_t	*tp;
6911 	man_path_t	*tpp;
6912 
6913 	ASSERT(MUTEX_HELD(&man_lock));
6914 	if (*lpp == NULL) {
6915 		*lpp = mp;
6916 		return;
6917 	}
6918 
6919 	tp = tpp = *lpp;
6920 	if (tp->mp_device.mdev_ppa > mp->mp_device.mdev_ppa) {
6921 		mp->mp_next = tp;
6922 		*lpp = mp;
6923 		return;
6924 	}
6925 
6926 	for (tp = tp->mp_next; tp != NULL; tp =  tp->mp_next) {
6927 		if (tp->mp_device.mdev_ppa > mp->mp_device.mdev_ppa)
6928 			break;
6929 		tpp = tp;
6930 	}
6931 
6932 	if (tp == NULL) {
6933 		tpp->mp_next = mp;
6934 	} else {
6935 		tpp->mp_next = mp;
6936 		mp->mp_next = tp;
6937 	}
6938 }
6939 
6940 /*
6941  * Merge npp into lpp, ascending order by ppa. Assumes no
6942  * duplicates in either list.
6943  */
6944 static void
6945 man_path_merge(man_path_t **lpp, man_path_t *np)
6946 {
6947 	man_path_t	*tmp;
6948 
6949 	ASSERT(MUTEX_HELD(&man_lock));
6950 	while (np != NULL) {
6951 		tmp = np;
6952 		np = np->mp_next;
6953 		tmp->mp_next = NULL;
6954 
6955 		man_path_insert(lpp, tmp);
6956 	}
6957 
6958 }
6959 
6960 static int
6961 man_path_kstat_init(man_path_t *mpp)
6962 {
6963 
6964 	kstat_named_t	*dev_knp;
6965 	int		status = 0;
6966 
6967 	ASSERT(MUTEX_HELD(&man_lock));
6968 	MAN_DBG(MAN_PATH, ("man_path_kstat_init: mpp(0x%p)\n", (void *)mpp));
6969 
6970 	/*
6971 	 * Create named kstats for accounting purposes.
6972 	 */
6973 	dev_knp = man_kzalloc(MAN_NUMSTATS * sizeof (kstat_named_t),
6974 		KM_NOSLEEP);
6975 	if (dev_knp == NULL) {
6976 		status = ENOMEM;
6977 		goto exit;
6978 	}
6979 	man_kstat_named_init(dev_knp, MAN_NUMSTATS);
6980 	mpp->mp_last_knp = dev_knp;
6981 
6982 exit:
6983 
6984 	MAN_DBG(MAN_PATH, ("man_path_kstat_init: returns %d\n", status));
6985 
6986 	return (status);
6987 }
6988 
6989 static void
6990 man_path_kstat_uninit(man_path_t *mp)
6991 {
6992 	ASSERT(MUTEX_HELD(&man_lock));
6993 	man_kfree(mp->mp_last_knp, MAN_NUMSTATS * sizeof (kstat_named_t));
6994 }
6995 
6996 /*
6997  * man_work_alloc - allocate and initiate a work request structure
6998  *
6999  *	type - type of request to allocate
7000  *	returns	- success - ptr to an initialized work structure
7001  *		- failure - NULL
7002  */
7003 man_work_t *
7004 man_work_alloc(int type, int kmflag)
7005 {
7006 	man_work_t	*wp;
7007 
7008 	wp = man_kzalloc(sizeof (man_work_t), kmflag);
7009 	if (wp == NULL)
7010 		goto exit;
7011 
7012 	cv_init(&wp->mw_cv, NULL, CV_DRIVER, NULL); \
7013 	wp->mw_type = type;
7014 
7015 exit:
7016 	return (wp);
7017 }
7018 
7019 /*
7020  * man_work_free - deallocate a work request structure
7021  *
7022  *	wp - ptr to work structure to be freed
7023  */
7024 void
7025 man_work_free(man_work_t *wp)
7026 {
7027 	cv_destroy(&wp->mw_cv);
7028 	man_kfree((void *)wp, sizeof (man_work_t));
7029 }
7030 
7031 /*
7032  * Post work to a work queue.  The man_bwork sleeps on
7033  * man_bwork_q->q_cv, and work requesters may sleep on mw_cv.
7034  * The man_lock is used to protect both cv's.
7035  */
7036 void
7037 man_work_add(man_workq_t *q, man_work_t *wp)
7038 {
7039 	man_work_t	*lp = q->q_work;
7040 
7041 	if (lp) {
7042 		while (lp->mw_next != NULL)
7043 			lp = lp->mw_next;
7044 
7045 		lp->mw_next = wp;
7046 
7047 	} else {
7048 		q->q_work = wp;
7049 	}
7050 
7051 	/*
7052 	 * cv_signal for man_bwork_q, qenable for man_iwork_q
7053 	 */
7054 	if (q == man_bwork_q) {
7055 		cv_signal(&q->q_cv);
7056 
7057 	} else {	/* q == man_iwork_q */
7058 
7059 		if (man_ctl_wq != NULL)
7060 			qenable(man_ctl_wq);
7061 	}
7062 
7063 }
7064 
7065 /* <<<<<<<<<<<<<<<<<<<<<<< NDD SUPPORT FUNCTIONS	>>>>>>>>>>>>>>>>>>> */
7066 /*
7067  * ndd support functions to get/set parameters
7068  */
7069 
7070 /*
7071  * Register each element of the parameter array with the
7072  * named dispatch handler. Each element is loaded using
7073  * nd_load()
7074  *
7075  * 	cnt	- the number of elements present in the parameter array
7076  */
7077 static int
7078 man_param_register(param_t *manpa, int cnt)
7079 {
7080 	int	i;
7081 	ndgetf_t getp;
7082 	ndsetf_t setp;
7083 	int	status = B_TRUE;
7084 
7085 	MAN_DBG(MAN_CONFIG, ("man_param_register: manpa(0x%p) cnt %d\n",
7086 		(void *)manpa, cnt));
7087 
7088 	getp = man_param_get;
7089 
7090 	for (i = 0; i < cnt; i++, manpa++) {
7091 		switch (man_param_display[i]) {
7092 		case MAN_NDD_GETABLE:
7093 			setp = NULL;
7094 			break;
7095 
7096 		case MAN_NDD_SETABLE:
7097 			setp = man_param_set;
7098 			break;
7099 
7100 		default:
7101 			continue;
7102 		}
7103 
7104 		if (!nd_load(&man_ndlist, manpa->param_name, getp,
7105 			setp, (caddr_t)manpa)) {
7106 
7107 			(void) man_nd_free(&man_ndlist);
7108 			status = B_FALSE;
7109 			goto exit;
7110 		}
7111 	}
7112 
7113 	if (!nd_load(&man_ndlist, "man_pathgroups_report",
7114 		man_pathgroups_report, NULL, NULL)) {
7115 
7116 		(void) man_nd_free(&man_ndlist);
7117 		status = B_FALSE;
7118 		goto exit;
7119 	}
7120 
7121 	if (!nd_load(&man_ndlist, "man_set_active_path",
7122 		NULL, man_set_active_path, NULL)) {
7123 
7124 		(void) man_nd_free(&man_ndlist);
7125 		status = B_FALSE;
7126 		goto exit;
7127 	}
7128 
7129 	if (!nd_load(&man_ndlist, "man_get_hostinfo",
7130 		man_get_hostinfo, NULL, NULL)) {
7131 
7132 		(void) man_nd_free(&man_ndlist);
7133 		status = B_FALSE;
7134 		goto exit;
7135 	}
7136 
7137 exit:
7138 
7139 	MAN_DBG(MAN_CONFIG, ("man_param_register: returns %d\n", status));
7140 
7141 	return (status);
7142 }
7143 
7144 static void
7145 man_nd_getset(queue_t *wq, mblk_t *mp)
7146 {
7147 
7148 	if (!nd_getset(wq, man_ndlist, mp))
7149 		miocnak(wq, mp, 0, ENOENT);
7150 	else
7151 		qreply(wq, mp);
7152 }
7153 
7154 /*ARGSUSED*/
7155 static int
7156 man_pathgroups_report(queue_t *wq, mblk_t *mp, caddr_t cp, cred_t *cr)
7157 {
7158 
7159 	man_t		*manp;
7160 	man_pg_t	*mpg;
7161 	int		i;
7162 	char		pad[] = "                 "; /* 17 spaces */
7163 	int		pad_end;
7164 
7165 
7166 	MAN_DBG(MAN_PATH, ("man_pathgroups_report: wq(0x%p) mp(0x%p)"
7167 		" caddr 0x%p", (void *)wq, (void *)mp, (void *)cp));
7168 
7169 	(void) mi_mpprintf(mp, "MAN Pathgroup report: (* == failed)");
7170 	(void) mi_mpprintf(mp, "====================================="
7171 		"==========================================");
7172 
7173 	mutex_enter(&man_lock);
7174 
7175 	for (i = 0; i < 2; i++) {
7176 		manp = ddi_get_soft_state(man_softstate, i);
7177 		if (manp == NULL)
7178 			continue;
7179 
7180 	(void) mi_mpprintf(mp,
7181 		"Interface\tDestination\t\tActive Path\tAlternate Paths");
7182 	(void) mi_mpprintf(mp, "---------------------------------------"
7183 		"----------------------------------------");
7184 
7185 		for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7186 
7187 			(void) mi_mpprintf(mp, "%s%d\t\t",
7188 				ddi_major_to_name(manp->man_meta_major),
7189 				manp->man_meta_ppa);
7190 
7191 			if (man_is_on_domain) {
7192 				(void) mi_mpprintf_nr(mp, "Master SSC\t");
7193 				man_preport(mpg->mpg_pathp, mp);
7194 			} else {
7195 				if (i == 0) {
7196 				    pad_end = 17 - strlen(ether_sprintf(
7197 					&mpg->mpg_dst_eaddr));
7198 				    if (pad_end < 0 || pad_end > 16)
7199 					pad_end = 0;
7200 				    pad[pad_end] = '\0';
7201 
7202 				    (void) mi_mpprintf_nr(mp, "%c %s%s",
7203 					mpg->mpg_pg_id + 'A',
7204 					ether_sprintf(&mpg->mpg_dst_eaddr),
7205 					pad);
7206 
7207 				    pad[pad_end] = ' ';
7208 				} else {
7209 				    (void) mi_mpprintf_nr(mp, "Other SSC\t");
7210 				}
7211 				man_preport(mpg->mpg_pathp, mp);
7212 			}
7213 			(void) mi_mpprintf_nr(mp, "\n");
7214 		}
7215 	}
7216 
7217 	mutex_exit(&man_lock);
7218 	MAN_DBG(MAN_PATH, ("man_pathgroups_report: returns"));
7219 
7220 	return (0);
7221 }
7222 
7223 static void
7224 man_preport(man_path_t *plist, mblk_t *mp)
7225 {
7226 	man_path_t	*ap;
7227 
7228 	ap = man_find_active_path(plist);
7229 	/*
7230 	 * Active path
7231 	 */
7232 	if (ap != NULL) {
7233 		(void) mi_mpprintf_nr(mp, "\t%s%d\t\t",
7234 			ddi_major_to_name(ap->mp_device.mdev_major),
7235 			ap->mp_device.mdev_ppa);
7236 	} else {
7237 		(void) mi_mpprintf_nr(mp, "None \t");
7238 	}
7239 
7240 	/*
7241 	 * Alternate Paths.
7242 	 */
7243 	while (plist != NULL) {
7244 		(void) mi_mpprintf_nr(mp, "%s%d exp %d",
7245 			ddi_major_to_name(plist->mp_device.mdev_major),
7246 			plist->mp_device.mdev_ppa,
7247 			plist->mp_device.mdev_exp_id);
7248 		if (plist->mp_device.mdev_state & MDEV_FAILED)
7249 			(void) mi_mpprintf_nr(mp, "*");
7250 		plist = plist->mp_next;
7251 		if (plist)
7252 			(void) mi_mpprintf_nr(mp, ", ");
7253 	}
7254 }
7255 
7256 /*
7257  * NDD request to set active path. Calling context is man_ioctl, so we are
7258  * exclusive in the inner perimeter.
7259  *
7260  *	Syntax is "ndd -set /dev/dman <man ppa> <pg_id> <phys ppa>"
7261  */
7262 /* ARGSUSED3 */
7263 static int
7264 man_set_active_path(queue_t *wq, mblk_t *mp, char *value, caddr_t cp,
7265     cred_t *cr)
7266 {
7267 	char		*end, *meta_ppap, *phys_ppap, *pg_idp;
7268 	int		meta_ppa;
7269 	int		phys_ppa;
7270 	int		pg_id;
7271 	man_t		*manp;
7272 	man_pg_t	*mpg;
7273 	man_path_t	*np;
7274 	mi_path_t	mpath;
7275 	int		status = 0;
7276 
7277 	MAN_DBG(MAN_PATH, ("man_set_active_path: wq(0x%p) mp(0x%p)"
7278 		" args %s", (void *)wq, (void *)mp, value));
7279 
7280 	meta_ppap = value;
7281 
7282 	if ((pg_idp = strchr(value, ' ')) == NULL) {
7283 		status = EINVAL;
7284 		goto exit;
7285 	}
7286 
7287 	*pg_idp++ = '\0';
7288 
7289 	if ((phys_ppap = strchr(pg_idp, ' ')) == NULL) {
7290 		status = EINVAL;
7291 		goto exit;
7292 	}
7293 
7294 	*phys_ppap++ = '\0';
7295 
7296 	meta_ppa = (int)mi_strtol(meta_ppap, &end, 10);
7297 	pg_id = (int)mi_strtol(pg_idp, &end, 10);
7298 	phys_ppa = (int)mi_strtol(phys_ppap, &end, 10);
7299 
7300 	mutex_enter(&man_lock);
7301 	manp = ddi_get_soft_state(man_softstate, meta_ppa);
7302 	if (manp == NULL || manp->man_pg == NULL) {
7303 		status = EINVAL;
7304 		mutex_exit(&man_lock);
7305 		goto exit;
7306 	}
7307 
7308 	mpg = man_find_pg_by_id(manp->man_pg, pg_id);
7309 	if (mpg == NULL) {
7310 		status = EINVAL;
7311 		mutex_exit(&man_lock);
7312 		goto exit;
7313 	}
7314 
7315 	np = man_find_path_by_ppa(mpg->mpg_pathp, phys_ppa);
7316 
7317 	if (np == NULL) {
7318 		status = EINVAL;
7319 		mutex_exit(&man_lock);
7320 		goto exit;
7321 	}
7322 
7323 	mpath.mip_cmd = MI_PATH_ACTIVATE;
7324 	mpath.mip_pg_id = pg_id;
7325 	mpath.mip_man_ppa = meta_ppa;
7326 	mpath.mip_devs[0] = np->mp_device;
7327 	mpath.mip_ndevs = 1;
7328 
7329 	status = man_pg_cmd(&mpath, NULL);
7330 	mutex_exit(&man_lock);
7331 
7332 exit:
7333 
7334 	MAN_DBG(MAN_PATH, ("man_set_active_path: returns %d", status));
7335 
7336 	return (status);
7337 }
7338 
7339 /*
7340  * Dump out the contents of the IOSRAM handoff structure. Note that if
7341  * anything changes here, you must make sure that the sysinit script
7342  * stays in sync with this output.
7343  */
7344 /* ARGSUSED */
7345 static int
7346 man_get_hostinfo(queue_t *wq, mblk_t *mp, caddr_t cp, cred_t *cr)
7347 {
7348 	manc_t	manc;
7349 	char	*ipaddr;
7350 	char	ipv6addr[INET6_ADDRSTRLEN];
7351 	int	i;
7352 	int	status;
7353 
7354 	if (!man_is_on_domain)
7355 		return (0);
7356 
7357 	if (status = man_get_iosram(&manc)) {
7358 		return (status);
7359 	}
7360 
7361 	mi_mpprintf(mp, "manc_magic = 0x%x", manc.manc_magic);
7362 	mi_mpprintf(mp, "manc_version = 0%d", manc.manc_version);
7363 	mi_mpprintf(mp, "manc_csum = 0x%x", manc.manc_csum);
7364 
7365 	if (manc.manc_ip_type == AF_INET) {
7366 		in_addr_t	netnum;
7367 
7368 		mi_mpprintf(mp, "manc_ip_type = AF_INET");
7369 
7370 		ipaddr = man_inet_ntoa(manc.manc_dom_ipaddr);
7371 		mi_mpprintf(mp, "manc_dom_ipaddr = %s", ipaddr);
7372 
7373 		ipaddr = man_inet_ntoa(manc.manc_dom_ip_netmask);
7374 		mi_mpprintf(mp, "manc_dom_ip_netmask = %s", ipaddr);
7375 
7376 		netnum = manc.manc_dom_ipaddr & manc.manc_dom_ip_netmask;
7377 		ipaddr = man_inet_ntoa(netnum);
7378 		mi_mpprintf(mp, "manc_dom_ip_netnum = %s", ipaddr);
7379 
7380 		ipaddr = man_inet_ntoa(manc.manc_sc_ipaddr);
7381 		mi_mpprintf(mp, "manc_sc_ipaddr = %s", ipaddr);
7382 
7383 	} else if (manc.manc_ip_type == AF_INET6) {
7384 
7385 		mi_mpprintf(mp, "manc_ip_type = AF_INET6");
7386 
7387 		(void) inet_ntop(AF_INET6, (void *)&manc.manc_dom_ipv6addr,
7388 			ipv6addr, INET6_ADDRSTRLEN);
7389 		mi_mpprintf(mp, "manc_dom_ipv6addr = %s", ipv6addr);
7390 
7391 		mi_mpprintf(mp, "manc_dom_ipv6_netmask = %d",
7392 				manc.manc_dom_ipv6_netmask.s6_addr[0]);
7393 
7394 		(void) inet_ntop(AF_INET6, (void *)&manc.manc_sc_ipv6addr,
7395 			ipv6addr, INET6_ADDRSTRLEN);
7396 		mi_mpprintf(mp, "manc_sc_ipv6addr = %s", ipv6addr);
7397 
7398 	} else {
7399 
7400 		mi_mpprintf(mp, "manc_ip_type = NONE");
7401 	}
7402 
7403 	mi_mpprintf(mp, "manc_dom_eaddr = %s",
7404 		ether_sprintf(&manc.manc_dom_eaddr));
7405 	mi_mpprintf(mp, "manc_sc_eaddr = %s",
7406 		ether_sprintf(&manc.manc_sc_eaddr));
7407 
7408 	mi_mpprintf(mp, "manc_iob_bitmap = 0x%x\tio boards = ",
7409 		manc.manc_iob_bitmap);
7410 	for (i = 0; i < MAN_MAX_EXPANDERS; i++) {
7411 		if ((manc.manc_iob_bitmap >> i) & 0x1) {
7412 			mi_mpprintf_nr(mp, "%d.1, ", i);
7413 		}
7414 	}
7415 	mi_mpprintf(mp, "manc_golden_iob = %d", manc.manc_golden_iob);
7416 
7417 	return (0);
7418 }
7419 
7420 static char *
7421 man_inet_ntoa(in_addr_t in)
7422 {
7423 	static char b[18];
7424 	unsigned char *p;
7425 
7426 	p = (unsigned char *)&in;
7427 	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
7428 	return (b);
7429 }
7430 
7431 /*
7432  * parameter value. cp points to the required parameter.
7433  */
7434 /* ARGSUSED */
7435 static int
7436 man_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
7437 {
7438 	param_t	*manpa = (param_t *)cp;
7439 
7440 	(void) mi_mpprintf(mp, "%u", manpa->param_val);
7441 	return (0);
7442 }
7443 
7444 /*
7445  * Sets the man parameter to the value in the param_register using
7446  * nd_load().
7447  */
7448 /* ARGSUSED */
7449 static int
7450 man_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
7451 {
7452 	char *end;
7453 	size_t new_value;
7454 	param_t	*manpa = (param_t *)cp;
7455 
7456 	new_value = mi_strtol(value, &end, 10);
7457 
7458 	if (end == value || new_value < manpa->param_min ||
7459 	    new_value > manpa->param_max) {
7460 			return (EINVAL);
7461 	}
7462 
7463 	manpa->param_val = new_value;
7464 
7465 	return (0);
7466 
7467 }
7468 
7469 /*
7470  * Free the Named Dispatch Table by calling man_nd_free
7471  */
7472 static void
7473 man_param_cleanup()
7474 {
7475 	if (man_ndlist != NULL)
7476 		nd_free(&man_ndlist);
7477 }
7478 
7479 /*
7480  * Free the table pointed to by 'ndp'
7481  */
7482 static void
7483 man_nd_free(caddr_t *nd_pparam)
7484 {
7485 	ND	*nd;
7486 
7487 	if ((nd = (ND *)(*nd_pparam)) != NULL) {
7488 		if (nd->nd_tbl)
7489 			mi_free((char *)nd->nd_tbl);
7490 		mi_free((char *)nd);
7491 		*nd_pparam = NULL;
7492 	}
7493 }
7494 
7495 
7496 /*
7497  * man_kstat_update - update the statistics for a meta-interface.
7498  *
7499  *	ksp - kstats struct
7500  *	rw - flag indicating whether stats are to be read or written.
7501  *
7502  *	returns	0
7503  *
7504  * The destination specific kstat information is protected by the
7505  * perimeter lock, so we submit a work request to get the stats
7506  * updated (see man_do_kstats()), and then collect the results
7507  * when cv_signal'd. Note that we are doing cv_timedwait_sig()
7508  * as a precautionary measure only.
7509  */
7510 static int
7511 man_kstat_update(kstat_t *ksp, int rw)
7512 {
7513 	man_t			*manp;		/* per instance data */
7514 	man_work_t		*wp;
7515 	int			status = 0;
7516 	kstat_named_t		*knp;
7517 	kstat_named_t		*man_knp;
7518 	int			i;
7519 
7520 	MAN_DBG(MAN_KSTAT, ("man_kstat_update: %s\n", rw ? "KSTAT_WRITE" :
7521 		"KSTAT_READ"));
7522 
7523 	mutex_enter(&man_lock);
7524 	manp = (man_t *)ksp->ks_private;
7525 	manp->man_refcnt++;
7526 
7527 	/*
7528 	 * If the driver has been configured, get kstats updated by inner
7529 	 * perimeter prior to retrieving.
7530 	 */
7531 	if (man_config_state == MAN_CONFIGURED) {
7532 		clock_t wait_status;
7533 
7534 		man_update_path_kstats(manp);
7535 		wp = man_work_alloc(MAN_WORK_KSTAT_UPDATE, KM_SLEEP);
7536 		wp->mw_arg.a_man_ppa = manp->man_meta_ppa;
7537 		wp->mw_flags = MAN_WFLAGS_CVWAITER;
7538 		man_work_add(man_iwork_q, wp);
7539 
7540 		wait_status = cv_timedwait_sig(&wp->mw_cv, &man_lock,
7541 		    ddi_get_lbolt() + drv_usectohz(manp->man_kstat_waittime));
7542 
7543 		if (wp->mw_flags & MAN_WFLAGS_DONE) {
7544 			status = wp->mw_status;
7545 			man_work_free(wp);
7546 		} else {
7547 			ASSERT(wait_status <= 0);
7548 			wp->mw_flags &= ~MAN_WFLAGS_CVWAITER;
7549 			if (wait_status == 0)
7550 				status = EINTR;
7551 			else {
7552 				MAN_DBG(MAN_KSTAT, ("man_kstat_update: "
7553 					"timedout, returning stale stats."));
7554 				status = 0;
7555 			}
7556 		}
7557 		if (status)
7558 			goto exit;
7559 	}
7560 
7561 	knp = (kstat_named_t *)ksp->ks_data;
7562 	man_knp = (kstat_named_t *)manp->man_ksp->ks_data;
7563 
7564 	if (rw == KSTAT_READ) {
7565 		for (i = 0; i < MAN_NUMSTATS; i++) {
7566 			knp[i].value.ui64 = man_knp[i].value.ui64;
7567 		}
7568 	} else {
7569 		for (i = 0; i < MAN_NUMSTATS; i++) {
7570 			man_knp[i].value.ui64 = knp[i].value.ui64;
7571 		}
7572 	}
7573 
7574 exit:
7575 	manp->man_refcnt--;
7576 	mutex_exit(&man_lock);
7577 
7578 	MAN_DBG(MAN_KSTAT, ("man_kstat_update: returns %d", status));
7579 
7580 	return (status);
7581 }
7582 
7583 /*
7584  * Sum destination kstats for all active paths for a given instance of the
7585  * MAN driver. Called with perimeter lock.
7586  */
7587 static void
7588 man_do_kstats(man_work_t *wp)
7589 {
7590 	man_t		*manp;
7591 	man_pg_t	*mpg;
7592 	man_path_t	*mp;
7593 
7594 	MAN_DBG(MAN_KSTAT, ("man_do_kstats:"));
7595 
7596 	mutex_enter(&man_lock);
7597 	/*
7598 	 * Sync mp_last_knp for each path associated with the MAN instance.
7599 	 */
7600 	manp = (man_t *)ddi_get_soft_state(man_softstate,
7601 		wp->mw_arg.a_man_ppa);
7602 	for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7603 
7604 		ASSERT(mpg->mpg_man_ppa == manp->man_meta_ppa);
7605 
7606 		if ((mp = man_find_active_path(mpg->mpg_pathp)) != NULL) {
7607 
7608 			MAN_DBG(MAN_KSTAT, ("\tkstat: path"));
7609 			MAN_DBGCALL(MAN_KSTAT, man_print_path(mp));
7610 
7611 			/*
7612 			 * We just to update the destination statistics here.
7613 			 */
7614 			man_sum_dests_kstats(mp->mp_last_knp, mpg);
7615 		}
7616 	}
7617 	mutex_exit(&man_lock);
7618 	MAN_DBG(MAN_KSTAT, ("man_do_kstats: returns"));
7619 }
7620 
7621 /*
7622  * Sum device kstats for all active paths for a given instance of the
7623  * MAN driver. Called with man_lock.
7624  */
7625 static void
7626 man_update_path_kstats(man_t *manp)
7627 {
7628 	kstat_named_t	*man_knp;
7629 	man_pg_t	*mpg;
7630 	man_path_t	*mp;
7631 
7632 	ASSERT(MUTEX_HELD(&man_lock));
7633 	MAN_DBG(MAN_KSTAT, ("man_update_path_kstats:"));
7634 
7635 	man_knp = (kstat_named_t *)manp->man_ksp->ks_data;
7636 
7637 	for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7638 
7639 		ASSERT(mpg->mpg_man_ppa == manp->man_meta_ppa);
7640 
7641 		if ((mp = man_find_active_path(mpg->mpg_pathp)) != NULL) {
7642 
7643 			man_update_dev_kstats(man_knp, mp);
7644 
7645 		}
7646 	}
7647 	MAN_DBG(MAN_KSTAT, ("man_update_path_kstats: returns"));
7648 }
7649 
7650 /*
7651  * Update the device kstats.
7652  * As man_kstat_update() is called with kstat_chain_lock held,
7653  * we can safely update the statistics from the underlying driver here.
7654  */
7655 static void
7656 man_update_dev_kstats(kstat_named_t *man_knp, man_path_t *mp)
7657 {
7658 	kstat_t		*dev_ksp;
7659 	major_t		major;
7660 	int		instance;
7661 	char		buf[KSTAT_STRLEN];
7662 
7663 
7664 	major = mp->mp_device.mdev_major;
7665 	instance = mp->mp_device.mdev_ppa;
7666 	(void) sprintf(buf, "%s%d", ddi_major_to_name(major), instance);
7667 
7668 	dev_ksp = kstat_hold_byname(ddi_major_to_name(major), instance, buf,
7669 	    ALL_ZONES);
7670 	if (dev_ksp != NULL) {
7671 
7672 		KSTAT_ENTER(dev_ksp);
7673 		KSTAT_UPDATE(dev_ksp, KSTAT_READ);
7674 		man_sum_kstats(man_knp, dev_ksp, mp->mp_last_knp);
7675 		KSTAT_EXIT(dev_ksp);
7676 		kstat_rele(dev_ksp);
7677 
7678 	} else {
7679 		MAN_DBG(MAN_KSTAT,
7680 		("man_update_dev_kstats: no kstat data found for %s(%d,%d)",
7681 				buf, major, instance));
7682 	}
7683 }
7684 
7685 static void
7686 man_sum_dests_kstats(kstat_named_t *knp, man_pg_t *mpg)
7687 {
7688 	int		i;
7689 	int		flags;
7690 	char		*statname;
7691 	manstr_t	*msp;
7692 	man_dest_t	*mdp;
7693 	uint64_t	switches = 0;
7694 	uint64_t	linkfails = 0;
7695 	uint64_t	linkstales = 0;
7696 	uint64_t	icmpv4probes = 0;
7697 	uint64_t	icmpv6probes = 0;
7698 
7699 	MAN_DBG(MAN_KSTAT, ("man_sum_dests_kstats: mpg 0x%p", (void *)mpg));
7700 
7701 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
7702 
7703 		if (!man_str_uses_pg(msp, mpg))
7704 			continue;
7705 
7706 		mdp = &msp->ms_dests[mpg->mpg_pg_id];
7707 
7708 		switches += mdp->md_switches;
7709 		linkfails += mdp->md_linkfails;
7710 		linkstales += mdp->md_linkstales;
7711 		icmpv4probes += mdp->md_icmpv4probes;
7712 		icmpv6probes += mdp->md_icmpv6probes;
7713 	}
7714 
7715 	for (i = 0; i < MAN_NUMSTATS; i++) {
7716 
7717 		statname = man_kstat_info[i].mk_name;
7718 		flags = man_kstat_info[i].mk_flags;
7719 
7720 		if (!(flags & MK_NOT_PHYSICAL))
7721 			continue;
7722 
7723 		if (strcmp(statname, "man_switches") == 0) {
7724 			knp[i].value.ui64 = switches;
7725 		} else if (strcmp(statname, "man_link_fails") == 0) {
7726 			knp[i].value.ui64 = linkfails;
7727 		} else if (strcmp(statname, "man_link_stales") == 0) {
7728 			knp[i].value.ui64 = linkstales;
7729 		} else if (strcmp(statname, "man_icmpv4_probes") == 0) {
7730 			knp[i].value.ui64 = icmpv4probes;
7731 		} else if (strcmp(statname, "man_icmpv6_probes") == 0) {
7732 			knp[i].value.ui64 = icmpv6probes;
7733 		}
7734 	}
7735 
7736 	MAN_DBG(MAN_KSTAT, ("man_sum_dests_kstats: returns"));
7737 }
7738 
7739 /*
7740  * Initialize MAN named kstats in the space provided.
7741  */
7742 static void
7743 man_kstat_named_init(kstat_named_t *knp, int num_stats)
7744 {
7745 	int	i;
7746 
7747 	MAN_DBG(MAN_KSTAT, ("man_kstat_named_init: knp(0x%p) num_stats = %d",
7748 		(void *)knp, num_stats));
7749 
7750 	for (i = 0; i < num_stats; i++) {
7751 		kstat_named_init(&knp[i], man_kstat_info[i].mk_name,
7752 			man_kstat_info[i].mk_type);
7753 	}
7754 
7755 	MAN_DBG(MAN_KSTAT, ("man_kstat_named_init: returns"));
7756 
7757 }
7758 
7759 /*
7760  * man_kstat_byname - get a kernel stat value from its structure
7761  *
7762  *	ksp - kstat_t structure to play with
7763  *	s   - string to match names with
7764  *	res - in/out result data pointer
7765  *
7766  *	returns	- success - 1 (found)
7767  *		- failure - 0 (not found)
7768  */
7769 static int
7770 man_kstat_byname(kstat_t *ksp, char *s, kstat_named_t *res)
7771 {
7772 	int		found = 0;
7773 
7774 	MAN_DBG(MAN_KSTAT2, ("man_kstat_byname: GETTING %s\n", s));
7775 
7776 	if (ksp->ks_type == KSTAT_TYPE_NAMED) {
7777 		kstat_named_t *knp;
7778 
7779 		for (knp = KSTAT_NAMED_PTR(ksp);
7780 		    (caddr_t)knp < ((caddr_t)ksp->ks_data+ksp->ks_data_size);
7781 		    knp++) {
7782 
7783 			if (strcmp(s, knp->name) == NULL) {
7784 
7785 				res->data_type = knp->data_type;
7786 				res->value = knp->value;
7787 				found++;
7788 
7789 				MAN_DBG(MAN_KSTAT2, ("\t%s: %d\n", knp->name,
7790 					(int)knp->value.ul));
7791 			}
7792 		}
7793 	} else {
7794 		MAN_DBG(MAN_KSTAT2, ("\tbad kstats type %d\n", ksp->ks_type));
7795 	}
7796 
7797 	/*
7798 	 * if getting a value but couldn't find the namestring, result = 0.
7799 	 */
7800 	if (!found) {
7801 		/*
7802 		 * a reasonable default
7803 		 */
7804 		res->data_type = KSTAT_DATA_ULONG;
7805 		res->value.l = 0;
7806 		MAN_DBG(MAN_KSTAT2, ("\tcouldn't find, using defaults\n"));
7807 	}
7808 
7809 	MAN_DBG(MAN_KSTAT2, ("man_kstat_byname: returns\n"));
7810 
7811 	return (found);
7812 }
7813 
7814 
7815 /*
7816  *
7817  * Accumulate MAN driver kstats from the incremental values of the underlying
7818  * physical interfaces.
7819  *
7820  * Parameters:
7821  *	sum_knp		- The named kstat area to put cumulative value,
7822  *			  NULL if we just want to sync next two params.
7823  *	phys_ksp	- Physical interface kstat_t pointer. Contains
7824  *			  more current counts.
7825  * 	phys_last_knp	- counts from the last time we were called for this
7826  *			  physical interface. Note that the name kstats
7827  *			  pointed to are actually in MAN format, but they
7828  *			  hold the mirrored physical devices last read
7829  *			  kstats.
7830  * Basic algorithm is:
7831  *
7832  * 	for each named kstat variable {
7833  *	    sum_knp[i] += (phys_ksp->ksp_data[i] - phys_last_knp[i]);
7834  *	    phys_last_knp[i] = phys_ksp->ksp_data[i];
7835  *	}
7836  *
7837  */
7838 static void
7839 man_sum_kstats(kstat_named_t *sum_knp, kstat_t *phys_ksp,
7840 	kstat_named_t *phys_last_knp)
7841 {
7842 	char		*physname;
7843 	char		*physalias;
7844 	char		*statname;
7845 	kstat_named_t	phys_kn_entry;
7846 	uint64_t	delta64;
7847 	int		i;
7848 
7849 	MAN_DBG(MAN_KSTAT, ("man_sum_kstats: sum_knp(0x%p) phys_ksp(0x%p)"
7850 		" phys_last_knp(0x%p)\n", (void *)sum_knp, (void *)phys_ksp,
7851 		(void *)phys_last_knp));
7852 
7853 	/*
7854 	 * Now for each entry in man_kstat_info, sum the named kstat.
7855 	 * Not that all MAN specific kstats will end up !found.
7856 	 */
7857 	for (i = 0; i < MAN_NUMSTATS; i++) {
7858 		int	found = 0;
7859 		int	flags = 0;
7860 
7861 		delta64 = 0;
7862 
7863 		statname = man_kstat_info[i].mk_name;
7864 		physname = man_kstat_info[i].mk_physname;
7865 		physalias = man_kstat_info[i].mk_physalias;
7866 		flags = man_kstat_info[i].mk_flags;
7867 
7868 		/*
7869 		 * Update MAN private kstats.
7870 		 */
7871 		if (flags & MK_NOT_PHYSICAL) {
7872 
7873 			kstat_named_t	*knp = phys_last_knp;
7874 
7875 			if (sum_knp == NULL)
7876 				continue;
7877 
7878 			if (strcmp(statname, "man_switches") == 0) {
7879 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7880 			} else if (strcmp(statname, "man_link_fails") == 0) {
7881 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7882 			} else if (strcmp(statname, "man_link_stales") == 0) {
7883 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7884 			} else if (strcmp(statname, "man_icmpv4_probes") == 0) {
7885 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7886 			} else if (strcmp(statname, "man_icmpv6_probes") == 0) {
7887 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7888 			}
7889 
7890 			continue;	/* phys_ksp doesnt have this stat */
7891 		}
7892 
7893 		/*
7894 		 * first try it by the "official" name
7895 		 */
7896 		if (phys_ksp) {
7897 			if (man_kstat_byname(phys_ksp, physname,
7898 				&phys_kn_entry)) {
7899 
7900 				found = 1;
7901 
7902 			} else if ((physalias) && (man_kstat_byname(phys_ksp,
7903 				physalias, &phys_kn_entry))) {
7904 
7905 				found = 1;
7906 			}
7907 		}
7908 
7909 		if (!found) {
7910 			/*
7911 			 * clear up the "last" value, no change to the sum
7912 			 */
7913 			phys_last_knp[i].value.ui64 = 0;
7914 			continue;
7915 		}
7916 
7917 		/*
7918 		 * at this point, we should have the good underlying
7919 		 * kstat value stored in phys_kn_entry
7920 		 */
7921 		if (flags & MK_NOT_COUNTER) {
7922 			/*
7923 			 * it isn't a counter, so store the value and
7924 			 * move on (e.g. ifspeed)
7925 			 */
7926 			phys_last_knp[i].value = phys_kn_entry.value;
7927 			continue;
7928 		}
7929 
7930 		switch (phys_kn_entry.data_type) {
7931 		case KSTAT_DATA_UINT32:
7932 
7933 			/*
7934 			 * this handles 32-bit wrapping
7935 			 */
7936 			if (phys_kn_entry.value.ui32 <
7937 				phys_last_knp[i].value.ui32) {
7938 
7939 				/*
7940 				 * we've wrapped!
7941 				 */
7942 				delta64 += (UINT_MAX -
7943 					phys_last_knp[i].value.ui32);
7944 				phys_last_knp[i].value.ui32 = 0;
7945 			}
7946 
7947 			delta64 += phys_kn_entry.value.ui32 -
7948 				phys_last_knp[i].value.ui32;
7949 			phys_last_knp[i].value.ui32 = phys_kn_entry.value.ui32;
7950 			break;
7951 
7952 		default:
7953 			/*
7954 			 * must be a 64-bit value, we ignore 64-bit
7955 			 * wraps, since they shouldn't ever happen
7956 			 * within the life of a machine (if we assume
7957 			 * machines don't stay up for more than a few
7958 			 * hundred years without a reboot...)
7959 			 */
7960 			delta64 = phys_kn_entry.value.ui64 -
7961 				phys_last_knp[i].value.ui64;
7962 			phys_last_knp[i].value.ui64 = phys_kn_entry.value.ui64;
7963 		}
7964 
7965 		if (sum_knp != NULL) {
7966 			/*
7967 			 * now we need to save the value
7968 			 */
7969 			switch (sum_knp[i].data_type) {
7970 			case KSTAT_DATA_UINT32:
7971 				/* trunk down to 32 bits, possibly lossy */
7972 				sum_knp[i].value.ui32 += (uint32_t)delta64;
7973 				break;
7974 
7975 			default:
7976 				sum_knp[i].value.ui64 += delta64;
7977 				break;
7978 			}
7979 		}
7980 	}
7981 
7982 	MAN_DBG(MAN_KSTAT, ("man_sum_kstats: returns\n"));
7983 }
7984 
7985 
7986 #if defined(DEBUG)
7987 
7988 
7989 static char *_ms_flags[] = {
7990 	"NONE",
7991 	"FAST", 	/* 0x1 */
7992 	"RAW",		/* 0x2 */
7993 	"ALLPHYS",	/* 0x4 */
7994 	"ALLMULTI",	/* 0x8 */
7995 	"ALLSAP",	/* 0x10 */
7996 	"CKSUM",	/* 0x20 */
7997 	"MULTI",	/* 0x40 */
7998 	"SERLPBK",	/* 0x80 */
7999 	"MACLPBK",	/* 0x100 */
8000 	"CLOSING",	/* 0x200 */
8001 	"CLOSE_DONE",	/* 0x400 */
8002 	"CONTROL"	/* 0x800 */
8003 };
8004 
8005 static void
8006 man_print_msp(manstr_t *msp)
8007 {
8008 	char	buf[512];
8009 	char	prbuf[512];
8010 	uint_t	flags;
8011 	int	i;
8012 
8013 	cmn_err(CE_CONT, "\tmsp(0x%p)\n", (void *)msp);
8014 
8015 	if (msp == NULL)
8016 		return;
8017 
8018 	cmn_err(CE_CONT, "\t%s%d SAP(0x%x):\n",
8019 		ddi_major_to_name(msp->ms_meta_maj), msp->ms_meta_ppa,
8020 		msp->ms_sap);
8021 
8022 	buf[0] = '\0';
8023 	prbuf[0] = '\0';
8024 	flags = msp->ms_flags;
8025 	for (i = 0; i < A_CNT(_ms_flags); i++) {
8026 		if ((flags >> i) & 0x1) {
8027 			sprintf(buf, " %s |", _ms_flags[i+1]);
8028 			strcat(prbuf, buf);
8029 		}
8030 	}
8031 	prbuf[strlen(prbuf) - 1] = '\0';
8032 	cmn_err(CE_CONT, "\tms_flags: %s\n", prbuf);
8033 
8034 	cmn_err(CE_CONT, "\tms_dlpistate: %s\n", dss[msp->ms_dlpistate]);
8035 
8036 	cmn_err(CE_CONT, "\tms_dl_mp: 0x%p\n", (void *)msp->ms_dl_mp);
8037 
8038 	cmn_err(CE_CONT, "\tms_manp: 0x%p\n", (void *)msp->ms_manp);
8039 
8040 	cmn_err(CE_CONT, "\tms_dests: 0x%p\n", (void *)msp->ms_dests);
8041 
8042 }
8043 
8044 static char *_md_state[] = {
8045 	"NOTPRESENT",		/* 0x0 */
8046 	"INITIALIZING",		/* 0x1 */
8047 	"READY",		/* 0x2 */
8048 	"PLUMBING",		/* 0x4 */
8049 	"CLOSING"		/* 0x8 */
8050 };
8051 
8052 static void
8053 man_print_mdp(man_dest_t *mdp)
8054 {
8055 	uint_t		state;
8056 	int		i;
8057 	char		buf[64];
8058 	char		prbuf[512];
8059 
8060 	buf[0] = '\0';
8061 	prbuf[0] = '\0';
8062 
8063 	cmn_err(CE_CONT, "\tmdp(0x%p)\n", (void *)mdp);
8064 
8065 	if (mdp == NULL)
8066 		return;
8067 
8068 	cmn_err(CE_CONT, "\tmd_pg_id: %d\n", mdp->md_pg_id);
8069 	cmn_err(CE_CONT, "\tmd_dst_eaddr: %s\n",
8070 		ether_sprintf(&mdp->md_dst_eaddr));
8071 	cmn_err(CE_CONT, "\tmd_src_eaddr: %s\n",
8072 		ether_sprintf(&mdp->md_src_eaddr));
8073 	cmn_err(CE_CONT, "\tmd_dlpistate: %s", dss[mdp->md_dlpistate]);
8074 	cmn_err(CE_CONT, "\tmd_muxid: 0x%u", mdp->md_muxid);
8075 	cmn_err(CE_CONT, "\tmd_rcvcnt %lu md_lastrcvcnt %lu", mdp->md_rcvcnt,
8076 		mdp->md_lastrcvcnt);
8077 
8078 	/*
8079 	 * Print out state as text.
8080 	 */
8081 	state = mdp->md_state;
8082 
8083 	if (state == 0) {
8084 		strcat(prbuf, _md_state[0]);
8085 	} else {
8086 
8087 		for (i = 0; i < A_CNT(_md_state); i++) {
8088 			if ((state >> i) & 0x1)  {
8089 				sprintf(buf, " %s |", _md_state[i+1]);
8090 				strcat(prbuf, buf);
8091 			}
8092 		}
8093 		prbuf[strlen(prbuf) -1] = '\0';
8094 	}
8095 	cmn_err(CE_CONT, "\tmd_state: %s", prbuf);
8096 
8097 	cmn_err(CE_CONT, "\tmd_device:\n");
8098 	man_print_dev(&mdp->md_device);
8099 
8100 }
8101 
8102 static void
8103 man_print_man(man_t *manp)
8104 {
8105 	char	buf[512];
8106 	char	prbuf[512];
8107 
8108 	buf[0] = '\0';
8109 	prbuf[0] = '\0';
8110 
8111 	if (manp == NULL)
8112 		return;
8113 
8114 	if (ddi_major_to_name(manp->man_meta_major)) {
8115 		sprintf(buf, "\t man_device: %s%d\n",
8116 			ddi_major_to_name(manp->man_meta_major),
8117 			manp->man_meta_ppa);
8118 	} else {
8119 		sprintf(buf, "\t major: %d", manp->man_meta_major);
8120 		sprintf(buf, "\t ppa: %d", manp->man_meta_ppa);
8121 	}
8122 
8123 	cmn_err(CE_CONT, "%s", buf);
8124 
8125 }
8126 
8127 static char *_mdev_state[] = {
8128 	"UNASSIGNED  ",
8129 	"ASSIGNED",
8130 	"ACTIVE",
8131 	"FAILED"
8132 };
8133 
8134 static void
8135 man_print_dev(man_dev_t *mdevp)
8136 {
8137 	char	buf[512];
8138 	char	prbuf[512];
8139 	int	i;
8140 	uint_t	state;
8141 
8142 	buf[0] = '\0';
8143 	prbuf[0] = '\0';
8144 
8145 	if (mdevp == NULL)
8146 		return;
8147 
8148 	if (mdevp->mdev_major == 0) {
8149 number:
8150 		sprintf(buf, "\t mdev_major: %d\n", mdevp->mdev_major);
8151 	} else if (ddi_major_to_name(mdevp->mdev_major)) {
8152 		sprintf(buf, "\t mdev_device: %s%d\n",
8153 			ddi_major_to_name(mdevp->mdev_major),
8154 			mdevp->mdev_ppa);
8155 	} else
8156 		goto number;
8157 
8158 	cmn_err(CE_CONT, "%s", buf);
8159 
8160 	cmn_err(CE_CONT, "\t mdev_exp_id: %d\n", mdevp->mdev_exp_id);
8161 
8162 	buf[0] = '\0';
8163 	prbuf[0] = '\0';
8164 	state = mdevp->mdev_state;
8165 
8166 	if (state == 0) {
8167 		strcat(prbuf, _mdev_state[0]);
8168 	} else {
8169 		for (i = 0; i < A_CNT(_mdev_state); i++) {
8170 			if ((state >> i) & 0x1) {
8171 				sprintf(buf, " %s |", _mdev_state[i+1]);
8172 				strcat(prbuf, buf);
8173 			}
8174 		}
8175 	}
8176 
8177 	prbuf[strlen(prbuf) - 2] = '\0';
8178 
8179 	cmn_err(CE_CONT, "\t mdev_state: %s\n", prbuf);
8180 
8181 }
8182 
8183 static char *_mip_cmd[] = {
8184 	"MI_PATH_READ",
8185 	"MI_PATH_ASSIGN",
8186 	"MI_PATH_ACTIVATE",
8187 	"MI_PATH_DEACTIVATE",
8188 	"MI_PATH_UNASSIGN"
8189 };
8190 
8191 static void
8192 man_print_mtp(mi_time_t *mtp)
8193 {
8194 	cmn_err(CE_CONT, "\tmtp(0x%p)\n", (void *)mtp);
8195 
8196 	if (mtp == NULL)
8197 		return;
8198 
8199 	cmn_err(CE_CONT, "\tmtp_instance: %d\n", mtp->mtp_man_ppa);
8200 
8201 	cmn_err(CE_CONT, "\tmtp_time: %d\n", mtp->mtp_time);
8202 
8203 }
8204 
8205 static void
8206 man_print_mip(mi_path_t *mip)
8207 {
8208 	cmn_err(CE_CONT, "\tmip(0x%p)\n", (void *)mip);
8209 
8210 	if (mip == NULL)
8211 		return;
8212 
8213 	cmn_err(CE_CONT, "\tmip_pg_id: %d\n", mip->mip_pg_id);
8214 
8215 	cmn_err(CE_CONT, "\tmip_cmd: %s\n", _mip_cmd[mip->mip_cmd]);
8216 
8217 	cmn_err(CE_CONT, "\tmip_eaddr: %s\n", ether_sprintf(&mip->mip_eaddr));
8218 
8219 	cmn_err(CE_CONT, "\tmip_devs: 0x%p\n", (void *)mip->mip_devs);
8220 
8221 	cmn_err(CE_CONT, "\tmip_ndevs: %d\n", mip->mip_ndevs);
8222 
8223 }
8224 
8225 static void
8226 man_print_mpg(man_pg_t *mpg)
8227 {
8228 	cmn_err(CE_CONT, "\tmpg(0x%p)\n", (void *)mpg);
8229 
8230 	if (mpg == NULL)
8231 		return;
8232 
8233 	cmn_err(CE_CONT, "\tmpg_next: 0x%p\n", (void *)mpg->mpg_next);
8234 
8235 	cmn_err(CE_CONT, "\tmpg_pg_id: %d\n", mpg->mpg_pg_id);
8236 
8237 	cmn_err(CE_CONT, "\tmpg_man_ppa: %d\n", mpg->mpg_man_ppa);
8238 
8239 	cmn_err(CE_CONT, "\tmpg_dst_eaddr: %s\n",
8240 		ether_sprintf(&mpg->mpg_dst_eaddr));
8241 
8242 	cmn_err(CE_CONT, "\tmpg_pathp: 0x%p\n", (void *)mpg->mpg_pathp);
8243 
8244 }
8245 
8246 static char *_mw_flags[] = {
8247 	"NOWAITER",		/* 0x0 */
8248 	"CVWAITER",		/* 0x1 */
8249 	"QWAITER",		/* 0x2 */
8250 	"DONE"		/* 0x3 */
8251 };
8252 
8253 static void
8254 man_print_work(man_work_t *wp)
8255 {
8256 	int 	i;
8257 
8258 	cmn_err(CE_CONT, "\twp(0x%p)\n\n", (void *)wp);
8259 
8260 	if (wp == NULL)
8261 		return;
8262 
8263 	cmn_err(CE_CONT, "\tmw_type: %s\n", _mw_type[wp->mw_type]);
8264 
8265 	cmn_err(CE_CONT, "\tmw_flags: ");
8266 	for (i = 0; i < A_CNT(_mw_flags); i++) {
8267 		if ((wp->mw_flags >> i) & 0x1)
8268 			cmn_err(CE_CONT, "%s", _mw_flags[i]);
8269 	}
8270 	cmn_err(CE_CONT, "\n");
8271 
8272 	cmn_err(CE_CONT, "\twp_status: %d\n", wp->mw_status);
8273 
8274 	cmn_err(CE_CONT, "\twp_arg: 0x%p\n", (void *)&wp->mw_arg);
8275 
8276 	cmn_err(CE_CONT, "\tmw_next: 0x%p\n", (void *)wp->mw_next);
8277 
8278 	cmn_err(CE_CONT, "\twp_q: 0x%p", (void *)wp->mw_q);
8279 
8280 }
8281 
8282 static void
8283 man_print_path(man_path_t *mp)
8284 {
8285 	cmn_err(CE_CONT, "\tmp(0x%p)\n\n", (void *)mp);
8286 
8287 	if (mp == NULL)
8288 		return;
8289 
8290 	cmn_err(CE_CONT, "\tmp_device:");
8291 	man_print_dev(&mp->mp_device);
8292 
8293 	cmn_err(CE_CONT, "\tmp_next: 0x%p\n", (void *)mp->mp_next);
8294 
8295 	cmn_err(CE_CONT, "\tmp_last_knp: 0x%p\n", (void *)mp->mp_last_knp);
8296 
8297 	cmn_err(CE_CONT, "\tmp_lru: 0x%lx", mp->mp_lru);
8298 
8299 }
8300 
8301 void *
8302 man_dbg_kzalloc(int line, size_t size, int kmflags)
8303 {
8304 	void *tmp;
8305 
8306 	tmp = kmem_zalloc(size, kmflags);
8307 	MAN_DBG(MAN_KMEM, ("0x%p %lu\tzalloc'd @ %d\n", (void *)tmp,
8308 		size, line));
8309 
8310 	return (tmp);
8311 
8312 }
8313 
8314 void
8315 man_dbg_kfree(int line, void *buf, size_t size)
8316 {
8317 
8318 	MAN_DBG(MAN_KMEM, ("0x%p %lu\tfree'd @ %d\n", (void *)buf, size, line));
8319 
8320 	kmem_free(buf, size);
8321 
8322 }
8323 
8324 #endif  /* DEBUG */
8325