xref: /titanic_41/usr/src/uts/sun4u/starcat/io/dman.c (revision 2be7def9bac9cc5b894988030377b62ee6be9c39)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 
28 /*
29  * Starcat Management Network Driver
30  *
31  * ****** NOTICE **** This file also resides in the SSC gate as
32  * ****** NOTICE **** usr/src/uts/sun4u/scman/scman.c. Any changes
33  * ****** NOTICE **** made here must be propogated there as well.
34  *
35  */
36 
37 #include <sys/types.h>
38 #include <sys/proc.h>
39 #include <sys/disp.h>
40 #include <sys/kmem.h>
41 #include <sys/stat.h>
42 #include <sys/kstat.h>
43 #include <sys/ksynch.h>
44 #include <sys/stream.h>
45 #include <sys/dlpi.h>
46 #include <sys/stropts.h>
47 #include <sys/strsubr.h>
48 #include <sys/debug.h>
49 #include <sys/conf.h>
50 #include <sys/kstr.h>
51 #include <sys/errno.h>
52 #include <sys/ethernet.h>
53 #include <sys/byteorder.h>
54 #include <sys/ddi.h>
55 #include <sys/sunddi.h>
56 #include <sys/sunldi.h>
57 #include <sys/modctl.h>
58 #include <sys/strsun.h>
59 #include <sys/callb.h>
60 #include <sys/pci.h>
61 #include <netinet/in.h>
62 #include <inet/common.h>
63 #include <inet/mi.h>
64 #include <inet/nd.h>
65 #include <sys/socket.h>
66 #include <netinet/igmp_var.h>
67 #include <netinet/ip6.h>
68 #include <netinet/icmp6.h>
69 #include <inet/ip.h>
70 #include <inet/ip6.h>
71 #include <sys/file.h>
72 #include <sys/dman.h>
73 #include <sys/autoconf.h>
74 #include <sys/zone.h>
75 
76 extern int ddi_create_internal_pathname(dev_info_t *, char *, int, minor_t);
77 
78 #define	MAN_IDNAME	"dman"
79 #define	DMAN_INT_PATH	"/devices/pseudo/dman@0:dman"
80 #define	DMAN_PATH	"/devices/pseudo/clone@0:dman"
81 #define	ERI_IDNAME	"eri"
82 #define	ERI_PATH	"/devices/pseudo/clone@0:eri"
83 
84 #if defined(DEBUG)
85 
86 static void man_print_msp(manstr_t *);
87 static void man_print_man(man_t *);
88 static void man_print_mdp(man_dest_t *);
89 static void man_print_dev(man_dev_t *);
90 static void man_print_mip(mi_path_t *);
91 static void man_print_mtp(mi_time_t *);
92 static void man_print_mpg(man_pg_t *);
93 static void man_print_path(man_path_t *);
94 static void man_print_work(man_work_t *);
95 
96 /*
97  * Set manstr_t dlpistate (upper half of multiplexor)
98  */
99 #define	SETSTATE(msp, state) \
100 	MAN_DBG(MAN_DLPI, ("msp=0x%p @ %d state %s=>%s\n",		\
101 		    (void *)msp, __LINE__, dss[msp->ms_dlpistate],	\
102 		    dss[(state)]));					\
103 		    msp->ms_dlpistate = (state);
104 /*
105  * Set man_dest_t dlpistate (lower half of multiplexor)
106  */
107 #define	D_SETSTATE(mdp, state) \
108 	MAN_DBG(MAN_DLPI, ("dst=0x%p @ %d state %s=>%s\n",	   \
109 		    (void *)mdp, __LINE__, dss[mdp->md_dlpistate], \
110 		    dss[(state)]));				   \
111 		    mdp->md_dlpistate = (state);
112 
113 static char *promisc[] = {	/* DLPI promisc Strings */
114 	"not used",		/* 0x00 */
115 	"DL_PROMISC_PHYS",	/* 0x01 */
116 	"DL_PROMISC_SAP",	/* 0x02 */
117 	"DL_PROMISC_MULTI"	/* 0x03 */
118 };
119 
120 static char *dps[] = {			/* DLPI Primitive Strings */
121 	"DL_INFO_REQ",			/* 0x00 */
122 	"DL_BIND_REQ",			/* 0x01 */
123 	"DL_UNBIND_REQ",		/* 0x02 */
124 	"DL_INFO_ACK",			/* 0x03 */
125 	"DL_BIND_ACK",			/* 0x04 */
126 	"DL_ERROR_ACK",			/* 0x05 */
127 	"DL_OK_ACK",			/* 0x06 */
128 	"DL_UNITDATA_REQ",		/* 0x07 */
129 	"DL_UNITDATA_IND",		/* 0x08 */
130 	"DL_UDERROR_IND",		/* 0x09 */
131 	"DL_UDQOS_REQ",			/* 0x0a */
132 	"DL_ATTACH_REQ",		/* 0x0b */
133 	"DL_DETACH_REQ",		/* 0x0c */
134 	"DL_CONNECT_REQ",		/* 0x0d */
135 	"DL_CONNECT_IND",		/* 0x0e */
136 	"DL_CONNECT_RES",		/* 0x0f */
137 	"DL_CONNECT_CON",		/* 0x10 */
138 	"DL_TOKEN_REQ",			/* 0x11 */
139 	"DL_TOKEN_ACK",			/* 0x12 */
140 	"DL_DISCONNECT_REQ",		/* 0x13 */
141 	"DL_DISCONNECT_IND",		/* 0x14 */
142 	"DL_SUBS_UNBIND_REQ",		/* 0x15 */
143 	"DL_LIARLIARPANTSONFIRE",	/* 0x16 */
144 	"DL_RESET_REQ",			/* 0x17 */
145 	"DL_RESET_IND",			/* 0x18 */
146 	"DL_RESET_RES",			/* 0x19 */
147 	"DL_RESET_CON",			/* 0x1a */
148 	"DL_SUBS_BIND_REQ",		/* 0x1b */
149 	"DL_SUBS_BIND_ACK",		/* 0x1c */
150 	"DL_ENABMULTI_REQ",		/* 0x1d */
151 	"DL_DISABMULTI_REQ",		/* 0x1e */
152 	"DL_PROMISCON_REQ",		/* 0x1f */
153 	"DL_PROMISCOFF_REQ",		/* 0x20 */
154 	"DL_DATA_ACK_REQ",		/* 0x21 */
155 	"DL_DATA_ACK_IND",		/* 0x22 */
156 	"DL_DATA_ACK_STATUS_IND",	/* 0x23 */
157 	"DL_REPLY_REQ",			/* 0x24 */
158 	"DL_REPLY_IND",			/* 0x25 */
159 	"DL_REPLY_STATUS_IND",		/* 0x26 */
160 	"DL_REPLY_UPDATE_REQ",		/* 0x27 */
161 	"DL_REPLY_UPDATE_STATUS_IND",	/* 0x28 */
162 	"DL_XID_REQ",			/* 0x29 */
163 	"DL_XID_IND",			/* 0x2a */
164 	"DL_XID_RES",			/* 0x2b */
165 	"DL_XID_CON",			/* 0x2c */
166 	"DL_TEST_REQ",			/* 0x2d */
167 	"DL_TEST_IND",			/* 0x2e */
168 	"DL_TEST_RES",			/* 0x2f */
169 	"DL_TEST_CON",			/* 0x30 */
170 	"DL_PHYS_ADDR_REQ",		/* 0x31 */
171 	"DL_PHYS_ADDR_ACK",		/* 0x32 */
172 	"DL_SET_PHYS_ADDR_REQ",		/* 0x33 */
173 	"DL_GET_STATISTICS_REQ",	/* 0x34 */
174 	"DL_GET_STATISTICS_ACK",	/* 0x35 */
175 };
176 
177 #define	MAN_DLPI_MAX_PRIM	0x35
178 
179 static char *dss[] = {			/* DLPI State Strings */
180 	"DL_UNBOUND",			/* 0x00	*/
181 	"DL_BIND_PENDING",		/* 0x01	*/
182 	"DL_UNBIND_PENDING",		/* 0x02	*/
183 	"DL_IDLE",			/* 0x03	*/
184 	"DL_UNATTACHED",		/* 0x04	*/
185 	"DL_ATTACH_PENDING",		/* 0x05	*/
186 	"DL_DETACH_PENDING",		/* 0x06	*/
187 	"DL_UDQOS_PENDING",		/* 0x07	*/
188 	"DL_OUTCON_PENDING",		/* 0x08	*/
189 	"DL_INCON_PENDING",		/* 0x09	*/
190 	"DL_CONN_RES_PENDING",		/* 0x0a	*/
191 	"DL_DATAXFER",			/* 0x0b	*/
192 	"DL_USER_RESET_PENDING",	/* 0x0c	*/
193 	"DL_PROV_RESET_PENDING",	/* 0x0d	*/
194 	"DL_RESET_RES_PENDING",		/* 0x0e	*/
195 	"DL_DISCON8_PENDING",		/* 0x0f	*/
196 	"DL_DISCON9_PENDING",		/* 0x10	*/
197 	"DL_DISCON11_PENDING",		/* 0x11	*/
198 	"DL_DISCON12_PENDING",		/* 0x12	*/
199 	"DL_DISCON13_PENDING",		/* 0x13	*/
200 	"DL_SUBS_BIND_PND",		/* 0x14	*/
201 	"DL_SUBS_UNBIND_PND",		/* 0x15	*/
202 };
203 
204 static const char *lss[] = {
205 	"UNKNOWN",	/* 0x0 */
206 	"INIT",		/* 0x1 */
207 	"GOOD",		/* 0x2 */
208 	"STALE",	/* 0x3 */
209 	"FAIL",		/* 0x4 */
210 };
211 
212 static char *_mw_type[] = {
213 	"OPEN_CTL",		/* 0x0 */
214 	"CLOSE_CTL",		/* 0x1 */
215 	"SWITCH",		/* 0x2 */
216 	"PATH_UPDATE",		/* 0x3 */
217 	"CLOSE",		/* 0x4 */
218 	"CLOSE_STREAM",	/* 0x5 */
219 	"DRATTACH",		/* 0x6 */
220 	"DRDETACH",		/* 0x7 */
221 	"STOP",			/* 0x8 */
222 	"DRSWITCH",		/* 0x9 */
223 	"KSTAT_UPDATE"		/* 0xA */
224 };
225 
226 uint32_t		man_debug = MAN_WARN;
227 
228 #define	man_kzalloc(a, b)	man_dbg_kzalloc(__LINE__, a, b)
229 #define	man_kfree(a, b)		man_dbg_kfree(__LINE__, a, b)
230 void	*man_dbg_kzalloc(int line, size_t size, int kmflags);
231 void	man_dbg_kfree(int line, void *buf, size_t size);
232 
233 #else	/* DEBUG */
234 
235 uint32_t		man_debug = 0;
236 /*
237  * Set manstr_t dlpistate (upper half of multiplexor)
238  */
239 #define	SETSTATE(msp, state) msp->ms_dlpistate = (state);
240 /*
241  * Set man_dest_t dlpistate (lower half of multiplexor)
242  */
243 #define	D_SETSTATE(mdp, state) mdp->md_dlpistate = (state);
244 
245 #define	man_kzalloc(a, b)	kmem_zalloc(a, b)
246 #define	man_kfree(a, b)		kmem_free(a, b)
247 
248 #endif	/* DEBUG */
249 
250 #define	DL_PRIM(mp)	(((union DL_primitives *)(mp)->b_rptr)->dl_primitive)
251 #define	DL_PROMISCON_TYPE(mp)	\
252 		(((union DL_primitives *)(mp)->b_rptr)->promiscon_req.dl_level)
253 #define	IOC_CMD(mp)	(((struct iocblk *)(mp)->b_rptr)->ioc_cmd)
254 
255 /*
256  * Start of kstat-related declarations
257  */
258 #define	MK_NOT_COUNTER		(1<<0)	/* is it a counter? */
259 #define	MK_ERROR		(1<<2)	/* for error statistics */
260 #define	MK_NOT_PHYSICAL		(1<<3)	/* no matching physical stat */
261 
262 typedef struct man_kstat_info_s {
263 	char		*mk_name;	/* e.g. align_errors */
264 	char		*mk_physname;	/* e.g. framing (NULL for same) */
265 	char		*mk_physalias;	/* e.g. framing (NULL for same) */
266 	uchar_t		mk_type;	/* e.g. KSTAT_DATA_UINT32 */
267 	int		mk_flags;
268 } man_kstat_info_t;
269 
270 /*
271  * Master declaration macro, note that it uses token pasting
272  */
273 #define	MK_DECLARE(name, pname, palias, bits, flags) \
274 	{ name,		pname,	palias,	KSTAT_DATA_UINT ## bits, flags }
275 
276 /*
277  * Obsolete forms don't have the _sinceswitch forms, they are all errors
278  */
279 #define	MK_OBSOLETE32(name, alias) MK_DECLARE(alias, name, alias, 32, MK_ERROR)
280 #define	MK_OBSOLETE64(name, alias) MK_DECLARE(alias, name, alias, 64, MK_ERROR)
281 
282 /*
283  * The only non-counters don't have any other aliases
284  */
285 #define	MK_NOTCOUNTER32(name) MK_DECLARE(name, name, NULL, 32, MK_NOT_COUNTER)
286 #define	MK_NOTCOUNTER64(name) MK_DECLARE(name, name, NULL, 64, MK_NOT_COUNTER)
287 
288 /*
289  * Normal counter forms
290  */
291 #define	MK_DECLARE32(name, alias) \
292 	MK_DECLARE(name, name, alias, 32, 0)
293 #define	MK_DECLARE64(name, alias) \
294 	MK_DECLARE(name, name, alias, 64, 0)
295 
296 /*
297  * Error counters need special MK_ERROR flag only for the non-AP form
298  */
299 #define	MK_ERROR32(name, alias) \
300 	MK_DECLARE(name, name, alias, 32, MK_ERROR)
301 #define	MK_ERROR64(name, alias) \
302 	MK_DECLARE(name, name, alias, 64, MK_ERROR)
303 
304 /*
305  * These AP-specific stats are not backed by physical statistics
306  */
307 #define	MK_NOTPHYS32(name) MK_DECLARE(name, NULL, NULL, 32, MK_NOT_PHYSICAL)
308 #define	MK_NOTPHYS64(name) MK_DECLARE(name, NULL, NULL, 64, MK_NOT_PHYSICAL)
309 
310 /*
311  * START of the actual man_kstat_info declaration using above macros
312  */
313 static man_kstat_info_t man_kstat_info[] = {
314 	/*
315 	 * Link Input/Output stats
316 	 */
317 	MK_DECLARE32("ipackets", NULL),
318 	MK_ERROR32("ierrors", NULL),
319 	MK_DECLARE32("opackets", NULL),
320 	MK_ERROR32("oerrors", NULL),
321 	MK_ERROR32("collisions", NULL),
322 	MK_NOTCOUNTER64("ifspeed"),
323 	/*
324 	 * These are new MIB-II stats, per PSARC 1997/198
325 	 */
326 	MK_DECLARE32("rbytes", NULL),
327 	MK_DECLARE32("obytes", NULL),
328 	MK_DECLARE32("multircv", NULL),
329 	MK_DECLARE32("multixmt", NULL),
330 	MK_DECLARE32("brdcstrcv", NULL),
331 	MK_DECLARE32("brdcstxmt", NULL),
332 	/*
333 	 * Error values
334 	 */
335 	MK_ERROR32("norcvbuf", NULL),
336 	MK_ERROR32("noxmtbuf", NULL),
337 	MK_ERROR32("unknowns", NULL),
338 	/*
339 	 * These are the 64-bit values, they fallback to 32-bit values
340 	 */
341 	MK_DECLARE64("ipackets64", "ipackets"),
342 	MK_DECLARE64("opackets64", "opackets"),
343 	MK_DECLARE64("rbytes64", "rbytes"),
344 	MK_DECLARE64("obytes64", "obytes"),
345 
346 	/* New AP switching statistics */
347 	MK_NOTPHYS64("man_switches"),
348 	MK_NOTPHYS64("man_link_fails"),
349 	MK_NOTPHYS64("man_link_stales"),
350 	MK_NOTPHYS64("man_icmpv4_probes"),
351 	MK_NOTPHYS64("man_icmpv6_probes"),
352 
353 	MK_ERROR32("align_errors", "framing"),
354 	MK_ERROR32("fcs_errors", "crc"),
355 	MK_ERROR32("first_collisions", NULL),
356 	MK_ERROR32("multi_collisions", NULL),
357 	MK_ERROR32("sqe_errors", "sqe"),
358 
359 	MK_ERROR32("tx_late_collisions", NULL),
360 	MK_ERROR32("ex_collisions", "excollisions"),
361 	MK_ERROR32("macxmt_errors", NULL),
362 	MK_ERROR32("carrier_errors", "nocarrier"),
363 	MK_ERROR32("toolong_errors", "buff"),
364 	MK_ERROR32("macrcv_errors", NULL),
365 
366 	MK_OBSOLETE32("framing", "align_errors"),
367 	MK_OBSOLETE32("crc", "fcs_errors"),
368 	MK_OBSOLETE32("sqe", "sqe_errors"),
369 	MK_OBSOLETE32("excollisions", "ex_collisions"),
370 	MK_OBSOLETE32("nocarrier", "carrier_errors"),
371 	MK_OBSOLETE32("buff", "toolong_errors"),
372 };
373 
374 #define	MAN_NUMSTATS (sizeof (man_kstat_info) / sizeof (man_kstat_info_t))
375 
376 /*
377  * Miscellaneous ethernet stuff.
378  *
379  * MANs DL_INFO_ACK template.
380  */
381 static	dl_info_ack_t man_infoack = {
382 	DL_INFO_ACK,				/* dl_primitive */
383 	ETHERMTU,				/* dl_max_sdu */
384 	0,					/* dl_min_sdu */
385 	MAN_ADDRL,				/* dl_addr_length */
386 	DL_ETHER,				/* dl_mac_type */
387 	0,					/* dl_reserved */
388 	0,					/* dl_current_state */
389 	-2,					/* dl_sap_length */
390 	DL_CLDLS,				/* dl_service_mode */
391 	0,					/* dl_qos_length */
392 	0,					/* dl_qos_offset */
393 	0,					/* dl_range_length */
394 	0,					/* dl_range_offset */
395 	DL_STYLE2,				/* dl_provider_style */
396 	sizeof (dl_info_ack_t),			/* dl_addr_offset */
397 	DL_VERSION_2,				/* dl_version */
398 	ETHERADDRL,				/* dl_brdcst_addr_length */
399 	sizeof (dl_info_ack_t) + MAN_ADDRL,	/* dl_brdcst_addr_offset */
400 	0					/* dl_growth */
401 };
402 
403 /*
404  * Ethernet broadcast address definition.
405  */
406 static	struct ether_addr	etherbroadcast = {
407 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
408 };
409 
410 static struct ether_addr zero_ether_addr = {
411 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
412 };
413 
414 /*
415  * Set via MAN_SET_SC_IPADDRS ioctl.
416  */
417 man_sc_ipaddrs_t	man_sc_ipaddrs = { 0xffffffffU, 0xffffffffU };
418 
419 /*
420  * Set via MAN_SET_SC_IP6ADDRS ioctl.
421  */
422 man_sc_ip6addrs_t	man_sc_ip6addrs = { 0, 0, 0, 0, 0, 0, 0, 0 };
423 
424 /*
425  * IP & ICMP constants
426  */
427 #ifndef	ETHERTYPE_IPV6
428 #define	ETHERTYPE_IPV6 0x86DD
429 #endif
430 
431 /*
432  * Function prototypes.
433  *
434  * Upper multiplexor functions.
435  */
436 static int	man_attach(dev_info_t *, ddi_attach_cmd_t);
437 static int	man_detach(dev_info_t *, ddi_detach_cmd_t);
438 static int	man_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
439 static int	man_open(register queue_t *, dev_t *, int, int, cred_t *);
440 static int	man_configure(queue_t *);
441 static int	man_deconfigure(void);
442 static int	man_init_dests(man_t *, manstr_t *);
443 static void	man_start_dest(man_dest_t *, manstr_t *, man_pg_t *);
444 static void	man_set_optimized_dest(manstr_t *);
445 static int	man_close(queue_t *);
446 static void	man_cancel_timers(man_adest_t *);
447 static int	man_uwput(queue_t *, mblk_t *);
448 static int	man_start(queue_t *, mblk_t *, eaddr_t *);
449 static void	man_ioctl(queue_t *, mblk_t *);
450 static void	man_set_linkcheck_time(queue_t *, mblk_t *);
451 static void	man_setpath(queue_t *, mblk_t *);
452 static void	man_geteaddr(queue_t *, mblk_t *);
453 static void	man_set_sc_ipaddrs(queue_t *, mblk_t *);
454 static void	man_set_sc_ip6addrs(queue_t *, mblk_t *);
455 static int	man_get_our_etheraddr(eaddr_t *eap);
456 static void	man_nd_getset(queue_t *, mblk_t *);
457 static void	man_dl_ioc_hdr_info(queue_t *, mblk_t *);
458 static int	man_uwsrv(queue_t *);
459 static int	man_proto(queue_t *, mblk_t *);
460 static int	man_udreq(queue_t *, mblk_t *);
461 static void	man_areq(queue_t *, mblk_t *);
462 static mblk_t	*man_alloc_physreq_mp(eaddr_t *);
463 static void	man_dreq(queue_t *, mblk_t *);
464 static void	man_dodetach(manstr_t *, man_work_t *);
465 static void	man_dl_clean(mblk_t **);
466 static void	man_breq(queue_t *, mblk_t *);
467 static void	man_ubreq(queue_t *, mblk_t *);
468 static void	man_ireq(queue_t *, mblk_t *);
469 static void	man_ponreq(queue_t *, mblk_t *);
470 static void	man_poffreq(queue_t *, mblk_t *);
471 static void	man_emreq(queue_t *, mblk_t *);
472 static void	man_dmreq(queue_t *, mblk_t *);
473 static void	man_pareq(queue_t *, mblk_t *);
474 static void	man_spareq(queue_t *, mblk_t *);
475 static int	man_dlpi(manstr_t *, mblk_t *);
476 static int	man_dlioc(manstr_t *, mblk_t *);
477 static int	man_dl_catch(mblk_t **, mblk_t *);
478 static void	man_dl_release(mblk_t **, mblk_t *);
479 static int	man_match_proto(mblk_t *, mblk_t *);
480 static int	man_open_ctl();
481 static void	man_close_ctl();
482 /*
483  * upper/lower multiplexor functions.
484  */
485 static int	man_dlpi_senddown(manstr_t *, mblk_t *);
486 static int	man_start_lower(man_dest_t *, mblk_t *, queue_t *, int caller);
487 static int	man_lrput(queue_t *, mblk_t *);
488 /*
489  * Lower multiplexor functions.
490  */
491 static int	man_lwsrv(queue_t *);
492 static int	man_lrsrv(queue_t *);
493 static void	man_dlpi_replay(man_dest_t *, mblk_t *);
494 static int	man_dlioc_replay(man_dest_t *);
495 /*
496  * Link failover routines.
497  */
498 static int	man_gettimer(int, man_dest_t *);
499 static void	man_linkcheck_timer(void *);
500 static int	man_needs_linkcheck(man_dest_t *);
501 static int	man_do_autoswitch(man_dest_t *);
502 static int	man_autoswitch(man_pg_t *, man_dev_t *, man_work_t *);
503 static int	man_prep_dests_for_switch(man_pg_t *, man_dest_t **, int *);
504 static int	man_str_uses_pg(manstr_t *, man_pg_t *);
505 static void	man_do_icmp_bcast(man_dest_t *, t_uscalar_t);
506 static mblk_t	*man_alloc_udreq(int, man_dladdr_t *);
507 static mblk_t	*man_pinger(t_uscalar_t);
508 /*
509  * Functions normally executing outside of the STREAMs perimeter.
510  */
511 /*
512  * Functions supporting/processing work requests.
513  */
514 static void	man_bwork(void);
515 static void	man_iwork(void);		/* inside perimeter */
516 void		man_work_add(man_workq_t *, man_work_t *);
517 man_work_t	*man_work_alloc(int, int);
518 void		man_work_free(man_work_t *);
519 /*
520  * Functions implementing/supporting failover.
521  *
522  * Executed inside perimeter.
523  */
524 static int	man_do_dr_attach(man_work_t *);
525 static int	man_do_dr_switch(man_work_t *);
526 static void	man_do_dr_detach(man_work_t *);
527 static int	man_iswitch(man_work_t *);
528 static void	man_ifail_dest(man_dest_t *);
529 static man_dest_t *man_switch_match(man_dest_t *, int, void *);
530 static void	man_add_dests(man_pg_t *);
531 static void	man_reset_dlpi(void *);
532 static mblk_t	*man_dup_mplist(mblk_t *);
533 static mblk_t	*man_alloc_ubreq_dreq();
534 /*
535  * Executed outside perimeter (us man_lock for synchronization).
536  */
537 static void	man_bclose(man_adest_t *);
538 static void	man_bswitch(man_adest_t *, man_work_t *);
539 static int	man_plumb(man_dest_t *);
540 static void	man_unplumb(man_dest_t *);
541 static void	man_plink(queue_t *, mblk_t *);
542 static void	man_unplink(queue_t *, mblk_t *);
543 static void	man_linkrec_insert(man_linkrec_t *);
544 static queue_t	*man_linkrec_find(int);
545 /*
546  * Functions supporting pathgroups
547  */
548 int	man_pg_cmd(mi_path_t *, man_work_t *);
549 static int	man_pg_assign(man_pg_t **, mi_path_t *, int);
550 static int	man_pg_create(man_pg_t **, man_pg_t **, mi_path_t *);
551 static int	man_pg_unassign(man_pg_t **, mi_path_t *);
552 static int	man_pg_activate(man_t *, mi_path_t *, man_work_t *);
553 static int	man_pg_read(man_pg_t *, mi_path_t *);
554 static man_pg_t	*man_find_path_by_dev(man_pg_t *, man_dev_t *, man_path_t **);
555 static man_pg_t	*man_find_pg_by_id(man_pg_t *, int);
556 static man_path_t	*man_find_path_by_ppa(man_path_t *, int);
557 static man_path_t	*man_find_active_path(man_path_t *);
558 static man_path_t	*man_find_alternate_path(man_path_t *);
559 static void	man_path_remove(man_path_t **, man_path_t *);
560 static void	man_path_insert(man_path_t **, man_path_t *);
561 static void	man_path_merge(man_path_t **, man_path_t *);
562 static int	man_path_kstat_init(man_path_t *);
563 static void	man_path_kstat_uninit(man_path_t *);
564 /*
565  * Functions supporting kstat reporting.
566  */
567 static int	man_kstat_update(kstat_t *, int);
568 static void	man_do_kstats(man_work_t *);
569 static void	man_update_path_kstats(man_t *);
570 static void 	man_update_dev_kstats(kstat_named_t *, man_path_t *);
571 static void	man_sum_dests_kstats(kstat_named_t *, man_pg_t *);
572 static void	man_kstat_named_init(kstat_named_t *, int);
573 static int	man_kstat_byname(kstat_t *, char *, kstat_named_t *);
574 static void	man_sum_kstats(kstat_named_t *, kstat_t *, kstat_named_t *);
575 /*
576  * Functions supporting ndd.
577  */
578 static int	man_param_register(param_t *, int);
579 static int	man_pathgroups_report(queue_t *, mblk_t *, caddr_t, cred_t *);
580 static void	man_preport(man_path_t *, mblk_t *);
581 static int	man_set_active_path(queue_t *, mblk_t *, char *, caddr_t,
582 			cred_t *);
583 static int	man_get_hostinfo(queue_t *, mblk_t *, caddr_t, cred_t *);
584 static char	*man_inet_ntoa(in_addr_t);
585 static int	man_param_get(queue_t *, mblk_t *, caddr_t, cred_t *);
586 static int	man_param_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *);
587 static  void    man_param_cleanup(void);
588 static  void    man_nd_free(caddr_t *nd_pparam);
589 /*
590  * MAN SSC/Domain specific externs.
591  */
592 extern int	man_get_iosram(manc_t *);
593 extern int	man_domain_configure(void);
594 extern int	man_domain_deconfigure(void);
595 extern int	man_dossc_switch(uint32_t);
596 extern int	man_is_on_domain;
597 
598 /*
599  * Driver Globals protected by inner perimeter.
600  */
601 static manstr_t	*man_strup = NULL;	/* list of MAN STREAMS */
602 static caddr_t	man_ndlist = NULL;	/* head of ndd var list */
603 void		*man_softstate = NULL;
604 
605 /*
606  * Driver globals protected by man_lock.
607  */
608 kmutex_t		man_lock;		/* lock protecting vars below */
609 static kthread_id_t	man_bwork_id = NULL;	/* background thread ID */
610 man_workq_t		*man_bwork_q;		/* bgthread work q */
611 man_workq_t		*man_iwork_q;		/* inner perim (uwsrv) work q */
612 static man_linkrec_t	*man_linkrec_head = NULL;	/* list of linkblks */
613 ldi_handle_t		man_ctl_lh = NULL;	/* MAN control handle */
614 queue_t			*man_ctl_wq = NULL;	/* MAN control rq */
615 static int		man_config_state = MAN_UNCONFIGURED;
616 static int		man_config_error = ENODEV;
617 
618 /*
619  * These parameters are accessed via ndd to report the link configuration
620  * for the MAN driver. They can also be used to force configuration changes.
621  */
622 #define	MAN_NOTUSR	0x0f000000
623 
624 /* ------------------------------------------------------------------------- */
625 
626 static  param_t	man_param_arr[] = {
627 	/* min		max		value		name */
628 	{  0,		0xFFFF,		0,		"man_debug_level"},
629 };
630 
631 #define	MAN_NDD_GETABLE	1
632 #define	MAN_NDD_SETABLE	2
633 
634 static  uint32_t	man_param_display[] = {
635 /* DISPLAY */
636 MAN_NDD_SETABLE,	/* man_debug_level */
637 };
638 
639 /*
640  * STREAMs information.
641  */
642 static struct module_info man_m_info = {
643 	MAN_IDNUM,			/* mi_idnum */
644 	MAN_IDNAME,			/* mi_idname */
645 	MAN_MINPSZ,			/* mi_minpsz */
646 	MAN_MAXPSZ,			/* mi_maxpsz */
647 	MAN_HIWAT,			/* mi_hiwat */
648 	MAN_LOWAT			/* mi_lowat */
649 };
650 
651 /*
652  * Upper read queue does not do anything.
653  */
654 static struct qinit man_urinit = {
655 	NULL,				/* qi_putp */
656 	NULL,				/* qi_srvp */
657 	man_open,			/* qi_qopen */
658 	man_close,			/* qi_qclose */
659 	NULL,				/* qi_qadmin */
660 	&man_m_info,			/* qi_minfo */
661 	NULL				/* qi_mstat */
662 };
663 
664 static struct qinit man_lrinit = {
665 	man_lrput,			/* qi_putp */
666 	man_lrsrv,			/* qi_srvp */
667 	man_open,			/* qi_qopen */
668 	man_close,			/* qi_qclose */
669 	NULL,				/* qi_qadmin */
670 	&man_m_info,			/* qi_minfo */
671 	NULL				/* qi_mstat */
672 };
673 
674 static struct qinit man_uwinit = {
675 	man_uwput,			/* qi_putp */
676 	man_uwsrv,			/* qi_srvp */
677 	man_open,			/* qi_qopen */
678 	man_close,			/* qi_qclose */
679 	NULL,				/* qi_qadmin */
680 	&man_m_info,			/* qi_minfo */
681 	NULL				/* qi_mstat */
682 };
683 
684 static struct qinit man_lwinit = {
685 	NULL,				/* qi_putp */
686 	man_lwsrv,			/* qi_srvp */
687 	man_open,			/* qi_qopen */
688 	man_close,			/* qi_qclose */
689 	NULL,				/* qi_qadmin */
690 	&man_m_info,			/* qi_minfo */
691 	NULL				/* qi_mstat */
692 };
693 
694 static struct streamtab man_maninfo = {
695 	&man_urinit,			/* st_rdinit */
696 	&man_uwinit,			/* st_wrinit */
697 	&man_lrinit,			/* st_muxrinit */
698 	&man_lwinit			/* st_muxwrinit */
699 };
700 
701 
702 /*
703  * Module linkage information for the kernel.
704  *
705  * Locking Theory:
706  * 	D_MTPERMOD -	Only an inner perimeter: All routines single
707  * 			threaded (except put, see below).
708  *	D_MTPUTSHARED -	Put routines enter inner perimeter shared (not
709  *			exclusive) for concurrency/performance reasons.
710  *
711  *	Anyone who needs exclusive outer perimeter permission (changing
712  *	global data structures) does so via qwriter() calls. The
713  *	background thread does all his work outside of perimeter and
714  *	submits work via qtimeout() when data structures need to be
715  *	modified.
716  */
717 
718 #define	MAN_MDEV_FLAGS	(D_MP|D_MTPERMOD|D_MTPUTSHARED)
719 
720 DDI_DEFINE_STREAM_OPS(man_ops, nulldev, nulldev, man_attach,
721     man_detach, nodev, man_info, MAN_MDEV_FLAGS, &man_maninfo,
722     ddi_quiesce_not_supported);
723 
724 extern int nodev(), nulldev();
725 
726 static struct modldrv modldrv = {
727 	&mod_driverops, 	/* Module type.  This one is a pseudo driver */
728 	"MAN MetaDriver",
729 	&man_ops,		/* driver ops */
730 };
731 
732 static struct modlinkage modlinkage = {
733 	MODREV_1,
734 	(void *) &modldrv,
735 	NULL
736 };
737 
738 
739 /* Virtual Driver loader entry points */
740 
741 int
742 _init(void)
743 {
744 	int		status = DDI_FAILURE;
745 
746 	MAN_DBG(MAN_INIT, ("_init:"));
747 
748 	status = mod_install(&modlinkage);
749 	if (status != 0) {
750 		cmn_err(CE_WARN, "man_init: mod_install failed"
751 		    " error = %d", status);
752 		return (status);
753 	}
754 
755 	status = ddi_soft_state_init(&man_softstate, sizeof (man_t), 4);
756 	if (status != 0) {
757 		cmn_err(CE_WARN, "man_init: ddi_soft_state_init failed"
758 		    " error = %d", status);
759 		(void) mod_remove(&modlinkage);
760 		return (status);
761 	}
762 
763 	man_bwork_q = man_kzalloc(sizeof (man_workq_t), KM_SLEEP);
764 	man_iwork_q = man_kzalloc(sizeof (man_workq_t), KM_SLEEP);
765 
766 	mutex_init(&man_lock, NULL, MUTEX_DRIVER, NULL);
767 	cv_init(&man_bwork_q->q_cv, NULL, CV_DRIVER, NULL);
768 	cv_init(&man_iwork_q->q_cv, NULL, CV_DRIVER, NULL);
769 
770 	return (0);
771 }
772 
773 /*
774  * _info is called by modinfo().
775  */
776 int
777 _info(struct modinfo *modinfop)
778 {
779 	int	status;
780 
781 	MAN_DBG(MAN_INIT, ("_info:"));
782 
783 	status = mod_info(&modlinkage, modinfop);
784 
785 	MAN_DBG(MAN_INIT, ("_info: returns %d", status));
786 
787 	return (status);
788 }
789 
790 /*
791  * _fini called by modunload() just before driver is unloaded from memory.
792  */
793 int
794 _fini(void)
795 {
796 	int status = 0;
797 
798 	MAN_DBG(MAN_INIT, ("_fini:"));
799 
800 
801 	/*
802 	 * The only upper stream left should be man_ctl_lh. Note that
803 	 * man_close (upper stream) is synchronous (i.e. it waits for
804 	 * all STREAMS framework associated with the upper stream to be
805 	 * torn down). This guarantees that man_ctl_lh will never become
806 	 * NULL until noone is around to notice. This assumption is made
807 	 * in a few places like man_plumb, man_unplumb, etc.
808 	 */
809 	if (man_strup && (man_strup->ms_next != NULL))
810 		return (EBUSY);
811 
812 	/*
813 	 * Deconfigure the driver.
814 	 */
815 	status = man_deconfigure();
816 	if (status)
817 		goto exit;
818 
819 	/*
820 	 * need to detach every instance of the driver
821 	 */
822 	status = mod_remove(&modlinkage);
823 	if (status != 0)
824 		goto exit;
825 
826 	ddi_soft_state_fini(&man_softstate);
827 
828 	/*
829 	 * Free up locks.
830 	 */
831 	mutex_destroy(&man_lock);
832 	cv_destroy(&man_bwork_q->q_cv);
833 	cv_destroy(&man_iwork_q->q_cv);
834 
835 	man_kfree(man_bwork_q, sizeof (man_workq_t));
836 	man_kfree(man_iwork_q, sizeof (man_workq_t));
837 
838 exit:
839 
840 	MAN_DBG(MAN_INIT, ("_fini: returns %d", status));
841 
842 	return (status);
843 }
844 
845 /*
846  * Deconfigure the MAN driver.
847  */
848 static int
849 man_deconfigure()
850 {
851 	man_work_t	*wp;
852 	int		status = 0;
853 
854 	MAN_DBG(MAN_CONFIG, ("man_deconfigure:\n"));
855 
856 	mutex_enter(&man_lock);
857 
858 	if (man_is_on_domain) {
859 		status = man_domain_deconfigure();
860 		if (status != 0)
861 			goto exit;
862 	}
863 
864 	man_param_cleanup();	/* Free up NDD resources */
865 
866 	/*
867 	 * I may have to handle straggling work requests. Just qwait?
868 	 * or cvwait? Called from _fini - TBD
869 	 */
870 	ASSERT(man_bwork_q->q_work == NULL);
871 	ASSERT(man_iwork_q->q_work == NULL);
872 
873 	MAN_DBG(MAN_CONFIG, ("man_deconfigure: submitting CLOSE_CTL\n"));
874 
875 	if (man_ctl_lh != NULL) {
876 		wp = man_work_alloc(MAN_WORK_CLOSE_CTL, KM_SLEEP);
877 		wp->mw_flags = MAN_WFLAGS_CVWAITER;
878 		man_work_add(man_bwork_q, wp);
879 
880 		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
881 			cv_wait(&wp->mw_cv, &man_lock);
882 		}
883 		man_work_free(wp);
884 	}
885 
886 	MAN_DBG(MAN_CONFIG, ("man_deconfigure: submitting STOP\n"));
887 	if (man_bwork_id != NULL) {
888 
889 		wp = man_work_alloc(MAN_WORK_STOP, KM_SLEEP);
890 		wp->mw_flags = MAN_WFLAGS_CVWAITER;
891 		man_work_add(man_bwork_q, wp);
892 
893 		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
894 			cv_wait(&wp->mw_cv, &man_lock);
895 		}
896 		man_work_free(wp);
897 	}
898 	man_config_state = MAN_UNCONFIGURED;
899 
900 exit:
901 	mutex_exit(&man_lock);
902 
903 	MAN_DBG(MAN_CONFIG, ("man_deconfigure: returns %d\n", status));
904 
905 	return (status);
906 }
907 
908 /*
909  * man_attach - allocate resources and attach an instance of the MAN driver
910  * The <man>.conf file controls how many instances of the MAN driver are
911  * available.
912  *
913  *	dip - devinfo of node
914  * 	cmd - one of DDI_ATTACH | DDI_RESUME
915  *
916  *	returns	- success - DDI_SUCCESS
917  *		- failure - DDI_FAILURE
918  */
919 static int
920 man_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
921 {
922 	man_t		*manp;		/* per instance data */
923 	uchar_t		flag = KSTAT_FLAG_WRITABLE; /* support netstat -kc */
924 	kstat_t		*ksp;
925 	int		minor_node_created = 0;
926 	int		instance;
927 	eaddr_t		man_eaddr;
928 
929 	MAN_DBG(MAN_INIT, ("man_attach: \n"));
930 
931 	if (cmd != DDI_ATTACH) {
932 		MAN_DBG(MAN_INIT, ("man_attach: bad command %d\n", cmd));
933 		return (DDI_FAILURE);
934 	}
935 
936 	if (man_get_our_etheraddr(&man_eaddr))
937 		return (DDI_FAILURE);
938 
939 	instance = ddi_get_instance(dip);
940 
941 	/*
942 	 * we assume that instance is always equal to zero.
943 	 * and there will always only be one instance.
944 	 * this is done because when dman opens itself via DMAN_INT_PATH,
945 	 * the path assumes that the instance number is zero.
946 	 * if we ever need to support multiple instances of the dman
947 	 * driver or non-zero instances, this will have to change.
948 	 */
949 	ASSERT(instance == 0);
950 
951 	/*
952 	 * Allocate per device info pointer and link in to global list of
953 	 * MAN devices.
954 	 */
955 	if ((ddi_soft_state_zalloc(man_softstate, instance) != DDI_SUCCESS) ||
956 	    ((manp = ddi_get_soft_state(man_softstate, instance)) == NULL)) {
957 		cmn_err(CE_WARN, "man_attach: cannot zalloc soft state!");
958 		return (DDI_FAILURE);
959 	}
960 
961 	ddi_set_driver_private(dip, manp);
962 	manp->man_dip = dip;
963 	manp->man_meta_major = ddi_driver_major(dip);
964 	manp->man_meta_ppa = instance;
965 
966 	/*
967 	 * Set ethernet address. Note that this address is duplicated
968 	 * at md_src_eaddr.
969 	 */
970 	ether_copy(&man_eaddr, &manp->man_eaddr);
971 	manp->man_eaddr_v = 1;
972 
973 	MAN_DBG(MAN_INIT, ("man_attach: set ether to %s",
974 	    ether_sprintf(&manp->man_eaddr)));
975 
976 	/*
977 	 * Initialize failover-related fields (timers and such),
978 	 * taking values from properties if present.
979 	 */
980 	manp->man_init_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
981 	    "init_time", MAN_INIT_TIME);
982 
983 	manp->man_linkcheck_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
984 	    "linkcheck_time", MAN_LINKCHECK_TIME);
985 
986 	manp->man_linkstale_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
987 	    "man_linkstale_time", MAN_LINKSTALE_TIME);
988 
989 	manp->man_linkstale_retries = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
990 	    "man_linkstale_retries", MAN_LINKSTALE_RETRIES);
991 
992 	manp->man_dr_delay = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
993 	    "man_dr_delay", MAN_DR_DELAY);
994 
995 	manp->man_dr_retries = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
996 	    "man_dr_retries", MAN_DR_RETRIES);
997 
998 	manp->man_kstat_waittime = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
999 	    "man_kstat_waittime", MAN_KSTAT_WAITTIME);
1000 
1001 	manp->man_dlpireset_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
1002 	    "man_dlpireset_time", MAN_DLPIRESET_TIME);
1003 
1004 	if (ddi_create_internal_pathname(dip, MAN_IDNAME, S_IFCHR,
1005 	    ddi_get_instance(dip)) == DDI_SUCCESS) {
1006 		minor_node_created = 1;
1007 	} else {
1008 		cmn_err(CE_WARN, "man_attach: failed for instance %d",
1009 		    ddi_get_instance(dip));
1010 		goto exit;
1011 	}
1012 
1013 	if (ddi_create_minor_node(dip, MAN_IDNAME, S_IFCHR,
1014 	    ddi_get_instance(dip), DDI_NT_NET, CLONE_DEV) == DDI_SUCCESS) {
1015 		minor_node_created = 1;
1016 	} else {
1017 		cmn_err(CE_WARN, "man_attach: failed for instance %d",
1018 		    ddi_get_instance(dip));
1019 		goto exit;
1020 	}
1021 
1022 	/*
1023 	 * Allocate meta kstat_t for this instance of the driver.
1024 	 * Note that each of man_path_t keeps track of the kstats
1025 	 * for the real devices via mp_last_knp.
1026 	 */
1027 #ifdef	kstat
1028 	flag |= KSTAT_FLAG_PERSISTENT;
1029 #endif
1030 	ksp = kstat_create(MAN_IDNAME, ddi_get_instance(dip), NULL, "net",
1031 	    KSTAT_TYPE_NAMED, MAN_NUMSTATS, flag);
1032 
1033 	if (ksp == NULL) {
1034 		cmn_err(CE_WARN, "man_attach(%d): kstat_create failed"
1035 		    " - manp(0x%p)", manp->man_meta_ppa,
1036 		    (void *)manp);
1037 		goto exit;
1038 	}
1039 
1040 	man_kstat_named_init(ksp->ks_data, MAN_NUMSTATS);
1041 	ksp->ks_update = man_kstat_update;
1042 	ksp->ks_private = (void *) manp;
1043 	manp->man_ksp = ksp;
1044 	kstat_install(manp->man_ksp);
1045 
1046 	ddi_report_dev(dip);
1047 
1048 	MAN_DBG(MAN_INIT, ("man_attach(%d) returns DDI_SUCCESS",
1049 	    ddi_get_instance(dip)));
1050 
1051 	return (DDI_SUCCESS);
1052 
1053 exit:
1054 	if (minor_node_created)
1055 		ddi_remove_minor_node(dip, NULL);
1056 	ddi_set_driver_private(dip, NULL);
1057 	ddi_soft_state_free(man_softstate, instance);
1058 
1059 	MAN_DBG(MAN_INIT, ("man_attach(%d) eaddr returns DDI_FAILIRE",
1060 	    ddi_get_instance(dip)));
1061 
1062 	return (DDI_FAILURE);
1063 
1064 }
1065 
1066 static int
1067 man_get_our_etheraddr(eaddr_t *eap)
1068 {
1069 	manc_t	manc;
1070 	int	status = 0;
1071 
1072 	if (man_is_on_domain) {
1073 		if (status = man_get_iosram(&manc))
1074 			return (status);
1075 		ether_copy(&manc.manc_dom_eaddr, eap);
1076 	} else {
1077 		(void) localetheraddr((struct ether_addr *)NULL, eap);
1078 	}
1079 
1080 	return (status);
1081 }
1082 
1083 /*
1084  * man_detach - detach an instance of a driver
1085  *
1086  *	dip - devinfo of node
1087  * 	cmd - one of DDI_DETACH | DDI_SUSPEND
1088  *
1089  *	returns	- success - DDI_SUCCESS
1090  *		- failure - DDI_FAILURE
1091  */
1092 static int
1093 man_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1094 {
1095 	register man_t	*manp;		/* per instance data */
1096 	int		instance;
1097 
1098 	MAN_DBG(MAN_INIT, ("man_detach(%d):\n", ddi_get_instance(dip)));
1099 
1100 	if (cmd != DDI_DETACH) {
1101 		MAN_DBG(MAN_INIT, ("man_detach: bad command %d\n", cmd));
1102 		return (DDI_FAILURE);
1103 	}
1104 
1105 	if (dip == NULL) {
1106 		MAN_DBG(MAN_INIT, ("man_detach: dip == NULL\n"));
1107 		return (DDI_FAILURE);
1108 	}
1109 
1110 	instance = ddi_get_instance(dip);
1111 
1112 	mutex_enter(&man_lock);
1113 
1114 	manp = (man_t *)ddi_get_soft_state(man_softstate, instance);
1115 	if (manp == NULL) {
1116 		mutex_exit(&man_lock);
1117 
1118 		cmn_err(CE_WARN, "man_detach: unable to get softstate"
1119 		    " for instance = %d, dip = 0x%p!\n", instance,
1120 		    (void *)dip);
1121 		return (DDI_FAILURE);
1122 	}
1123 
1124 	if (manp->man_refcnt != 0) {
1125 		mutex_exit(&man_lock);
1126 
1127 		cmn_err(CE_WARN, "man_detach: %s%d refcnt %d", MAN_IDNAME,
1128 		    instance, manp->man_refcnt);
1129 		MAN_DBGCALL(MAN_INIT, man_print_man(manp));
1130 
1131 		return (DDI_FAILURE);
1132 	}
1133 
1134 	ddi_remove_minor_node(dip, NULL);
1135 
1136 	mutex_exit(&man_lock);
1137 
1138 	kstat_delete(manp->man_ksp);
1139 	ddi_soft_state_free(man_softstate, instance);
1140 	ddi_set_driver_private(dip, NULL);
1141 
1142 	MAN_DBG(MAN_INIT, ("man_detach returns DDI_SUCCESS"));
1143 
1144 	return (DDI_SUCCESS);
1145 }
1146 
1147 /*
1148  * man_info:
1149  *	As a standard DLPI style-2, man_info() should always return
1150  *	DDI_FAILURE.
1151  *
1152  *	However, man_open() has special treatment for a direct open
1153  *	via kstr_open() without going through the CLONE driver.
1154  *	To make this special kstr_open() work, we need to map
1155  *	minor of 0 to instance 0.
1156  */
1157 /*ARGSUSED*/
1158 static int
1159 man_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1160 {
1161 	minor_t minor;
1162 
1163 	switch (infocmd) {
1164 	case DDI_INFO_DEVT2DEVINFO:
1165 		break;
1166 
1167 	case DDI_INFO_DEVT2INSTANCE:
1168 		minor = getminor((dev_t)arg);
1169 		if (minor == 0) {
1170 			*result = (void *)(uintptr_t)minor;
1171 			return (DDI_SUCCESS);
1172 		}
1173 		break;
1174 	default:
1175 		break;
1176 	}
1177 	return (DDI_FAILURE);
1178 }
1179 
1180 /* Standard Device Driver entry points */
1181 
1182 /*
1183  * man_open - open the device
1184  *
1185  *	rq - upper read queue of the stream
1186  *	devp - pointer to a device number
1187  *	flag - information passed from the user program open(2) system call
1188  *	sflag - stream flags
1189  *	credp - pointer to the cred(9S) user credential structure
1190  *
1191  *	returns	- success - 0
1192  *		- failure - errno value for failure
1193  */
1194 /*ARGSUSED*/
1195 static int
1196 man_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
1197 {
1198 	int			minordev = -1;
1199 	manstr_t		*msp;
1200 	manstr_t		*tsp;
1201 	manstr_t		**prevmsp;
1202 	int			status = 0;
1203 
1204 	MAN_DBG(MAN_OCLOSE, ("man_open: rq(0x%p) sflag(0x%x)\n",
1205 	    (void *)rq, sflag));
1206 
1207 	ASSERT(rq);
1208 	ASSERT(sflag != MODOPEN);
1209 
1210 	/*
1211 	 * reopen; q_ptr set to msp at open completion.
1212 	 */
1213 	if (rq->q_ptr) {
1214 		return (0);
1215 	}
1216 
1217 	/*
1218 	 * Allocate and initialize manstr_t for this device.
1219 	 */
1220 	msp = man_kzalloc(sizeof (manstr_t), KM_SLEEP);
1221 	SETSTATE(msp, DL_UNATTACHED);
1222 	msp->ms_meta_ppa = -1;
1223 	msp->ms_rq = rq;
1224 	rq->q_ptr = WR(rq)->q_ptr = msp;
1225 
1226 	/*
1227 	 * Get the MAN driver configured on 1st open.  Note that the only way
1228 	 * we get sflag != CLONEOPEN is via the call in man_plumbctl().  All
1229 	 * CLONEOPEN calls to man_open will be via the file system
1230 	 * device node /dev/man, a pseudo clone device.
1231 	 */
1232 
1233 	qprocson(rq);
1234 
1235 	if (sflag == CLONEOPEN && man_config_state != MAN_CONFIGURED) {
1236 		/*
1237 		 * First open calls man_configure. Everyone qwaits until
1238 		 * we get it open. See man_open_ctl() comments for mutex
1239 		 * lock/synchronization info.
1240 		 */
1241 
1242 		mutex_enter(&man_lock);
1243 
1244 		if (man_config_state == MAN_UNCONFIGURED) {
1245 			man_config_state = MAN_CONFIGURING;
1246 			mutex_exit(&man_lock);
1247 			status = man_configure(rq);
1248 			if (status != 0)
1249 				goto exit;
1250 		} else {
1251 			while (man_config_state == MAN_CONFIGURING) {
1252 
1253 				mutex_exit(&man_lock);
1254 				status = qwait_sig(rq);
1255 
1256 				if (status == 0) {
1257 					status = EINTR;
1258 					goto exit;
1259 				}
1260 
1261 				mutex_enter(&man_lock);
1262 			}
1263 			mutex_exit(&man_lock);
1264 
1265 			if (man_config_error) {
1266 				status = man_config_error;
1267 				goto exit;
1268 			}
1269 		}
1270 	}
1271 
1272 	/*
1273 	 * Determine minor device number. man_open serialized by
1274 	 * D_MTPERMOD.
1275 	 */
1276 	prevmsp = &man_strup;
1277 	if (sflag == CLONEOPEN) {
1278 
1279 		minordev = 0;
1280 		for (; (tsp = *prevmsp) != NULL; prevmsp = &tsp->ms_next) {
1281 			if (minordev < tsp->ms_minor)
1282 				break;
1283 			minordev++;
1284 		}
1285 		*devp = makedevice(getmajor(*devp), minordev);
1286 
1287 	} else {
1288 		/*
1289 		 * Should only get here from man_plumbctl().
1290 		 */
1291 		/*LINTED E_ASSIGN_UINT_TO_SIGNED_INT*/
1292 		minordev = getminor(*devp);
1293 
1294 		/*
1295 		 * No need to protect this here as all opens are
1296 		 * qwaiting, and the bgthread (who is doing this open)
1297 		 * is the only one who mucks with this variable.
1298 		 */
1299 		man_ctl_wq = WR(rq);
1300 
1301 		ASSERT(minordev == 0);	/* TBD delete this */
1302 	}
1303 
1304 	msp->ms_meta_maj = getmajor(*devp);
1305 	msp->ms_minor = minordev;
1306 	if (minordev == 0)
1307 		msp->ms_flags = MAN_SFLAG_CONTROL;
1308 
1309 	/*
1310 	 * Link new entry into global list of active entries.
1311 	 */
1312 	msp->ms_next = *prevmsp;
1313 	*prevmsp = msp;
1314 
1315 
1316 	/*
1317 	 * Disable automatic enabling of our write service procedure.
1318 	 * We control this explicitly.
1319 	 */
1320 	noenable(WR(rq));
1321 
1322 exit:
1323 	MAN_DBG(MAN_OCLOSE, ("man_open: exit rq(0x%p) minor %d errno %d\n",
1324 	    (void *)rq, minordev, status));
1325 
1326 	/*
1327 	 * Clean up on error.
1328 	 */
1329 	if (status) {
1330 		qprocsoff(rq);
1331 		rq->q_ptr = WR(rq)->q_ptr = NULL;
1332 		man_kfree((char *)msp, sizeof (manstr_t));
1333 	} else
1334 		(void) qassociate(rq, -1);
1335 
1336 	return (status);
1337 }
1338 
1339 /*
1340  * Get the driver configured.  Called from first man_open with exclusive
1341  * inner perimeter.
1342  */
1343 static int
1344 man_configure(queue_t *rq)
1345 {
1346 	man_work_t	*wp;
1347 	int		status = 0;
1348 
1349 	MAN_DBG(MAN_CONFIG, ("man_configure:"));
1350 
1351 	/*
1352 	 * Initialize NDD parameters.
1353 	 */
1354 	if (!man_ndlist &&
1355 	    !man_param_register(man_param_arr, A_CNT(man_param_arr))) {
1356 		cmn_err(CE_WARN, "man_configure: man_param_register failed!");
1357 		man_config_error = ENOMEM;
1358 		goto exit;
1359 	}
1360 
1361 	mutex_enter(&man_lock);
1362 
1363 	/*
1364 	 * Start up background thread.
1365 	 */
1366 	if (man_bwork_id == NULL)
1367 		man_bwork_id = thread_create(NULL, 2 * DEFAULTSTKSZ,
1368 		    man_bwork, NULL, 0, &p0, TS_RUN, minclsyspri);
1369 
1370 	/*
1371 	 * Submit work to get control stream opened. Qwait until its
1372 	 * done. See man_open_ctl for mutex lock/synchronization info.
1373 	 */
1374 
1375 	if (man_ctl_lh == NULL) {
1376 		wp = man_work_alloc(MAN_WORK_OPEN_CTL, KM_SLEEP);
1377 		wp->mw_flags |= MAN_WFLAGS_QWAITER;
1378 		wp->mw_q = WR(rq);
1379 
1380 		/*
1381 		 * Submit work and wait. When man_open_ctl exits
1382 		 * man_open, it will cause qwait below to return.
1383 		 */
1384 		man_work_add(man_bwork_q, wp);
1385 		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
1386 			mutex_exit(&man_lock);
1387 			qwait(rq);
1388 			mutex_enter(&man_lock);
1389 		}
1390 		status = wp->mw_status;
1391 		man_work_free(wp);
1392 
1393 	}
1394 	mutex_exit(&man_lock);
1395 
1396 	/*
1397 	 * If on domain, setup IOSRAM and build the pathgroups
1398 	 * automatically.
1399 	 */
1400 	if ((status == 0) && man_is_on_domain)
1401 		status = man_domain_configure();
1402 
1403 exit:
1404 	mutex_enter(&man_lock);
1405 
1406 	man_config_error = status;
1407 	if (status != 0)
1408 		man_config_state = MAN_UNCONFIGURED;
1409 	else
1410 		man_config_state = MAN_CONFIGURED;
1411 
1412 	mutex_exit(&man_lock);
1413 
1414 	MAN_DBG(MAN_CONFIG, ("man_configure: returns %d\n", status));
1415 
1416 	return (status);
1417 }
1418 
1419 /*
1420  * man_close - close the device
1421  *
1422  *	rq - upper read queue of the stream
1423  *
1424  *	returns	- success - 0
1425  *		- failure - errno value for failure
1426  */
1427 static int
1428 man_close(queue_t *rq)
1429 {
1430 	manstr_t		*close_msp;
1431 	manstr_t		*msp;
1432 
1433 	MAN_DBG(MAN_OCLOSE, ("man_close: rq(0x%p)\n", (void *)rq));
1434 
1435 	qprocsoff(rq);
1436 	close_msp = (manstr_t *)rq->q_ptr;
1437 
1438 	/*
1439 	 * Unlink the per-Stream entry from the active list and free it.
1440 	 */
1441 	if (close_msp == man_strup)
1442 		man_strup = close_msp->ms_next;
1443 	else {
1444 		for (msp = man_strup; msp && msp->ms_next != close_msp; )
1445 			msp = msp->ms_next;
1446 
1447 		if (msp == NULL) {
1448 			cmn_err(CE_WARN, "man_close: no stream!");
1449 			return (ENODEV);
1450 		}
1451 
1452 		msp->ms_next = close_msp->ms_next;
1453 	}
1454 
1455 	if (close_msp->ms_dests != NULL) {
1456 		/*
1457 		 * Still DL_ATTACHED
1458 		 */
1459 		man_work_t *wp;
1460 
1461 		wp = man_work_alloc(MAN_WORK_CLOSE_STREAM, KM_SLEEP);
1462 		man_dodetach(close_msp, wp);
1463 	}
1464 
1465 	if (close_msp->ms_flags & MAN_SFLAG_CONTROL) {
1466 		/*
1467 		 * Driver about to unload.
1468 		 */
1469 		man_ctl_wq = NULL;
1470 	}
1471 
1472 	rq->q_ptr = WR(rq)->q_ptr = NULL;
1473 	man_kfree((char *)close_msp, sizeof (manstr_t));
1474 	(void) qassociate(rq, -1);
1475 
1476 	MAN_DBG(MAN_OCLOSE, ("man_close: exit\n"));
1477 
1478 	return (0);
1479 }
1480 
1481 /*
1482  * Ask bgthread to tear down lower stream and qwait
1483  * until its done.
1484  */
1485 static void
1486 man_dodetach(manstr_t *msp, man_work_t *wp)
1487 {
1488 	man_dest_t	*mdp;
1489 	int		i;
1490 	mblk_t		*mp;
1491 
1492 	mdp = msp->ms_dests;
1493 	msp->ms_dests = NULL;
1494 	msp->ms_destp = NULL;
1495 
1496 	/*
1497 	 * Excise lower dests array, set it closing and hand it to
1498 	 * background thread to dispose of.
1499 	 */
1500 	for (i = 0; i < MAN_MAX_DESTS; i++) {
1501 
1502 		mdp[i].md_state |= MAN_DSTATE_CLOSING;
1503 		mdp[i].md_msp = NULL;
1504 		mdp[i].md_rq = NULL;
1505 
1506 		if (mdp[i].md_lc_timer_id != 0) {
1507 			(void) quntimeout(man_ctl_wq, mdp[i].md_lc_timer_id);
1508 			mdp[i].md_lc_timer_id = 0;
1509 		}
1510 		if (mdp[i].md_bc_id != 0) {
1511 			qunbufcall(man_ctl_wq, mdp[i].md_bc_id);
1512 			mdp[i].md_bc_id = 0;
1513 		}
1514 
1515 		mutex_enter(&mdp[i].md_lock);
1516 		while ((mp = mdp[i].md_dmp_head) != NULL) {
1517 			mdp[i].md_dmp_head = mp->b_next;
1518 			mp->b_next = NULL;
1519 			freemsg(mp);
1520 		}
1521 		mdp[i].md_dmp_count = 0;
1522 		mdp[i].md_dmp_tail = NULL;
1523 		mutex_exit(&mdp[i].md_lock);
1524 	}
1525 
1526 	/*
1527 	 * Dump any DL type messages previously caught.
1528 	 */
1529 	man_dl_clean(&msp->ms_dl_mp);
1530 	man_dl_clean(&msp->ms_dlioc_mp);
1531 
1532 	/*
1533 	 * We need to clear fast path flag when dlioc messages are cleaned.
1534 	 */
1535 	msp->ms_flags &= ~MAN_SFLAG_FAST;
1536 
1537 	/*
1538 	 * MAN_WORK_CLOSE_STREAM work request preallocated by caller.
1539 	 */
1540 	ASSERT(wp->mw_type == MAN_WORK_CLOSE_STREAM);
1541 	ASSERT(mdp != NULL);
1542 	wp->mw_arg.a_mdp = mdp;
1543 	wp->mw_arg.a_ndests = MAN_MAX_DESTS;
1544 	wp->mw_arg.a_pg_id = -1;	/* Don't care */
1545 
1546 	mutex_enter(&man_lock);
1547 	man_work_add(man_bwork_q, wp);
1548 	msp->ms_manp->man_refcnt--;
1549 	mutex_exit(&man_lock);
1550 
1551 	msp->ms_manp = NULL;
1552 
1553 }
1554 
1555 
1556 /*
1557  * man_uwput - handle DLPI messages issued from upstream, the write
1558  * side of the upper half of multiplexor. Called with shared access to
1559  * the inner perimeter.
1560  *
1561  *	wq - upper write queue of mxx
1562  *	mp - mblk ptr to DLPI request
1563  */
1564 static int
1565 man_uwput(register queue_t *wq, register mblk_t *mp)
1566 {
1567 	register manstr_t	*msp;		/* per stream data */
1568 	register man_t		*manp;		/* per instance data */
1569 
1570 	msp = (manstr_t *)wq->q_ptr;
1571 
1572 	MAN_DBG(MAN_UWPUT, ("man_uwput: wq(0x%p) mp(0x%p) db_type(0x%x)"
1573 	    " msp(0x%p)\n",
1574 	    (void *)wq, (void *)mp, DB_TYPE(mp), (void *)msp));
1575 #if DEBUG
1576 	if (man_debug & MAN_UWPUT) {
1577 		if (DB_TYPE(mp) == M_IOCTL) {
1578 			struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
1579 			MAN_DBG(MAN_UWPUT,
1580 			    ("man_uwput: M_IOCTL ioc_cmd(0x%x)\n",
1581 			    iocp->ioc_cmd));
1582 		} else if (DB_TYPE(mp) == M_CTL) {
1583 			struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
1584 			MAN_DBG(MAN_UWPUT,
1585 			    ("man_uwput: M_CTL ioc_cmd(0x%x)\n",
1586 			    iocp->ioc_cmd));
1587 		}
1588 	}
1589 #endif	/* DEBUG */
1590 
1591 
1592 	switch (DB_TYPE(mp)) {
1593 	case M_DATA:
1594 		manp = msp->ms_manp;
1595 
1596 		if (((msp->ms_flags & (MAN_SFLAG_FAST | MAN_SFLAG_RAW)) == 0) ||
1597 		    (msp->ms_dlpistate != DL_IDLE) ||
1598 		    (manp == NULL)) {
1599 
1600 			merror(wq, mp, EPROTO);
1601 			break;
1602 		}
1603 
1604 		if (wq->q_first) {
1605 			(void) putq(wq, mp);
1606 			qenable(wq);
1607 		} else {
1608 			ehdr_t	*ep = (ehdr_t *)mp->b_rptr;
1609 
1610 			(void) man_start(wq, mp, &ep->ether_dhost);
1611 		}
1612 		break;
1613 
1614 	case M_PROTO:
1615 	case M_PCPROTO:
1616 		if ((DL_PRIM(mp) == DL_UNITDATA_IND) && !wq->q_first) {
1617 			(void) man_udreq(wq, mp);
1618 		} else {
1619 			(void) putq(wq, mp);
1620 			qenable(wq);
1621 		}
1622 		break;
1623 
1624 	case M_IOCTL:
1625 	case M_IOCDATA:
1626 		qwriter(wq, mp, man_ioctl, PERIM_INNER);
1627 		break;
1628 
1629 	case M_CTL:
1630 		freemsg(mp);
1631 		break;
1632 
1633 	case M_FLUSH:
1634 		MAN_DBG(MAN_UWPUT, ("man_wput: M_FLUSH\n"));
1635 		if (*mp->b_rptr & FLUSHW)
1636 			flushq(wq, FLUSHDATA);
1637 		if (*mp->b_rptr & FLUSHR) {
1638 			flushq(RD(wq), FLUSHDATA);
1639 			*mp->b_rptr &= ~FLUSHW;
1640 			qreply(wq, mp);
1641 		} else {
1642 			freemsg(mp);
1643 		}
1644 		break;
1645 
1646 	default:
1647 		MAN_DBG(MAN_WARN,
1648 		    ("man_uwput: illegal mblk(0x%p) type(0x%x)\n",
1649 		    (void *)mp, DB_TYPE(mp)));
1650 		freemsg(mp);
1651 		break;
1652 	} /* End switch */
1653 
1654 	MAN_DBG(MAN_UWPUT, ("man_uwput: exit wq(0x%p) mp(0x%p)\n",
1655 	    (void *)wq, (void *)mp));
1656 
1657 	return (0);
1658 }
1659 
1660 /*
1661  * man_start - handle data messages issued from upstream.  Send down
1662  * to particular man_dest based on ether_addr, otherwise send out to all
1663  * valid man_dests.
1664  *
1665  *	wq - upper write queue of mxx
1666  *	mp - mblk ptr to DLPI request
1667  * 	caller - Caller ID for decision making on canput failure
1668  *
1669  * Returns:
1670  *	0	- Data xmitted or No flow control situation detected.
1671  *	1	- Flow control situation detected.
1672  *
1673  * STREAMS Flow Control: can be used if there is only one destination
1674  * for a stream (1 to 1 multiplexor). In this case, we will use the upper
1675  * write queue to store mblks when in flow control. If there are multiple
1676  * destinations, we cannot use the STREAMs based flow control (1 to many
1677  * multiplexor). In this case, we will use the lower write queue to store
1678  * mblks when in flow control. Since destinations come and go, we may
1679  * transition between 1-to-1 and 1-to-m. So it may be the case that we have
1680  * some mblks stored on the upper queue, and some on the lower queue. However,
1681  * we will never send mblks out of order. See man_uwput and man_start_lower().
1682  *
1683  * A simple flow control mechanism is implemented for the deferred mblk list,
1684  * as this list is expected to be used temporarily for a very short
1685  * period required for switching paths. This flow control mechanism is
1686  * used only as a defensive approach to avoid infinite growth of this list.
1687  */
1688 static int
1689 man_start(register queue_t *wq, register mblk_t *mp, eaddr_t *eap)
1690 {
1691 	register manstr_t	*msp;		/* per stream data */
1692 	register man_dest_t	*mdp = NULL;	/* destination */
1693 	mblk_t			*tmp;
1694 	int			i;
1695 	int			status = 0;
1696 
1697 	msp = (manstr_t *)wq->q_ptr;
1698 
1699 	MAN_DBG(MAN_DATA, ("man_start: msp(0x%p) ether_addr(%s)\n",
1700 	    (void *)msp, ether_sprintf(eap)));
1701 
1702 	if (msp->ms_dests == NULL) {
1703 		cmn_err(CE_WARN, "man_start: no destinations");
1704 		freemsg(mp);
1705 		return (0);
1706 	}
1707 
1708 	/*
1709 	 * Optimization if only one valid destination.
1710 	 */
1711 	mdp = msp->ms_destp;
1712 
1713 	if (IS_UNICAST(eap)) {
1714 		queue_t			*flow_wq = NULL;
1715 
1716 		if (mdp == NULL) {
1717 			/*
1718 			 * TDB - This needs to be optimized (some bits in
1719 			 * ehp->dhost will act as an index.
1720 			 */
1721 			for (i = 0; i < MAN_MAX_DESTS; i++) {
1722 
1723 				mdp = &msp->ms_dests[i];
1724 
1725 				if ((mdp->md_state == MAN_DSTATE_READY) &&
1726 				    (ether_cmp(eap, &mdp->md_dst_eaddr) == 0))
1727 					break;
1728 				mdp = NULL;
1729 			}
1730 		} else {
1731 			/*
1732 			 * 1 to 1 multiplexing, use upper wq for flow control.
1733 			 */
1734 			flow_wq = wq;
1735 		}
1736 
1737 		if (mdp != NULL) {
1738 			/*
1739 			 * Its going somewhere specific
1740 			 */
1741 			status =  man_start_lower(mdp, mp, flow_wq, MAN_UPPER);
1742 
1743 		} else {
1744 			MAN_DBG(MAN_DATA, ("man_start: no destination"
1745 			    " for eaddr %s\n", ether_sprintf(eap)));
1746 			freemsg(mp);
1747 		}
1748 	} else {
1749 		/*
1750 		 * Broadcast or multicast - send everone a copy.
1751 		 */
1752 		if (mdp == NULL) {
1753 			for (i = 0; i < MAN_MAX_DESTS; i++) {
1754 				mdp = &msp->ms_dests[i];
1755 
1756 				if (mdp->md_state != MAN_DSTATE_READY)
1757 					continue;
1758 
1759 				if ((tmp = copymsg(mp)) != NULL) {
1760 					(void) man_start_lower(mdp, tmp,
1761 					    NULL, MAN_UPPER);
1762 				} else {
1763 					MAN_DBG(MAN_DATA, ("man_start: copymsg"
1764 					    " failed!"));
1765 				}
1766 			}
1767 			freemsg(mp);
1768 		} else {
1769 			if (mdp->md_state == MAN_DSTATE_READY)
1770 				status =  man_start_lower(mdp, mp, wq,
1771 				    MAN_UPPER);
1772 			else
1773 				freemsg(mp);
1774 		}
1775 	}
1776 	return (status);
1777 }
1778 
1779 /*
1780  * Send a DL_UNITDATA or M_DATA fastpath data mblk to a particular
1781  * destination. Others mblk types sent down via * man_dlpi_senddown().
1782  *
1783  * Returns:
1784  *	0	- Data xmitted
1785  *	1	- Data not xmitted due to flow control.
1786  */
1787 static int
1788 man_start_lower(man_dest_t *mdp, mblk_t *mp, queue_t *flow_wq, int caller)
1789 {
1790 	queue_t		*wq = mdp->md_wq;
1791 	int		status = 0;
1792 
1793 	/*
1794 	 * Lower stream ready for data transmit.
1795 	 */
1796 	if (mdp->md_state == MAN_DSTATE_READY &&
1797 	    mdp->md_dlpistate == DL_IDLE) {
1798 
1799 		ASSERT(mdp->md_wq != NULL);
1800 
1801 		if (caller == MAN_UPPER) {
1802 			/*
1803 			 * Check for flow control conditions for lower
1804 			 * stream.
1805 			 */
1806 			if (mdp->md_dmp_head == NULL &&
1807 			    wq->q_first == NULL && canputnext(wq)) {
1808 
1809 				(void) putnext(wq, mp);
1810 
1811 			} else {
1812 				mutex_enter(&mdp->md_lock);
1813 				if (mdp->md_dmp_head != NULL) {
1814 					/*
1815 					 * A simple flow control mechanism.
1816 					 */
1817 					if (mdp->md_dmp_count >= MAN_HIWAT) {
1818 						freemsg(mp);
1819 					} else {
1820 						/*
1821 						 * Add 'mp' to the deferred
1822 						 * msg list.
1823 						 */
1824 						mdp->md_dmp_tail->b_next = mp;
1825 						mdp->md_dmp_tail = mp;
1826 						mdp->md_dmp_count +=
1827 						    msgsize(mp);
1828 					}
1829 					mutex_exit(&mdp->md_lock);
1830 					/*
1831 					 * Inform flow control situation
1832 					 * to the caller.
1833 					 */
1834 					status = 1;
1835 					qenable(wq);
1836 					goto exit;
1837 				}
1838 				mutex_exit(&mdp->md_lock);
1839 				/*
1840 				 * If 1 to 1 mux, use upper write queue for
1841 				 * flow control.
1842 				 */
1843 				if (flow_wq != NULL) {
1844 					/*
1845 					 * putbq() message and indicate
1846 					 * flow control situation to the
1847 					 * caller.
1848 					 */
1849 					(void) putbq(flow_wq, mp);
1850 					qenable(flow_wq);
1851 					status = 1;
1852 					goto exit;
1853 				}
1854 				/*
1855 				 * 1 to many mux, use lower write queue for
1856 				 * flow control. Be mindful not to overflow
1857 				 * the lower MAN STREAM q.
1858 				 */
1859 				if (canput(wq)) {
1860 					(void) putq(wq, mp);
1861 					qenable(wq);
1862 				} else {
1863 					MAN_DBG(MAN_DATA, ("man_start_lower:"
1864 					    " lower q flow controlled -"
1865 					    " discarding packet"));
1866 					freemsg(mp);
1867 					goto exit;
1868 				}
1869 			}
1870 
1871 		} else {
1872 			/*
1873 			 * man_lwsrv  is draining flow controlled mblks.
1874 			 */
1875 			if (canputnext(wq))
1876 				(void) putnext(wq, mp);
1877 			else
1878 				status = 1;
1879 		}
1880 		goto exit;
1881 	}
1882 
1883 	/*
1884 	 * Lower stream in transition, do flow control.
1885 	 */
1886 	status = 1;
1887 
1888 	if (mdp->md_state == MAN_DSTATE_NOTPRESENT) {
1889 nodest:
1890 		cmn_err(CE_WARN,
1891 		    "man_start_lower: no dest for mdp(0x%p), caller(%d)!",
1892 		    (void *)mdp, caller);
1893 		if (caller == MAN_UPPER)
1894 			freemsg(mp);
1895 		goto exit;
1896 	}
1897 
1898 	if (mdp->md_state & MAN_DSTATE_CLOSING) {
1899 		MAN_DBG(MAN_DATA, ("man_start_lower: mdp(0x%p) closing",
1900 		    (void *)mdp));
1901 		if (caller == MAN_UPPER)
1902 			freemsg(mp);
1903 		goto exit;
1904 	}
1905 
1906 	if ((mdp->md_state & MAN_DSTATE_PLUMBING) ||
1907 	    (mdp->md_state == MAN_DSTATE_INITIALIZING) ||
1908 	    (mdp->md_dlpistate != DL_IDLE)) {
1909 		/*
1910 		 * Defer until PLUMBED and DL_IDLE. See man_lwsrv().
1911 		 */
1912 		if (caller == MAN_UPPER) {
1913 			/*
1914 			 * Upper stream sending data down, add to defered mblk
1915 			 * list for stream.
1916 			 */
1917 			mutex_enter(&mdp->md_lock);
1918 			if (mdp->md_dmp_count >= MAN_HIWAT) {
1919 				freemsg(mp);
1920 			} else {
1921 				if (mdp->md_dmp_head == NULL) {
1922 					ASSERT(mdp->md_dmp_tail == NULL);
1923 					mdp->md_dmp_head = mp;
1924 					mdp->md_dmp_tail = mp;
1925 				} else {
1926 					mdp->md_dmp_tail->b_next = mp;
1927 					mdp->md_dmp_tail = mp;
1928 				}
1929 				mdp->md_dmp_count += msgsize(mp);
1930 			}
1931 			mutex_exit(&mdp->md_lock);
1932 		}
1933 
1934 		goto exit;
1935 	}
1936 
1937 exit:
1938 	return (status);
1939 }
1940 
1941 /*
1942  * man_ioctl - handle ioctl requests for this driver (I_PLINK/I_PUNLINK)
1943  * or pass thru to the physical driver below.  Note that most M_IOCTLs we
1944  * care about come down the control msp, but the IOC ones come down the IP.
1945  * Called with exclusive inner perimeter.
1946  *
1947  *	wq - upper write queue of mxx
1948  *	mp - mblk ptr to DLPI ioctl request
1949  */
1950 static void
1951 man_ioctl(register queue_t *wq, register mblk_t *mp)
1952 {
1953 	manstr_t		*msp;
1954 	struct iocblk		*iocp;
1955 
1956 	iocp = (struct iocblk *)mp->b_rptr;
1957 	msp = (manstr_t *)wq->q_ptr;
1958 
1959 #ifdef DEBUG
1960 	{
1961 		char			ioc_cmd[30];
1962 
1963 		(void) sprintf(ioc_cmd, "not handled IOCTL 0x%x",
1964 		    iocp->ioc_cmd);
1965 		MAN_DBG((MAN_SWITCH | MAN_PATH | MAN_DLPI),
1966 		    ("man_ioctl: wq(0x%p) mp(0x%p) cmd(%s)\n",
1967 		    (void *)wq, (void *)mp,
1968 		    (iocp->ioc_cmd == I_PLINK) ? "I_PLINK" :
1969 		    (iocp->ioc_cmd == I_PUNLINK) ? "I_PUNLINK" :
1970 		    (iocp->ioc_cmd == MAN_SETPATH) ? "MAN_SETPATH" :
1971 		    (iocp->ioc_cmd == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
1972 		    (iocp->ioc_cmd == DLIOCRAW) ? "DLIOCRAW" : ioc_cmd));
1973 	}
1974 #endif /* DEBUG */
1975 
1976 
1977 	/*
1978 	 *  Handle the requests...
1979 	 */
1980 	switch ((unsigned int)iocp->ioc_cmd) {
1981 
1982 	case I_PLINK:
1983 		man_plink(wq, mp);
1984 		break;
1985 
1986 	case I_PUNLINK:
1987 		man_unplink(wq, mp);
1988 		break;
1989 
1990 	case MAN_SETPATH:
1991 		man_setpath(wq, mp);
1992 		break;
1993 
1994 	case MAN_GETEADDR:
1995 		man_geteaddr(wq, mp);
1996 		break;
1997 
1998 	case MAN_SET_LINKCHECK_TIME:
1999 		man_set_linkcheck_time(wq, mp);
2000 		break;
2001 
2002 	case MAN_SET_SC_IPADDRS:
2003 		man_set_sc_ipaddrs(wq, mp);
2004 		break;
2005 
2006 	case MAN_SET_SC_IP6ADDRS:
2007 		man_set_sc_ip6addrs(wq, mp);
2008 		break;
2009 
2010 	case DLIOCRAW:
2011 		if (man_dlioc(msp, mp))
2012 			miocnak(wq, mp, 0, ENOMEM);
2013 		else {
2014 			msp->ms_flags |= MAN_SFLAG_RAW;
2015 			miocack(wq, mp, 0, 0);
2016 		}
2017 		break;
2018 
2019 	case DL_IOC_HDR_INFO:
2020 		man_dl_ioc_hdr_info(wq, mp);
2021 		break;
2022 
2023 	case MAN_ND_GET:
2024 	case MAN_ND_SET:
2025 		man_nd_getset(wq, mp);
2026 		break;
2027 
2028 	default:
2029 		MAN_DBG(MAN_DDI, ("man_ioctl: unknown ioc_cmd %d\n",
2030 		    (unsigned int)iocp->ioc_cmd));
2031 		miocnak(wq, mp, 0, EINVAL);
2032 		break;
2033 	}
2034 exit:
2035 	MAN_DBG((MAN_SWITCH | MAN_PATH | MAN_DLPI), ("man_ioctl: exit\n"));
2036 
2037 }
2038 
2039 /*
2040  * man_plink: handle I_PLINK requests on the control stream
2041  */
2042 void
2043 man_plink(queue_t *wq, mblk_t *mp)
2044 {
2045 	struct linkblk	*linkp;
2046 	man_linkrec_t	*lrp;
2047 	int		status = 0;
2048 
2049 	linkp = (struct linkblk *)mp->b_cont->b_rptr;
2050 
2051 	/*
2052 	 * Create a record to hold lower stream info. man_plumb will
2053 	 * retrieve it after calling ldi_ioctl(I_PLINK)
2054 	 */
2055 	lrp = man_kzalloc(sizeof (man_linkrec_t), KM_NOSLEEP);
2056 	if (lrp == NULL) {
2057 		status = ENOMEM;
2058 		goto exit;
2059 	}
2060 
2061 	lrp->l_muxid = linkp->l_index;
2062 	lrp->l_wq = linkp->l_qbot;
2063 	lrp->l_rq = RD(linkp->l_qbot);
2064 
2065 	man_linkrec_insert(lrp);
2066 
2067 exit:
2068 	if (status)
2069 		miocnak(wq, mp, 0, status);
2070 	else
2071 		miocack(wq, mp, 0, 0);
2072 
2073 }
2074 
2075 /*
2076  * man_unplink - handle I_PUNLINK requests on the control stream
2077  */
2078 void
2079 man_unplink(queue_t *wq, mblk_t *mp)
2080 {
2081 	struct linkblk	*linkp;
2082 
2083 	linkp = (struct linkblk *)mp->b_cont->b_rptr;
2084 	RD(linkp->l_qbot)->q_ptr = NULL;
2085 	WR(linkp->l_qbot)->q_ptr = NULL;
2086 	miocack(wq, mp, 0, 0);
2087 }
2088 
2089 void
2090 man_linkrec_insert(man_linkrec_t *lrp)
2091 {
2092 	mutex_enter(&man_lock);
2093 
2094 	lrp->l_next = man_linkrec_head;
2095 	man_linkrec_head = lrp;
2096 
2097 	mutex_exit(&man_lock);
2098 
2099 }
2100 
2101 static queue_t *
2102 man_linkrec_find(int muxid)
2103 {
2104 	man_linkrec_t	*lpp;
2105 	man_linkrec_t	*lp;
2106 	queue_t		*wq = NULL;
2107 
2108 	mutex_enter(&man_lock);
2109 
2110 	if (man_linkrec_head == NULL)
2111 		goto exit;
2112 
2113 	lp = lpp = man_linkrec_head;
2114 	if (lpp->l_muxid == muxid) {
2115 		man_linkrec_head = lpp->l_next;
2116 	} else {
2117 		for (lp = lpp->l_next; lp; lp = lp->l_next) {
2118 			if (lp->l_muxid == muxid)
2119 				break;
2120 			lpp = lp;
2121 		}
2122 	}
2123 
2124 	if (lp == NULL)
2125 		goto exit;
2126 
2127 	wq = lp->l_wq;
2128 	ASSERT(wq != NULL);
2129 
2130 	lpp->l_next = lp->l_next;
2131 	man_kfree(lp, sizeof (man_linkrec_t));
2132 
2133 exit:
2134 	mutex_exit(&man_lock);
2135 
2136 	return (wq);
2137 }
2138 
2139 /*
2140  * Set instance linkcheck timer value.
2141  */
2142 static void
2143 man_set_linkcheck_time(queue_t *wq, mblk_t *mp)
2144 {
2145 	mi_time_t	*mtp;
2146 	int		error;
2147 	man_t		*manp;
2148 
2149 	MAN_DBG(MAN_LINK, ("man_set_linkcheck_time: enter"));
2150 
2151 	error = miocpullup(mp, sizeof (mi_time_t));
2152 	if (error != 0)
2153 		goto exit;
2154 
2155 	mtp = (mi_time_t *)mp->b_cont->b_rptr;
2156 
2157 	MAN_DBG(MAN_LINK, ("man_set_linkcheck_time: mtp"));
2158 	MAN_DBGCALL(MAN_LINK, man_print_mtp(mtp));
2159 
2160 	manp = ddi_get_soft_state(man_softstate, mtp->mtp_man_ppa);
2161 	if (manp == NULL) {
2162 		error = ENODEV;
2163 		goto exit;
2164 	}
2165 
2166 	manp->man_linkcheck_time = mtp->mtp_time;
2167 exit:
2168 	if (error)
2169 		miocnak(wq, mp, 0, error);
2170 	else
2171 		miocack(wq, mp, sizeof (mi_time_t), 0);
2172 }
2173 
2174 /*
2175  * Man path ioctl processing. Should only happen on the SSC. Called
2176  * with exclusive inner perimeter.
2177  */
2178 static void
2179 man_setpath(queue_t *wq, mblk_t *mp)
2180 {
2181 	mi_path_t		*mip;
2182 	int			error;
2183 
2184 	error = miocpullup(mp, sizeof (mi_path_t));
2185 	if (error != 0)
2186 		goto exit;
2187 
2188 	mip = (mi_path_t *)mp->b_cont->b_rptr;
2189 	mutex_enter(&man_lock);
2190 	error = man_pg_cmd(mip, NULL);
2191 	mutex_exit(&man_lock);
2192 
2193 exit:
2194 	if (error)
2195 		miocnak(wq, mp, 0, error);
2196 	else
2197 		miocack(wq, mp, sizeof (mi_path_t), 0);
2198 }
2199 
2200 /*
2201  * Get the local ethernet address of this machine.
2202  */
2203 static void
2204 man_geteaddr(queue_t *wq, mblk_t *mp)
2205 {
2206 	eaddr_t			*eap;
2207 	int			error;
2208 
2209 	error = miocpullup(mp, sizeof (eaddr_t));
2210 	if (error != 0) {
2211 		miocnak(wq, mp, 0, error);
2212 		return;
2213 	}
2214 
2215 	eap = (eaddr_t *)mp->b_cont->b_rptr;
2216 	(void) localetheraddr(NULL, eap);
2217 	miocack(wq, mp, sizeof (eaddr_t), 0);
2218 }
2219 
2220 /*
2221  * Set my SC and other SC IPv4 addresses for use in man_pinger routine.
2222  */
2223 static void
2224 man_set_sc_ipaddrs(queue_t *wq, mblk_t *mp)
2225 {
2226 	int			error;
2227 
2228 	error = miocpullup(mp, sizeof (man_sc_ipaddrs_t));
2229 	if (error != 0)
2230 		goto exit;
2231 
2232 	man_sc_ipaddrs = *(man_sc_ipaddrs_t *)mp->b_cont->b_rptr;
2233 
2234 #ifdef DEBUG
2235 	{
2236 		char	buf[INET_ADDRSTRLEN];
2237 
2238 		(void) inet_ntop(AF_INET,
2239 		    (void *) &man_sc_ipaddrs.ip_other_sc_ipaddr,
2240 		    buf, INET_ADDRSTRLEN);
2241 		MAN_DBG(MAN_CONFIG, ("ip_other_sc_ipaddr = %s", buf));
2242 		(void) inet_ntop(AF_INET,
2243 		    (void *) &man_sc_ipaddrs.ip_my_sc_ipaddr,
2244 		    buf, INET_ADDRSTRLEN);
2245 		MAN_DBG(MAN_CONFIG, ("ip_my_sc_ipaddr = %s", buf));
2246 	}
2247 #endif /* DEBUG */
2248 exit:
2249 	if (error)
2250 		miocnak(wq, mp, 0, error);
2251 	else
2252 		miocack(wq, mp, sizeof (man_sc_ipaddrs_t), 0);
2253 }
2254 
2255 /*
2256  * Set my SC and other SC IPv6 addresses for use in man_pinger routine.
2257  */
2258 static void
2259 man_set_sc_ip6addrs(queue_t *wq, mblk_t *mp)
2260 {
2261 	int			error;
2262 
2263 	error = miocpullup(mp, sizeof (man_sc_ip6addrs_t));
2264 	if (error != 0)
2265 		goto exit;
2266 
2267 	man_sc_ip6addrs = *(man_sc_ip6addrs_t *)mp->b_cont->b_rptr;
2268 
2269 #ifdef DEBUG
2270 	{
2271 		char	buf[INET6_ADDRSTRLEN];
2272 
2273 		(void) inet_ntop(AF_INET6,
2274 		    (void *) &man_sc_ip6addrs.ip6_other_sc_ipaddr,
2275 		    buf, INET6_ADDRSTRLEN);
2276 		MAN_DBG(MAN_CONFIG, ("ip6_other_sc_ipaddr = %s", buf));
2277 		(void) inet_ntop(AF_INET6,
2278 		    (void *) &man_sc_ip6addrs.ip6_my_sc_ipaddr,
2279 		    buf, INET6_ADDRSTRLEN);
2280 		MAN_DBG(MAN_CONFIG, ("ip6_my_sc_ipaddr = %s", buf));
2281 	}
2282 #endif /* DEBUG */
2283 exit:
2284 	if (error)
2285 		miocnak(wq, mp, 0, error);
2286 	else
2287 		miocack(wq, mp, sizeof (man_sc_ip6addrs_t), 0);
2288 }
2289 
2290 /*
2291  * M_DATA fastpath info request.
2292  */
2293 static void
2294 man_dl_ioc_hdr_info(queue_t *wq, mblk_t *mp)
2295 {
2296 	manstr_t		*msp;
2297 	man_t			*manp;
2298 	mblk_t			*nmp;
2299 	man_dladdr_t		*dlap;
2300 	dl_unitdata_req_t	*dludp;
2301 	struct	ether_header	*headerp;
2302 	t_uscalar_t		off, len;
2303 	int			status = 0;
2304 
2305 	MAN_DBG(MAN_DLPI, ("man_dl_ioc_hdr_info: enter"));
2306 
2307 	msp = (manstr_t *)wq->q_ptr;
2308 	manp = msp->ms_manp;
2309 	if (manp == NULL) {
2310 		status = EINVAL;
2311 		goto exit;
2312 	}
2313 
2314 	status = miocpullup(mp, sizeof (dl_unitdata_req_t) + MAN_ADDRL);
2315 	if (status != 0)
2316 		goto exit;
2317 
2318 	/*
2319 	 * Sanity check the DL_UNITDATA_REQ destination address
2320 	 * offset and length values.
2321 	 */
2322 	dludp = (dl_unitdata_req_t *)mp->b_cont->b_rptr;
2323 	off = dludp->dl_dest_addr_offset;
2324 	len = dludp->dl_dest_addr_length;
2325 	if (dludp->dl_primitive != DL_UNITDATA_REQ ||
2326 	    !MBLKIN(mp->b_cont, off, len) || len != MAN_ADDRL) {
2327 		status = EINVAL;
2328 		goto exit;
2329 	}
2330 
2331 	dlap = (man_dladdr_t  *)(mp->b_cont->b_rptr + off);
2332 
2333 	/*
2334 	 * Allocate a new mblk to hold the ether header.
2335 	 */
2336 	if ((nmp = allocb(ETHERHEADER_SIZE, BPRI_MED)) == NULL) {
2337 		status = ENOMEM;
2338 		goto exit;
2339 	}
2340 
2341 	/* We only need one dl_ioc_hdr mblk for replay */
2342 	if (!(msp->ms_flags & MAN_SFLAG_FAST))
2343 		status = man_dl_catch(&msp->ms_dlioc_mp, mp);
2344 
2345 	/* Forward the packet to all lower destinations. */
2346 	if ((status != 0) || ((status = man_dlpi_senddown(msp, mp)) != 0)) {
2347 		freemsg(nmp);
2348 		goto exit;
2349 	}
2350 
2351 	nmp->b_wptr += ETHERHEADER_SIZE;
2352 
2353 	/*
2354 	 * Fill in the ether header.
2355 	 */
2356 	headerp = (struct ether_header *)nmp->b_rptr;
2357 	ether_copy(&dlap->dl_phys, &headerp->ether_dhost);
2358 	ether_copy(&manp->man_eaddr, &headerp->ether_shost);
2359 	put_ether_type(headerp, dlap->dl_sap);
2360 
2361 	/*
2362 	 * Link new mblk in after the "request" mblks.
2363 	 */
2364 	linkb(mp, nmp);
2365 
2366 exit:
2367 	MAN_DBG(MAN_DLPI, ("man_dl_ioc_hdr_info: returns, status = %d",
2368 	    status));
2369 
2370 	if (status) {
2371 		miocnak(wq, mp, 0, status);
2372 	} else {
2373 		msp = (manstr_t *)wq->q_ptr;
2374 		msp->ms_flags |= MAN_SFLAG_FAST;
2375 		miocack(wq, mp, msgsize(mp->b_cont), 0);
2376 	}
2377 
2378 }
2379 
2380 /*
2381  * man_uwsrv - Upper write queue service routine to handle deferred
2382  * DLPI messages issued from upstream, the write side of the upper half
2383  * of multiplexor. It is also used by man_bwork to switch the lower
2384  * multiplexor.
2385  *
2386  *	wq - upper write queue of mxx
2387  */
2388 static int
2389 man_uwsrv(queue_t *wq)
2390 {
2391 	register mblk_t		*mp;
2392 	manstr_t		*msp;		/* per stream data */
2393 	man_t			*manp;		/* per instance data */
2394 	ehdr_t			*ep;
2395 	int			status;
2396 
2397 	msp = (manstr_t *)wq->q_ptr;
2398 
2399 	MAN_DBG(MAN_UWSRV, ("man_uwsrv: wq(0x%p) msp", (void *)wq));
2400 	MAN_DBGCALL(MAN_UWSRV, man_print_msp(msp));
2401 
2402 	if (msp == NULL)
2403 		goto done;
2404 
2405 	manp = msp->ms_manp;
2406 
2407 	while (mp = getq(wq)) {
2408 
2409 		switch (DB_TYPE(mp)) {
2410 		/*
2411 		 * Can probably remove this as I never put data messages
2412 		 * here.
2413 		 */
2414 		case M_DATA:
2415 			if (manp) {
2416 				ep = (ehdr_t *)mp->b_rptr;
2417 				status = man_start(wq, mp, &ep->ether_dhost);
2418 				if (status) {
2419 					/*
2420 					 * man_start() indicated flow control
2421 					 * situation, stop processing now.
2422 					 */
2423 					goto break_loop;
2424 				}
2425 			} else
2426 				freemsg(mp);
2427 			break;
2428 
2429 		case M_PROTO:
2430 		case M_PCPROTO:
2431 			status = man_proto(wq, mp);
2432 			if (status) {
2433 				/*
2434 				 * man_proto() indicated flow control
2435 				 * situation detected by man_start(),
2436 				 * stop processing now.
2437 				 */
2438 				goto break_loop;
2439 			}
2440 			break;
2441 
2442 		default:
2443 			MAN_DBG(MAN_UWSRV, ("man_uwsrv: discarding mp(0x%p)",
2444 			    (void *)mp));
2445 			freemsg(mp);
2446 			break;
2447 		}
2448 	}
2449 
2450 break_loop:
2451 	/*
2452 	 * Check to see if bgthread wants us to do something inside the
2453 	 * perimeter.
2454 	 */
2455 	if ((msp->ms_flags & MAN_SFLAG_CONTROL) &&
2456 	    man_iwork_q->q_work != NULL) {
2457 
2458 		man_iwork();
2459 	}
2460 
2461 done:
2462 
2463 	MAN_DBG(MAN_UWSRV, ("man_uwsrv: returns"));
2464 
2465 	return (0);
2466 }
2467 
2468 
2469 /*
2470  * man_proto - handle DLPI protocol requests issued from upstream.
2471  * Called by man_uwsrv().  We disassociate upper and lower multiplexor
2472  * DLPI state transitions. The upper stream here (manstr_t) transitions
2473  * appropriately, saves the DLPI requests via man_dlpi(), and then
2474  * arranges for the DLPI request to be sent down via man_dlpi_senddown() if
2475  * appropriate.
2476  *
2477  *	wq - upper write queue of mxx
2478  *	mp - mbl ptr to protocol request
2479  */
2480 static int
2481 man_proto(queue_t *wq, mblk_t *mp)
2482 {
2483 	union DL_primitives	*dlp;
2484 	int			flow_status = 0;
2485 
2486 	dlp = (union DL_primitives *)mp->b_rptr;
2487 
2488 	MAN_DBG((MAN_UWSRV | MAN_DLPI),
2489 	    ("man_proto: mp(0x%p) prim(%s)\n", (void *)mp,
2490 	    dps[dlp->dl_primitive]));
2491 
2492 	switch (dlp->dl_primitive) {
2493 	case DL_UNITDATA_REQ:
2494 		flow_status = man_udreq(wq, mp);
2495 		break;
2496 
2497 	case DL_ATTACH_REQ:
2498 		man_areq(wq, mp);
2499 		break;
2500 
2501 	case DL_DETACH_REQ:
2502 		man_dreq(wq, mp);
2503 		break;
2504 
2505 	case DL_BIND_REQ:
2506 		man_breq(wq, mp);
2507 		break;
2508 
2509 	case DL_UNBIND_REQ:
2510 		man_ubreq(wq, mp);
2511 		break;
2512 
2513 	case DL_INFO_REQ:
2514 		man_ireq(wq, mp);
2515 		break;
2516 
2517 	case DL_PROMISCON_REQ:
2518 		man_ponreq(wq, mp);
2519 		break;
2520 
2521 	case DL_PROMISCOFF_REQ:
2522 		man_poffreq(wq, mp);
2523 		break;
2524 
2525 	case DL_ENABMULTI_REQ:
2526 		man_emreq(wq, mp);
2527 		break;
2528 
2529 	case DL_DISABMULTI_REQ:
2530 		man_dmreq(wq, mp);
2531 		break;
2532 
2533 	case DL_PHYS_ADDR_REQ:
2534 		man_pareq(wq, mp);
2535 		break;
2536 
2537 	case DL_SET_PHYS_ADDR_REQ:
2538 		man_spareq(wq, mp);
2539 		break;
2540 
2541 	default:
2542 		MAN_DBG((MAN_UWSRV | MAN_DLPI), ("man_proto: prim(%d)\n",
2543 		    dlp->dl_primitive));
2544 		dlerrorack(wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
2545 		break;
2546 
2547 	} /* End switch */
2548 
2549 	MAN_DBG((MAN_UWSRV | MAN_DLPI), ("man_proto: exit\n"));
2550 	return (flow_status);
2551 
2552 }
2553 
2554 static int
2555 man_udreq(queue_t *wq, mblk_t *mp)
2556 {
2557 	manstr_t		*msp;
2558 	dl_unitdata_req_t	*dludp;
2559 	mblk_t	*nmp;
2560 	man_dladdr_t		*dlap;
2561 	t_uscalar_t 		off, len;
2562 	int 			flow_status = 0;
2563 
2564 	msp = (manstr_t *)wq->q_ptr;
2565 
2566 
2567 	if (msp->ms_dlpistate != DL_IDLE) {
2568 		dlerrorack(wq, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
2569 		return (flow_status);
2570 	}
2571 	dludp = (dl_unitdata_req_t *)mp->b_rptr;
2572 	off = dludp->dl_dest_addr_offset;
2573 	len = dludp->dl_dest_addr_length;
2574 
2575 	/*
2576 	 * Validate destination address format.
2577 	 */
2578 	if (!MBLKIN(mp, off, len) || (len != MAN_ADDRL)) {
2579 		dluderrorind(wq, mp, mp->b_rptr + off, len, DL_BADADDR, 0);
2580 		return (flow_status);
2581 	}
2582 
2583 	/*
2584 	 * Error if no M_DATA follows.
2585 	 */
2586 	nmp = mp->b_cont;
2587 	if (nmp == NULL) {
2588 		dluderrorind(wq, mp, mp->b_rptr + off, len, DL_BADDATA, 0);
2589 		return (flow_status);
2590 	}
2591 
2592 	dlap = (man_dladdr_t *)(mp->b_rptr + off);
2593 
2594 	flow_status = man_start(wq, mp, &dlap->dl_phys);
2595 	return (flow_status);
2596 }
2597 
2598 /*
2599  * Handle DL_ATTACH_REQ.
2600  */
2601 static void
2602 man_areq(queue_t *wq, mblk_t *mp)
2603 {
2604 	man_t			*manp;	/* per instance data */
2605 	manstr_t		*msp;	/* per stream data */
2606 	short			ppa;
2607 	union DL_primitives	*dlp;
2608 	mblk_t			*preq = NULL;
2609 	int			did_refcnt = FALSE;
2610 	int			dlerror = 0;
2611 	int			status = 0;
2612 
2613 	msp = (manstr_t *)wq->q_ptr;
2614 	dlp = (union DL_primitives *)mp->b_rptr;
2615 
2616 	/*
2617 	 * Attach us to MAN PPA (device instance).
2618 	 */
2619 	if (MBLKL(mp) < DL_ATTACH_REQ_SIZE) {
2620 		dlerror = DL_BADPRIM;
2621 		goto exit;
2622 	}
2623 
2624 	if (msp->ms_dlpistate != DL_UNATTACHED) {
2625 		dlerror = DL_OUTSTATE;
2626 		goto exit;
2627 	}
2628 
2629 	ppa = dlp->attach_req.dl_ppa;
2630 	if (ppa == -1 || qassociate(wq, ppa) != 0) {
2631 		dlerror = DL_BADPPA;
2632 		MAN_DBG(MAN_WARN, ("man_areq: bad PPA %d", ppa));
2633 		goto exit;
2634 	}
2635 
2636 	mutex_enter(&man_lock);
2637 	manp = ddi_get_soft_state(man_softstate, ppa);
2638 	ASSERT(manp != NULL);	/* qassociate() succeeded */
2639 
2640 	manp->man_refcnt++;
2641 	did_refcnt = TRUE;
2642 	mutex_exit(&man_lock);
2643 
2644 	/*
2645 	 * Create a DL replay list for the lower stream. These wont
2646 	 * actually be sent down until the lower streams are made active
2647 	 * (sometime after the call to man_init_dests below).
2648 	 */
2649 	preq = man_alloc_physreq_mp(&manp->man_eaddr);
2650 	if (preq == NULL) {
2651 		dlerror = DL_SYSERR;
2652 		status = ENOMEM;
2653 		goto exit;
2654 	}
2655 
2656 	/*
2657 	 * Make copy for dlpi resync of upper and lower streams.
2658 	 */
2659 	if (man_dlpi(msp, mp)) {
2660 		dlerror = DL_SYSERR;
2661 		status = ENOMEM;
2662 		goto exit;
2663 	}
2664 
2665 	/* TBD - need to clean off ATTACH req on failure here. */
2666 	if (man_dlpi(msp, preq)) {
2667 		dlerror = DL_SYSERR;
2668 		status = ENOMEM;
2669 		goto exit;
2670 	}
2671 
2672 	/*
2673 	 * man_init_dests/man_start_dest needs these set before call.
2674 	 */
2675 	msp->ms_manp = manp;
2676 	msp->ms_meta_ppa = ppa;
2677 
2678 	/*
2679 	 *  Allocate and init lower destination structures.
2680 	 */
2681 	ASSERT(msp->ms_dests == NULL);
2682 	if (man_init_dests(manp, msp)) {
2683 		mblk_t	 *tmp;
2684 
2685 		/*
2686 		 * If we cant get the lower streams ready, then
2687 		 * remove the messages from the DL replay list and
2688 		 * fail attach.
2689 		 */
2690 		while ((tmp = msp->ms_dl_mp) != NULL) {
2691 			msp->ms_dl_mp = msp->ms_dl_mp->b_next;
2692 			tmp->b_next = tmp->b_prev = NULL;
2693 			freemsg(tmp);
2694 		}
2695 
2696 		msp->ms_manp = NULL;
2697 		msp->ms_meta_ppa = -1;
2698 
2699 		dlerror = DL_SYSERR;
2700 		status = ENOMEM;
2701 		goto exit;
2702 	}
2703 
2704 	MAN_DBG(MAN_DLPI, ("man_areq: ppa 0x%x man_refcnt: %d\n",
2705 	    ppa, manp->man_refcnt));
2706 
2707 	SETSTATE(msp, DL_UNBOUND);
2708 
2709 exit:
2710 	if (dlerror == 0) {
2711 		dlokack(wq, mp, DL_ATTACH_REQ);
2712 	} else {
2713 		if (did_refcnt) {
2714 			mutex_enter(&man_lock);
2715 			manp->man_refcnt--;
2716 			mutex_exit(&man_lock);
2717 		}
2718 		dlerrorack(wq, mp, DL_ATTACH_REQ, dlerror, status);
2719 		(void) qassociate(wq, -1);
2720 	}
2721 	if (preq != NULL)
2722 		freemsg(preq);
2723 
2724 }
2725 
2726 /*
2727  * Called at DL_ATTACH time.
2728  * Man_lock is held to protect pathgroup list(man_pg).
2729  */
2730 static int
2731 man_init_dests(man_t *manp, manstr_t *msp)
2732 {
2733 	man_dest_t	*mdp;
2734 	man_pg_t	*mpg;
2735 	int		i;
2736 
2737 	mdp = man_kzalloc(MAN_DEST_ARRAY_SIZE, KM_NOSLEEP);
2738 	if (mdp == NULL)
2739 		return (ENOMEM);
2740 
2741 	msp->ms_dests = mdp;
2742 
2743 	mutex_enter(&man_lock);
2744 	for (i = 0; i < MAN_MAX_DESTS; i++) {
2745 
2746 		mdp[i].md_muxid = -1;	/* muxid 0 is valid */
2747 		mutex_init(&mdp->md_lock, NULL, MUTEX_DRIVER, NULL);
2748 
2749 		mpg = man_find_pg_by_id(manp->man_pg, i);
2750 
2751 		if (mpg && man_find_active_path(mpg->mpg_pathp))
2752 			man_start_dest(&mdp[i], msp, mpg);
2753 	}
2754 	mutex_exit(&man_lock);
2755 
2756 	return (0);
2757 }
2758 
2759 /*
2760  * Get a destination ready for use.
2761  */
2762 static void
2763 man_start_dest(man_dest_t *mdp, manstr_t *msp, man_pg_t *mpg)
2764 {
2765 	man_path_t	*ap;
2766 
2767 	mdp->md_muxid = -1;
2768 	mdp->md_dlpistate = DL_UNATTACHED;
2769 	mdp->md_msp = msp;
2770 	mdp->md_rq = msp->ms_rq;
2771 	mdp->md_pg_id = mpg->mpg_pg_id;
2772 
2773 	ASSERT(msp->ms_manp);
2774 
2775 	ether_copy(&msp->ms_manp->man_eaddr, &mdp->md_src_eaddr);
2776 	ether_copy(&mpg->mpg_dst_eaddr, &mdp->md_dst_eaddr);
2777 
2778 	ap = man_find_active_path(mpg->mpg_pathp);
2779 	ASSERT(ap);
2780 	mdp->md_device = ap->mp_device;
2781 
2782 	/*
2783 	 * Set up linktimers so that first time through, we will do
2784 	 * a failover.
2785 	 */
2786 	mdp->md_linkstate = MAN_LINKFAIL;
2787 	mdp->md_state = MAN_DSTATE_INITIALIZING;
2788 	mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
2789 	    (void *)mdp, man_gettimer(MAN_TIMER_INIT, mdp));
2790 
2791 	/*
2792 	 * As an optimization, if there is only one destination,
2793 	 * remember the destination pointer. Used by man_start().
2794 	 */
2795 	man_set_optimized_dest(msp);
2796 
2797 	MAN_DBG(MAN_DEST, ("man_start_dest: mdp"));
2798 	MAN_DBGCALL(MAN_DEST, man_print_mdp(mdp));
2799 }
2800 
2801 static void
2802 man_set_optimized_dest(manstr_t *msp)
2803 {
2804 	int		count = 0;
2805 	int		i;
2806 	man_dest_t	*mdp = NULL;
2807 
2808 	for (i = 0; i < MAN_MAX_DESTS; i++) {
2809 		if (msp->ms_dests[i].md_msp != NULL) {
2810 			count++;
2811 			mdp = &msp->ms_dests[i];
2812 		}
2813 	}
2814 
2815 	if (count == 1)
2816 		msp->ms_destp = mdp;
2817 	else
2818 		msp->ms_destp = NULL;
2819 
2820 }
2821 
2822 /*
2823  * Catch dlpi message for replaying, and arrange to send it down
2824  * to any destinations not PLUMBING. See man_dlpi_replay().
2825  */
2826 static int
2827 man_dlpi(manstr_t *msp, mblk_t *mp)
2828 {
2829 	int	status;
2830 
2831 	status = man_dl_catch(&msp->ms_dl_mp, mp);
2832 	if (status == 0)
2833 		status = man_dlpi_senddown(msp, mp);
2834 
2835 	return (status);
2836 }
2837 
2838 /*
2839  * Catch IOCTL type DL_ messages.
2840  */
2841 static int
2842 man_dlioc(manstr_t *msp, mblk_t *mp)
2843 {
2844 	int status;
2845 
2846 	status = man_dl_catch(&msp->ms_dlioc_mp, mp);
2847 	if (status == 0)
2848 		status = man_dlpi_senddown(msp, mp);
2849 
2850 	return (status);
2851 }
2852 
2853 /*
2854  * We catch all DLPI messages that we have to resend to a new AP'ed
2855  * device to put him in the right state.  We link these messages together
2856  * w/ their b_next fields and hang it off of msp->ms_dl_mp.  We
2857  * must be careful to restore b_next fields before doing dupmsg/freemsg!
2858  *
2859  *	msp - pointer of stream struct to process
2860  *	mblk - pointer to DLPI request to catch
2861  */
2862 static int
2863 man_dl_catch(mblk_t **mplist, mblk_t *mp)
2864 {
2865 	mblk_t			*dupmp;
2866 	mblk_t			*tmp;
2867 	unsigned		prim;
2868 	int			status = 0;
2869 
2870 	dupmp = copymsg(mp);
2871 	if (dupmp == NULL) {
2872 		status = ENOMEM;
2873 		goto exit;
2874 	}
2875 
2876 
2877 	if (*mplist == NULL)
2878 		*mplist = dupmp;
2879 	else {
2880 		for (tmp = *mplist; tmp->b_next; )
2881 			tmp = tmp->b_next;
2882 
2883 		tmp->b_next = dupmp;
2884 	}
2885 
2886 	prim = DL_PRIM(mp);
2887 	MAN_DBG(MAN_DLPI,
2888 	    ("man_dl_catch: adding %s\n",
2889 	    (prim == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
2890 	    (prim == DLIOCRAW) ? "DLIOCRAW" :
2891 	    (prim == DL_PROMISCON_REQ) ? promisc[DL_PROMISCON_TYPE(mp)] :
2892 	    dps[prim]));
2893 
2894 exit:
2895 
2896 	return (status);
2897 }
2898 
2899 /*
2900  * Send down a single DLPI M_[PC]PROTO to all currently valid dests.
2901  *
2902  *	msp - ptr to NDM stream structure DL_ messages was received on.
2903  *	mp - ptr to mblk containing DL_ request.
2904  */
2905 static int
2906 man_dlpi_senddown(manstr_t *msp, mblk_t *mp)
2907 {
2908 	man_dest_t	*mdp;
2909 	int		i;
2910 	mblk_t		*rmp[MAN_MAX_DESTS];	/* Copy to replay */
2911 	int		dstate[MAN_MAX_DESTS];
2912 	int		no_dests = TRUE;
2913 	int		status = 0;
2914 
2915 	if (msp->ms_dests == NULL)
2916 		goto exit;
2917 
2918 	for (i = 0; i < MAN_MAX_DESTS; i++) {
2919 		mdp = &msp->ms_dests[i];
2920 		if (mdp->md_state == MAN_DSTATE_READY) {
2921 			dstate[i] = TRUE;
2922 			no_dests = FALSE;
2923 		} else {
2924 			dstate[i] = FALSE;
2925 		}
2926 		rmp[i] = NULL;
2927 	}
2928 
2929 	if (no_dests)
2930 		goto exit;
2931 
2932 	/*
2933 	 * Build replay and duplicate list for all possible destinations.
2934 	 */
2935 	for (i = 0; i < MAN_MAX_DESTS; i++) {
2936 		if (dstate[i]) {
2937 			rmp[i] = copymsg(mp);
2938 			if (rmp[i] == NULL) {
2939 				status = ENOMEM;
2940 				break;
2941 			}
2942 		}
2943 	}
2944 
2945 	if (status == 0) {
2946 		for (i = 0; i < MAN_MAX_DESTS; i++)
2947 			if (dstate[i]) {
2948 				mdp = &msp->ms_dests[i];
2949 
2950 				ASSERT(mdp->md_wq != NULL);
2951 				ASSERT(mp->b_next == NULL);
2952 				ASSERT(mp->b_prev == NULL);
2953 
2954 				man_dlpi_replay(mdp, rmp[i]);
2955 			}
2956 	} else {
2957 		for (; i >= 0; i--)
2958 			if (dstate[i] && rmp[i])
2959 				freemsg(rmp[i]);
2960 	}
2961 
2962 exit:
2963 	return (status);
2964 }
2965 
2966 /*
2967  * man_dlpi_replay - traverse the list of DLPI requests and reapply them to
2968  * get the upper and lower streams into the same state. Called holding inner
2969  * perimeter lock exclusive. Note thet we defer M_IOCTL type dlpi messages
2970  * until we get an OK_ACK to our ATTACH (see man_lrsrv and
2971  * man_dlioc_replay).
2972  *
2973  * 	mdp - pointer to lower queue (destination)
2974  *	rmp - list of mblks to send down stream.
2975  */
2976 static void
2977 man_dlpi_replay(man_dest_t *mdp, mblk_t *rmp)
2978 {
2979 	mblk_t			*mp;
2980 	union DL_primitives	*dlp = NULL;
2981 
2982 	MAN_DBG(MAN_DLPI, ("man_dlpi_replay: mdp(0x%p)", (void *)mdp));
2983 
2984 	while (rmp) {
2985 		mp = rmp;
2986 		rmp = rmp->b_next;
2987 		mp->b_prev = mp->b_next = NULL;
2988 
2989 		dlp = (union DL_primitives *)mp->b_rptr;
2990 		MAN_DBG(MAN_DLPI,
2991 		    ("man_dlpi_replay: mdp(0x%p) sending %s\n",
2992 		    (void *)mdp,
2993 		    (dlp->dl_primitive == DL_IOC_HDR_INFO) ?
2994 		    "DL_IOC_HDR_INFO" : (dlp->dl_primitive == DLIOCRAW) ?
2995 		    "DLIOCRAW" : dps[(unsigned)(dlp->dl_primitive)]));
2996 
2997 		if (dlp->dl_primitive == DL_ATTACH_REQ) {
2998 			/*
2999 			 * insert the lower devices ppa.
3000 			 */
3001 			dlp->attach_req.dl_ppa = mdp->md_device.mdev_ppa;
3002 		}
3003 
3004 		(void) putnext(mdp->md_wq, mp);
3005 	}
3006 
3007 }
3008 
3009 static void
3010 man_dreq(queue_t *wq, mblk_t *mp)
3011 {
3012 	manstr_t	*msp;	/* per stream data */
3013 	man_work_t	*wp;
3014 
3015 	msp = (manstr_t *)wq->q_ptr;
3016 
3017 	if (MBLKL(mp) < DL_DETACH_REQ_SIZE) {
3018 		dlerrorack(wq, mp, DL_DETACH_REQ, DL_BADPRIM, 0);
3019 		return;
3020 	}
3021 
3022 	if (msp->ms_dlpistate != DL_UNBOUND) {
3023 		dlerrorack(wq, mp, DL_DETACH_REQ, DL_OUTSTATE, 0);
3024 		return;
3025 	}
3026 
3027 	ASSERT(msp->ms_dests != NULL);
3028 
3029 	wp = man_work_alloc(MAN_WORK_CLOSE_STREAM, KM_NOSLEEP);
3030 	if (wp == NULL) {
3031 		dlerrorack(wq, mp, DL_DETACH_REQ, DL_SYSERR, ENOMEM);
3032 		return;
3033 	}
3034 	man_dodetach(msp, wp);
3035 	(void) qassociate(wq, -1);
3036 
3037 	SETSTATE(msp, DL_UNATTACHED);
3038 
3039 	dlokack(wq, mp, DL_DETACH_REQ);
3040 }
3041 
3042 static void
3043 man_dl_clean(mblk_t **mplist)
3044 {
3045 	mblk_t	*tmp;
3046 
3047 	/*
3048 	 * Toss everything.
3049 	 */
3050 	while ((tmp = *mplist) != NULL) {
3051 		*mplist = (*mplist)->b_next;
3052 		tmp->b_next = tmp->b_prev = NULL;
3053 		freemsg(tmp);
3054 	}
3055 
3056 }
3057 
3058 /*
3059  * man_dl_release - Remove the corresponding DLPI request from the
3060  * catch list. Walk thru the catch list looking for the other half of
3061  * the pair and delete it.  If we are detaching, delete the entire list.
3062  *
3063  *	msp - pointer of stream struct to process
3064  *	mp  - pointer to mblk to first half of pair.  We will delete other
3065  * 		half of pair based on this.
3066  */
3067 static void
3068 man_dl_release(mblk_t **mplist, mblk_t *mp)
3069 {
3070 	uchar_t			match_dbtype;
3071 	mblk_t			*tmp;
3072 	mblk_t			*tmpp;
3073 	int			matched = FALSE;
3074 
3075 	if (*mplist == NULL)
3076 		goto exit;
3077 
3078 	match_dbtype = DB_TYPE(mp);
3079 
3080 	/*
3081 	 * Currently we only clean DL_ PROTO type messages. There is
3082 	 * no way to turn off M_CTL or DL_IOC stuff other than sending
3083 	 * down a DL_DETACH, which resets everything.
3084 	 */
3085 	if (match_dbtype != M_PROTO && match_dbtype != M_PCPROTO) {
3086 		goto exit;
3087 	}
3088 
3089 	/*
3090 	 * Selectively find a caught mblk that matches this one and
3091 	 * remove it from the list
3092 	 */
3093 	tmp = tmpp = *mplist;
3094 	matched = man_match_proto(mp, tmp);
3095 	if (matched) {
3096 		*mplist = tmp->b_next;
3097 		tmp->b_next = tmp->b_prev = NULL;
3098 	} else {
3099 		for (tmp = tmp->b_next; tmp != NULL; tmp = tmp->b_next) {
3100 			if (matched = man_match_proto(mp, tmp))
3101 				break;
3102 			tmpp = tmp;
3103 		}
3104 
3105 		if (matched) {
3106 			tmpp->b_next = tmp->b_next;
3107 			tmp->b_next = tmp->b_prev = NULL;
3108 		}
3109 	}
3110 
3111 exit:
3112 	if (matched) {
3113 
3114 		MAN_DBG(MAN_DLPI, ("man_dl_release: release %s",
3115 		    (DL_PRIM(mp) == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3116 		    (DL_PRIM(mp) == DLIOCRAW) ? "DLIOCRAW" :
3117 		    dps[(int)DL_PRIM(mp)]));
3118 
3119 		freemsg(tmp);
3120 	}
3121 	MAN_DBG(MAN_DLPI, ("man_dl_release: returns"));
3122 
3123 }
3124 
3125 /*
3126  * Compare two DL_ messages. If they are complimentary (e.g. DL_UNBIND
3127  * compliments DL_BIND), return true.
3128  */
3129 static int
3130 man_match_proto(mblk_t *mp1, mblk_t *mp2)
3131 {
3132 	t_uscalar_t	prim1;
3133 	t_uscalar_t	prim2;
3134 	int		matched = FALSE;
3135 
3136 	/*
3137 	 * Primitive to clean off list.
3138 	 */
3139 	prim1 = DL_PRIM(mp1);
3140 	prim2 = DL_PRIM(mp2);
3141 
3142 	switch (prim1) {
3143 	case DL_UNBIND_REQ:
3144 		if (prim2 == DL_BIND_REQ)
3145 			matched = TRUE;
3146 		break;
3147 
3148 	case DL_PROMISCOFF_REQ:
3149 		if (prim2 == DL_PROMISCON_REQ) {
3150 			dl_promiscoff_req_t	*poff1;
3151 			dl_promiscoff_req_t	*poff2;
3152 
3153 			poff1 = (dl_promiscoff_req_t *)mp1->b_rptr;
3154 			poff2 = (dl_promiscoff_req_t *)mp2->b_rptr;
3155 
3156 			if (poff1->dl_level == poff2->dl_level)
3157 				matched = TRUE;
3158 		}
3159 		break;
3160 
3161 	case DL_DISABMULTI_REQ:
3162 		if (prim2 == DL_ENABMULTI_REQ) {
3163 			union DL_primitives	*dlp;
3164 			t_uscalar_t		off;
3165 			eaddr_t			*addrp1;
3166 			eaddr_t			*addrp2;
3167 
3168 			dlp = (union DL_primitives *)mp1->b_rptr;
3169 			off = dlp->disabmulti_req.dl_addr_offset;
3170 			addrp1 = (eaddr_t *)(mp1->b_rptr + off);
3171 
3172 			dlp = (union DL_primitives *)mp2->b_rptr;
3173 			off = dlp->disabmulti_req.dl_addr_offset;
3174 			addrp2 = (eaddr_t *)(mp2->b_rptr + off);
3175 
3176 			if (ether_cmp(addrp1, addrp2) == 0)
3177 				matched = 1;
3178 		}
3179 		break;
3180 
3181 	default:
3182 		break;
3183 	}
3184 
3185 	MAN_DBG(MAN_DLPI, ("man_match_proto returns %d", matched));
3186 
3187 	return (matched);
3188 }
3189 
3190 /*
3191  * Bind upper stream to a particular SAP. Called with exclusive innerperim
3192  * QPAIR, shared outerperim.
3193  */
3194 static void
3195 man_breq(queue_t *wq, mblk_t *mp)
3196 {
3197 	man_t			*manp;	/* per instance data */
3198 	manstr_t		*msp;	/* per stream data */
3199 	union DL_primitives	*dlp;
3200 	man_dladdr_t		man_addr;
3201 	t_uscalar_t		sap;
3202 	t_uscalar_t		xidtest;
3203 
3204 	msp = (manstr_t *)wq->q_ptr;
3205 
3206 	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
3207 		dlerrorack(wq, mp, DL_BIND_REQ, DL_BADPRIM, 0);
3208 		return;
3209 	}
3210 
3211 	if (msp->ms_dlpistate != DL_UNBOUND) {
3212 		dlerrorack(wq, mp, DL_BIND_REQ, DL_OUTSTATE, 0);
3213 		return;
3214 	}
3215 
3216 	dlp = (union DL_primitives *)mp->b_rptr;
3217 	manp = msp->ms_manp;			/* valid after attach */
3218 	sap = dlp->bind_req.dl_sap;
3219 	xidtest = dlp->bind_req.dl_xidtest_flg;
3220 
3221 	ASSERT(manp);
3222 
3223 	if (xidtest) {
3224 		dlerrorack(wq, mp, DL_BIND_REQ, DL_NOAUTO, 0);
3225 		return;
3226 	}
3227 
3228 	if (sap > ETHERTYPE_MAX) {
3229 		dlerrorack(wq, mp, DL_BIND_REQ, DL_BADSAP, 0);
3230 		return;
3231 	}
3232 
3233 	if (man_dlpi(msp, mp)) {
3234 		dlerrorack(wq, mp, DL_BIND_REQ, DL_SYSERR, ENOMEM);
3235 		return;
3236 	}
3237 
3238 	msp->ms_sap = sap;
3239 
3240 	SETSTATE(msp, DL_IDLE);
3241 
3242 	man_addr.dl_sap = msp->ms_sap;
3243 	ether_copy(&msp->ms_manp->man_eaddr, &man_addr.dl_phys);
3244 
3245 	dlbindack(wq, mp, msp->ms_sap, &man_addr, MAN_ADDRL, 0, 0);
3246 
3247 }
3248 
3249 static void
3250 man_ubreq(queue_t *wq, mblk_t *mp)
3251 {
3252 	manstr_t		*msp;	/* per stream data */
3253 
3254 	msp = (manstr_t *)wq->q_ptr;
3255 
3256 	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
3257 		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
3258 		return;
3259 	}
3260 
3261 	if (msp->ms_dlpistate != DL_IDLE) {
3262 		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
3263 		return;
3264 	}
3265 
3266 	if (man_dlpi_senddown(msp, mp)) {
3267 		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_SYSERR, ENOMEM);
3268 		return;
3269 	}
3270 
3271 	man_dl_release(&msp->ms_dl_mp, mp);
3272 
3273 	SETSTATE(msp, DL_UNBOUND);
3274 
3275 	dlokack(wq, mp, DL_UNBIND_REQ);
3276 
3277 }
3278 
3279 static void
3280 man_ireq(queue_t *wq, mblk_t *mp)
3281 {
3282 	manstr_t	*msp;
3283 	dl_info_ack_t	*dlip;
3284 	man_dladdr_t	*dlap;
3285 	eaddr_t		*ep;
3286 	size_t	size;
3287 
3288 	msp = (manstr_t *)wq->q_ptr;
3289 
3290 	if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
3291 		dlerrorack(wq, mp, DL_INFO_REQ, DL_BADPRIM, 0);
3292 		return;
3293 	}
3294 
3295 	/* Exchange current msg for a DL_INFO_ACK. */
3296 	size = sizeof (dl_info_ack_t) + MAN_ADDRL + ETHERADDRL;
3297 	mp = mexchange(wq, mp, size, M_PCPROTO, DL_INFO_ACK);
3298 	if (mp == NULL) {
3299 		MAN_DBG(MAN_DLPI, ("man_ireq: man_ireq: mp == NULL."));
3300 		return;
3301 	}
3302 
3303 	/* Fill in the DL_INFO_ACK fields and reply. */
3304 	dlip = (dl_info_ack_t *)mp->b_rptr;
3305 	*dlip = man_infoack;
3306 	dlip->dl_current_state = msp->ms_dlpistate;
3307 	dlap = (man_dladdr_t *)(mp->b_rptr + dlip->dl_addr_offset);
3308 	dlap->dl_sap = msp->ms_sap;
3309 
3310 	/*
3311 	 * If attached, return physical address.
3312 	 */
3313 	if (msp->ms_manp != NULL) {
3314 		ether_copy(&msp->ms_manp->man_eaddr, &dlap->dl_phys);
3315 	} else {
3316 		bzero((caddr_t)&dlap->dl_phys, ETHERADDRL);
3317 	}
3318 
3319 	ep = (struct ether_addr *)(mp->b_rptr + dlip->dl_brdcst_addr_offset);
3320 	ether_copy(&etherbroadcast, ep);
3321 
3322 	qreply(wq, mp);
3323 
3324 }
3325 
3326 
3327 static void
3328 man_ponreq(queue_t *wq, mblk_t *mp)
3329 {
3330 	manstr_t	*msp;
3331 	int		flag;
3332 
3333 	msp = (manstr_t *)wq->q_ptr;
3334 
3335 	if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
3336 		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
3337 		return;
3338 	}
3339 
3340 	switch (((dl_promiscon_req_t *)mp->b_rptr)->dl_level) {
3341 	case DL_PROMISC_PHYS:
3342 		flag = MAN_SFLAG_ALLPHYS;
3343 		break;
3344 
3345 	case DL_PROMISC_SAP:
3346 		flag = MAN_SFLAG_ALLSAP;
3347 		break;
3348 
3349 	case DL_PROMISC_MULTI:
3350 		flag = MAN_SFLAG_ALLMULTI;
3351 		break;
3352 
3353 	default:
3354 		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_NOTSUPPORTED, 0);
3355 		return;
3356 	}
3357 
3358 	/*
3359 	 * Catch request for replay, and forward down to any lower
3360 	 * lower stream.
3361 	 */
3362 	if (man_dlpi(msp, mp)) {
3363 		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_SYSERR, ENOMEM);
3364 		return;
3365 	}
3366 
3367 	msp->ms_flags |= flag;
3368 
3369 	dlokack(wq, mp, DL_PROMISCON_REQ);
3370 
3371 }
3372 
3373 static void
3374 man_poffreq(queue_t *wq, mblk_t *mp)
3375 {
3376 	manstr_t		*msp;
3377 	int			flag;
3378 
3379 	msp = (manstr_t *)wq->q_ptr;
3380 
3381 	if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
3382 		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
3383 		return;
3384 	}
3385 
3386 	switch (((dl_promiscoff_req_t *)mp->b_rptr)->dl_level) {
3387 	case DL_PROMISC_PHYS:
3388 		flag = MAN_SFLAG_ALLPHYS;
3389 		break;
3390 
3391 	case DL_PROMISC_SAP:
3392 		flag = MAN_SFLAG_ALLSAP;
3393 		break;
3394 
3395 	case DL_PROMISC_MULTI:
3396 		flag = MAN_SFLAG_ALLMULTI;
3397 		break;
3398 
3399 	default:
3400 		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_NOTSUPPORTED, 0);
3401 		return;
3402 	}
3403 
3404 	if ((msp->ms_flags & flag) == 0) {
3405 		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
3406 		return;
3407 	}
3408 
3409 	if (man_dlpi_senddown(msp, mp)) {
3410 		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_SYSERR, ENOMEM);
3411 		return;
3412 	}
3413 
3414 	man_dl_release(&msp->ms_dl_mp, mp);
3415 
3416 	msp->ms_flags &= ~flag;
3417 
3418 	dlokack(wq, mp, DL_PROMISCOFF_REQ);
3419 
3420 }
3421 
3422 /*
3423  * Enable multicast requests. We might need to track addresses instead of
3424  * just passing things through (see eri_dmreq) - TBD.
3425  */
3426 static void
3427 man_emreq(queue_t *wq, mblk_t *mp)
3428 {
3429 	manstr_t		*msp;
3430 	union DL_primitives	*dlp;
3431 	eaddr_t			*addrp;
3432 	t_uscalar_t		off;
3433 	t_uscalar_t		len;
3434 
3435 	msp = (manstr_t *)wq->q_ptr;
3436 
3437 	if (MBLKL(mp) < DL_ENABMULTI_REQ_SIZE) {
3438 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADPRIM, 0);
3439 		return;
3440 	}
3441 
3442 	if (msp->ms_dlpistate == DL_UNATTACHED) {
3443 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_OUTSTATE, 0);
3444 		return;
3445 	}
3446 
3447 	dlp = (union DL_primitives *)mp->b_rptr;
3448 	len = dlp->enabmulti_req.dl_addr_length;
3449 	off = dlp->enabmulti_req.dl_addr_offset;
3450 	addrp = (struct ether_addr *)(mp->b_rptr + off);
3451 
3452 	if ((len != ETHERADDRL) ||
3453 	    !MBLKIN(mp, off, len) ||
3454 	    ((addrp->ether_addr_octet[0] & 01) == 0)) {
3455 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADADDR, 0);
3456 		return;
3457 	}
3458 
3459 	/*
3460 	 * Catch request for replay, and forward down to any lower
3461 	 * lower stream.
3462 	 */
3463 	if (man_dlpi(msp, mp)) {
3464 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_SYSERR, ENOMEM);
3465 		return;
3466 	}
3467 
3468 	dlokack(wq, mp, DL_ENABMULTI_REQ);
3469 
3470 }
3471 
3472 static void
3473 man_dmreq(queue_t *wq, mblk_t *mp)
3474 {
3475 	manstr_t		*msp;
3476 	union DL_primitives	*dlp;
3477 	eaddr_t			*addrp;
3478 	t_uscalar_t		off;
3479 	t_uscalar_t		len;
3480 
3481 	msp = (manstr_t *)wq->q_ptr;
3482 
3483 	if (MBLKL(mp) < DL_DISABMULTI_REQ_SIZE) {
3484 		dlerrorack(wq, mp, DL_DISABMULTI_REQ, DL_BADPRIM, 0);
3485 		return;
3486 	}
3487 
3488 	if (msp->ms_dlpistate == DL_UNATTACHED) {
3489 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_OUTSTATE, 0);
3490 		return;
3491 	}
3492 
3493 	dlp = (union DL_primitives *)mp->b_rptr;
3494 	len = dlp->enabmulti_req.dl_addr_length;
3495 	off = dlp->enabmulti_req.dl_addr_offset;
3496 	addrp = (struct ether_addr *)(mp->b_rptr + off);
3497 
3498 	if ((len != ETHERADDRL) ||
3499 	    !MBLKIN(mp, off, len) ||
3500 	    ((addrp->ether_addr_octet[0] & 01) == 0)) {
3501 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADADDR, 0);
3502 		return;
3503 	}
3504 
3505 	if (man_dlpi_senddown(msp, mp)) {
3506 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_SYSERR, ENOMEM);
3507 		return;
3508 	}
3509 
3510 	man_dl_release(&msp->ms_dl_mp, mp);
3511 
3512 	dlokack(wq, mp, DL_DISABMULTI_REQ);
3513 
3514 }
3515 
3516 static void
3517 man_pareq(queue_t *wq, mblk_t *mp)
3518 {
3519 	manstr_t		*msp;
3520 	union	DL_primitives	*dlp;
3521 	uint32_t		type;
3522 	struct	ether_addr	addr;
3523 
3524 	msp = (manstr_t *)wq->q_ptr;
3525 
3526 	if (MBLKL(mp) < DL_PHYS_ADDR_REQ_SIZE) {
3527 		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3528 		return;
3529 	}
3530 
3531 	dlp = (union DL_primitives *)mp->b_rptr;
3532 	type = dlp->physaddr_req.dl_addr_type;
3533 	if (msp->ms_manp == NULL) {
3534 		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_OUTSTATE, 0);
3535 		return;
3536 	}
3537 
3538 	switch (type) {
3539 	case	DL_FACT_PHYS_ADDR:
3540 		(void) localetheraddr((struct ether_addr *)NULL, &addr);
3541 		break;
3542 
3543 	case	DL_CURR_PHYS_ADDR:
3544 		ether_bcopy(&msp->ms_manp->man_eaddr, &addr);
3545 		break;
3546 
3547 	default:
3548 		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_NOTSUPPORTED, 0);
3549 		return;
3550 	}
3551 
3552 	dlphysaddrack(wq, mp, &addr, ETHERADDRL);
3553 }
3554 
3555 /*
3556  * TBD - this routine probably should be protected w/ an ndd
3557  * tuneable, or a man.conf parameter.
3558  */
3559 static void
3560 man_spareq(queue_t *wq, mblk_t *mp)
3561 {
3562 	manstr_t		*msp;
3563 	union DL_primitives	*dlp;
3564 	t_uscalar_t		off;
3565 	t_uscalar_t		len;
3566 	eaddr_t			*addrp;
3567 
3568 	msp = (manstr_t *)wq->q_ptr;
3569 
3570 	if (MBLKL(mp) < DL_SET_PHYS_ADDR_REQ_SIZE) {
3571 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3572 		return;
3573 	}
3574 
3575 	dlp = (union DL_primitives *)mp->b_rptr;
3576 	len = dlp->set_physaddr_req.dl_addr_length;
3577 	off = dlp->set_physaddr_req.dl_addr_offset;
3578 
3579 	if (!MBLKIN(mp, off, len)) {
3580 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3581 		return;
3582 	}
3583 
3584 	addrp = (struct ether_addr *)(mp->b_rptr + off);
3585 
3586 	/*
3587 	 * Error if length of address isn't right or the address
3588 	 * specified is a multicast or broadcast address.
3589 	 */
3590 	if ((len != ETHERADDRL) ||
3591 	    ((addrp->ether_addr_octet[0] & 01) == 1) ||
3592 	    (ether_cmp(addrp, &etherbroadcast) == 0)) {
3593 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADADDR, 0);
3594 		return;
3595 	}
3596 	/*
3597 	 * Error if this stream is not attached to a device.
3598 	 */
3599 	if (msp->ms_manp == NULL) {
3600 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_OUTSTATE, 0);
3601 		return;
3602 	}
3603 
3604 	/*
3605 	 * We will also resend DL_SET_PHYS_ADDR_REQ for each dest
3606 	 * when it is linked under us.
3607 	 */
3608 	if (man_dlpi_senddown(msp, mp)) {
3609 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_SYSERR, ENOMEM);
3610 		return;
3611 	}
3612 
3613 	ether_copy(addrp, msp->ms_manp->man_eaddr.ether_addr_octet);
3614 
3615 	MAN_DBG(MAN_DLPI, ("man_sareq: snagged %s\n",
3616 	    ether_sprintf(&msp->ms_manp->man_eaddr)));
3617 
3618 	dlokack(wq, mp, DL_SET_PHYS_ADDR_REQ);
3619 
3620 }
3621 
3622 /*
3623  * These routines make up the lower part of the MAN streams framework.
3624  */
3625 
3626 /*
3627  * man_lwsrv - Deferred mblks for down stream. We end up here when
3628  * the destination is not DL_IDLE when traffic comes downstream.
3629  *
3630  *	wq - lower write queue of mxx
3631  */
3632 static int
3633 man_lwsrv(queue_t *wq)
3634 {
3635 	mblk_t		*mp;
3636 	mblk_t		*mlistp;
3637 	man_dest_t	*mdp;
3638 	size_t		count;
3639 
3640 	mdp = (man_dest_t *)wq->q_ptr;
3641 
3642 	MAN_DBG(MAN_LWSRV, ("man_lwsrv: wq(0x%p) mdp(0x%p)"
3643 	    " md_rq(0x%p)\n", (void *)wq, (void *)mdp,
3644 	    mdp ? (void *)mdp->md_rq : NULL));
3645 
3646 	if (mdp == NULL)
3647 		goto exit;
3648 
3649 	if (mdp->md_state & MAN_DSTATE_CLOSING) {
3650 			flushq(wq, FLUSHDATA);
3651 			flushq(RD(wq), FLUSHDATA);
3652 			goto exit;
3653 	}
3654 
3655 	/*
3656 	 * Arrange to send deferred mp's first, then mblks on the
3657 	 * service queue. Since we are exclusive in the inner perimeter,
3658 	 * we dont have to worry about md_lock, like the put procedures,
3659 	 * which are MTPUTSHARED.
3660 	 */
3661 	mutex_enter(&mdp->md_lock);
3662 	mlistp = mdp->md_dmp_head;
3663 	mdp->md_dmp_head = NULL;
3664 	count = mdp->md_dmp_count;
3665 	mdp->md_dmp_count = 0;
3666 	mutex_exit(&mdp->md_lock);
3667 
3668 	while (mlistp != NULL) {
3669 		mp = mlistp;
3670 		mlistp = mp->b_next;
3671 		mp->b_next = NULL;
3672 		count -= msgsize(mp);
3673 		if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
3674 
3675 			mutex_enter(&mdp->md_lock);
3676 			mdp->md_dmp_count += count + msgsize(mp);
3677 			mp->b_next = mlistp;
3678 			mdp->md_dmp_head = mp;
3679 			mutex_exit(&mdp->md_lock);
3680 			goto exit;
3681 		}
3682 	}
3683 	mdp->md_dmp_tail = NULL;
3684 
3685 	while (mp = getq(wq)) {
3686 		if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
3687 			/*
3688 			 * Put it back on queue, making sure to avoid
3689 			 * infinite loop mentioned in putbq(9F)
3690 			 */
3691 			noenable(wq);
3692 			(void) putbq(wq, mp);
3693 			enableok(wq);
3694 
3695 			break;
3696 		}
3697 	}
3698 
3699 exit:
3700 
3701 	return (0);
3702 }
3703 
3704 /*
3705  * man_lrput - handle DLPI messages issued from downstream.
3706  *
3707  *	rq - lower read queue of mxx
3708  *	mp - mblk ptr to DLPI request
3709  *
3710  *	returns 0
3711  */
3712 static int
3713 man_lrput(queue_t *rq, mblk_t *mp)
3714 {
3715 	man_dest_t	*mdp;
3716 	manstr_t	*msp;
3717 
3718 #if defined(DEBUG)
3719 	union DL_primitives	*dlp;
3720 	t_uscalar_t		prim = MAN_DLPI_MAX_PRIM + 1;
3721 	char			*prim_str;
3722 #endif  /* DEBUG */
3723 
3724 	mdp = (man_dest_t *)rq->q_ptr;
3725 
3726 #if defined(DEBUG)
3727 	if (DB_TYPE(mp) == M_PROTO) {
3728 		dlp = (union DL_primitives *)mp->b_rptr;
3729 		prim = dlp->dl_primitive;
3730 	}
3731 
3732 	prim_str = (prim > MAN_DLPI_MAX_PRIM) ? "NON DLPI" :
3733 	    (prim == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3734 	    (prim == DLIOCRAW) ? "DLIOCRAW" :
3735 	    dps[(unsigned int)prim];
3736 	MAN_DBG(MAN_LRPUT, ("man_lrput: rq(0x%p) mp(0x%p) mdp(0x%p)"
3737 	    " db_type(0x%x) dl_prim %s", (void *)rq,
3738 	    (void *)mp, (void *)mdp, DB_TYPE(mp), prim_str));
3739 	MAN_DBGCALL(MAN_LRPUT2, man_print_mdp(mdp));
3740 #endif  /* DEBUG */
3741 
3742 	if (DB_TYPE(mp) == M_FLUSH) {
3743 		/* Turn around */
3744 		if (*mp->b_rptr & FLUSHW) {
3745 			*mp->b_rptr &= ~FLUSHR;
3746 			qreply(rq, mp);
3747 		} else
3748 			freemsg(mp);
3749 		return (0);
3750 	}
3751 
3752 	if (mdp == NULL || mdp->md_state != MAN_DSTATE_READY) {
3753 
3754 		MAN_DBG(MAN_LRPUT, ("man_lrput: not ready mdp(0x%p),"
3755 		    " state(%d)", (void *)mdp, mdp ? mdp->md_state : -1));
3756 		freemsg(mp);
3757 		return (0);
3758 	}
3759 
3760 	/*
3761 	 * If we have a destination in the right state, forward on datagrams.
3762 	 */
3763 	if (MAN_IS_DATA(mp)) {
3764 		if (mdp->md_dlpistate == DL_IDLE && canputnext(mdp->md_rq)) {
3765 
3766 			msp = mdp->md_msp;
3767 			if (!(msp->ms_flags & MAN_SFLAG_PROMISC))
3768 				mdp->md_rcvcnt++; /* Count for failover */
3769 			/*
3770 			 * go put mblk_t directly up to next queue.
3771 			 */
3772 			MAN_DBG(MAN_LRPUT, ("man_lrput: putnext to rq(0x%p)",
3773 			    (void *)mdp->md_rq));
3774 			(void) putnext(mdp->md_rq, mp);
3775 		} else {
3776 			freemsg(mp);
3777 		}
3778 	} else {
3779 		/*
3780 		 * Handle in man_lrsrv with exclusive inner perimeter lock.
3781 		 */
3782 		(void) putq(rq, mp);
3783 	}
3784 
3785 	return (0);
3786 }
3787 
3788 /*
3789  * Either this is a response from our attempt to sync the upper and lower
3790  * stream states, or its data. If its not data. Do DL_* response processing
3791  * and transition md_dlpistate accordingly. If its data, toss it.
3792  */
3793 static int
3794 man_lrsrv(queue_t *rq)
3795 {
3796 	man_dest_t		*mdp;
3797 	mblk_t			*mp;
3798 	union DL_primitives	*dlp;
3799 	ulong_t			prim;
3800 	ulong_t			cprim;
3801 	int			need_dl_reset = FALSE;
3802 
3803 #if defined(DEBUG)
3804 		struct iocblk	*iocp;
3805 		char		ioc_cmd[256];
3806 #endif  /* DEBUG */
3807 
3808 	MAN_DBG(MAN_LRSRV, ("man_lrsrv: rq(0x%p)", (void *)rq));
3809 
3810 	mdp = (man_dest_t *)rq->q_ptr;
3811 
3812 	if ((mdp == NULL) || (mdp->md_state & MAN_DSTATE_CLOSING)) {
3813 			flushq(rq, FLUSHDATA);
3814 			flushq(WR(rq), FLUSHDATA);
3815 			goto exit;
3816 	}
3817 
3818 	while (mp = getq(rq)) {
3819 
3820 
3821 	/*
3822 	 * If we're not connected, or its a datagram, toss it.
3823 	 */
3824 	if (MAN_IS_DATA(mp) || mdp->md_state != MAN_DSTATE_READY) {
3825 
3826 		MAN_DBG(MAN_LRSRV, ("man_lrsrv: dropping mblk mdp(0x%p)"
3827 		    " is_data(%d)", (void *)mdp, MAN_IS_DATA(mp)));
3828 		freemsg(mp);
3829 		continue;
3830 	}
3831 
3832 	/*
3833 	 * Should be response to man_dlpi_replay. Discard unless there
3834 	 * is a failure we care about.
3835 	 */
3836 
3837 	switch (DB_TYPE(mp)) {
3838 	case M_PROTO:
3839 	case M_PCPROTO:
3840 		/* Do proto processing below. */
3841 		break;
3842 
3843 	case M_IOCNAK:
3844 		/*
3845 		 * DL_IOC* failed for some reason.
3846 		 */
3847 		need_dl_reset = TRUE;
3848 
3849 #if defined(DEBUG)
3850 		iocp = (struct iocblk *)mp->b_rptr;
3851 
3852 		(void) sprintf(ioc_cmd, "0x%x", iocp->ioc_cmd);
3853 		MAN_DBG(MAN_LRSRV, ("man_lrsrv: M_IOCNAK err %d for cmd(%s)\n",
3854 		    iocp->ioc_error,
3855 		    (iocp->ioc_cmd == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3856 		    (iocp->ioc_cmd == DLIOCRAW) ? "DLIOCRAW" : ioc_cmd));
3857 #endif  /* DEBUG */
3858 
3859 		/* FALLTHRU */
3860 
3861 	case M_IOCACK:
3862 	case M_CTL:
3863 		/*
3864 		 * OK response from DL_IOC*, ignore.
3865 		 */
3866 		goto dl_reset;
3867 	}
3868 
3869 	dlp = (union DL_primitives *)mp->b_rptr;
3870 	prim = dlp->dl_primitive;
3871 
3872 	MAN_DBG(MAN_LRSRV, ("man_lrsrv: prim %s", dps[(int)prim]));
3873 
3874 	/*
3875 	 * DLPI state processing big theory: We do not rigorously check
3876 	 * DLPI states (e.g. PENDING stuff). Simple rules:
3877 	 *
3878 	 * 	1) If we see an OK_ACK to an ATTACH_REQ, dlpistate = DL_UNBOUND.
3879 	 *	2) If we see an BIND_ACK to a BIND_REQ, dlpistate = DL_IDLE.
3880 	 *	3) If we see a OK_ACK response to an UNBIND_REQ
3881 	 *	   dlpistate = DL_UNBOUND.
3882 	 *	4) If we see a OK_ACK response to a DETACH_REQ,
3883 	 *	   dlpistate = DL_UNATTACHED.
3884 	 *
3885 	 * Everything that isn't handle by 1-4 above is handled by 5)
3886 	 *
3887 	 *	5) A NAK to any DL_* messages we care about causes
3888 	 *	   dlpistate = DL_UNATTACHED and man_reset_dlpi to run
3889 	 *
3890 	 * TBD - need a reset counter so we can try a switch if it gets
3891 	 * too high.
3892 	 */
3893 
3894 	switch (prim) {
3895 	case DL_OK_ACK:
3896 		cprim = dlp->ok_ack.dl_correct_primitive;
3897 
3898 		switch (cprim) {
3899 		case DL_ATTACH_REQ:
3900 			if (man_dlioc_replay(mdp)) {
3901 				D_SETSTATE(mdp, DL_UNBOUND);
3902 			} else {
3903 				need_dl_reset = TRUE;
3904 				break;
3905 			}
3906 			break;
3907 
3908 		case DL_DETACH_REQ:
3909 			D_SETSTATE(mdp, DL_UNATTACHED);
3910 			break;
3911 
3912 		case DL_UNBIND_REQ:
3913 			/*
3914 			 * Cancel timer and set md_dlpistate.
3915 			 */
3916 			D_SETSTATE(mdp, DL_UNBOUND);
3917 
3918 			ASSERT(mdp->md_bc_id == 0);
3919 			if (mdp->md_lc_timer_id != 0) {
3920 				(void) quntimeout(man_ctl_wq,
3921 				    mdp->md_lc_timer_id);
3922 				mdp->md_lc_timer_id = 0;
3923 			}
3924 		}
3925 		MAN_DBG(MAN_DLPI,
3926 		    ("		cprim %s", dps[(int)cprim]));
3927 		break;
3928 
3929 	case DL_BIND_ACK:
3930 		/*
3931 		 * We're ready for data. Get man_lwsrv to run to
3932 		 * process any defered data and start linkcheck timer.
3933 		 */
3934 		D_SETSTATE(mdp, DL_IDLE);
3935 		qenable(mdp->md_wq);
3936 		mdp->md_linkstate = MAN_LINKGOOD;
3937 		if (man_needs_linkcheck(mdp)) {
3938 			mdp->md_lc_timer_id = qtimeout(man_ctl_wq,
3939 			    man_linkcheck_timer, (void *)mdp,
3940 			    man_gettimer(MAN_TIMER_LINKCHECK, mdp));
3941 		}
3942 
3943 		break;
3944 
3945 	case DL_ERROR_ACK:
3946 		cprim = dlp->error_ack.dl_error_primitive;
3947 		switch (cprim) {
3948 		case DL_ATTACH_REQ:
3949 		case DL_BIND_REQ:
3950 		case DL_DISABMULTI_REQ:
3951 		case DL_ENABMULTI_REQ:
3952 		case DL_PROMISCON_REQ:
3953 		case DL_PROMISCOFF_REQ:
3954 		case DL_SET_PHYS_ADDR_REQ:
3955 			need_dl_reset = TRUE;
3956 			break;
3957 
3958 		/*
3959 		 * ignore error TBD (better comment)
3960 		 */
3961 		case DL_UNBIND_REQ:
3962 		case DL_DETACH_REQ:
3963 			break;
3964 		}
3965 
3966 		MAN_DBG(MAN_DLPI,
3967 		    ("\tdl_errno %d dl_unix_errno %d cprim %s",
3968 		    dlp->error_ack.dl_errno, dlp->error_ack.dl_unix_errno,
3969 		    dps[(int)cprim]));
3970 		break;
3971 
3972 	case DL_UDERROR_IND:
3973 		MAN_DBG(MAN_DLPI,
3974 		    ("\tdl_errno %d unix_errno %d",
3975 		    dlp->uderror_ind.dl_errno,
3976 		    dlp->uderror_ind.dl_unix_errno));
3977 		break;
3978 
3979 	case DL_INFO_ACK:
3980 		break;
3981 
3982 	default:
3983 		/*
3984 		 * We should not get here.
3985 		 */
3986 		cmn_err(CE_WARN, "man_lrsrv: unexpected DL prim 0x%lx!",
3987 		    prim);
3988 		need_dl_reset = TRUE;
3989 		break;
3990 	}
3991 
3992 dl_reset:
3993 	freemsg(mp);
3994 
3995 	if (need_dl_reset) {
3996 		man_pg_t	*mpg;
3997 		man_path_t	*mp;
3998 
3999 		if (qsize(rq)) {	/* Dump all messages. */
4000 			flushq(rq, FLUSHDATA);
4001 			flushq(WR(rq), FLUSHDATA);
4002 		}
4003 
4004 		mdp->md_dlpierrors++;
4005 		D_SETSTATE(mdp, DL_UNATTACHED);
4006 		if (mdp->md_lc_timer_id != 0) {
4007 			(void) quntimeout(man_ctl_wq, mdp->md_lc_timer_id);
4008 			mdp->md_lc_timer_id = 0;
4009 		}
4010 
4011 		mutex_enter(&man_lock);
4012 		ASSERT(mdp->md_msp != NULL);
4013 		ASSERT(mdp->md_msp->ms_manp != NULL);
4014 		mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg,
4015 		    mdp->md_pg_id);
4016 		ASSERT(mpg != NULL);
4017 		mp = man_find_path_by_ppa(mpg->mpg_pathp,
4018 		    mdp->md_device.mdev_ppa);
4019 		ASSERT(mp != NULL);
4020 		mp->mp_device.mdev_state |= MDEV_FAILED;
4021 		if ((mdp->md_dlpierrors >= MAN_MAX_DLPIERRORS) &&
4022 		    (man_is_on_domain ||
4023 		    mdp->md_msp->ms_manp->man_meta_ppa == 1)) {
4024 			/*
4025 			 * Autoswitching is disabled for instance 0
4026 			 * on the SC as we expect the domain to
4027 			 * initiate the path switching.
4028 			 */
4029 			(void) man_do_autoswitch((man_dest_t *)mdp);
4030 			MAN_DBG(MAN_WARN, ("man_lrsrv: dlpi failure(%d,%d),"
4031 			    " switching path", mdp->md_device.mdev_major,
4032 			    mdp->md_device.mdev_ppa));
4033 		} else {
4034 			mdp->md_lc_timer_id = qtimeout(man_ctl_wq,
4035 			    man_reset_dlpi, (void *)mdp,
4036 			    man_gettimer(MAN_TIMER_DLPIRESET, mdp));
4037 		}
4038 		mutex_exit(&man_lock);
4039 	}
4040 
4041 
4042 	} /* End while (getq()) */
4043 
4044 exit:
4045 	MAN_DBG(MAN_DLPI, ("man_lrsrv: returns"));
4046 
4047 	return (0);
4048 }
4049 
4050 static int
4051 man_needs_linkcheck(man_dest_t *mdp)
4052 {
4053 	/*
4054 	 * Not ready for linkcheck.
4055 	 */
4056 	if (mdp->md_msp == NULL || mdp->md_msp->ms_manp == NULL)
4057 		return (0);
4058 
4059 	/*
4060 	 * Linkchecking needs to be done on IP streams. For domain, all
4061 	 * driver instances need checking, for SC only instance 1 needs it.
4062 	 */
4063 	if ((man_is_on_domain || mdp->md_msp->ms_manp->man_meta_ppa == 1) &&
4064 	    (mdp->md_msp->ms_sap == ETHERTYPE_IP ||
4065 	    mdp->md_msp->ms_sap == ETHERTYPE_IPV6))
4066 
4067 		return (1);
4068 
4069 	/*
4070 	 * Linkcheck not need on this link.
4071 	 */
4072 	return (0);
4073 }
4074 
4075 /*
4076  * The following routines process work requests posted to man_iwork_q
4077  * from the non-STREAMS half of the driver (see man_bwork.c). The work
4078  * requires access to the inner perimeter lock of the driver. This
4079  * lock is acquired by man_uwsrv, who calls man_iwork to process the
4080  * man_iwork_q->
4081  */
4082 
4083 /*
4084  * The man_bwork has posted some work for us to do inside the
4085  * perimeter. This mainly involves updating lower multiplexor data
4086  * structures (non-blocking type stuff). So, we can hold the man_lock
4087  * until we are done processing all work items. Note that some of these
4088  * routines in turn submit work back to the bgthread, which they can do
4089  * since we hold the man_lock.
4090  */
4091 static void
4092 man_iwork()
4093 {
4094 	man_work_t	*wp;
4095 	int		wp_finished;
4096 
4097 	MAN_DBG(MAN_SWITCH, ("man_iwork: q_work(0x%p)",
4098 	    (void *)man_iwork_q->q_work));
4099 
4100 	mutex_enter(&man_lock);
4101 
4102 	while (man_iwork_q->q_work) {
4103 
4104 		wp = man_iwork_q->q_work;
4105 		man_iwork_q->q_work = wp->mw_next;
4106 		wp->mw_next = NULL;
4107 
4108 		mutex_exit(&man_lock);
4109 
4110 		MAN_DBG(MAN_SWITCH, ("man_iwork: type %s",
4111 		    _mw_type[wp->mw_type]));
4112 
4113 		wp_finished = TRUE;
4114 
4115 		switch (wp->mw_type) {
4116 		case MAN_WORK_DRATTACH:
4117 			(void) man_do_dr_attach(wp);
4118 			break;
4119 
4120 		case MAN_WORK_DRSWITCH:
4121 			/*
4122 			 * Return status to man_dr_detach immediately. If
4123 			 * no error submitting SWITCH request, man_iswitch
4124 			 * or man_bclose will cv_signal man_dr_detach on
4125 			 * completion of SWITCH work request.
4126 			 */
4127 			if (man_do_dr_switch(wp) == 0)
4128 				wp_finished = FALSE;
4129 			break;
4130 
4131 		case MAN_WORK_DRDETACH:
4132 			man_do_dr_detach(wp);
4133 			break;
4134 
4135 		case MAN_WORK_SWITCH:
4136 			if (man_iswitch(wp))
4137 				wp_finished = FALSE;
4138 			break;
4139 
4140 		case MAN_WORK_KSTAT_UPDATE:
4141 			man_do_kstats(wp);
4142 			break;
4143 
4144 		default:
4145 			cmn_err(CE_WARN, "man_iwork: "
4146 			    "illegal work type(%d)", wp->mw_type);
4147 			break;
4148 		}
4149 
4150 		mutex_enter(&man_lock);
4151 
4152 		/*
4153 		 * If we've completed the work request, delete, or
4154 		 * cv_signal waiter.
4155 		 */
4156 		if (wp_finished) {
4157 			wp->mw_flags |= MAN_WFLAGS_DONE;
4158 
4159 			if (wp->mw_flags & MAN_WFLAGS_CVWAITER)
4160 				cv_signal(&wp->mw_cv);
4161 			else
4162 				man_work_free(wp);
4163 		}
4164 	}
4165 
4166 	mutex_exit(&man_lock);
4167 }
4168 
4169 /*
4170  * man_dr_detach has submitted a request to DRSWITCH a path.
4171  * He is in cv_wait_sig(wp->mw_cv). We forward the work request on to
4172  * man_bwork as a switch request. It should end up back at
4173  * man_iwork, who will cv_signal(wp->mw_cv) man_dr_detach.
4174  *
4175  * Called holding inner perimeter lock.
4176  * man_lock is held to synchronize access to pathgroup list(man_pg).
4177  */
4178 static int
4179 man_do_dr_switch(man_work_t *wp)
4180 {
4181 	man_t		*manp;
4182 	man_pg_t	*mpg;
4183 	man_path_t	*mp;
4184 	man_path_t	*ap;
4185 	man_adest_t	*adp;
4186 	mi_path_t	mpath;
4187 	int		status = 0;
4188 
4189 	adp = &wp->mw_arg;
4190 
4191 	MAN_DBG(MAN_SWITCH, ("man_do_dr_switch: pg_id %d work:", adp->a_pg_id));
4192 	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4193 
4194 	mutex_enter(&man_lock);
4195 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4196 	if (manp == NULL || manp->man_pg == NULL) {
4197 		status = ENODEV;
4198 		goto exit;
4199 	}
4200 
4201 	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4202 	if (mpg == NULL) {
4203 		status = ENODEV;
4204 		goto exit;
4205 	}
4206 
4207 	if (mpg->mpg_flags & MAN_PG_SWITCHING) {
4208 		status = EAGAIN;
4209 		goto exit;
4210 	}
4211 
4212 	/*
4213 	 * Check to see if detaching device is active. If so, activate
4214 	 * an alternate.
4215 	 */
4216 	mp = man_find_active_path(mpg->mpg_pathp);
4217 	if (mp && mp->mp_device.mdev_ppa == adp->a_sf_dev.mdev_ppa) {
4218 
4219 		ap = man_find_alternate_path(mpg->mpg_pathp);
4220 		if (ap == NULL) {
4221 			status = EBUSY;
4222 			goto exit;
4223 		}
4224 
4225 		bzero((char *)&mpath, sizeof (mi_path_t));
4226 
4227 		mpath.mip_cmd = MI_PATH_ACTIVATE;
4228 		mpath.mip_man_ppa = 0;
4229 		mpath.mip_pg_id = 0;
4230 		mpath.mip_devs[0] = ap->mp_device;
4231 		mpath.mip_ndevs = 1;
4232 		ether_copy(&manp->man_eaddr, &mpath.mip_eaddr);
4233 
4234 		/*
4235 		 * DR thread is sleeping on wp->mw_cv. We change the work
4236 		 * request from DRSWITCH to SWITCH and submit it to
4237 		 * for processing by man_bwork (via man_pg_cmd). At
4238 		 * completion the SWITCH work request is processed by
4239 		 * man_iswitch() or man_bclose and the DR thread will
4240 		 * be cv_signal'd.
4241 		 */
4242 		wp->mw_type = MAN_WORK_SWITCH;
4243 		if (status = man_pg_cmd(&mpath, wp))
4244 			goto exit;
4245 
4246 	} else {
4247 		/*
4248 		 * Tell man_dr_detach that detaching device is not currently
4249 		 * in use.
4250 		 */
4251 		status = ENODEV;
4252 	}
4253 
4254 exit:
4255 	if (status) {
4256 		/*
4257 		 * ENODEV is a noop, not really an error.
4258 		 */
4259 		if (status != ENODEV)
4260 			wp->mw_status = status;
4261 	}
4262 	mutex_exit(&man_lock);
4263 
4264 	return (status);
4265 }
4266 
4267 /*
4268  * man_dr_attach has submitted a request to DRATTACH a path,
4269  * add that path to the path list.
4270  *
4271  * Called holding perimeter lock.
4272  */
4273 static int
4274 man_do_dr_attach(man_work_t *wp)
4275 {
4276 	man_t		*manp;
4277 	man_adest_t	*adp;
4278 	mi_path_t	mpath;
4279 	manc_t		manc;
4280 	int		status = 0;
4281 
4282 	adp = &wp->mw_arg;
4283 
4284 	MAN_DBG(MAN_SWITCH, ("man_do_dr_attach: pg_id %d work:", adp->a_pg_id));
4285 	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4286 
4287 	mutex_enter(&man_lock);
4288 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4289 	if (manp == NULL || manp->man_pg == NULL) {
4290 		status = ENODEV;
4291 		goto exit;
4292 	}
4293 
4294 	if (status = man_get_iosram(&manc)) {
4295 		goto exit;
4296 	}
4297 	/*
4298 	 * Extract SC ethernet address from IOSRAM.
4299 	 */
4300 	ether_copy(&manc.manc_sc_eaddr, &mpath.mip_eaddr);
4301 
4302 	mpath.mip_pg_id = adp->a_pg_id;
4303 	mpath.mip_man_ppa = adp->a_man_ppa;
4304 	/*
4305 	 * man_dr_attach passes the new device info in a_sf_dev.
4306 	 */
4307 	MAN_DBG(MAN_DR, ("man_do_dr_attach: "));
4308 	MAN_DBGCALL(MAN_DR, man_print_dev(&adp->a_sf_dev));
4309 	mpath.mip_devs[0] = adp->a_sf_dev;
4310 	mpath.mip_ndevs = 1;
4311 	mpath.mip_cmd = MI_PATH_ADD;
4312 	status = man_pg_cmd(&mpath, NULL);
4313 
4314 exit:
4315 	mutex_exit(&man_lock);
4316 	return (status);
4317 }
4318 
4319 /*
4320  * man_dr_detach has submitted a request to DRDETACH a path.
4321  * He is in cv_wait_sig(wp->mw_cv). We remove the path and
4322  * cv_signal(wp->mw_cv) man_dr_detach.
4323  *
4324  * Called holding perimeter lock.
4325  */
4326 static void
4327 man_do_dr_detach(man_work_t *wp)
4328 {
4329 	man_t		*manp;
4330 	man_pg_t	*mpg;
4331 	man_path_t	*mp;
4332 	man_adest_t	*adp;
4333 	manc_t		manc;
4334 	mi_path_t	mpath;
4335 	int		i;
4336 	int		found;
4337 	int		status = 0;
4338 
4339 	adp = &wp->mw_arg;
4340 
4341 	MAN_DBG(MAN_SWITCH, ("man_do_dr_detach: pg_id %d work:", adp->a_pg_id));
4342 	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4343 
4344 	mutex_enter(&man_lock);
4345 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4346 	if (manp == NULL || manp->man_pg == NULL) {
4347 		status = ENODEV;
4348 		goto exit;
4349 	}
4350 
4351 	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4352 	if (mpg == NULL) {
4353 		status = ENODEV;
4354 		goto exit;
4355 	}
4356 
4357 	if (mpg->mpg_flags & MAN_PG_SWITCHING) {
4358 		status = EAGAIN;
4359 		goto exit;
4360 	}
4361 
4362 	/*
4363 	 * We should have switched detaching path if it was active.
4364 	 */
4365 	mp = man_find_active_path(mpg->mpg_pathp);
4366 	if (mp && mp->mp_device.mdev_ppa == adp->a_sf_dev.mdev_ppa) {
4367 		status = EAGAIN;
4368 		goto exit;
4369 	}
4370 
4371 	/*
4372 	 * Submit an ASSIGN command, minus the detaching device.
4373 	 */
4374 	bzero((char *)&mpath, sizeof (mi_path_t));
4375 
4376 	if (status = man_get_iosram(&manc)) {
4377 		goto exit;
4378 	}
4379 
4380 	mpath.mip_cmd = MI_PATH_ASSIGN;
4381 	mpath.mip_man_ppa = 0;
4382 	mpath.mip_pg_id = 0;
4383 
4384 	mp = mpg->mpg_pathp;
4385 	i = 0;
4386 	found = FALSE;
4387 	while (mp != NULL) {
4388 		if (mp->mp_device.mdev_ppa != adp->a_sf_dev.mdev_ppa) {
4389 			mpath.mip_devs[i] = mp->mp_device;
4390 			i++;
4391 		} else {
4392 			found = TRUE;
4393 		}
4394 		mp = mp->mp_next;
4395 	}
4396 
4397 	if (found) {
4398 		/*
4399 		 * Need to include SCs ethernet address in command.
4400 		 */
4401 		mpath.mip_ndevs = i;
4402 		ether_copy(&manc.manc_sc_eaddr, &mpath.mip_eaddr);
4403 
4404 		status = man_pg_cmd(&mpath, NULL);
4405 	}
4406 
4407 	/*
4408 	 * Hand back status to man_dr_detach request.
4409 	 */
4410 exit:
4411 	if (status != ENODEV)
4412 		wp->mw_status = status;
4413 
4414 	mutex_exit(&man_lock);
4415 
4416 }
4417 
4418 
4419 /*
4420  * The background thread has configured new lower multiplexor streams for
4421  * the given destinations. Update the appropriate destination data structures
4422  * inside the inner perimeter. We must take care to deal with destinations
4423  * whose upper stream has closed or detached from lower streams.
4424  *
4425  * Returns
4426  *	0		Done with work request.
4427  *	1		Reused work request.
4428  */
4429 static int
4430 man_iswitch(man_work_t *wp)
4431 {
4432 	man_adest_t	*adp;
4433 	man_t		*manp;
4434 	man_pg_t	*mpg;
4435 	man_path_t	*mp = NULL;
4436 	man_dest_t	*mdp;
4437 	man_dest_t	*tdp;
4438 	int		i;
4439 	int		switch_ok = TRUE;
4440 
4441 	adp = &wp->mw_arg;
4442 
4443 	if (wp->mw_status != 0) {
4444 		switch_ok = FALSE;	/* Never got things opened */
4445 	}
4446 
4447 	/*
4448 	 * Update destination structures as appropriate.
4449 	 */
4450 	for (i = 0; i < adp->a_ndests; i++) {
4451 		man_dest_t	tmp;
4452 
4453 		/*
4454 		 * Check to see if lower stream we just switch is still
4455 		 * around.
4456 		 */
4457 		tdp = &adp->a_mdp[i];
4458 		mdp = man_switch_match(tdp, adp->a_pg_id, tdp->md_switch_id);
4459 
4460 		if (mdp == NULL)
4461 			continue;
4462 
4463 		if (switch_ok == FALSE) {
4464 			/*
4465 			 * Switch failed for some reason.  Clear
4466 			 * PLUMBING flag and retry switch again later.
4467 			 */
4468 			man_ifail_dest(mdp);
4469 			continue;
4470 		}
4471 
4472 		/*
4473 		 * Swap new info, for old. We return the old info to
4474 		 * man_bwork to close things up below.
4475 		 */
4476 		bcopy((char *)mdp, (char *)&tmp, sizeof (man_dest_t));
4477 
4478 		ASSERT(mdp->md_state & MAN_DSTATE_PLUMBING);
4479 		ASSERT(mdp->md_state == tdp->md_state);
4480 
4481 		mdp->md_state = tdp->md_state;
4482 
4483 		/*
4484 		 * save the wq from the destination passed(tdp).
4485 		 */
4486 		mdp->md_wq = tdp->md_wq;
4487 		RD(mdp->md_wq)->q_ptr = (void *)(mdp);
4488 		WR(mdp->md_wq)->q_ptr = (void *)(mdp);
4489 
4490 		mdp->md_state &= ~MAN_DSTATE_INITIALIZING;
4491 		mdp->md_state |= MAN_DSTATE_READY;
4492 
4493 		ASSERT(mdp->md_device.mdev_major == adp->a_sf_dev.mdev_major);
4494 
4495 		ASSERT(tdp->md_device.mdev_ppa == adp->a_st_dev.mdev_ppa);
4496 		ASSERT(tdp->md_device.mdev_major == adp->a_st_dev.mdev_major);
4497 
4498 		mdp->md_device = tdp->md_device;
4499 		mdp->md_muxid = tdp->md_muxid;
4500 		mdp->md_linkstate = MAN_LINKUNKNOWN;
4501 		(void) drv_getparm(TIME, &mdp->md_lastswitch);
4502 		mdp->md_state &= ~MAN_DSTATE_PLUMBING;
4503 		mdp->md_switch_id = 0;
4504 		mdp->md_switches++;
4505 		mdp->md_dlpierrors = 0;
4506 		D_SETSTATE(mdp, DL_UNATTACHED);
4507 
4508 		/*
4509 		 * Resync lower w/ upper dlpi state. This will start link
4510 		 * timer if/when lower stream goes to DL_IDLE (see man_lrsrv).
4511 		 */
4512 		man_reset_dlpi((void *)mdp);
4513 
4514 		bcopy((char *)&tmp, (char *)tdp, sizeof (man_dest_t));
4515 	}
4516 
4517 	if (switch_ok) {
4518 		for (i = 0; i < adp->a_ndests; i++) {
4519 			tdp = &adp->a_mdp[i];
4520 
4521 			tdp->md_state &= ~MAN_DSTATE_PLUMBING;
4522 			tdp->md_state &= ~MAN_DSTATE_INITIALIZING;
4523 			tdp->md_state |= MAN_DSTATE_READY;
4524 		}
4525 	} else {
4526 		/*
4527 		 * Never got switch-to destinations open, free them.
4528 		 */
4529 		man_kfree(adp->a_mdp,
4530 		    sizeof (man_dest_t) * adp->a_ndests);
4531 	}
4532 
4533 	/*
4534 	 * Clear pathgroup switching flag and update path flags.
4535 	 */
4536 	mutex_enter(&man_lock);
4537 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4538 
4539 	ASSERT(manp != NULL);
4540 	ASSERT(manp->man_pg != NULL);
4541 
4542 	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4543 	ASSERT(mpg != NULL);
4544 	ASSERT(mpg->mpg_flags & MAN_PG_SWITCHING);
4545 	mpg->mpg_flags &= ~MAN_PG_SWITCHING;
4546 
4547 	/*
4548 	 * Switch succeeded, mark path we switched from as failed, and
4549 	 * device we switch to as active and clear its failed flag (if set).
4550 	 * Sync up kstats.
4551 	 */
4552 	if (switch_ok) {
4553 		mp = man_find_active_path(mpg->mpg_pathp);
4554 		if (mp != NULL) {
4555 
4556 			ASSERT(adp->a_sf_dev.mdev_major != 0);
4557 
4558 			MAN_DBG(MAN_SWITCH, ("man_iswitch: switch from dev:"));
4559 			MAN_DBGCALL(MAN_SWITCH, man_print_dev(&adp->a_sf_dev));
4560 
4561 			mp->mp_device.mdev_state &= ~MDEV_ACTIVE;
4562 		} else
4563 			ASSERT(adp->a_sf_dev.mdev_major == 0);
4564 
4565 		MAN_DBG(MAN_SWITCH, ("man_iswitch: switch to dev:"));
4566 		MAN_DBGCALL(MAN_SWITCH, man_print_dev(&adp->a_st_dev));
4567 
4568 		ASSERT(adp->a_st_dev.mdev_major != 0);
4569 
4570 		mp = man_find_path_by_ppa(mpg->mpg_pathp,
4571 		    adp->a_st_dev.mdev_ppa);
4572 
4573 		ASSERT(mp != NULL);
4574 
4575 		mp->mp_device.mdev_state |= MDEV_ACTIVE;
4576 	}
4577 
4578 	/*
4579 	 * Decrement manp reference count and hand back work request if
4580 	 * needed.
4581 	 */
4582 	manp->man_refcnt--;
4583 
4584 	if (switch_ok) {
4585 		wp->mw_type = MAN_WORK_CLOSE;
4586 		man_work_add(man_bwork_q, wp);
4587 	}
4588 
4589 	mutex_exit(&man_lock);
4590 
4591 	return (switch_ok);
4592 }
4593 
4594 /*
4595  * Find the destination in the upper stream that we just switched.
4596  */
4597 man_dest_t *
4598 man_switch_match(man_dest_t *sdp, int pg_id, void *sid)
4599 {
4600 	man_dest_t	*mdp = NULL;
4601 	manstr_t	*msp;
4602 
4603 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4604 		/*
4605 		 * Check if upper stream closed, or detached.
4606 		 */
4607 		if (msp != sdp->md_msp)
4608 			continue;
4609 
4610 		if (msp->ms_dests == NULL)
4611 			break;
4612 
4613 		mdp = &msp->ms_dests[pg_id];
4614 
4615 		/*
4616 		 * Upper stream detached and reattached while we were
4617 		 * switching.
4618 		 */
4619 		if (mdp->md_switch_id != sid) {
4620 			mdp = NULL;
4621 			break;
4622 		}
4623 	}
4624 
4625 	return (mdp);
4626 }
4627 
4628 /*
4629  * bg_thread cant complete the switch for some reason. (Re)start the
4630  * linkcheck timer again.
4631  */
4632 static void
4633 man_ifail_dest(man_dest_t *mdp)
4634 {
4635 	ASSERT(mdp->md_lc_timer_id == 0);
4636 	ASSERT(mdp->md_bc_id == 0);
4637 	ASSERT(mdp->md_state & MAN_DSTATE_PLUMBING);
4638 
4639 	MAN_DBG(MAN_SWITCH, ("man_ifail_dest"));
4640 	MAN_DBGCALL(MAN_SWITCH, man_print_mdp(mdp));
4641 
4642 	mdp->md_state &= ~MAN_DSTATE_PLUMBING;
4643 	mdp->md_linkstate = MAN_LINKFAIL;
4644 
4645 	/*
4646 	 * If we have not yet initialized link, or the upper stream is
4647 	 * DL_IDLE, restart the linktimer.
4648 	 */
4649 	if ((mdp->md_state & MAN_DSTATE_INITIALIZING) ||
4650 	    ((mdp->md_msp->ms_sap == ETHERTYPE_IPV6 ||
4651 	    mdp->md_msp->ms_sap == ETHERTYPE_IP) &&
4652 	    mdp->md_msp->ms_dlpistate == DL_IDLE)) {
4653 
4654 		mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
4655 		    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
4656 	}
4657 
4658 }
4659 
4660 /*
4661  * Arrange to replay all of ms_dl_mp on the new lower stream to get it
4662  * in sync with the upper stream. Note that this includes setting the
4663  * physical address.
4664  *
4665  * Called from qtimeout with inner perimeter lock.
4666  */
4667 static void
4668 man_reset_dlpi(void *argp)
4669 {
4670 	man_dest_t	*mdp = (man_dest_t *)argp;
4671 	manstr_t	*msp;
4672 	mblk_t		*mp;
4673 	mblk_t		*rmp = NULL;
4674 	mblk_t		*tmp;
4675 
4676 	mdp->md_lc_timer_id = 0;
4677 
4678 	if (mdp->md_state != MAN_DSTATE_READY) {
4679 		MAN_DBG(MAN_DLPI, ("man_reset_dlpi: not ready!"));
4680 		return;
4681 	}
4682 
4683 	msp = mdp->md_msp;
4684 
4685 	rmp = man_dup_mplist(msp->ms_dl_mp);
4686 	if (rmp == NULL)
4687 		goto fail;
4688 
4689 	/*
4690 	 * Send down an unbind and detach request, just to clean things
4691 	 * out, we ignore ERROR_ACKs for unbind and detach in man_lrsrv.
4692 	 */
4693 	tmp = man_alloc_ubreq_dreq();
4694 	if (tmp == NULL) {
4695 		goto fail;
4696 	}
4697 	mp = tmp;
4698 	while (mp->b_next != NULL)
4699 		mp = mp->b_next;
4700 	mp->b_next = rmp;
4701 	rmp = tmp;
4702 
4703 	man_dlpi_replay(mdp, rmp);
4704 
4705 	return;
4706 
4707 fail:
4708 
4709 	while (rmp) {
4710 		mp = rmp;
4711 		rmp = rmp->b_next;
4712 		mp->b_next = mp->b_prev = NULL;
4713 		freemsg(mp);
4714 	}
4715 
4716 	ASSERT(mdp->md_lc_timer_id == 0);
4717 	ASSERT(mdp->md_bc_id == 0);
4718 
4719 	/*
4720 	 * If low on memory, try again later. I Could use qbufcall, but that
4721 	 * could fail and I would have to try and recover from that w/
4722 	 * qtimeout anyway.
4723 	 */
4724 	mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_reset_dlpi,
4725 	    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
4726 }
4727 
4728 /*
4729  * Once we receive acknowledgement that DL_ATTACH_REQ was successful,
4730  * we can send down the DL_* related IOCTLs (e.g. DL_IOC_HDR). If we
4731  * try and send them downsteam w/o waiting, the ioctl's get processed before
4732  * the ATTACH_REQ and they are rejected. TBD - could just do the lower
4733  * dlpi state change in lock step. TBD
4734  */
4735 static int
4736 man_dlioc_replay(man_dest_t *mdp)
4737 {
4738 	mblk_t		*rmp;
4739 	int		status = 1;
4740 
4741 	if (mdp->md_msp->ms_dlioc_mp == NULL)
4742 		goto exit;
4743 
4744 	rmp = man_dup_mplist(mdp->md_msp->ms_dlioc_mp);
4745 	if (rmp == NULL) {
4746 		status = 0;
4747 		goto exit;
4748 	}
4749 
4750 	man_dlpi_replay(mdp, rmp);
4751 exit:
4752 	return (status);
4753 }
4754 
4755 static mblk_t *
4756 man_alloc_ubreq_dreq()
4757 {
4758 	mblk_t			*dreq;
4759 	mblk_t			*ubreq = NULL;
4760 	union DL_primitives	*dlp;
4761 
4762 	dreq = allocb(DL_DETACH_REQ_SIZE, BPRI_MED);
4763 	if (dreq == NULL)
4764 		goto exit;
4765 
4766 	dreq->b_datap->db_type = M_PROTO;
4767 	dlp = (union DL_primitives *)dreq->b_rptr;
4768 	dlp->dl_primitive = DL_DETACH_REQ;
4769 	dreq->b_wptr += DL_DETACH_REQ_SIZE;
4770 
4771 	ubreq = allocb(DL_UNBIND_REQ_SIZE, BPRI_MED);
4772 	if (ubreq == NULL) {
4773 		freemsg(dreq);
4774 		goto exit;
4775 	}
4776 
4777 	ubreq->b_datap->db_type = M_PROTO;
4778 	dlp = (union DL_primitives *)ubreq->b_rptr;
4779 	dlp->dl_primitive = DL_UNBIND_REQ;
4780 	ubreq->b_wptr += DL_UNBIND_REQ_SIZE;
4781 
4782 	ubreq->b_next = dreq;
4783 
4784 exit:
4785 
4786 	return (ubreq);
4787 }
4788 
4789 static mblk_t *
4790 man_dup_mplist(mblk_t *mp)
4791 {
4792 	mblk_t	*listp = NULL;
4793 	mblk_t	*tailp = NULL;
4794 
4795 	for (; mp != NULL; mp = mp->b_next) {
4796 
4797 		mblk_t	*nmp;
4798 		mblk_t	*prev;
4799 		mblk_t	*next;
4800 
4801 		prev = mp->b_prev;
4802 		next = mp->b_next;
4803 		mp->b_prev = mp->b_next = NULL;
4804 
4805 		nmp = copymsg(mp);
4806 
4807 		mp->b_prev = prev;
4808 		mp->b_next = next;
4809 
4810 		if (nmp == NULL)
4811 			goto nomem;
4812 
4813 		if (listp == NULL) {
4814 			listp = tailp = nmp;
4815 		} else {
4816 			tailp->b_next = nmp;
4817 			tailp = nmp;
4818 		}
4819 	}
4820 
4821 	return (listp);
4822 nomem:
4823 
4824 	while (listp) {
4825 		mp = listp;
4826 		listp = mp->b_next;
4827 		mp->b_next = mp->b_prev = NULL;
4828 		freemsg(mp);
4829 	}
4830 
4831 	return (NULL);
4832 
4833 }
4834 
4835 static mblk_t *
4836 man_alloc_physreq_mp(eaddr_t *man_eap)
4837 {
4838 
4839 	mblk_t			*mp;
4840 	union DL_primitives	*dlp;
4841 	t_uscalar_t		off;
4842 	eaddr_t			*eap;
4843 
4844 	mp = allocb(DL_SET_PHYS_ADDR_REQ_SIZE + ETHERADDRL, BPRI_MED);
4845 	if (mp == NULL)
4846 		goto exit;
4847 
4848 	mp->b_datap->db_type = M_PROTO;
4849 	dlp = (union DL_primitives *)mp->b_wptr;
4850 	dlp->set_physaddr_req.dl_primitive = DL_SET_PHYS_ADDR_REQ;
4851 	dlp->set_physaddr_req.dl_addr_length = ETHERADDRL;
4852 	off = DL_SET_PHYS_ADDR_REQ_SIZE;
4853 	dlp->set_physaddr_req.dl_addr_offset =  off;
4854 	mp->b_wptr += DL_SET_PHYS_ADDR_REQ_SIZE + ETHERADDRL;
4855 
4856 	eap = (eaddr_t *)(mp->b_rptr + off);
4857 	ether_copy(man_eap, eap);
4858 
4859 exit:
4860 	MAN_DBG(MAN_DLPI, ("man_alloc_physreq: physaddr %s\n",
4861 	    ether_sprintf(eap)));
4862 
4863 	return (mp);
4864 }
4865 
4866 /*
4867  * A new path in a pathgroup has become active for the first time. Setup
4868  * the lower destinations in prepartion for man_pg_activate to call
4869  * man_autoswitch.
4870  */
4871 static void
4872 man_add_dests(man_pg_t *mpg)
4873 {
4874 	manstr_t	*msp;
4875 	man_dest_t	*mdp;
4876 
4877 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4878 
4879 		if (!man_str_uses_pg(msp, mpg))
4880 			continue;
4881 
4882 		mdp = &msp->ms_dests[mpg->mpg_pg_id];
4883 
4884 /*
4885  * TBD - Take out
4886  *		ASSERT(mdp->md_device.mdev_state == MDEV_UNASSIGNED);
4887  *		ASSERT(mdp->md_state == MAN_DSTATE_NOTPRESENT);
4888  */
4889 		if (mdp->md_device.mdev_state != MDEV_UNASSIGNED) {
4890 			cmn_err(CE_NOTE, "man_add_dests mdev !unassigned");
4891 			MAN_DBGCALL(MAN_PATH, man_print_mdp(mdp));
4892 		}
4893 
4894 		man_start_dest(mdp, msp, mpg);
4895 	}
4896 
4897 }
4898 
4899 static int
4900 man_remove_dests(man_pg_t *mpg)
4901 {
4902 	manstr_t	*msp;
4903 	int		close_cnt = 0;
4904 	man_dest_t	*cdp;
4905 	man_dest_t	*mdp;
4906 	man_dest_t	*tdp;
4907 	man_work_t	*wp;
4908 	mblk_t		*mp;
4909 	int		status = 0;
4910 
4911 	wp = man_work_alloc(MAN_WORK_CLOSE, KM_NOSLEEP);
4912 	if (wp == NULL) {
4913 		status = ENOMEM;
4914 		goto exit;
4915 	}
4916 
4917 	/*
4918 	 * Count up number of destinations we need to close.
4919 	 */
4920 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4921 		if (!man_str_uses_pg(msp, mpg))
4922 			continue;
4923 
4924 		close_cnt++;
4925 	}
4926 
4927 	if (close_cnt == 0)
4928 		goto exit;
4929 
4930 	cdp = man_kzalloc(sizeof (man_dest_t) * close_cnt, KM_NOSLEEP);
4931 	if (cdp == NULL) {
4932 		status = ENOMEM;
4933 		man_work_free(wp);
4934 		goto exit;
4935 	}
4936 
4937 	tdp = cdp;
4938 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4939 		if (!man_str_uses_pg(msp, mpg))
4940 			continue;
4941 
4942 		mdp = &msp->ms_dests[mpg->mpg_pg_id];
4943 
4944 		mdp->md_state |= MAN_DSTATE_CLOSING;
4945 		mdp->md_device.mdev_state = MDEV_UNASSIGNED;
4946 		mdp->md_msp = NULL;
4947 		mdp->md_rq = NULL;
4948 
4949 		/*
4950 		 * Clean up optimized destination pointer if we are
4951 		 * closing it.
4952 		 */
4953 		man_set_optimized_dest(msp);
4954 
4955 		if (mdp->md_lc_timer_id != 0) {
4956 			(void) quntimeout(man_ctl_wq, mdp->md_lc_timer_id);
4957 			mdp->md_lc_timer_id = 0;
4958 		}
4959 		if (mdp->md_bc_id != 0) {
4960 			qunbufcall(man_ctl_wq, mdp->md_bc_id);
4961 			mdp->md_bc_id = 0;
4962 		}
4963 
4964 		mutex_enter(&mdp->md_lock);
4965 		while ((mp = mdp->md_dmp_head) != NULL) {
4966 			mdp->md_dmp_head = mp->b_next;
4967 			mp->b_next = NULL;
4968 			freemsg(mp);
4969 		}
4970 		mdp->md_dmp_count = 0;
4971 		mdp->md_dmp_tail = NULL;
4972 		mutex_exit(&mdp->md_lock);
4973 
4974 		*tdp++ = *mdp;
4975 
4976 		mdp->md_state = MAN_DSTATE_NOTPRESENT;
4977 		mdp->md_muxid = -1;
4978 	}
4979 
4980 	wp->mw_arg.a_mdp = cdp;
4981 	wp->mw_arg.a_ndests = close_cnt;
4982 	man_work_add(man_bwork_q, wp);
4983 
4984 exit:
4985 	return (status);
4986 
4987 }
4988 
4989 /*
4990  * Returns TRUE if stream uses pathgroup, FALSE otherwise.
4991  */
4992 static int
4993 man_str_uses_pg(manstr_t *msp, man_pg_t *mpg)
4994 {
4995 	int	status;
4996 
4997 	status = ((msp->ms_flags & MAN_SFLAG_CONTROL)	||
4998 	    (msp->ms_dests == NULL)	||
4999 	    (msp->ms_manp == NULL)	||
5000 	    (msp->ms_manp->man_meta_ppa != mpg->mpg_man_ppa));
5001 
5002 	return (!status);
5003 }
5004 
5005 static int
5006 man_gettimer(int timer, man_dest_t *mdp)
5007 {
5008 
5009 	int attached = TRUE;
5010 	int time = 0;
5011 
5012 	if (mdp == NULL || mdp->md_msp == NULL || mdp->md_msp->ms_manp == NULL)
5013 		attached = FALSE;
5014 
5015 	switch (timer) {
5016 	case MAN_TIMER_INIT:
5017 		if (attached)
5018 			time = mdp->md_msp->ms_manp->man_init_time;
5019 		else
5020 			time = MAN_INIT_TIME;
5021 		break;
5022 
5023 	case MAN_TIMER_LINKCHECK:
5024 		if (attached) {
5025 			if (mdp->md_linkstate == MAN_LINKSTALE)
5026 				time = mdp->md_msp->ms_manp->man_linkstale_time;
5027 			else
5028 				time = mdp->md_msp->ms_manp->man_linkcheck_time;
5029 		} else
5030 			time = MAN_LINKCHECK_TIME;
5031 		break;
5032 
5033 	case MAN_TIMER_DLPIRESET:
5034 		if (attached)
5035 			time = mdp->md_msp->ms_manp->man_dlpireset_time;
5036 		else
5037 			time = MAN_DLPIRESET_TIME;
5038 		break;
5039 
5040 	default:
5041 		MAN_DBG(MAN_LINK, ("man_gettimer: unknown timer %d", timer));
5042 		time = MAN_LINKCHECK_TIME;
5043 		break;
5044 	}
5045 
5046 	return (drv_usectohz(time));
5047 }
5048 
5049 /*
5050  * Check the links for each active destination. Called inside inner
5051  * perimeter via qtimeout. This timer only runs on the domain side of the
5052  * driver. It should never run on the SC side.
5053  *
5054  * On a MAN_LINKGOOD link, we check/probe the link health every
5055  * MAN_LINKCHECK_TIME seconds. If the link goes MAN_LINKSTALE, the we probe
5056  * the link every MAN_LINKSTALE_TIME seconds, and fail the link after probing
5057  * the link MAN_LINKSTALE_RETRIES times.
5058  * The man_lock is held to synchronize access pathgroup list(man_pg).
5059  */
5060 void
5061 man_linkcheck_timer(void *argp)
5062 {
5063 	man_dest_t		*mdp = (man_dest_t *)argp;
5064 	int			restart_timer = TRUE;
5065 	int			send_ping = TRUE;
5066 	int			newstate;
5067 	int			oldstate;
5068 	man_pg_t		*mpg;
5069 	man_path_t		*mp;
5070 
5071 	MAN_DBG(MAN_LINK, ("man_linkcheck_timer: mdp"));
5072 	MAN_DBGCALL(MAN_LINK, man_print_mdp(mdp));
5073 
5074 	/*
5075 	 * Clear timeout id and check if someones waiting on us to
5076 	 * complete a close.
5077 	 */
5078 	mdp->md_lc_timer_id = 0;
5079 
5080 	if (mdp->md_state == MAN_DSTATE_NOTPRESENT ||
5081 	    mdp->md_state & MAN_DSTATE_BUSY) {
5082 
5083 		MAN_DBG(MAN_LINK, ("man_linkcheck_timer: not ready mdp"));
5084 		MAN_DBGCALL(MAN_LINK, man_print_mdp(mdp));
5085 		goto exit;
5086 	}
5087 
5088 	mutex_enter(&man_lock);
5089 	/*
5090 	 * If the lower stream needs initializing, just go straight to
5091 	 * switch code. As the linkcheck timer is started for all
5092 	 * SAPs, do not send ping packets during the initialization.
5093 	 */
5094 	if (mdp->md_state == MAN_DSTATE_INITIALIZING) {
5095 		send_ping = FALSE;
5096 		goto do_switch;
5097 	}
5098 
5099 	newstate = oldstate = mdp->md_linkstate;
5100 
5101 	if (!man_needs_linkcheck(mdp)) {
5102 		cmn_err(CE_NOTE,
5103 		    "man_linkcheck_timer: unneeded linkcheck on mdp(0x%p)",
5104 		    (void *)mdp);
5105 		mutex_exit(&man_lock);
5106 		return;
5107 	}
5108 
5109 	/*
5110 	 * The above call to  man_needs_linkcheck() validates
5111 	 * mdp->md_msp and mdp->md_msp->ms_manp pointers.
5112 	 */
5113 	mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg, mdp->md_pg_id);
5114 	ASSERT(mpg != NULL);
5115 	mp = man_find_path_by_ppa(mpg->mpg_pathp, mdp->md_device.mdev_ppa);
5116 	ASSERT(mp != NULL);
5117 
5118 	/*
5119 	 * This is the most common case, when traffic is flowing.
5120 	 */
5121 	if (mdp->md_rcvcnt != mdp->md_lastrcvcnt) {
5122 
5123 		newstate = MAN_LINKGOOD;
5124 		mdp->md_lastrcvcnt = mdp->md_rcvcnt;
5125 		send_ping = FALSE;
5126 
5127 		/*
5128 		 * Clear the FAILED flag and update lru.
5129 		 */
5130 		mp->mp_device.mdev_state &= ~MDEV_FAILED;
5131 		(void) drv_getparm(TIME, &mp->mp_lru);
5132 
5133 		if (mdp->md_link_updown_msg == MAN_LINK_DOWN_MSG) {
5134 			man_t *manp = mdp->md_msp->ms_manp;
5135 
5136 			cmn_err(CE_NOTE, "%s%d Link up",
5137 			    ddi_major_to_name(manp->man_meta_major),
5138 			    manp->man_meta_ppa);
5139 
5140 			mdp->md_link_updown_msg = MAN_LINK_UP_MSG;
5141 		}
5142 
5143 		goto done;
5144 	}
5145 
5146 	/*
5147 	 * If we're here, it means we have not seen any traffic
5148 	 */
5149 	switch (oldstate) {
5150 	case MAN_LINKINIT:
5151 	case MAN_LINKGOOD:
5152 		newstate = MAN_LINKSTALE;
5153 		mdp->md_linkstales++;
5154 		mdp->md_linkstale_retries =
5155 		    mdp->md_msp->ms_manp->man_linkstale_retries;
5156 		break;
5157 
5158 	case MAN_LINKSTALE:
5159 	case MAN_LINKFAIL:
5160 		mdp->md_linkstales++;
5161 		mdp->md_linkstale_retries--;
5162 		if (mdp->md_linkstale_retries < 0) {
5163 			newstate = MAN_LINKFAIL;
5164 			mdp->md_linkfails++;
5165 			mdp->md_linkstale_retries =
5166 			    mdp->md_msp->ms_manp->man_linkstale_retries;
5167 			/*
5168 			 * Mark the destination as FAILED and
5169 			 * update lru.
5170 			 */
5171 			if (oldstate != MAN_LINKFAIL) {
5172 				mp->mp_device.mdev_state |= MDEV_FAILED;
5173 				(void) drv_getparm(TIME, &mp->mp_lru);
5174 			}
5175 		}
5176 		break;
5177 
5178 	default:
5179 		cmn_err(CE_WARN, "man_linkcheck_timer: illegal link"
5180 		    " state %d", oldstate);
5181 		break;
5182 	}
5183 done:
5184 
5185 	if (oldstate != newstate) {
5186 
5187 		MAN_DBG(MAN_LINK, ("man_linkcheck_timer"
5188 		    " link state %s -> %s", lss[oldstate],
5189 		    lss[newstate]));
5190 
5191 		mdp->md_linkstate = newstate;
5192 	}
5193 
5194 	/*
5195 	 * Do any work required from state transitions above.
5196 	 */
5197 	if (newstate == MAN_LINKFAIL) {
5198 do_switch:
5199 		if (!man_do_autoswitch(mdp)) {
5200 			/*
5201 			 * Stop linkcheck timer until switch completes.
5202 			 */
5203 			restart_timer = FALSE;
5204 			send_ping = FALSE;
5205 		}
5206 	}
5207 
5208 	mutex_exit(&man_lock);
5209 	if (send_ping)
5210 		man_do_icmp_bcast(mdp, mdp->md_msp->ms_sap);
5211 
5212 	if (restart_timer)
5213 		mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
5214 		    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
5215 
5216 exit:
5217 	MAN_DBG(MAN_LINK, ("man_linkcheck_timer: returns"));
5218 
5219 }
5220 
5221 /*
5222  * Handle linkcheck initiated autoswitching.
5223  * Called with man_lock held.
5224  */
5225 static int
5226 man_do_autoswitch(man_dest_t *mdp)
5227 {
5228 	man_pg_t	*mpg;
5229 	man_path_t	*ap;
5230 	int		status = 0;
5231 
5232 	ASSERT(MUTEX_HELD(&man_lock));
5233 	/*
5234 	 * Set flags and refcnt. Cleared in man_iswitch when SWITCH completes.
5235 	 */
5236 	mdp->md_msp->ms_manp->man_refcnt++;
5237 
5238 	mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg, mdp->md_pg_id);
5239 	ASSERT(mpg);
5240 
5241 	if (mpg->mpg_flags & MAN_PG_SWITCHING)
5242 		return (EBUSY);
5243 
5244 	mpg->mpg_flags |= MAN_PG_SWITCHING;
5245 
5246 	if (mdp->md_state == MAN_DSTATE_INITIALIZING) {
5247 		/*
5248 		 * We're initializing, ask for a switch to our currently
5249 		 * active device.
5250 		 */
5251 		status = man_autoswitch(mpg, &mdp->md_device, NULL);
5252 	} else {
5253 
5254 		if (mdp->md_msp != NULL && mdp->md_msp->ms_manp != NULL &&
5255 		    mdp->md_link_updown_msg == MAN_LINK_UP_MSG) {
5256 
5257 			man_t *manp = mdp->md_msp->ms_manp;
5258 
5259 			cmn_err(CE_NOTE, "%s%d Link down",
5260 			    ddi_major_to_name(manp->man_meta_major),
5261 			    manp->man_meta_ppa);
5262 		}
5263 		mdp->md_link_updown_msg = MAN_LINK_DOWN_MSG;
5264 
5265 		MAN_DBG(MAN_LINK, ("man_linkcheck_timer: link failure on %s%d",
5266 		    ddi_major_to_name(mdp->md_device.mdev_major),
5267 		    mdp->md_device.mdev_ppa));
5268 
5269 		ap = man_find_alternate_path(mpg->mpg_pathp);
5270 
5271 		if (ap == NULL) {
5272 			status = ENODEV;
5273 			goto exit;
5274 		}
5275 		status = man_autoswitch(mpg, &ap->mp_device, NULL);
5276 	}
5277 exit:
5278 	if (status != 0) {
5279 		/*
5280 		 * man_iswitch not going to run, clean up.
5281 		 */
5282 		mpg->mpg_flags &= ~MAN_PG_SWITCHING;
5283 		mdp->md_msp->ms_manp->man_refcnt--;
5284 	}
5285 
5286 	return (status);
5287 }
5288 
5289 /*
5290  * Gather up all lower multiplexor streams that have this link open and
5291  * try to switch them. Called from inner perimeter and holding man_lock.
5292  *
5293  *	pg_id		- Pathgroup to do switch for.
5294  *	st_devp		- New device to switch to.
5295  *	wait_for_switch	- whether or not to qwait for completion.
5296  */
5297 static int
5298 man_autoswitch(man_pg_t *mpg, man_dev_t *st_devp, man_work_t *waiter_wp)
5299 {
5300 	man_work_t	*wp;
5301 	int		sdp_cnt = 0;
5302 	man_dest_t	*sdp;
5303 	int		status = 0;
5304 
5305 	ASSERT(MUTEX_HELD(&man_lock));
5306 	if (waiter_wp == NULL) {
5307 		wp = man_work_alloc(MAN_WORK_SWITCH, KM_NOSLEEP);
5308 		if (wp == NULL) {
5309 			status = ENOMEM;
5310 			goto exit;
5311 		}
5312 	} else {
5313 		ASSERT(waiter_wp->mw_type == MAN_WORK_SWITCH);
5314 		wp = waiter_wp;
5315 	}
5316 
5317 	/*
5318 	 * Set dests as PLUMBING, cancel timers and return array of dests
5319 	 * that need a switch.
5320 	 */
5321 	status = man_prep_dests_for_switch(mpg, &sdp, &sdp_cnt);
5322 	if (status) {
5323 		if (waiter_wp == NULL)
5324 			man_work_free(wp);
5325 		goto exit;
5326 	}
5327 
5328 	/*
5329 	 * If no streams are active, there are no streams to switch.
5330 	 * Return ENODEV (see man_pg_activate).
5331 	 */
5332 	if (sdp_cnt == 0) {
5333 		if (waiter_wp == NULL)
5334 			man_work_free(wp);
5335 		status = ENODEV;
5336 		goto exit;
5337 	}
5338 
5339 	/*
5340 	 * Ask the bgthread to switch. See man_bwork.
5341 	 */
5342 	wp->mw_arg.a_sf_dev = sdp->md_device;
5343 	wp->mw_arg.a_st_dev = *st_devp;
5344 	wp->mw_arg.a_pg_id = mpg->mpg_pg_id;
5345 	wp->mw_arg.a_man_ppa = mpg->mpg_man_ppa;
5346 
5347 	wp->mw_arg.a_mdp = sdp;
5348 	wp->mw_arg.a_ndests = sdp_cnt;
5349 	man_work_add(man_bwork_q, wp);
5350 
5351 exit:
5352 
5353 	return (status);
5354 }
5355 
5356 /*
5357  * If an alternate path exists for pathgroup, arrange for switch to
5358  * happen. Note that we need to switch each of msp->dests[pg_id], for
5359  * all on man_strup. We must:
5360  *
5361  *		Cancel any timers
5362  *		Mark dests as PLUMBING
5363  *		Submit switch request to man_bwork_q->
5364  */
5365 static int
5366 man_prep_dests_for_switch(man_pg_t *mpg, man_dest_t **mdpp, int *cntp)
5367 {
5368 	manstr_t	*msp;
5369 	man_dest_t	*mdp;
5370 	int		sdp_cnt = 0;
5371 	man_dest_t	*sdp = NULL;
5372 	man_dest_t	*tdp;
5373 	int		status = 0;
5374 
5375 	MAN_DBG(MAN_SWITCH, ("man_prep_dests_for_switch: pg_id %d",
5376 	    mpg->mpg_pg_id));
5377 
5378 	/*
5379 	 * Count up number of streams, there is one destination that needs
5380 	 * switching per stream.
5381 	 */
5382 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
5383 		if (man_str_uses_pg(msp, mpg))
5384 			sdp_cnt++;
5385 	}
5386 
5387 	if (sdp_cnt == 0)
5388 		goto exit;
5389 
5390 	sdp = man_kzalloc(sizeof (man_dest_t) * sdp_cnt, KM_NOSLEEP);
5391 	if (sdp == NULL) {
5392 		status = ENOMEM;
5393 		goto exit;
5394 	}
5395 	tdp = sdp;
5396 	/*
5397 	 * Mark each destination as unusable.
5398 	 */
5399 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
5400 		if (man_str_uses_pg(msp, mpg)) {
5401 
5402 			/*
5403 			 * Mark destination as plumbing and store the
5404 			 * address of sdp as a way to identify the
5405 			 * SWITCH request when it comes back (see man_iswitch).
5406 			 */
5407 			mdp = &msp->ms_dests[mpg->mpg_pg_id];
5408 			mdp->md_state |= MAN_DSTATE_PLUMBING;
5409 			mdp->md_switch_id = sdp;
5410 
5411 			/*
5412 			 * Copy destination info.
5413 			 */
5414 			bcopy(mdp, tdp, sizeof (man_dest_t));
5415 			tdp++;
5416 
5417 			/*
5418 			 * Cancel timers.
5419 			 */
5420 			if (mdp->md_lc_timer_id) {
5421 				(void) quntimeout(man_ctl_wq,
5422 				    mdp->md_lc_timer_id);
5423 				mdp->md_lc_timer_id = 0;
5424 			}
5425 			if (mdp->md_bc_id) {
5426 				qunbufcall(man_ctl_wq, mdp->md_bc_id);
5427 				mdp->md_bc_id = 0;
5428 			}
5429 		}
5430 	}
5431 
5432 	*mdpp = sdp;
5433 	*cntp = sdp_cnt;
5434 	status = 0;
5435 exit:
5436 
5437 	MAN_DBG(MAN_SWITCH, ("man_prep_dests_for_switch: returns %d"
5438 	    " sdp(0x%p) sdp_cnt(%d)", status, (void *)sdp, sdp_cnt));
5439 
5440 	return (status);
5441 
5442 }
5443 
5444 /*
5445  * The code below generates an ICMP echo packet and sends it to the
5446  * broadcast address in the hopes that the other end will respond
5447  * and the man_linkcheck_timer logic will see the traffic.
5448  *
5449  * This assumes ethernet-like media.
5450  */
5451 /*
5452  * Generate an ICMP packet. Called exclusive inner perimeter.
5453  *
5454  *	mdp - destination to send packet to.
5455  *	sap - either ETHERTYPE_ARP or ETHERTYPE_IPV6
5456  */
5457 static void
5458 man_do_icmp_bcast(man_dest_t *mdp, t_uscalar_t sap)
5459 {
5460 	mblk_t			*mp = NULL;
5461 
5462 	/* TBD - merge pinger and this routine. */
5463 
5464 	ASSERT(sap == ETHERTYPE_IPV6 || sap == ETHERTYPE_IP);
5465 
5466 	if (sap == ETHERTYPE_IPV6) {
5467 		mdp->md_icmpv6probes++;
5468 	} else {
5469 		mdp->md_icmpv4probes++;
5470 	}
5471 	/*
5472 	 * Send the ICMP message
5473 	 */
5474 	mp = man_pinger(sap);
5475 
5476 	MAN_DBG(MAN_LINK, ("man_do_icmp_bcast: sap=0x%x mp=0x%p",
5477 	    sap, (void *)mp));
5478 	if (mp == NULL)
5479 		return;
5480 
5481 	/*
5482 	 * Send it out.
5483 	 */
5484 	if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
5485 
5486 		MAN_DBG(MAN_LINK, ("man_do_icmp_broadcast: xmit failed"));
5487 
5488 		freemsg(mp);
5489 	}
5490 
5491 }
5492 
5493 static mblk_t *
5494 man_pinger(t_uscalar_t sap)
5495 {
5496 	mblk_t		*mp = NULL;
5497 	man_dladdr_t	dlsap;
5498 	icmph_t		*icmph;
5499 	int		ipver;
5500 	ipha_t		*ipha;
5501 	ip6_t		*ip6h;
5502 	int		iph_hdr_len;
5503 	int		datalen = 64;
5504 	uchar_t		*datap;
5505 	uint16_t	size;
5506 	uchar_t		i;
5507 
5508 	dlsap.dl_sap = htons(sap);
5509 	bcopy(&etherbroadcast, &dlsap.dl_phys, sizeof (dlsap.dl_phys));
5510 
5511 	if (sap == ETHERTYPE_IPV6) {
5512 		ipver = IPV6_VERSION;
5513 		iph_hdr_len = sizeof (ip6_t);
5514 		size = ICMP6_MINLEN;
5515 	} else {
5516 		ipver = IPV4_VERSION;
5517 		iph_hdr_len = sizeof (ipha_t);
5518 		size = ICMPH_SIZE;
5519 	}
5520 	size += (uint16_t)iph_hdr_len;
5521 	size += datalen;
5522 
5523 	mp = man_alloc_udreq(size, &dlsap);
5524 	if (mp == NULL)
5525 		goto exit;
5526 
5527 	/*
5528 	 * fill out the ICMP echo packet headers
5529 	 */
5530 	mp->b_cont->b_wptr += iph_hdr_len;
5531 	if (ipver == IPV4_VERSION) {
5532 		ipha = (ipha_t *)mp->b_cont->b_rptr;
5533 		ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
5534 		    | IP_SIMPLE_HDR_LENGTH_IN_WORDS;
5535 		ipha->ipha_type_of_service = 0;
5536 		ipha->ipha_length = size;
5537 		ipha->ipha_fragment_offset_and_flags = IPH_DF;
5538 		ipha->ipha_ttl = 1;
5539 		ipha->ipha_protocol = IPPROTO_ICMP;
5540 		if (man_is_on_domain) {
5541 			manc_t		manc;
5542 
5543 			if (man_get_iosram(&manc)) {
5544 				freemsg(mp);
5545 				mp = NULL;
5546 				goto exit;
5547 			}
5548 
5549 			/*
5550 			 * Domain generates ping packets for domain to
5551 			 * SC network (dman0 <--> scman0).
5552 			 */
5553 			ipha->ipha_dst = manc.manc_sc_ipaddr;
5554 			ipha->ipha_src = manc.manc_dom_ipaddr;
5555 		} else {
5556 			/*
5557 			 * Note that ping packets are only generated
5558 			 * by the SC across scman1 (SC to SC network).
5559 			 */
5560 			ipha->ipha_dst = man_sc_ipaddrs.ip_other_sc_ipaddr;
5561 			ipha->ipha_src = man_sc_ipaddrs.ip_my_sc_ipaddr;
5562 		}
5563 
5564 		ipha->ipha_ident = 0;
5565 
5566 		ipha->ipha_hdr_checksum = 0;
5567 		ipha->ipha_hdr_checksum = IP_CSUM(mp->b_cont, 0, 0);
5568 
5569 	} else {
5570 		ip6h = (ip6_t *)mp->b_cont->b_rptr;
5571 		/*
5572 		 * IP version = 6, priority = 0, flow = 0
5573 		 */
5574 		ip6h->ip6_flow = (IPV6_VERSION << 28);
5575 		ip6h->ip6_plen =
5576 		    htons((short)(size - iph_hdr_len));
5577 		ip6h->ip6_nxt = IPPROTO_ICMPV6;
5578 		ip6h->ip6_hlim = 1;	/* stay on link */
5579 
5580 		if (man_is_on_domain) {
5581 			manc_t		manc;
5582 
5583 			if (man_get_iosram(&manc)) {
5584 				freemsg(mp);
5585 				mp = NULL;
5586 				goto exit;
5587 			}
5588 
5589 			/*
5590 			 * Domain generates ping packets for domain to
5591 			 * SC network (dman0 <--> scman0).
5592 			 */
5593 			ip6h->ip6_src = manc.manc_dom_ipv6addr;
5594 			ip6h->ip6_dst = manc.manc_sc_ipv6addr;
5595 		} else {
5596 			/*
5597 			 * Note that ping packets are only generated
5598 			 * by the SC across scman1 (SC to SC network).
5599 			 */
5600 			ip6h->ip6_src = man_sc_ip6addrs.ip6_my_sc_ipaddr;
5601 			ip6h->ip6_dst = man_sc_ip6addrs.ip6_other_sc_ipaddr;
5602 		}
5603 	}
5604 
5605 	/*
5606 	 * IPv6 and IP are the same for ICMP as far as I'm concerned.
5607 	 */
5608 	icmph = (icmph_t *)mp->b_cont->b_wptr;
5609 	if (ipver == IPV4_VERSION) {
5610 		mp->b_cont->b_wptr += ICMPH_SIZE;
5611 		icmph->icmph_type = ICMP_ECHO_REQUEST;
5612 		icmph->icmph_code = 0;
5613 	} else {
5614 		mp->b_cont->b_wptr += ICMP6_MINLEN;
5615 		icmph->icmph_type = ICMP6_ECHO_REQUEST;
5616 		icmph->icmph_code = 0;
5617 	}
5618 
5619 	datap = mp->b_cont->b_wptr;
5620 	mp->b_cont->b_wptr += datalen;
5621 
5622 	for (i = 0; i < datalen; i++)
5623 		*datap++ = i;
5624 
5625 	if (ipver == IPV4_VERSION) {
5626 		icmph->icmph_checksum = IP_CSUM(mp->b_cont, iph_hdr_len, 0);
5627 	} else {
5628 		uint32_t	sum;
5629 
5630 		sum = htons(IPPROTO_ICMPV6) + ip6h->ip6_plen;
5631 		icmph->icmph_checksum = IP_CSUM(mp->b_cont, iph_hdr_len - 32,
5632 		    (sum & 0xffff) + (sum >> 16));
5633 	}
5634 
5635 /*
5636  * TBD
5637  *	icp->icmp_time =  ???;
5638  */
5639 
5640 exit:
5641 	return (mp);
5642 }
5643 
5644 static mblk_t *
5645 man_alloc_udreq(int size, man_dladdr_t *dlsap)
5646 {
5647 	dl_unitdata_req_t	*udreq;
5648 	mblk_t			*bp;
5649 	mblk_t			*mp;
5650 
5651 	mp = allocb(sizeof (dl_unitdata_req_t) + sizeof (*dlsap), BPRI_MED);
5652 
5653 	if (mp == NULL) {
5654 		cmn_err(CE_NOTE, "man_preparepkt: allocb failed");
5655 		return (NULL);
5656 	}
5657 
5658 	if ((bp = allocb(size, BPRI_MED)) == NULL) {
5659 		freemsg(mp);
5660 		cmn_err(CE_NOTE, "man_preparepkts: allocb failed");
5661 		return (NULL);
5662 	}
5663 	bzero(bp->b_rptr, size);
5664 
5665 	mp->b_cont = bp;
5666 	mp->b_datap->db_type = M_PROTO;
5667 	udreq = (dl_unitdata_req_t *)mp->b_wptr;
5668 	mp->b_wptr += sizeof (dl_unitdata_req_t);
5669 
5670 	/*
5671 	 * phys addr first - TBD
5672 	 */
5673 	bcopy((char *)dlsap, mp->b_wptr, sizeof (*dlsap));
5674 	mp->b_wptr += sizeof (*dlsap);
5675 
5676 	udreq->dl_primitive = DL_UNITDATA_REQ;
5677 	udreq->dl_dest_addr_length = sizeof (*dlsap);
5678 	udreq->dl_dest_addr_offset = sizeof (*udreq);
5679 	udreq->dl_priority.dl_min = 0;
5680 	udreq->dl_priority.dl_max = 0;
5681 
5682 	return (mp);
5683 }
5684 
5685 
5686 /*
5687  * The routines in this file are executed by the MAN background thread,
5688  * which executes outside of the STREAMS framework (see man_str.c). It is
5689  * allowed to do the things required to modify the STREAMS driver (things
5690  * that are normally done from a user process). These routines do things like
5691  * open and close drivers, PLINK and PUNLINK streams to/from the multiplexor,
5692  * etc.
5693  *
5694  * The mechanism of communication between the STREAMS portion of the driver
5695  * and the background thread portion are two work queues, man_bwork_q
5696  * and man_iwork_q (background work q and streams work q).  Work
5697  * requests are placed on those queues when one half of the driver wants
5698  * the other half to do some work for it.
5699  *
5700  * The MAN background thread executes the man_bwork routine. Its sole
5701  * job is to process work requests placed on this work q. The MAN upper
5702  * write service routine is responsible for processing work requests posted
5703  * to the man_iwork_q->
5704  *
5705  * Both work queues are protected by the global mutex man_lock. The
5706  * man_bwork is signalged via the condvarman_bwork_q->q_cv. The man_uwsrv
5707  * routine is signaled by calling qenable (forcing man_uwsrv to run).
5708  */
5709 
5710 /*
5711  * man_bwork - Work thread for this device.  It is responsible for
5712  * performing operations which can't occur within the STREAMS framework.
5713  *
5714  * Locking:
5715  *	- Called holding no locks
5716  *	- Obtains the global mutex man_lock to remove work from
5717  *	  man_bwork_q, and post work to man_iwork_q->
5718  *	- Note that we do not want to hold any locks when making
5719  *	  any ldi_ calls.
5720  */
5721 void
5722 man_bwork()
5723 {
5724 	man_work_t	*wp;
5725 	int		done = 0;
5726 	callb_cpr_t	cprinfo;
5727 	int		wp_finished;
5728 
5729 	CALLB_CPR_INIT(&cprinfo, &man_lock, callb_generic_cpr,
5730 	    "mn_work_thrd");
5731 
5732 	MAN_DBG(MAN_CONFIG, ("man_bwork: enter"));
5733 
5734 	while (done == 0) {
5735 
5736 		mutex_enter(&man_lock);
5737 		/*
5738 		 * While there is nothing to do, sit in cv_wait.  If work
5739 		 * request is made, requester will signal.
5740 		 */
5741 		while (man_bwork_q->q_work == NULL) {
5742 
5743 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
5744 
5745 			cv_wait(&man_bwork_q->q_cv, &man_lock);
5746 
5747 			CALLB_CPR_SAFE_END(&cprinfo, &man_lock);
5748 		}
5749 
5750 		wp = man_bwork_q->q_work;
5751 		man_bwork_q->q_work = wp->mw_next;
5752 		wp->mw_next = NULL;
5753 		mutex_exit(&man_lock);
5754 
5755 		wp_finished = TRUE;
5756 
5757 		MAN_DBG(MAN_SWITCH, ("man_bwork: type %s",
5758 		    _mw_type[wp->mw_type]));
5759 
5760 		switch (wp->mw_type) {
5761 		case MAN_WORK_OPEN_CTL:
5762 			wp->mw_status = man_open_ctl();
5763 			break;
5764 
5765 		case MAN_WORK_CLOSE_CTL:
5766 			man_close_ctl();
5767 			break;
5768 
5769 		case MAN_WORK_CLOSE:
5770 		case MAN_WORK_CLOSE_STREAM:
5771 			man_bclose(&wp->mw_arg);
5772 			break;
5773 
5774 		case MAN_WORK_SWITCH:
5775 			man_bswitch(&wp->mw_arg, wp);
5776 			wp_finished = FALSE;
5777 			break;
5778 
5779 		case MAN_WORK_STOP:		/* man_bwork_stop() */
5780 			done = 1;
5781 			mutex_enter(&man_lock);
5782 			CALLB_CPR_EXIT(&cprinfo); /* Unlocks man_lock */
5783 			break;
5784 
5785 		default:
5786 			cmn_err(CE_WARN, "man_bwork: "
5787 			    "illegal work type(%d)", wp->mw_type);
5788 			break;
5789 		}
5790 
5791 		mutex_enter(&man_lock);
5792 
5793 		if (wp_finished) {
5794 			wp->mw_flags |= MAN_WFLAGS_DONE;
5795 			if (wp->mw_flags & MAN_WFLAGS_CVWAITER)
5796 				cv_signal(&wp->mw_cv);
5797 			else if (wp->mw_flags & MAN_WFLAGS_QWAITER)
5798 				qenable(wp->mw_q);
5799 			else
5800 				man_work_free(wp);
5801 		}
5802 
5803 		mutex_exit(&man_lock);
5804 	}
5805 
5806 	MAN_DBG(MAN_CONFIG, ("man_bwork: thread_exit"));
5807 
5808 	mutex_enter(&man_lock);
5809 	man_bwork_id = NULL;
5810 	mutex_exit(&man_lock);
5811 
5812 	thread_exit();
5813 }
5814 
5815 /*
5816  * man_open_ctl - Open the control stream.
5817  *
5818  *	returns	- success - 0
5819  *		- failure - errno code
5820  *
5821  * Mutex Locking Notes:
5822  *	We need a way to keep the CLONE_OPEN qwaiters in man_open from
5823  *	checking the man_config variables after the ldi_open call below
5824  *	returns from man_open, leaving the inner perimeter. So, we use the
5825  *	man_lock to synchronize the threads in man_open_ctl and man_open.  We
5826  *	hold man_lock across this call into man_open, which in general is a
5827  *	no-no. But, the STREAMs portion of the driver (other than open)
5828  *	doesn't use it. So, if ldi_open gets hijacked to run any part of
5829  *	the MAN streams driver, it wont end up recursively trying to acquire
5830  *	man_lock. Note that the non-CLONE_OPEN portion of man_open doesnt
5831  *	acquire it either, so again no recursive mutex.
5832  */
5833 static int
5834 man_open_ctl()
5835 {
5836 	int		status = 0;
5837 	ldi_handle_t	ctl_lh = NULL;
5838 	ldi_ident_t	li = NULL;
5839 
5840 	MAN_DBG(MAN_CONFIG, ("man_open_ctl: plumbing control stream\n"));
5841 
5842 	/*
5843 	 * Get eri driver loaded and kstats initialized. Is there a better
5844 	 * way to do this? - TBD.
5845 	 */
5846 	status = ldi_ident_from_mod(&modlinkage, &li);
5847 	if (status) {
5848 		cmn_err(CE_WARN,
5849 		    "man_open_ctl: ident alloc failed, error %d", status);
5850 		goto exit;
5851 	}
5852 
5853 	status = ldi_open_by_name(ERI_PATH, FREAD | FWRITE | FNOCTTY,
5854 	    kcred, &ctl_lh, li);
5855 	if (status) {
5856 		cmn_err(CE_WARN,
5857 		    "man_open_ctl: eri open failed, error %d", status);
5858 		ctl_lh = NULL;
5859 		goto exit;
5860 	}
5861 	(void) ldi_close(ctl_lh, NULL, kcred);
5862 	ctl_lh = NULL;
5863 
5864 	mutex_enter(&man_lock);
5865 
5866 	if (man_ctl_lh != NULL) {
5867 		mutex_exit(&man_lock);
5868 		goto exit;
5869 	}
5870 
5871 	ASSERT(man_ctl_wq == NULL);
5872 	mutex_exit(&man_lock);
5873 
5874 	status = ldi_open_by_name(DMAN_INT_PATH, FREAD | FWRITE | FNOCTTY,
5875 	    kcred, &ctl_lh, li);
5876 	if (status) {
5877 		cmn_err(CE_WARN,
5878 		    "man_open_ctl: man control dev open failed, "
5879 		    "error %d", status);
5880 		goto exit;
5881 	}
5882 
5883 	/*
5884 	 * Update global config state. TBD - dont need lock here, since
5885 	 * everyone is stuck in open until we finish. Only other modifier
5886 	 * is man_deconfigure via _fini, which returns EBUSY if there is
5887 	 * any open streams (other than control). Do need to signal qwaiters
5888 	 * on error.
5889 	 */
5890 	mutex_enter(&man_lock);
5891 	ASSERT(man_config_state == MAN_CONFIGURING);
5892 	ASSERT(man_ctl_lh == NULL);
5893 	man_ctl_lh = ctl_lh;
5894 	mutex_exit(&man_lock);
5895 
5896 exit:
5897 	if (li)
5898 		ldi_ident_release(li);
5899 
5900 	MAN_DBG(MAN_CONFIG, ("man_open_ctl: man_ctl_lh(0x%p) errno = %d\n",
5901 	    (void *)man_ctl_lh, status));
5902 
5903 	return (status);
5904 }
5905 
5906 /*
5907  * man_close_ctl - Close control stream, we are about to unload driver.
5908  *
5909  * Locking:
5910  *	- Called holding no locks.
5911  */
5912 static void
5913 man_close_ctl()
5914 {
5915 	ldi_handle_t tlh;
5916 
5917 	MAN_DBG(MAN_CONFIG, ("man_close_ctl: unplumbing control stream\n"));
5918 
5919 	mutex_enter(&man_lock);
5920 	if ((tlh = man_ctl_lh) != NULL)
5921 		man_ctl_lh = NULL;
5922 	mutex_exit(&man_lock);
5923 
5924 	if (tlh != NULL) {
5925 		(void) ldi_close(tlh, NULL, kcred);
5926 	}
5927 
5928 }
5929 
5930 /*
5931  * Close the lower streams. Get all the timers canceled, close the lower
5932  * stream and delete the dest array.
5933  *
5934  * Returns:
5935  *	0	Closed all streams.
5936  *	1	Couldn't close one or more streams, timers still running.
5937  *
5938  * Locking:
5939  *	- Called holding no locks.
5940  */
5941 static void
5942 man_bclose(man_adest_t *adp)
5943 {
5944 	int		i;
5945 	man_dest_t	*mdp;
5946 
5947 	man_cancel_timers(adp);
5948 
5949 	for (i = 0; i < adp->a_ndests; i++) {
5950 		mdp = &adp->a_mdp[i];
5951 
5952 		if (mdp->md_muxid != -1)
5953 			man_unplumb(mdp);
5954 	}
5955 
5956 	mutex_destroy(&mdp->md_lock);
5957 	man_kfree(adp->a_mdp, sizeof (man_dest_t) * adp->a_ndests);
5958 	adp->a_mdp = NULL;
5959 }
5960 
5961 /*
5962  * We want to close down all lower streams. Need to wait until all
5963  * timers and work related to these lower streams is quiesced.
5964  *
5965  * Returns 1 if lower streams are quiesced, 0 if we need to wait
5966  * a bit longer.
5967  */
5968 static void
5969 man_cancel_timers(man_adest_t *adp)
5970 {
5971 	man_dest_t	*mdp;
5972 	int		cnt;
5973 	int		i;
5974 
5975 	mdp = adp->a_mdp;
5976 	cnt = adp->a_ndests;
5977 
5978 	MAN_DBG(MAN_SWITCH, ("man_cancel_timers: mdp(0x%p) cnt %d",
5979 	    (void *)mdp, cnt));
5980 
5981 	for (i = 0; i < cnt; i++) {
5982 
5983 		if (mdp[i].md_lc_timer_id != 0) {
5984 			(void) quntimeout(man_ctl_wq, mdp[i].md_lc_timer_id);
5985 			mdp[i].md_lc_timer_id = 0;
5986 		}
5987 
5988 		if (mdp[i].md_bc_id != 0) {
5989 			qunbufcall(man_ctl_wq, mdp[i].md_bc_id);
5990 			mdp[i].md_bc_id = 0;
5991 		}
5992 	}
5993 
5994 	MAN_DBG(MAN_SWITCH, ("man_cancel_timers: returns"));
5995 }
5996 
5997 /*
5998  * A failover is started at start of day, when the driver detects a
5999  * link failure (see man_linkcheck_timer), or when DR detaches
6000  * the IO board containing the current active link between SC and
6001  * domain (see man_dr_detach, man_iwork, and man_do_dr_detach). A
6002  * MAN_WORK_SWITCH work request containing all the lower streams that
6003  * should be switched is posted on the man_bwork_q-> This work request is
6004  * processed here. Once all lower streams have been switched to an
6005  * alternate path, the MAN_WORK_SWITCH work request is passed back to
6006  * man_iwork_q where it is processed within the inner perimeter of the
6007  * STREAMS framework (see man_iswitch).
6008  *
6009  * Note that when the switch fails for whatever reason, we just hand
6010  * back the lower streams untouched and let another failover happen.
6011  * Hopefully we will sooner or later succeed at the failover.
6012  */
6013 static void
6014 man_bswitch(man_adest_t *adp, man_work_t *wp)
6015 {
6016 	man_dest_t	*tdp;
6017 	man_t		*manp;
6018 	int		i;
6019 	int		status = 0;
6020 
6021 	/*
6022 	 * Make a temporary copy of dest array, updating device to the
6023 	 * alternate and try to open all lower streams. bgthread can sleep.
6024 	 */
6025 
6026 	tdp = man_kzalloc(sizeof (man_dest_t) * adp->a_ndests,
6027 	    KM_SLEEP);
6028 	bcopy(adp->a_mdp, tdp, sizeof (man_dest_t) * adp->a_ndests);
6029 
6030 	/*
6031 	 * Before we switch to the new path, lets sync the kstats.
6032 	 */
6033 	mutex_enter(&man_lock);
6034 
6035 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
6036 	if (manp != NULL) {
6037 		man_update_path_kstats(manp);
6038 	} else
6039 		status = ENODEV;
6040 
6041 	mutex_exit(&man_lock);
6042 
6043 	if (status != 0)
6044 		goto exit;
6045 
6046 	for (i = 0; i < adp->a_ndests; i++) {
6047 
6048 		tdp[i].md_device = adp->a_st_dev;
6049 		tdp[i].md_muxid = -1;
6050 
6051 		if (man_plumb(&tdp[i]))
6052 			break;
6053 	}
6054 
6055 	/*
6056 	 * Didn't plumb everyone, unplumb new lower stuff and return.
6057 	 */
6058 	if (i < adp->a_ndests) {
6059 		int	j;
6060 
6061 		for (j = 0; j <= i; j++)
6062 			man_unplumb(&tdp[j]);
6063 		status = EAGAIN;
6064 		goto exit;
6065 	}
6066 
6067 	if (man_is_on_domain && man_dossc_switch(adp->a_st_dev.mdev_exp_id)) {
6068 		/*
6069 		 * If we cant set new path on the SSC, then fail the
6070 		 * failover.
6071 		 */
6072 		for (i = 0; i < adp->a_ndests; i++)
6073 			man_unplumb(&tdp[i]);
6074 		status = EAGAIN;
6075 		goto exit;
6076 	}
6077 
6078 	man_kfree(adp->a_mdp, sizeof (man_dest_t) * adp->a_ndests);
6079 	adp->a_mdp = tdp;
6080 
6081 exit:
6082 	if (status)
6083 		man_kfree(tdp, sizeof (man_dest_t) * adp->a_ndests);
6084 
6085 
6086 	MAN_DBG(MAN_SWITCH, ("man_bswitch: returns %d", status));
6087 
6088 	/*
6089 	 * Hand processed switch request back to man_iwork for
6090 	 * processing in man_iswitch.
6091 	 */
6092 	wp->mw_status = status;
6093 
6094 	mutex_enter(&man_lock);
6095 	man_work_add(man_iwork_q, wp);
6096 	mutex_exit(&man_lock);
6097 
6098 }
6099 
6100 /*
6101  * man_plumb - Configure a lower stream for this destination.
6102  *
6103  * Locking:
6104  * 	- Called holding no locks.
6105  *
6106  * Returns:
6107  *	- success - 0
6108  *	- failure - error code of failure
6109  */
6110 static int
6111 man_plumb(man_dest_t *mdp)
6112 {
6113 	int		status;
6114 	int		muxid;
6115 	ldi_handle_t	lh;
6116 	ldi_ident_t	li = NULL;
6117 
6118 	MAN_DBG(MAN_SWITCH, ("man_plumb: mdp(0x%p) %s%d exp(%d)",
6119 	    (void *)mdp, ddi_major_to_name(mdp->md_device.mdev_major),
6120 	    mdp->md_device.mdev_ppa, mdp->md_device.mdev_exp_id));
6121 
6122 	/*
6123 	 * Control stream should already be open.
6124 	 */
6125 	if (man_ctl_lh == NULL) {
6126 		status = EAGAIN;
6127 		goto exit;
6128 	}
6129 
6130 	mutex_enter(&man_lock);
6131 	ASSERT(man_ctl_wq != NULL);
6132 	status = ldi_ident_from_stream(man_ctl_wq, &li);
6133 	if (status != 0) {
6134 		cmn_err(CE_WARN,
6135 		    "man_plumb: ident alloc failed, error %d", status);
6136 		goto exit;
6137 	}
6138 	mutex_exit(&man_lock);
6139 
6140 	/*
6141 	 * previously opens were done by a dev_t of makedev(clone_major,
6142 	 * mdev_major) which should always map to /devices/pseudo/clone@0:eri
6143 	 */
6144 	ASSERT(strcmp(ERI_IDNAME,
6145 	    ddi_major_to_name(mdp->md_device.mdev_major)) == 0);
6146 
6147 	status = ldi_open_by_name(ERI_PATH, FREAD | FWRITE | FNOCTTY,
6148 	    kcred, &lh, li);
6149 	if (status) {
6150 		cmn_err(CE_WARN,
6151 		    "man_plumb: eri open failed, error %d", status);
6152 		goto exit;
6153 	}
6154 
6155 	/*
6156 	 * Link netdev under MAN.
6157 	 */
6158 	ASSERT(mdp->md_muxid == -1);
6159 
6160 	status = ldi_ioctl(man_ctl_lh, I_PLINK, (intptr_t)lh,
6161 	    FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &muxid);
6162 	if (status) {
6163 		cmn_err(CE_WARN,
6164 		    "man_plumb: ldi_ioctl(I_PLINK) failed, error %d", status);
6165 		(void) ldi_close(lh, NULL, kcred);
6166 		goto exit;
6167 
6168 	}
6169 	mdp->md_muxid = muxid;
6170 	mdp->md_wq = man_linkrec_find(muxid);
6171 	/*
6172 	 * If we can't find the linkrec then return an
6173 	 * error. It will be automatically unplumbed on failure.
6174 	 */
6175 	if (mdp->md_wq == NULL)
6176 		status = EAGAIN;
6177 
6178 	(void) ldi_close(lh, NULL, kcred);
6179 exit:
6180 	if (li)
6181 		ldi_ident_release(li);
6182 
6183 	MAN_DBG(MAN_SWITCH, ("man_plumb: exit\n"));
6184 
6185 	return (status);
6186 }
6187 
6188 /*
6189  * man_unplumb - tear down the STREAMs framework for the lower multiplexor.
6190  *
6191  *	mdp - destination struct of interest
6192  *
6193  *	returns	- success - 0
6194  *		- failure - return error from ldi_ioctl
6195  */
6196 static void
6197 man_unplumb(man_dest_t *mdp)
6198 {
6199 	int	status, rval;
6200 
6201 	MAN_DBG(MAN_SWITCH, ("man_unplumb: mdp"));
6202 	MAN_DBGCALL(MAN_SWITCH, man_print_mdp(mdp));
6203 
6204 	if (mdp->md_muxid == -1)
6205 		return;
6206 
6207 	ASSERT(man_ctl_lh != NULL);
6208 
6209 	/*
6210 	 * I_PUNLINK causes the multiplexor resources to be freed.
6211 	 */
6212 	status = ldi_ioctl(man_ctl_lh, I_PUNLINK, (intptr_t)mdp->md_muxid,
6213 	    FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &rval);
6214 	if (status) {
6215 		cmn_err(CE_WARN, "man_unplumb: ldi_ioctl(I_PUNLINK) failed"
6216 		    " errno %d\n", status);
6217 	}
6218 	/*
6219 	 * Delete linkrec if it exists.
6220 	 */
6221 	(void) man_linkrec_find(mdp->md_muxid);
6222 	mdp->md_muxid = -1;
6223 
6224 }
6225 
6226 /*
6227  * The routines below deal with paths and pathgroups. These data structures
6228  * are used to track the physical devices connecting the domain and SSC.
6229  * These devices make up the lower streams of the MAN multiplexor. The
6230  * routines all expect the man_lock to be held.
6231  *
6232  * A pathgroup consists of all paths that connect a particular domain and the
6233  * SSC. The concept of a pathgroup id (pg_id) is used to uniquely identify
6234  * a pathgroup.  For Domains, there is just one pathgroup, that connecting
6235  * the domain to the SSC (pg_id == 0). On the SSC, there is one pathgroup per
6236  * domain. The pg_id field corresponds to the domain tags A-R. A pg_id of
6237  * 0 means domain tag A, a pg_id of 1 means domain B, etc.
6238  *
6239  * The path data structure identifies one path between the SSC and a domain.
6240  * It describes the information for the path: the major and minor number of
6241  * the physical device; kstat pointers; and ethernet address of the
6242  * other end of the path.
6243  *
6244  * The pathgroups are anchored at man_pg_head and are protected by the
6245  * by the inner perimeter. The routines are only called by the STREAMs
6246  * portion of the driver.
6247  */
6248 
6249 /*
6250  * Update man instance pathgroup info. Exclusive inner perimeter assures
6251  * this code is single threaded. man_refcnt assures man_t wont detach
6252  * while we are playing with man_pg stuff.
6253  *
6254  * Returns 0 on success, errno on failure.
6255  */
6256 int
6257 man_pg_cmd(mi_path_t *mip, man_work_t *waiter_wp)
6258 {
6259 	int		status = 0;
6260 	man_t		*manp;
6261 
6262 	if (mip->mip_ndevs < 0) {
6263 		status = EINVAL;
6264 		cmn_err(CE_WARN, "man_pg_cmd: EINVAL: mip_ndevs %d",
6265 		    mip->mip_ndevs);
6266 		goto exit;
6267 	}
6268 
6269 	ASSERT(MUTEX_HELD(&man_lock));
6270 	manp = ddi_get_soft_state(man_softstate, mip->mip_man_ppa);
6271 	if (manp == NULL) {
6272 		status = ENODEV;
6273 		goto exit;
6274 	}
6275 
6276 	MAN_DBG(MAN_PATH, ("man_pg_cmd: mip"));
6277 	MAN_DBGCALL(MAN_PATH, man_print_mip(mip));
6278 
6279 	MAN_DBG(MAN_PATH, ("\tman_t"));
6280 	MAN_DBGCALL(MAN_PATH, man_print_man(manp));
6281 
6282 	switch (mip->mip_cmd) {
6283 	case MI_PATH_ASSIGN:
6284 		status = man_pg_assign(&manp->man_pg, mip, FALSE);
6285 		break;
6286 
6287 	case MI_PATH_ADD:
6288 		status = man_pg_assign(&manp->man_pg, mip, TRUE);
6289 		break;
6290 
6291 	case MI_PATH_UNASSIGN:
6292 		status = man_pg_unassign(&manp->man_pg, mip);
6293 		break;
6294 
6295 	case MI_PATH_ACTIVATE:
6296 		status = man_pg_activate(manp, mip, waiter_wp);
6297 		break;
6298 
6299 	case MI_PATH_READ:
6300 		status = man_pg_read(manp->man_pg, mip);
6301 		break;
6302 
6303 	default:
6304 		status = EINVAL;
6305 		cmn_err(CE_NOTE, "man_pg_cmd: invalid command");
6306 		break;
6307 	}
6308 
6309 exit:
6310 	MAN_DBG(MAN_PATH, ("man_pg_cmd: returns %d", status));
6311 
6312 	return (status);
6313 }
6314 
6315 /*
6316  * Assign paths to a pathgroup. If pathgroup doesnt exists, create it.
6317  * If path doesnt exist, create it. If ethernet address of existing
6318  * pathgroup different, change it. If an existing path is not in the new
6319  * list, remove it.  If anything changed, send PATH_UPDATE request to
6320  * man_iwork to update all man_dest_t's.
6321  *
6322  * 	mplpp	- man pathgroup list point to point.
6323  *	mip	- new/updated pathgroup info to assign.
6324  */
6325 static int
6326 man_pg_assign(man_pg_t **mplpp, mi_path_t *mip, int add_only)
6327 {
6328 	man_pg_t	*mpg;
6329 	man_path_t	*mp;
6330 	man_path_t	*add_paths = NULL;
6331 	int		cnt;
6332 	int		i;
6333 	int		first_pass = TRUE;
6334 	int		status = 0;
6335 
6336 	ASSERT(MUTEX_HELD(&man_lock));
6337 
6338 	cnt = mip->mip_ndevs;
6339 	if (cnt == 0) {
6340 		status = EINVAL;
6341 		cmn_err(CE_NOTE, "man_pg_assign: mip_ndevs == 0");
6342 		goto exit;
6343 	}
6344 
6345 	/*
6346 	 * Assure the devices to be assigned are not assigned to some other
6347 	 * pathgroup.
6348 	 */
6349 	for (i = 0; i < cnt; i++) {
6350 		mpg = man_find_path_by_dev(*mplpp, &mip->mip_devs[i], NULL);
6351 
6352 		if (mpg == NULL)
6353 			continue;
6354 
6355 		if ((mpg->mpg_man_ppa != mip->mip_man_ppa) ||
6356 		    (mpg->mpg_pg_id != mip->mip_pg_id)) {
6357 			/*
6358 			 * Already assigned to some other man instance
6359 			 * or pathgroup.
6360 			 */
6361 			status = EEXIST;
6362 			goto exit;
6363 		}
6364 	}
6365 
6366 	/*
6367 	 * Find pathgroup, or allocate new one if it doesnt exist and
6368 	 * add it to list at mplpp. Result is that mpg points to
6369 	 * pathgroup to modify.
6370 	 */
6371 	mpg = man_find_pg_by_id(*mplpp, mip->mip_pg_id);
6372 	if (mpg == NULL) {
6373 
6374 		status = man_pg_create(mplpp, &mpg, mip);
6375 		if (status)
6376 			goto exit;
6377 
6378 	} else if (ether_cmp(&mip->mip_eaddr, &mpg->mpg_dst_eaddr) != 0) {
6379 
6380 		cmn_err(CE_WARN, "man_pg_assign: ethernet address mismatch");
6381 		cmn_err(CE_CONT, "existing %s",
6382 		    ether_sprintf(&mpg->mpg_dst_eaddr));
6383 		cmn_err(CE_CONT, "new %s",
6384 		    ether_sprintf(&mip->mip_eaddr));
6385 
6386 		status = EINVAL;
6387 		goto exit;
6388 	}
6389 
6390 	/*
6391 	 * Create list of new paths to add to pathgroup.
6392 	 */
6393 	for (i = 0; i < cnt; i++) {
6394 
6395 		if (man_find_path_by_dev(*mplpp, &mip->mip_devs[i], NULL))
6396 			continue;	/* Already exists in this pathgroup */
6397 
6398 		mp = man_kzalloc(sizeof (man_path_t), KM_NOSLEEP);
6399 		if (mp == NULL) {
6400 			status = ENOMEM;
6401 			goto exit;
6402 		}
6403 
6404 		mp->mp_device = mip->mip_devs[i];
6405 		mp->mp_device.mdev_state = MDEV_ASSIGNED;
6406 
6407 		MAN_DBG(MAN_PATH, ("man_pg_assign: assigning mdp"));
6408 		MAN_DBGCALL(MAN_PATH, man_print_dev(&mp->mp_device));
6409 
6410 		status = man_path_kstat_init(mp);
6411 		if (status) {
6412 			man_kfree(mp, sizeof (man_path_t));
6413 			goto exit;
6414 		}
6415 
6416 		man_path_insert(&add_paths, mp);
6417 	}
6418 
6419 	/*
6420 	 * man_dr_attach passes only the path which is being DRd in.
6421 	 * So just add the path and don't worry about removing paths.
6422 	 */
6423 	if (add_only == TRUE)
6424 		goto exit;
6425 
6426 
6427 	/*
6428 	 * Check if any paths we want to remove are ACTIVE. If not,
6429 	 * do a second pass and remove them.
6430 	 */
6431 again:
6432 	mp = mpg->mpg_pathp;
6433 	while (mp != NULL) {
6434 		int		in_new_list;
6435 		man_path_t	*rp;
6436 
6437 		rp = NULL;
6438 		in_new_list = FALSE;
6439 
6440 		for (i = 0; i < cnt; i++) {
6441 			if (mp->mp_device.mdev_ppa ==
6442 			    mip->mip_devs[i].mdev_ppa) {
6443 
6444 				in_new_list = TRUE;
6445 				break;
6446 			}
6447 		}
6448 
6449 		if (!in_new_list) {
6450 			if (first_pass) {
6451 				if (mp->mp_device.mdev_state & MDEV_ACTIVE) {
6452 					status = EBUSY;
6453 					goto exit;
6454 				}
6455 			} else {
6456 				rp = mp;
6457 			}
6458 		}
6459 		mp = mp->mp_next;
6460 
6461 		if (rp != NULL)
6462 			man_path_remove(&mpg->mpg_pathp, rp);
6463 	}
6464 
6465 	if (first_pass == TRUE) {
6466 		first_pass = FALSE;
6467 		goto again;
6468 	}
6469 
6470 exit:
6471 	if (status == 0) {
6472 		if (add_paths)
6473 			man_path_merge(&mpg->mpg_pathp, add_paths);
6474 	} else {
6475 		while (add_paths != NULL) {
6476 			mp = add_paths;
6477 			add_paths = mp->mp_next;
6478 			mp->mp_next = NULL;
6479 
6480 			man_path_kstat_uninit(mp);
6481 			man_kfree(mp, sizeof (man_path_t));
6482 		}
6483 	}
6484 
6485 	return (status);
6486 }
6487 
6488 /*
6489  * Remove all paths from a pathgroup (domain shutdown). If there is an
6490  * active path in the group, shut down all destinations referencing it
6491  * first.
6492  */
6493 static int
6494 man_pg_unassign(man_pg_t **plpp, mi_path_t *mip)
6495 {
6496 	man_pg_t	*mpg;
6497 	man_pg_t	*tpg;
6498 	man_pg_t	*tppg;
6499 	man_path_t	*mp = NULL;
6500 	int		status = 0;
6501 
6502 	ASSERT(MUTEX_HELD(&man_lock));
6503 
6504 	/*
6505 	 * Check for existence of pathgroup.
6506 	 */
6507 	if ((mpg = man_find_pg_by_id(*plpp, mip->mip_pg_id)) == NULL)
6508 		goto exit;
6509 
6510 	if (man_find_active_path(mpg->mpg_pathp) != NULL) {
6511 		status = man_remove_dests(mpg);
6512 		if (status)
6513 			goto exit;
6514 	}
6515 
6516 	/*
6517 	 * Free all the paths for this pathgroup.
6518 	 */
6519 	while (mpg->mpg_pathp) {
6520 		mp = mpg->mpg_pathp;
6521 		mpg->mpg_pathp = mp->mp_next;
6522 		mp->mp_next = NULL;
6523 
6524 		man_path_kstat_uninit(mp);
6525 		man_kfree(mp, sizeof (man_path_t));
6526 	}
6527 
6528 	/*
6529 	 * Remove this pathgroup from the list, and free it.
6530 	 */
6531 	tpg = tppg = *plpp;
6532 	if (tpg == mpg) {
6533 		*plpp = tpg->mpg_next;
6534 		goto free_pg;
6535 	}
6536 
6537 	for (tpg = tpg->mpg_next; tpg != NULL; tpg = tpg->mpg_next) {
6538 		if (tpg == mpg)
6539 			break;
6540 		tppg = tpg;
6541 	}
6542 
6543 	ASSERT(tpg != NULL);
6544 
6545 	tppg->mpg_next = tpg->mpg_next;
6546 	tpg->mpg_next = NULL;
6547 
6548 free_pg:
6549 	man_kfree(tpg, sizeof (man_pg_t));
6550 
6551 exit:
6552 	return (status);
6553 
6554 }
6555 
6556 /*
6557  * Set a new active path. This is done via man_ioctl so we are
6558  * exclusive in the inner perimeter.
6559  */
6560 static int
6561 man_pg_activate(man_t *manp, mi_path_t *mip, man_work_t *waiter_wp)
6562 {
6563 	man_pg_t	*mpg1;
6564 	man_pg_t	*mpg2;
6565 	man_pg_t	*plp;
6566 	man_path_t	*mp;
6567 	man_path_t	*ap;
6568 	int		status = 0;
6569 
6570 	ASSERT(MUTEX_HELD(&man_lock));
6571 	MAN_DBG(MAN_PATH, ("man_pg_activate: dev"));
6572 	MAN_DBGCALL(MAN_PATH, man_print_dev(mip->mip_devs));
6573 
6574 	if (mip->mip_ndevs != 1) {
6575 		status = EINVAL;
6576 		goto exit;
6577 	}
6578 
6579 	plp = manp->man_pg;
6580 	mpg1 = man_find_pg_by_id(plp, mip->mip_pg_id);
6581 	if (mpg1 == NULL) {
6582 		status = EINVAL;
6583 		goto exit;
6584 	}
6585 
6586 	mpg2 = man_find_path_by_dev(plp, mip->mip_devs, &mp);
6587 	if (mpg2 == NULL) {
6588 		status = ENODEV;
6589 		goto exit;
6590 	}
6591 
6592 	if (mpg1 != mpg2) {
6593 		status = EINVAL;
6594 		goto exit;
6595 	}
6596 
6597 	ASSERT(mp->mp_device.mdev_ppa == mip->mip_devs->mdev_ppa);
6598 
6599 	if (mpg1->mpg_flags & MAN_PG_SWITCHING) {
6600 		status = EAGAIN;
6601 		goto exit;
6602 	}
6603 
6604 	ap = man_find_active_path(mpg1->mpg_pathp);
6605 	if (ap == NULL) {
6606 		/*
6607 		 * This is the first time a path has been activated for
6608 		 * this pathgroup. Initialize all upper streams dest
6609 		 * structure for this pathgroup so autoswitch will find
6610 		 * them.
6611 		 */
6612 		mp->mp_device.mdev_state |= MDEV_ACTIVE;
6613 		man_add_dests(mpg1);
6614 		goto exit;
6615 	}
6616 
6617 	/*
6618 	 * Path already active, nothing to do.
6619 	 */
6620 	if (ap == mp)
6621 		goto exit;
6622 
6623 	/*
6624 	 * Try to autoswitch to requested device. Set flags and refcnt.
6625 	 * Cleared in man_iswitch when SWITCH completes.
6626 	 */
6627 	manp->man_refcnt++;
6628 	mpg1->mpg_flags |= MAN_PG_SWITCHING;
6629 
6630 	/*
6631 	 * Switch to path specified.
6632 	 */
6633 	status = man_autoswitch(mpg1, mip->mip_devs, waiter_wp);
6634 
6635 	if (status != 0) {
6636 		/*
6637 		 * man_iswitch not going to run, clean up.
6638 		 */
6639 		manp->man_refcnt--;
6640 		mpg1->mpg_flags &= ~MAN_PG_SWITCHING;
6641 
6642 		if (status == ENODEV) {
6643 			/*
6644 			 * Device not plumbed isn't really an error. Change
6645 			 * active device setting here, since man_iswitch isn't
6646 			 * going to be run to do it.
6647 			 */
6648 			status = 0;
6649 			ap->mp_device.mdev_state &= ~MDEV_ACTIVE;
6650 			mp->mp_device.mdev_state |= MDEV_ACTIVE;
6651 		}
6652 	}
6653 
6654 exit:
6655 	MAN_DBG(MAN_PATH, ("man_pg_activate: returns %d", status));
6656 
6657 	return (status);
6658 }
6659 
6660 static int
6661 man_pg_read(man_pg_t *plp, mi_path_t *mip)
6662 {
6663 	man_pg_t	*mpg;
6664 	man_path_t	*mp;
6665 	int		cnt;
6666 	int		status = 0;
6667 
6668 	ASSERT(MUTEX_HELD(&man_lock));
6669 
6670 	if ((mpg = man_find_pg_by_id(plp, mip->mip_pg_id)) == NULL) {
6671 		status = ENODEV;
6672 		goto exit;
6673 	}
6674 
6675 	cnt = 0;
6676 	for (mp = mpg->mpg_pathp; mp != NULL; mp = mp->mp_next) {
6677 		bcopy(&mp->mp_device, &mip->mip_devs[cnt], sizeof (man_dev_t));
6678 		if (cnt == mip->mip_ndevs)
6679 			break;
6680 		cnt++;
6681 	}
6682 
6683 	MAN_DBG(MAN_PATH, ("man_pg_read: pg(0x%p) id(%d) found %d paths",
6684 	    (void *)mpg, mpg->mpg_pg_id, cnt));
6685 
6686 	mip->mip_ndevs = cnt;
6687 
6688 	/*
6689 	 * TBD - What should errno be if user buffer too small ?
6690 	 */
6691 	if (mp != NULL) {
6692 		status = ENOMEM;
6693 	}
6694 
6695 exit:
6696 
6697 	return (status);
6698 }
6699 
6700 /*
6701  * return existing pathgroup, or create it. TBD - Need to update
6702  * all of destinations if we added a pathgroup. Also, need to update
6703  * all of man_strup if we add a path.
6704  *
6705  * 	mplpp	- man pathgroup list point to pointer.
6706  * 	mpgp	- returns newly created man pathgroup.
6707  *	mip	- info to fill in mpgp.
6708  */
6709 static int
6710 man_pg_create(man_pg_t **mplpp, man_pg_t **mpgp, mi_path_t *mip)
6711 {
6712 	man_pg_t	*mpg;
6713 	man_pg_t	*tpg;
6714 	int		status = 0;
6715 
6716 	ASSERT(MUTEX_HELD(&man_lock));
6717 
6718 	if (ether_cmp(&mip->mip_eaddr, &zero_ether_addr) == 0) {
6719 		cmn_err(CE_NOTE, "man_ioctl: man_pg_create: ether"
6720 		    " addresss not set!");
6721 		status = EINVAL;
6722 		goto exit;
6723 	}
6724 
6725 	mpg = man_kzalloc(sizeof (man_pg_t), KM_NOSLEEP);
6726 	if (mpg == NULL) {
6727 		status = ENOMEM;
6728 		goto exit;
6729 	}
6730 
6731 	mpg->mpg_flags = MAN_PG_IDLE;
6732 	mpg->mpg_pg_id = mip->mip_pg_id;
6733 	mpg->mpg_man_ppa = mip->mip_man_ppa;
6734 	ether_copy(&mip->mip_eaddr, &mpg->mpg_dst_eaddr);
6735 
6736 	MAN_DBG(MAN_PATH, ("man_pg_create: new mpg"));
6737 	MAN_DBGCALL(MAN_PATH, man_print_mpg(mpg));
6738 
6739 	tpg = *mplpp;
6740 	if (tpg == NULL) {
6741 		*mplpp = mpg;
6742 	} else {
6743 		while (tpg->mpg_next != NULL)
6744 			tpg = tpg->mpg_next;
6745 		tpg->mpg_next = mpg;
6746 	}
6747 
6748 exit:
6749 	*mpgp = mpg;
6750 
6751 	return (status);
6752 }
6753 
6754 /*
6755  * Return pointer to pathgroup containing mdevp, null otherwise. Also,
6756  * if a path pointer is passed in, set it to matching path in pathgroup.
6757  *
6758  * Called holding man_lock.
6759  */
6760 static man_pg_t *
6761 man_find_path_by_dev(man_pg_t *plp, man_dev_t *mdevp, man_path_t **mpp)
6762 {
6763 	man_pg_t	*mpg;
6764 	man_path_t	*mp;
6765 
6766 	ASSERT(MUTEX_HELD(&man_lock));
6767 	for (mpg = plp; mpg != NULL; mpg = mpg->mpg_next) {
6768 		for (mp  = mpg->mpg_pathp; mp != NULL; mp = mp->mp_next) {
6769 			if (mp->mp_device.mdev_major == mdevp->mdev_major &&
6770 			    mp->mp_device.mdev_ppa == mdevp->mdev_ppa) {
6771 
6772 				if (mpp != NULL)
6773 					*mpp = mp;
6774 				return (mpg);
6775 			}
6776 		}
6777 	}
6778 
6779 	return (NULL);
6780 }
6781 
6782 /*
6783  * Return pointer to pathgroup assigned to destination, null if not found.
6784  *
6785  * Called holding man_lock.
6786  */
6787 static man_pg_t *
6788 man_find_pg_by_id(man_pg_t *mpg, int pg_id)
6789 {
6790 	ASSERT(MUTEX_HELD(&man_lock));
6791 	for (; mpg != NULL; mpg = mpg->mpg_next) {
6792 		if (mpg->mpg_pg_id == pg_id)
6793 			return (mpg);
6794 	}
6795 
6796 	return (NULL);
6797 }
6798 
6799 static man_path_t *
6800 man_find_path_by_ppa(man_path_t *mplist, int ppa)
6801 {
6802 	man_path_t	*mp;
6803 
6804 	ASSERT(MUTEX_HELD(&man_lock));
6805 	for (mp = mplist; mp != NULL; mp = mp->mp_next) {
6806 		if (mp->mp_device.mdev_ppa == ppa)
6807 			return (mp);
6808 	}
6809 
6810 	return (NULL);
6811 }
6812 
6813 static man_path_t *
6814 man_find_active_path(man_path_t *mplist)
6815 {
6816 	man_path_t	*mp;
6817 
6818 	ASSERT(MUTEX_HELD(&man_lock));
6819 	for (mp = mplist; mp != NULL; mp = mp->mp_next)
6820 		if (mp->mp_device.mdev_state & MDEV_ACTIVE)
6821 			return (mp);
6822 
6823 	return (NULL);
6824 }
6825 
6826 /*
6827  * Try and find an alternate path.
6828  */
6829 static man_path_t *
6830 man_find_alternate_path(man_path_t *mlp)
6831 {
6832 	man_path_t	*ap;		/* Active path */
6833 	man_path_t	*np;		/* New alternate path */
6834 	man_path_t	*fp = NULL;	/* LRU failed path */
6835 
6836 	ASSERT(MUTEX_HELD(&man_lock));
6837 	ap = man_find_active_path(mlp);
6838 
6839 	/*
6840 	 * Find a non-failed path, or the lru failed path and switch to it.
6841 	 */
6842 	for (np = mlp; np != NULL; np = np->mp_next) {
6843 		if (np == ap)
6844 			continue;
6845 
6846 		if (np->mp_device.mdev_state == MDEV_ASSIGNED)
6847 			goto exit;
6848 
6849 		if (np->mp_device.mdev_state & MDEV_FAILED) {
6850 			if (fp == NULL)
6851 				fp = np;
6852 			else
6853 				if (fp->mp_lru > np->mp_lru)
6854 						fp = np;
6855 		}
6856 	}
6857 
6858 	/*
6859 	 * Nowhere to switch to.
6860 	 */
6861 	if (np == NULL && (np =  fp) == NULL)
6862 		goto exit;
6863 
6864 exit:
6865 	return (np);
6866 }
6867 
6868 /*
6869  * Assumes caller has verified existence.
6870  */
6871 static void
6872 man_path_remove(man_path_t **lpp, man_path_t *mp)
6873 {
6874 	man_path_t	*tp;
6875 	man_path_t	*tpp;
6876 
6877 	ASSERT(MUTEX_HELD(&man_lock));
6878 	MAN_DBG(MAN_PATH, ("man_path_remove: removing path"));
6879 	MAN_DBGCALL(MAN_PATH, man_print_path(mp));
6880 
6881 	tp = tpp = *lpp;
6882 	if (tp == mp) {
6883 		*lpp = tp->mp_next;
6884 		goto exit;
6885 	}
6886 
6887 	for (tp = tp->mp_next; tp != NULL; tp = tp->mp_next) {
6888 		if (tp == mp)
6889 			break;
6890 		tpp = tp;
6891 	}
6892 
6893 	ASSERT(tp != NULL);
6894 
6895 	tpp->mp_next = tp->mp_next;
6896 	tp->mp_next = NULL;
6897 
6898 exit:
6899 	man_path_kstat_uninit(tp);
6900 	man_kfree(tp, sizeof (man_path_t));
6901 
6902 }
6903 
6904 /*
6905  * Insert path into list, ascending order by ppa.
6906  */
6907 static void
6908 man_path_insert(man_path_t **lpp, man_path_t *mp)
6909 {
6910 	man_path_t	*tp;
6911 	man_path_t	*tpp;
6912 
6913 	ASSERT(MUTEX_HELD(&man_lock));
6914 	if (*lpp == NULL) {
6915 		*lpp = mp;
6916 		return;
6917 	}
6918 
6919 	tp = tpp = *lpp;
6920 	if (tp->mp_device.mdev_ppa > mp->mp_device.mdev_ppa) {
6921 		mp->mp_next = tp;
6922 		*lpp = mp;
6923 		return;
6924 	}
6925 
6926 	for (tp = tp->mp_next; tp != NULL; tp =  tp->mp_next) {
6927 		if (tp->mp_device.mdev_ppa > mp->mp_device.mdev_ppa)
6928 			break;
6929 		tpp = tp;
6930 	}
6931 
6932 	if (tp == NULL) {
6933 		tpp->mp_next = mp;
6934 	} else {
6935 		tpp->mp_next = mp;
6936 		mp->mp_next = tp;
6937 	}
6938 }
6939 
6940 /*
6941  * Merge npp into lpp, ascending order by ppa. Assumes no
6942  * duplicates in either list.
6943  */
6944 static void
6945 man_path_merge(man_path_t **lpp, man_path_t *np)
6946 {
6947 	man_path_t	*tmp;
6948 
6949 	ASSERT(MUTEX_HELD(&man_lock));
6950 	while (np != NULL) {
6951 		tmp = np;
6952 		np = np->mp_next;
6953 		tmp->mp_next = NULL;
6954 
6955 		man_path_insert(lpp, tmp);
6956 	}
6957 
6958 }
6959 
6960 static int
6961 man_path_kstat_init(man_path_t *mpp)
6962 {
6963 
6964 	kstat_named_t	*dev_knp;
6965 	int		status = 0;
6966 
6967 	ASSERT(MUTEX_HELD(&man_lock));
6968 	MAN_DBG(MAN_PATH, ("man_path_kstat_init: mpp(0x%p)\n", (void *)mpp));
6969 
6970 	/*
6971 	 * Create named kstats for accounting purposes.
6972 	 */
6973 	dev_knp = man_kzalloc(MAN_NUMSTATS * sizeof (kstat_named_t),
6974 	    KM_NOSLEEP);
6975 	if (dev_knp == NULL) {
6976 		status = ENOMEM;
6977 		goto exit;
6978 	}
6979 	man_kstat_named_init(dev_knp, MAN_NUMSTATS);
6980 	mpp->mp_last_knp = dev_knp;
6981 
6982 exit:
6983 
6984 	MAN_DBG(MAN_PATH, ("man_path_kstat_init: returns %d\n", status));
6985 
6986 	return (status);
6987 }
6988 
6989 static void
6990 man_path_kstat_uninit(man_path_t *mp)
6991 {
6992 	ASSERT(MUTEX_HELD(&man_lock));
6993 	man_kfree(mp->mp_last_knp, MAN_NUMSTATS * sizeof (kstat_named_t));
6994 }
6995 
6996 /*
6997  * man_work_alloc - allocate and initiate a work request structure
6998  *
6999  *	type - type of request to allocate
7000  *	returns	- success - ptr to an initialized work structure
7001  *		- failure - NULL
7002  */
7003 man_work_t *
7004 man_work_alloc(int type, int kmflag)
7005 {
7006 	man_work_t	*wp;
7007 
7008 	wp = man_kzalloc(sizeof (man_work_t), kmflag);
7009 	if (wp == NULL)
7010 		goto exit;
7011 
7012 	cv_init(&wp->mw_cv, NULL, CV_DRIVER, NULL); \
7013 	wp->mw_type = type;
7014 
7015 exit:
7016 	return (wp);
7017 }
7018 
7019 /*
7020  * man_work_free - deallocate a work request structure
7021  *
7022  *	wp - ptr to work structure to be freed
7023  */
7024 void
7025 man_work_free(man_work_t *wp)
7026 {
7027 	cv_destroy(&wp->mw_cv);
7028 	man_kfree((void *)wp, sizeof (man_work_t));
7029 }
7030 
7031 /*
7032  * Post work to a work queue.  The man_bwork sleeps on
7033  * man_bwork_q->q_cv, and work requesters may sleep on mw_cv.
7034  * The man_lock is used to protect both cv's.
7035  */
7036 void
7037 man_work_add(man_workq_t *q, man_work_t *wp)
7038 {
7039 	man_work_t	*lp = q->q_work;
7040 
7041 	if (lp) {
7042 		while (lp->mw_next != NULL)
7043 			lp = lp->mw_next;
7044 
7045 		lp->mw_next = wp;
7046 
7047 	} else {
7048 		q->q_work = wp;
7049 	}
7050 
7051 	/*
7052 	 * cv_signal for man_bwork_q, qenable for man_iwork_q
7053 	 */
7054 	if (q == man_bwork_q) {
7055 		cv_signal(&q->q_cv);
7056 
7057 	} else {	/* q == man_iwork_q */
7058 
7059 		if (man_ctl_wq != NULL)
7060 			qenable(man_ctl_wq);
7061 	}
7062 
7063 }
7064 
7065 /* <<<<<<<<<<<<<<<<<<<<<<< NDD SUPPORT FUNCTIONS	>>>>>>>>>>>>>>>>>>> */
7066 /*
7067  * ndd support functions to get/set parameters
7068  */
7069 
7070 /*
7071  * Register each element of the parameter array with the
7072  * named dispatch handler. Each element is loaded using
7073  * nd_load()
7074  *
7075  * 	cnt	- the number of elements present in the parameter array
7076  */
7077 static int
7078 man_param_register(param_t *manpa, int cnt)
7079 {
7080 	int	i;
7081 	ndgetf_t getp;
7082 	ndsetf_t setp;
7083 	int	status = B_TRUE;
7084 
7085 	MAN_DBG(MAN_CONFIG, ("man_param_register: manpa(0x%p) cnt %d\n",
7086 	    (void *)manpa, cnt));
7087 
7088 	getp = man_param_get;
7089 
7090 	for (i = 0; i < cnt; i++, manpa++) {
7091 		switch (man_param_display[i]) {
7092 		case MAN_NDD_GETABLE:
7093 			setp = NULL;
7094 			break;
7095 
7096 		case MAN_NDD_SETABLE:
7097 			setp = man_param_set;
7098 			break;
7099 
7100 		default:
7101 			continue;
7102 		}
7103 
7104 		if (!nd_load(&man_ndlist, manpa->param_name, getp,
7105 		    setp, (caddr_t)manpa)) {
7106 
7107 			(void) man_nd_free(&man_ndlist);
7108 			status = B_FALSE;
7109 			goto exit;
7110 		}
7111 	}
7112 
7113 	if (!nd_load(&man_ndlist, "man_pathgroups_report",
7114 	    man_pathgroups_report, NULL, NULL)) {
7115 
7116 		(void) man_nd_free(&man_ndlist);
7117 		status = B_FALSE;
7118 		goto exit;
7119 	}
7120 
7121 	if (!nd_load(&man_ndlist, "man_set_active_path",
7122 	    NULL, man_set_active_path, NULL)) {
7123 
7124 		(void) man_nd_free(&man_ndlist);
7125 		status = B_FALSE;
7126 		goto exit;
7127 	}
7128 
7129 	if (!nd_load(&man_ndlist, "man_get_hostinfo",
7130 	    man_get_hostinfo, NULL, NULL)) {
7131 
7132 		(void) man_nd_free(&man_ndlist);
7133 		status = B_FALSE;
7134 		goto exit;
7135 	}
7136 
7137 exit:
7138 
7139 	MAN_DBG(MAN_CONFIG, ("man_param_register: returns %d\n", status));
7140 
7141 	return (status);
7142 }
7143 
7144 static void
7145 man_nd_getset(queue_t *wq, mblk_t *mp)
7146 {
7147 
7148 	if (!nd_getset(wq, man_ndlist, mp))
7149 		miocnak(wq, mp, 0, ENOENT);
7150 	else
7151 		qreply(wq, mp);
7152 }
7153 
7154 /*ARGSUSED*/
7155 static int
7156 man_pathgroups_report(queue_t *wq, mblk_t *mp, caddr_t cp, cred_t *cr)
7157 {
7158 
7159 	man_t		*manp;
7160 	man_pg_t	*mpg;
7161 	int		i;
7162 	char		pad[] = "                 "; /* 17 spaces */
7163 	int		pad_end;
7164 
7165 
7166 	MAN_DBG(MAN_PATH, ("man_pathgroups_report: wq(0x%p) mp(0x%p)"
7167 	    " caddr 0x%p", (void *)wq, (void *)mp, (void *)cp));
7168 
7169 	(void) mi_mpprintf(mp, "MAN Pathgroup report: (* == failed)");
7170 	(void) mi_mpprintf(mp, "====================================="
7171 	    "==========================================");
7172 
7173 	mutex_enter(&man_lock);
7174 
7175 	for (i = 0; i < 2; i++) {
7176 		manp = ddi_get_soft_state(man_softstate, i);
7177 		if (manp == NULL)
7178 			continue;
7179 
7180 	(void) mi_mpprintf(mp,
7181 	    "Interface\tDestination\t\tActive Path\tAlternate Paths");
7182 	(void) mi_mpprintf(mp, "---------------------------------------"
7183 	    "----------------------------------------");
7184 
7185 		for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7186 
7187 			(void) mi_mpprintf(mp, "%s%d\t\t",
7188 			    ddi_major_to_name(manp->man_meta_major),
7189 			    manp->man_meta_ppa);
7190 
7191 			if (man_is_on_domain) {
7192 				(void) mi_mpprintf_nr(mp, "Master SSC\t");
7193 				man_preport(mpg->mpg_pathp, mp);
7194 			} else {
7195 				if (i == 0) {
7196 					pad_end = 17 - strlen(ether_sprintf(
7197 					    &mpg->mpg_dst_eaddr));
7198 					if (pad_end < 0 || pad_end > 16)
7199 					pad_end = 0;
7200 					pad[pad_end] = '\0';
7201 
7202 					(void) mi_mpprintf_nr(mp, "%c %s%s",
7203 					    mpg->mpg_pg_id + 'A',
7204 					    ether_sprintf(&mpg->mpg_dst_eaddr),
7205 					    pad);
7206 
7207 					pad[pad_end] = ' ';
7208 				} else {
7209 					(void) mi_mpprintf_nr(mp,
7210 					    "Other SSC\t");
7211 				}
7212 				man_preport(mpg->mpg_pathp, mp);
7213 			}
7214 			(void) mi_mpprintf_nr(mp, "\n");
7215 		}
7216 	}
7217 
7218 	mutex_exit(&man_lock);
7219 	MAN_DBG(MAN_PATH, ("man_pathgroups_report: returns"));
7220 
7221 	return (0);
7222 }
7223 
7224 static void
7225 man_preport(man_path_t *plist, mblk_t *mp)
7226 {
7227 	man_path_t	*ap;
7228 
7229 	ap = man_find_active_path(plist);
7230 	/*
7231 	 * Active path
7232 	 */
7233 	if (ap != NULL) {
7234 		(void) mi_mpprintf_nr(mp, "\t%s%d\t\t",
7235 		    ddi_major_to_name(ap->mp_device.mdev_major),
7236 		    ap->mp_device.mdev_ppa);
7237 	} else {
7238 		(void) mi_mpprintf_nr(mp, "None \t");
7239 	}
7240 
7241 	/*
7242 	 * Alternate Paths.
7243 	 */
7244 	while (plist != NULL) {
7245 		(void) mi_mpprintf_nr(mp, "%s%d exp %d",
7246 		    ddi_major_to_name(plist->mp_device.mdev_major),
7247 		    plist->mp_device.mdev_ppa,
7248 		    plist->mp_device.mdev_exp_id);
7249 		if (plist->mp_device.mdev_state & MDEV_FAILED)
7250 			(void) mi_mpprintf_nr(mp, "*");
7251 		plist = plist->mp_next;
7252 		if (plist)
7253 			(void) mi_mpprintf_nr(mp, ", ");
7254 	}
7255 }
7256 
7257 /*
7258  * NDD request to set active path. Calling context is man_ioctl, so we are
7259  * exclusive in the inner perimeter.
7260  *
7261  *	Syntax is "ndd -set /dev/dman <man ppa> <pg_id> <phys ppa>"
7262  */
7263 /* ARGSUSED3 */
7264 static int
7265 man_set_active_path(queue_t *wq, mblk_t *mp, char *value, caddr_t cp,
7266     cred_t *cr)
7267 {
7268 	char		*end, *meta_ppap, *phys_ppap, *pg_idp;
7269 	int		meta_ppa;
7270 	int		phys_ppa;
7271 	int		pg_id;
7272 	man_t		*manp;
7273 	man_pg_t	*mpg;
7274 	man_path_t	*np;
7275 	mi_path_t	mpath;
7276 	int		status = 0;
7277 
7278 	MAN_DBG(MAN_PATH, ("man_set_active_path: wq(0x%p) mp(0x%p)"
7279 	    " args %s", (void *)wq, (void *)mp, value));
7280 
7281 	meta_ppap = value;
7282 
7283 	if ((pg_idp = strchr(value, ' ')) == NULL) {
7284 		status = EINVAL;
7285 		goto exit;
7286 	}
7287 
7288 	*pg_idp++ = '\0';
7289 
7290 	if ((phys_ppap = strchr(pg_idp, ' ')) == NULL) {
7291 		status = EINVAL;
7292 		goto exit;
7293 	}
7294 
7295 	*phys_ppap++ = '\0';
7296 
7297 	meta_ppa = (int)mi_strtol(meta_ppap, &end, 10);
7298 	pg_id = (int)mi_strtol(pg_idp, &end, 10);
7299 	phys_ppa = (int)mi_strtol(phys_ppap, &end, 10);
7300 
7301 	mutex_enter(&man_lock);
7302 	manp = ddi_get_soft_state(man_softstate, meta_ppa);
7303 	if (manp == NULL || manp->man_pg == NULL) {
7304 		status = EINVAL;
7305 		mutex_exit(&man_lock);
7306 		goto exit;
7307 	}
7308 
7309 	mpg = man_find_pg_by_id(manp->man_pg, pg_id);
7310 	if (mpg == NULL) {
7311 		status = EINVAL;
7312 		mutex_exit(&man_lock);
7313 		goto exit;
7314 	}
7315 
7316 	np = man_find_path_by_ppa(mpg->mpg_pathp, phys_ppa);
7317 
7318 	if (np == NULL) {
7319 		status = EINVAL;
7320 		mutex_exit(&man_lock);
7321 		goto exit;
7322 	}
7323 
7324 	mpath.mip_cmd = MI_PATH_ACTIVATE;
7325 	mpath.mip_pg_id = pg_id;
7326 	mpath.mip_man_ppa = meta_ppa;
7327 	mpath.mip_devs[0] = np->mp_device;
7328 	mpath.mip_ndevs = 1;
7329 
7330 	status = man_pg_cmd(&mpath, NULL);
7331 	mutex_exit(&man_lock);
7332 
7333 exit:
7334 
7335 	MAN_DBG(MAN_PATH, ("man_set_active_path: returns %d", status));
7336 
7337 	return (status);
7338 }
7339 
7340 /*
7341  * Dump out the contents of the IOSRAM handoff structure. Note that if
7342  * anything changes here, you must make sure that the sysinit script
7343  * stays in sync with this output.
7344  */
7345 /* ARGSUSED */
7346 static int
7347 man_get_hostinfo(queue_t *wq, mblk_t *mp, caddr_t cp, cred_t *cr)
7348 {
7349 	manc_t	manc;
7350 	char	*ipaddr;
7351 	char	ipv6addr[INET6_ADDRSTRLEN];
7352 	int	i;
7353 	int	status;
7354 
7355 	if (!man_is_on_domain)
7356 		return (0);
7357 
7358 	if (status = man_get_iosram(&manc)) {
7359 		return (status);
7360 	}
7361 
7362 	(void) mi_mpprintf(mp, "manc_magic = 0x%x", manc.manc_magic);
7363 	(void) mi_mpprintf(mp, "manc_version = 0%d", manc.manc_version);
7364 	(void) mi_mpprintf(mp, "manc_csum = 0x%x", manc.manc_csum);
7365 
7366 	if (manc.manc_ip_type == AF_INET) {
7367 		in_addr_t	netnum;
7368 
7369 		(void) mi_mpprintf(mp, "manc_ip_type = AF_INET");
7370 
7371 		ipaddr = man_inet_ntoa(manc.manc_dom_ipaddr);
7372 		(void) mi_mpprintf(mp, "manc_dom_ipaddr = %s", ipaddr);
7373 
7374 		ipaddr = man_inet_ntoa(manc.manc_dom_ip_netmask);
7375 		(void) mi_mpprintf(mp, "manc_dom_ip_netmask = %s", ipaddr);
7376 
7377 		netnum = manc.manc_dom_ipaddr & manc.manc_dom_ip_netmask;
7378 		ipaddr = man_inet_ntoa(netnum);
7379 		(void) mi_mpprintf(mp, "manc_dom_ip_netnum = %s", ipaddr);
7380 
7381 		ipaddr = man_inet_ntoa(manc.manc_sc_ipaddr);
7382 		(void) mi_mpprintf(mp, "manc_sc_ipaddr = %s", ipaddr);
7383 
7384 	} else if (manc.manc_ip_type == AF_INET6) {
7385 
7386 		(void) mi_mpprintf(mp, "manc_ip_type = AF_INET6");
7387 
7388 		(void) inet_ntop(AF_INET6, (void *)&manc.manc_dom_ipv6addr,
7389 		    ipv6addr, INET6_ADDRSTRLEN);
7390 		(void) mi_mpprintf(mp, "manc_dom_ipv6addr = %s", ipv6addr);
7391 
7392 		(void) mi_mpprintf(mp, "manc_dom_ipv6_netmask = %d",
7393 		    manc.manc_dom_ipv6_netmask.s6_addr[0]);
7394 
7395 		(void) inet_ntop(AF_INET6, (void *)&manc.manc_sc_ipv6addr,
7396 		    ipv6addr, INET6_ADDRSTRLEN);
7397 		(void) mi_mpprintf(mp, "manc_sc_ipv6addr = %s", ipv6addr);
7398 
7399 	} else {
7400 
7401 		(void) mi_mpprintf(mp, "manc_ip_type = NONE");
7402 	}
7403 
7404 	(void) mi_mpprintf(mp, "manc_dom_eaddr = %s",
7405 	    ether_sprintf(&manc.manc_dom_eaddr));
7406 	(void) mi_mpprintf(mp, "manc_sc_eaddr = %s",
7407 	    ether_sprintf(&manc.manc_sc_eaddr));
7408 
7409 	(void) mi_mpprintf(mp, "manc_iob_bitmap = 0x%x\tio boards = ",
7410 	    manc.manc_iob_bitmap);
7411 	for (i = 0; i < MAN_MAX_EXPANDERS; i++) {
7412 		if ((manc.manc_iob_bitmap >> i) & 0x1) {
7413 			(void) mi_mpprintf_nr(mp, "%d.1, ", i);
7414 		}
7415 	}
7416 	(void) mi_mpprintf(mp, "manc_golden_iob = %d", manc.manc_golden_iob);
7417 
7418 	return (0);
7419 }
7420 
7421 static char *
7422 man_inet_ntoa(in_addr_t in)
7423 {
7424 	static char b[18];
7425 	unsigned char *p;
7426 
7427 	p = (unsigned char *)&in;
7428 	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
7429 	return (b);
7430 }
7431 
7432 /*
7433  * parameter value. cp points to the required parameter.
7434  */
7435 /* ARGSUSED */
7436 static int
7437 man_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
7438 {
7439 	param_t	*manpa = (param_t *)cp;
7440 
7441 	(void) mi_mpprintf(mp, "%u", manpa->param_val);
7442 	return (0);
7443 }
7444 
7445 /*
7446  * Sets the man parameter to the value in the param_register using
7447  * nd_load().
7448  */
7449 /* ARGSUSED */
7450 static int
7451 man_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
7452 {
7453 	char *end;
7454 	size_t new_value;
7455 	param_t	*manpa = (param_t *)cp;
7456 
7457 	new_value = mi_strtol(value, &end, 10);
7458 
7459 	if (end == value || new_value < manpa->param_min ||
7460 	    new_value > manpa->param_max) {
7461 			return (EINVAL);
7462 	}
7463 
7464 	manpa->param_val = new_value;
7465 
7466 	return (0);
7467 
7468 }
7469 
7470 /*
7471  * Free the Named Dispatch Table by calling man_nd_free
7472  */
7473 static void
7474 man_param_cleanup()
7475 {
7476 	if (man_ndlist != NULL)
7477 		nd_free(&man_ndlist);
7478 }
7479 
7480 /*
7481  * Free the table pointed to by 'ndp'
7482  */
7483 static void
7484 man_nd_free(caddr_t *nd_pparam)
7485 {
7486 	ND	*nd;
7487 
7488 	if ((nd = (ND *)(*nd_pparam)) != NULL) {
7489 		if (nd->nd_tbl)
7490 			mi_free((char *)nd->nd_tbl);
7491 		mi_free((char *)nd);
7492 		*nd_pparam = NULL;
7493 	}
7494 }
7495 
7496 
7497 /*
7498  * man_kstat_update - update the statistics for a meta-interface.
7499  *
7500  *	ksp - kstats struct
7501  *	rw - flag indicating whether stats are to be read or written.
7502  *
7503  *	returns	0
7504  *
7505  * The destination specific kstat information is protected by the
7506  * perimeter lock, so we submit a work request to get the stats
7507  * updated (see man_do_kstats()), and then collect the results
7508  * when cv_signal'd. Note that we are doing cv_timedwait_sig()
7509  * as a precautionary measure only.
7510  */
7511 static int
7512 man_kstat_update(kstat_t *ksp, int rw)
7513 {
7514 	man_t			*manp;		/* per instance data */
7515 	man_work_t		*wp;
7516 	int			status = 0;
7517 	kstat_named_t		*knp;
7518 	kstat_named_t		*man_knp;
7519 	int			i;
7520 
7521 	MAN_DBG(MAN_KSTAT, ("man_kstat_update: %s\n", rw ? "KSTAT_WRITE" :
7522 	    "KSTAT_READ"));
7523 
7524 	mutex_enter(&man_lock);
7525 	manp = (man_t *)ksp->ks_private;
7526 	manp->man_refcnt++;
7527 
7528 	/*
7529 	 * If the driver has been configured, get kstats updated by inner
7530 	 * perimeter prior to retrieving.
7531 	 */
7532 	if (man_config_state == MAN_CONFIGURED) {
7533 		clock_t wait_status;
7534 
7535 		man_update_path_kstats(manp);
7536 		wp = man_work_alloc(MAN_WORK_KSTAT_UPDATE, KM_SLEEP);
7537 		wp->mw_arg.a_man_ppa = manp->man_meta_ppa;
7538 		wp->mw_flags = MAN_WFLAGS_CVWAITER;
7539 		man_work_add(man_iwork_q, wp);
7540 
7541 		wait_status = cv_reltimedwait_sig(&wp->mw_cv, &man_lock,
7542 		    drv_usectohz(manp->man_kstat_waittime), TR_CLOCK_TICK);
7543 
7544 		if (wp->mw_flags & MAN_WFLAGS_DONE) {
7545 			status = wp->mw_status;
7546 			man_work_free(wp);
7547 		} else {
7548 			ASSERT(wait_status <= 0);
7549 			wp->mw_flags &= ~MAN_WFLAGS_CVWAITER;
7550 			if (wait_status == 0)
7551 				status = EINTR;
7552 			else {
7553 				MAN_DBG(MAN_KSTAT, ("man_kstat_update: "
7554 				    "timedout, returning stale stats."));
7555 				status = 0;
7556 			}
7557 		}
7558 		if (status)
7559 			goto exit;
7560 	}
7561 
7562 	knp = (kstat_named_t *)ksp->ks_data;
7563 	man_knp = (kstat_named_t *)manp->man_ksp->ks_data;
7564 
7565 	if (rw == KSTAT_READ) {
7566 		for (i = 0; i < MAN_NUMSTATS; i++) {
7567 			knp[i].value.ui64 = man_knp[i].value.ui64;
7568 		}
7569 	} else {
7570 		for (i = 0; i < MAN_NUMSTATS; i++) {
7571 			man_knp[i].value.ui64 = knp[i].value.ui64;
7572 		}
7573 	}
7574 
7575 exit:
7576 	manp->man_refcnt--;
7577 	mutex_exit(&man_lock);
7578 
7579 	MAN_DBG(MAN_KSTAT, ("man_kstat_update: returns %d", status));
7580 
7581 	return (status);
7582 }
7583 
7584 /*
7585  * Sum destination kstats for all active paths for a given instance of the
7586  * MAN driver. Called with perimeter lock.
7587  */
7588 static void
7589 man_do_kstats(man_work_t *wp)
7590 {
7591 	man_t		*manp;
7592 	man_pg_t	*mpg;
7593 	man_path_t	*mp;
7594 
7595 	MAN_DBG(MAN_KSTAT, ("man_do_kstats:"));
7596 
7597 	mutex_enter(&man_lock);
7598 	/*
7599 	 * Sync mp_last_knp for each path associated with the MAN instance.
7600 	 */
7601 	manp = (man_t *)ddi_get_soft_state(man_softstate,
7602 	    wp->mw_arg.a_man_ppa);
7603 	for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7604 
7605 		ASSERT(mpg->mpg_man_ppa == manp->man_meta_ppa);
7606 
7607 		if ((mp = man_find_active_path(mpg->mpg_pathp)) != NULL) {
7608 
7609 			MAN_DBG(MAN_KSTAT, ("\tkstat: path"));
7610 			MAN_DBGCALL(MAN_KSTAT, man_print_path(mp));
7611 
7612 			/*
7613 			 * We just to update the destination statistics here.
7614 			 */
7615 			man_sum_dests_kstats(mp->mp_last_knp, mpg);
7616 		}
7617 	}
7618 	mutex_exit(&man_lock);
7619 	MAN_DBG(MAN_KSTAT, ("man_do_kstats: returns"));
7620 }
7621 
7622 /*
7623  * Sum device kstats for all active paths for a given instance of the
7624  * MAN driver. Called with man_lock.
7625  */
7626 static void
7627 man_update_path_kstats(man_t *manp)
7628 {
7629 	kstat_named_t	*man_knp;
7630 	man_pg_t	*mpg;
7631 	man_path_t	*mp;
7632 
7633 	ASSERT(MUTEX_HELD(&man_lock));
7634 	MAN_DBG(MAN_KSTAT, ("man_update_path_kstats:"));
7635 
7636 	man_knp = (kstat_named_t *)manp->man_ksp->ks_data;
7637 
7638 	for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7639 
7640 		ASSERT(mpg->mpg_man_ppa == manp->man_meta_ppa);
7641 
7642 		if ((mp = man_find_active_path(mpg->mpg_pathp)) != NULL) {
7643 
7644 			man_update_dev_kstats(man_knp, mp);
7645 
7646 		}
7647 	}
7648 	MAN_DBG(MAN_KSTAT, ("man_update_path_kstats: returns"));
7649 }
7650 
7651 /*
7652  * Update the device kstats.
7653  * As man_kstat_update() is called with kstat_chain_lock held,
7654  * we can safely update the statistics from the underlying driver here.
7655  */
7656 static void
7657 man_update_dev_kstats(kstat_named_t *man_knp, man_path_t *mp)
7658 {
7659 	kstat_t		*dev_ksp;
7660 	major_t		major;
7661 	int		instance;
7662 	char		buf[KSTAT_STRLEN];
7663 
7664 
7665 	major = mp->mp_device.mdev_major;
7666 	instance = mp->mp_device.mdev_ppa;
7667 	(void) sprintf(buf, "%s%d", ddi_major_to_name(major), instance);
7668 
7669 	dev_ksp = kstat_hold_byname(ddi_major_to_name(major), instance, buf,
7670 	    ALL_ZONES);
7671 	if (dev_ksp != NULL) {
7672 
7673 		KSTAT_ENTER(dev_ksp);
7674 		KSTAT_UPDATE(dev_ksp, KSTAT_READ);
7675 		man_sum_kstats(man_knp, dev_ksp, mp->mp_last_knp);
7676 		KSTAT_EXIT(dev_ksp);
7677 		kstat_rele(dev_ksp);
7678 
7679 	} else {
7680 		MAN_DBG(MAN_KSTAT,
7681 		    ("man_update_dev_kstats: no kstat data found for %s(%d,%d)",
7682 		    buf, major, instance));
7683 	}
7684 }
7685 
7686 static void
7687 man_sum_dests_kstats(kstat_named_t *knp, man_pg_t *mpg)
7688 {
7689 	int		i;
7690 	int		flags;
7691 	char		*statname;
7692 	manstr_t	*msp;
7693 	man_dest_t	*mdp;
7694 	uint64_t	switches = 0;
7695 	uint64_t	linkfails = 0;
7696 	uint64_t	linkstales = 0;
7697 	uint64_t	icmpv4probes = 0;
7698 	uint64_t	icmpv6probes = 0;
7699 
7700 	MAN_DBG(MAN_KSTAT, ("man_sum_dests_kstats: mpg 0x%p", (void *)mpg));
7701 
7702 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
7703 
7704 		if (!man_str_uses_pg(msp, mpg))
7705 			continue;
7706 
7707 		mdp = &msp->ms_dests[mpg->mpg_pg_id];
7708 
7709 		switches += mdp->md_switches;
7710 		linkfails += mdp->md_linkfails;
7711 		linkstales += mdp->md_linkstales;
7712 		icmpv4probes += mdp->md_icmpv4probes;
7713 		icmpv6probes += mdp->md_icmpv6probes;
7714 	}
7715 
7716 	for (i = 0; i < MAN_NUMSTATS; i++) {
7717 
7718 		statname = man_kstat_info[i].mk_name;
7719 		flags = man_kstat_info[i].mk_flags;
7720 
7721 		if (!(flags & MK_NOT_PHYSICAL))
7722 			continue;
7723 
7724 		if (strcmp(statname, "man_switches") == 0) {
7725 			knp[i].value.ui64 = switches;
7726 		} else if (strcmp(statname, "man_link_fails") == 0) {
7727 			knp[i].value.ui64 = linkfails;
7728 		} else if (strcmp(statname, "man_link_stales") == 0) {
7729 			knp[i].value.ui64 = linkstales;
7730 		} else if (strcmp(statname, "man_icmpv4_probes") == 0) {
7731 			knp[i].value.ui64 = icmpv4probes;
7732 		} else if (strcmp(statname, "man_icmpv6_probes") == 0) {
7733 			knp[i].value.ui64 = icmpv6probes;
7734 		}
7735 	}
7736 
7737 	MAN_DBG(MAN_KSTAT, ("man_sum_dests_kstats: returns"));
7738 }
7739 
7740 /*
7741  * Initialize MAN named kstats in the space provided.
7742  */
7743 static void
7744 man_kstat_named_init(kstat_named_t *knp, int num_stats)
7745 {
7746 	int	i;
7747 
7748 	MAN_DBG(MAN_KSTAT, ("man_kstat_named_init: knp(0x%p) num_stats = %d",
7749 	    (void *)knp, num_stats));
7750 
7751 	for (i = 0; i < num_stats; i++) {
7752 		kstat_named_init(&knp[i], man_kstat_info[i].mk_name,
7753 		    man_kstat_info[i].mk_type);
7754 	}
7755 
7756 	MAN_DBG(MAN_KSTAT, ("man_kstat_named_init: returns"));
7757 
7758 }
7759 
7760 /*
7761  * man_kstat_byname - get a kernel stat value from its structure
7762  *
7763  *	ksp - kstat_t structure to play with
7764  *	s   - string to match names with
7765  *	res - in/out result data pointer
7766  *
7767  *	returns	- success - 1 (found)
7768  *		- failure - 0 (not found)
7769  */
7770 static int
7771 man_kstat_byname(kstat_t *ksp, char *s, kstat_named_t *res)
7772 {
7773 	int		found = 0;
7774 
7775 	MAN_DBG(MAN_KSTAT2, ("man_kstat_byname: GETTING %s\n", s));
7776 
7777 	if (ksp->ks_type == KSTAT_TYPE_NAMED) {
7778 		kstat_named_t *knp;
7779 
7780 		for (knp = KSTAT_NAMED_PTR(ksp);
7781 		    (caddr_t)knp < ((caddr_t)ksp->ks_data+ksp->ks_data_size);
7782 		    knp++) {
7783 
7784 			if (strcmp(s, knp->name) == NULL) {
7785 
7786 				res->data_type = knp->data_type;
7787 				res->value = knp->value;
7788 				found++;
7789 
7790 				MAN_DBG(MAN_KSTAT2, ("\t%s: %d\n", knp->name,
7791 				    (int)knp->value.ul));
7792 			}
7793 		}
7794 	} else {
7795 		MAN_DBG(MAN_KSTAT2, ("\tbad kstats type %d\n", ksp->ks_type));
7796 	}
7797 
7798 	/*
7799 	 * if getting a value but couldn't find the namestring, result = 0.
7800 	 */
7801 	if (!found) {
7802 		/*
7803 		 * a reasonable default
7804 		 */
7805 		res->data_type = KSTAT_DATA_ULONG;
7806 		res->value.l = 0;
7807 		MAN_DBG(MAN_KSTAT2, ("\tcouldn't find, using defaults\n"));
7808 	}
7809 
7810 	MAN_DBG(MAN_KSTAT2, ("man_kstat_byname: returns\n"));
7811 
7812 	return (found);
7813 }
7814 
7815 
7816 /*
7817  *
7818  * Accumulate MAN driver kstats from the incremental values of the underlying
7819  * physical interfaces.
7820  *
7821  * Parameters:
7822  *	sum_knp		- The named kstat area to put cumulative value,
7823  *			  NULL if we just want to sync next two params.
7824  *	phys_ksp	- Physical interface kstat_t pointer. Contains
7825  *			  more current counts.
7826  * 	phys_last_knp	- counts from the last time we were called for this
7827  *			  physical interface. Note that the name kstats
7828  *			  pointed to are actually in MAN format, but they
7829  *			  hold the mirrored physical devices last read
7830  *			  kstats.
7831  * Basic algorithm is:
7832  *
7833  * 	for each named kstat variable {
7834  *	    sum_knp[i] += (phys_ksp->ksp_data[i] - phys_last_knp[i]);
7835  *	    phys_last_knp[i] = phys_ksp->ksp_data[i];
7836  *	}
7837  *
7838  */
7839 static void
7840 man_sum_kstats(kstat_named_t *sum_knp, kstat_t *phys_ksp,
7841 	kstat_named_t *phys_last_knp)
7842 {
7843 	char		*physname;
7844 	char		*physalias;
7845 	char		*statname;
7846 	kstat_named_t	phys_kn_entry;
7847 	uint64_t	delta64;
7848 	int		i;
7849 
7850 	MAN_DBG(MAN_KSTAT, ("man_sum_kstats: sum_knp(0x%p) phys_ksp(0x%p)"
7851 	    " phys_last_knp(0x%p)\n", (void *)sum_knp, (void *)phys_ksp,
7852 	    (void *)phys_last_knp));
7853 
7854 	/*
7855 	 * Now for each entry in man_kstat_info, sum the named kstat.
7856 	 * Not that all MAN specific kstats will end up !found.
7857 	 */
7858 	for (i = 0; i < MAN_NUMSTATS; i++) {
7859 		int	found = 0;
7860 		int	flags = 0;
7861 
7862 		delta64 = 0;
7863 
7864 		statname = man_kstat_info[i].mk_name;
7865 		physname = man_kstat_info[i].mk_physname;
7866 		physalias = man_kstat_info[i].mk_physalias;
7867 		flags = man_kstat_info[i].mk_flags;
7868 
7869 		/*
7870 		 * Update MAN private kstats.
7871 		 */
7872 		if (flags & MK_NOT_PHYSICAL) {
7873 
7874 			kstat_named_t	*knp = phys_last_knp;
7875 
7876 			if (sum_knp == NULL)
7877 				continue;
7878 
7879 			if (strcmp(statname, "man_switches") == 0) {
7880 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7881 			} else if (strcmp(statname, "man_link_fails") == 0) {
7882 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7883 			} else if (strcmp(statname, "man_link_stales") == 0) {
7884 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7885 			} else if (strcmp(statname, "man_icmpv4_probes") == 0) {
7886 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7887 			} else if (strcmp(statname, "man_icmpv6_probes") == 0) {
7888 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7889 			}
7890 
7891 			continue;	/* phys_ksp doesnt have this stat */
7892 		}
7893 
7894 		/*
7895 		 * first try it by the "official" name
7896 		 */
7897 		if (phys_ksp) {
7898 			if (man_kstat_byname(phys_ksp, physname,
7899 			    &phys_kn_entry)) {
7900 
7901 				found = 1;
7902 
7903 			} else if ((physalias) && (man_kstat_byname(phys_ksp,
7904 			    physalias, &phys_kn_entry))) {
7905 
7906 				found = 1;
7907 			}
7908 		}
7909 
7910 		if (!found) {
7911 			/*
7912 			 * clear up the "last" value, no change to the sum
7913 			 */
7914 			phys_last_knp[i].value.ui64 = 0;
7915 			continue;
7916 		}
7917 
7918 		/*
7919 		 * at this point, we should have the good underlying
7920 		 * kstat value stored in phys_kn_entry
7921 		 */
7922 		if (flags & MK_NOT_COUNTER) {
7923 			/*
7924 			 * it isn't a counter, so store the value and
7925 			 * move on (e.g. ifspeed)
7926 			 */
7927 			phys_last_knp[i].value = phys_kn_entry.value;
7928 			continue;
7929 		}
7930 
7931 		switch (phys_kn_entry.data_type) {
7932 		case KSTAT_DATA_UINT32:
7933 
7934 			/*
7935 			 * this handles 32-bit wrapping
7936 			 */
7937 			if (phys_kn_entry.value.ui32 <
7938 			    phys_last_knp[i].value.ui32) {
7939 
7940 				/*
7941 				 * we've wrapped!
7942 				 */
7943 				delta64 += (UINT_MAX -
7944 				    phys_last_knp[i].value.ui32);
7945 				phys_last_knp[i].value.ui32 = 0;
7946 			}
7947 
7948 			delta64 += phys_kn_entry.value.ui32 -
7949 			    phys_last_knp[i].value.ui32;
7950 			phys_last_knp[i].value.ui32 = phys_kn_entry.value.ui32;
7951 			break;
7952 
7953 		default:
7954 			/*
7955 			 * must be a 64-bit value, we ignore 64-bit
7956 			 * wraps, since they shouldn't ever happen
7957 			 * within the life of a machine (if we assume
7958 			 * machines don't stay up for more than a few
7959 			 * hundred years without a reboot...)
7960 			 */
7961 			delta64 = phys_kn_entry.value.ui64 -
7962 			    phys_last_knp[i].value.ui64;
7963 			phys_last_knp[i].value.ui64 = phys_kn_entry.value.ui64;
7964 		}
7965 
7966 		if (sum_knp != NULL) {
7967 			/*
7968 			 * now we need to save the value
7969 			 */
7970 			switch (sum_knp[i].data_type) {
7971 			case KSTAT_DATA_UINT32:
7972 				/* trunk down to 32 bits, possibly lossy */
7973 				sum_knp[i].value.ui32 += (uint32_t)delta64;
7974 				break;
7975 
7976 			default:
7977 				sum_knp[i].value.ui64 += delta64;
7978 				break;
7979 			}
7980 		}
7981 	}
7982 
7983 	MAN_DBG(MAN_KSTAT, ("man_sum_kstats: returns\n"));
7984 }
7985 
7986 
7987 #if defined(DEBUG)
7988 
7989 
7990 static char *_ms_flags[] = {
7991 	"NONE",
7992 	"FAST", 	/* 0x1 */
7993 	"RAW",		/* 0x2 */
7994 	"ALLPHYS",	/* 0x4 */
7995 	"ALLMULTI",	/* 0x8 */
7996 	"ALLSAP",	/* 0x10 */
7997 	"CKSUM",	/* 0x20 */
7998 	"MULTI",	/* 0x40 */
7999 	"SERLPBK",	/* 0x80 */
8000 	"MACLPBK",	/* 0x100 */
8001 	"CLOSING",	/* 0x200 */
8002 	"CLOSE_DONE",	/* 0x400 */
8003 	"CONTROL"	/* 0x800 */
8004 };
8005 
8006 static void
8007 man_print_msp(manstr_t *msp)
8008 {
8009 	char	buf[512];
8010 	char	prbuf[512];
8011 	uint_t	flags;
8012 	int	i;
8013 
8014 	cmn_err(CE_CONT, "\tmsp(0x%p)\n", (void *)msp);
8015 
8016 	if (msp == NULL)
8017 		return;
8018 
8019 	cmn_err(CE_CONT, "\t%s%d SAP(0x%x):\n",
8020 	    ddi_major_to_name(msp->ms_meta_maj), msp->ms_meta_ppa,
8021 	    msp->ms_sap);
8022 
8023 	buf[0] = '\0';
8024 	prbuf[0] = '\0';
8025 	flags = msp->ms_flags;
8026 	for (i = 0; i < A_CNT(_ms_flags); i++) {
8027 		if ((flags >> i) & 0x1) {
8028 			(void) sprintf(buf, " %s |", _ms_flags[i+1]);
8029 			(void) strcat(prbuf, buf);
8030 		}
8031 	}
8032 	prbuf[strlen(prbuf) - 1] = '\0';
8033 	cmn_err(CE_CONT, "\tms_flags: %s\n", prbuf);
8034 
8035 	cmn_err(CE_CONT, "\tms_dlpistate: %s\n", dss[msp->ms_dlpistate]);
8036 
8037 	cmn_err(CE_CONT, "\tms_dl_mp: 0x%p\n", (void *)msp->ms_dl_mp);
8038 
8039 	cmn_err(CE_CONT, "\tms_manp: 0x%p\n", (void *)msp->ms_manp);
8040 
8041 	cmn_err(CE_CONT, "\tms_dests: 0x%p\n", (void *)msp->ms_dests);
8042 
8043 }
8044 
8045 static char *_md_state[] = {
8046 	"NOTPRESENT",		/* 0x0 */
8047 	"INITIALIZING",		/* 0x1 */
8048 	"READY",		/* 0x2 */
8049 	"PLUMBING",		/* 0x4 */
8050 	"CLOSING"		/* 0x8 */
8051 };
8052 
8053 static void
8054 man_print_mdp(man_dest_t *mdp)
8055 {
8056 	uint_t		state;
8057 	int		i;
8058 	char		buf[64];
8059 	char		prbuf[512];
8060 
8061 	buf[0] = '\0';
8062 	prbuf[0] = '\0';
8063 
8064 	cmn_err(CE_CONT, "\tmdp(0x%p)\n", (void *)mdp);
8065 
8066 	if (mdp == NULL)
8067 		return;
8068 
8069 	cmn_err(CE_CONT, "\tmd_pg_id: %d\n", mdp->md_pg_id);
8070 	cmn_err(CE_CONT, "\tmd_dst_eaddr: %s\n",
8071 	    ether_sprintf(&mdp->md_dst_eaddr));
8072 	cmn_err(CE_CONT, "\tmd_src_eaddr: %s\n",
8073 	    ether_sprintf(&mdp->md_src_eaddr));
8074 	cmn_err(CE_CONT, "\tmd_dlpistate: %s", dss[mdp->md_dlpistate]);
8075 	cmn_err(CE_CONT, "\tmd_muxid: 0x%u", mdp->md_muxid);
8076 	cmn_err(CE_CONT, "\tmd_rcvcnt %lu md_lastrcvcnt %lu", mdp->md_rcvcnt,
8077 	    mdp->md_lastrcvcnt);
8078 
8079 	/*
8080 	 * Print out state as text.
8081 	 */
8082 	state = mdp->md_state;
8083 
8084 	if (state == 0) {
8085 		(void) strcat(prbuf, _md_state[0]);
8086 	} else {
8087 
8088 		for (i = 0; i < A_CNT(_md_state); i++) {
8089 			if ((state >> i) & 0x1)  {
8090 				(void) sprintf(buf, " %s |", _md_state[i+1]);
8091 				(void) strcat(prbuf, buf);
8092 			}
8093 		}
8094 		prbuf[strlen(prbuf) -1] = '\0';
8095 	}
8096 	cmn_err(CE_CONT, "\tmd_state: %s", prbuf);
8097 
8098 	cmn_err(CE_CONT, "\tmd_device:\n");
8099 	man_print_dev(&mdp->md_device);
8100 
8101 }
8102 
8103 static void
8104 man_print_man(man_t *manp)
8105 {
8106 	char	buf[512];
8107 	char	prbuf[512];
8108 
8109 	buf[0] = '\0';
8110 	prbuf[0] = '\0';
8111 
8112 	if (manp == NULL)
8113 		return;
8114 
8115 	if (ddi_major_to_name(manp->man_meta_major)) {
8116 		(void) sprintf(buf, "\t man_device: %s%d\n",
8117 		    ddi_major_to_name(manp->man_meta_major),
8118 		    manp->man_meta_ppa);
8119 	} else {
8120 		(void) sprintf(buf, "\t major: %d", manp->man_meta_major);
8121 		(void) sprintf(buf, "\t ppa: %d", manp->man_meta_ppa);
8122 	}
8123 
8124 	cmn_err(CE_CONT, "%s", buf);
8125 
8126 }
8127 
8128 static char *_mdev_state[] = {
8129 	"UNASSIGNED  ",
8130 	"ASSIGNED",
8131 	"ACTIVE",
8132 	"FAILED"
8133 };
8134 
8135 static void
8136 man_print_dev(man_dev_t *mdevp)
8137 {
8138 	char	buf[512];
8139 	char	prbuf[512];
8140 	int	i;
8141 	uint_t	state;
8142 
8143 	buf[0] = '\0';
8144 	prbuf[0] = '\0';
8145 
8146 	if (mdevp == NULL)
8147 		return;
8148 
8149 	if (mdevp->mdev_major == 0) {
8150 number:
8151 		(void) sprintf(buf, "\t mdev_major: %d\n", mdevp->mdev_major);
8152 	} else if (ddi_major_to_name(mdevp->mdev_major)) {
8153 		(void) sprintf(buf, "\t mdev_device: %s%d\n",
8154 		    ddi_major_to_name(mdevp->mdev_major),
8155 		    mdevp->mdev_ppa);
8156 	} else
8157 		goto number;
8158 
8159 	cmn_err(CE_CONT, "%s", buf);
8160 
8161 	cmn_err(CE_CONT, "\t mdev_exp_id: %d\n", mdevp->mdev_exp_id);
8162 
8163 	buf[0] = '\0';
8164 	prbuf[0] = '\0';
8165 	state = mdevp->mdev_state;
8166 
8167 	if (state == 0) {
8168 		(void) strcat(prbuf, _mdev_state[0]);
8169 	} else {
8170 		for (i = 0; i < A_CNT(_mdev_state); i++) {
8171 			if ((state >> i) & 0x1) {
8172 				(void) sprintf(buf, " %s |", _mdev_state[i+1]);
8173 				(void) strcat(prbuf, buf);
8174 			}
8175 		}
8176 	}
8177 
8178 	prbuf[strlen(prbuf) - 2] = '\0';
8179 
8180 	cmn_err(CE_CONT, "\t mdev_state: %s\n", prbuf);
8181 
8182 }
8183 
8184 static char *_mip_cmd[] = {
8185 	"MI_PATH_READ",
8186 	"MI_PATH_ASSIGN",
8187 	"MI_PATH_ACTIVATE",
8188 	"MI_PATH_DEACTIVATE",
8189 	"MI_PATH_UNASSIGN"
8190 };
8191 
8192 static void
8193 man_print_mtp(mi_time_t *mtp)
8194 {
8195 	cmn_err(CE_CONT, "\tmtp(0x%p)\n", (void *)mtp);
8196 
8197 	if (mtp == NULL)
8198 		return;
8199 
8200 	cmn_err(CE_CONT, "\tmtp_instance: %d\n", mtp->mtp_man_ppa);
8201 
8202 	cmn_err(CE_CONT, "\tmtp_time: %d\n", mtp->mtp_time);
8203 
8204 }
8205 
8206 static void
8207 man_print_mip(mi_path_t *mip)
8208 {
8209 	cmn_err(CE_CONT, "\tmip(0x%p)\n", (void *)mip);
8210 
8211 	if (mip == NULL)
8212 		return;
8213 
8214 	cmn_err(CE_CONT, "\tmip_pg_id: %d\n", mip->mip_pg_id);
8215 
8216 	cmn_err(CE_CONT, "\tmip_cmd: %s\n", _mip_cmd[mip->mip_cmd]);
8217 
8218 	cmn_err(CE_CONT, "\tmip_eaddr: %s\n", ether_sprintf(&mip->mip_eaddr));
8219 
8220 	cmn_err(CE_CONT, "\tmip_devs: 0x%p\n", (void *)mip->mip_devs);
8221 
8222 	cmn_err(CE_CONT, "\tmip_ndevs: %d\n", mip->mip_ndevs);
8223 
8224 }
8225 
8226 static void
8227 man_print_mpg(man_pg_t *mpg)
8228 {
8229 	cmn_err(CE_CONT, "\tmpg(0x%p)\n", (void *)mpg);
8230 
8231 	if (mpg == NULL)
8232 		return;
8233 
8234 	cmn_err(CE_CONT, "\tmpg_next: 0x%p\n", (void *)mpg->mpg_next);
8235 
8236 	cmn_err(CE_CONT, "\tmpg_pg_id: %d\n", mpg->mpg_pg_id);
8237 
8238 	cmn_err(CE_CONT, "\tmpg_man_ppa: %d\n", mpg->mpg_man_ppa);
8239 
8240 	cmn_err(CE_CONT, "\tmpg_dst_eaddr: %s\n",
8241 	    ether_sprintf(&mpg->mpg_dst_eaddr));
8242 
8243 	cmn_err(CE_CONT, "\tmpg_pathp: 0x%p\n", (void *)mpg->mpg_pathp);
8244 
8245 }
8246 
8247 static char *_mw_flags[] = {
8248 	"NOWAITER",		/* 0x0 */
8249 	"CVWAITER",		/* 0x1 */
8250 	"QWAITER",		/* 0x2 */
8251 	"DONE"		/* 0x3 */
8252 };
8253 
8254 static void
8255 man_print_work(man_work_t *wp)
8256 {
8257 	int 	i;
8258 
8259 	cmn_err(CE_CONT, "\twp(0x%p)\n\n", (void *)wp);
8260 
8261 	if (wp == NULL)
8262 		return;
8263 
8264 	cmn_err(CE_CONT, "\tmw_type: %s\n", _mw_type[wp->mw_type]);
8265 
8266 	cmn_err(CE_CONT, "\tmw_flags: ");
8267 	for (i = 0; i < A_CNT(_mw_flags); i++) {
8268 		if ((wp->mw_flags >> i) & 0x1)
8269 			cmn_err(CE_CONT, "%s", _mw_flags[i]);
8270 	}
8271 	cmn_err(CE_CONT, "\n");
8272 
8273 	cmn_err(CE_CONT, "\twp_status: %d\n", wp->mw_status);
8274 
8275 	cmn_err(CE_CONT, "\twp_arg: 0x%p\n", (void *)&wp->mw_arg);
8276 
8277 	cmn_err(CE_CONT, "\tmw_next: 0x%p\n", (void *)wp->mw_next);
8278 
8279 	cmn_err(CE_CONT, "\twp_q: 0x%p", (void *)wp->mw_q);
8280 
8281 }
8282 
8283 static void
8284 man_print_path(man_path_t *mp)
8285 {
8286 	cmn_err(CE_CONT, "\tmp(0x%p)\n\n", (void *)mp);
8287 
8288 	if (mp == NULL)
8289 		return;
8290 
8291 	cmn_err(CE_CONT, "\tmp_device:");
8292 	man_print_dev(&mp->mp_device);
8293 
8294 	cmn_err(CE_CONT, "\tmp_next: 0x%p\n", (void *)mp->mp_next);
8295 
8296 	cmn_err(CE_CONT, "\tmp_last_knp: 0x%p\n", (void *)mp->mp_last_knp);
8297 
8298 	cmn_err(CE_CONT, "\tmp_lru: 0x%lx", mp->mp_lru);
8299 
8300 }
8301 
8302 void *
8303 man_dbg_kzalloc(int line, size_t size, int kmflags)
8304 {
8305 	void *tmp;
8306 
8307 	tmp = kmem_zalloc(size, kmflags);
8308 	MAN_DBG(MAN_KMEM, ("0x%p %lu\tzalloc'd @ %d\n", (void *)tmp,
8309 	    size, line));
8310 
8311 	return (tmp);
8312 
8313 }
8314 
8315 void
8316 man_dbg_kfree(int line, void *buf, size_t size)
8317 {
8318 
8319 	MAN_DBG(MAN_KMEM, ("0x%p %lu\tfree'd @ %d\n", (void *)buf, size, line));
8320 
8321 	kmem_free(buf, size);
8322 
8323 }
8324 
8325 #endif  /* DEBUG */
8326