xref: /titanic_50/usr/src/uts/sun4u/starcat/io/dman.c (revision 64d1d4ab72834b7483c7962efc738b568ca8792e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 
28 /*
29  * Starcat Management Network Driver
30  *
31  * ****** NOTICE **** This file also resides in the SSC gate as
32  * ****** NOTICE **** usr/src/uts/sun4u/scman/scman.c. Any changes
33  * ****** NOTICE **** made here must be propogated there as well.
34  *
35  */
36 
37 #include <sys/types.h>
38 #include <sys/proc.h>
39 #include <sys/disp.h>
40 #include <sys/kmem.h>
41 #include <sys/stat.h>
42 #include <sys/kstat.h>
43 #include <sys/ksynch.h>
44 #include <sys/stream.h>
45 #include <sys/dlpi.h>
46 #include <sys/stropts.h>
47 #include <sys/strsubr.h>
48 #include <sys/debug.h>
49 #include <sys/conf.h>
50 #include <sys/kstr.h>
51 #include <sys/errno.h>
52 #include <sys/ethernet.h>
53 #include <sys/byteorder.h>
54 #include <sys/ddi.h>
55 #include <sys/sunddi.h>
56 #include <sys/sunldi.h>
57 #include <sys/modctl.h>
58 #include <sys/strsun.h>
59 #include <sys/callb.h>
60 #include <sys/pci.h>
61 #include <netinet/in.h>
62 #include <inet/common.h>
63 #include <inet/mi.h>
64 #include <inet/nd.h>
65 #include <sys/socket.h>
66 #include <netinet/igmp_var.h>
67 #include <netinet/ip6.h>
68 #include <netinet/icmp6.h>
69 #include <inet/ip.h>
70 #include <inet/ip6.h>
71 #include <sys/file.h>
72 #include <sys/dman.h>
73 #include <sys/autoconf.h>
74 #include <sys/zone.h>
75 
76 extern int ddi_create_internal_pathname(dev_info_t *, char *, int, minor_t);
77 
78 #define	MAN_IDNAME	"dman"
79 #define	DMAN_INT_PATH	"/devices/pseudo/dman@0:dman"
80 #define	DMAN_PATH	"/devices/pseudo/clone@0:dman"
81 #define	ERI_IDNAME	"eri"
82 #define	ERI_PATH	"/devices/pseudo/clone@0:eri"
83 
84 #if defined(DEBUG)
85 
86 static void man_print_msp(manstr_t *);
87 static void man_print_man(man_t *);
88 static void man_print_mdp(man_dest_t *);
89 static void man_print_dev(man_dev_t *);
90 static void man_print_mip(mi_path_t *);
91 static void man_print_mtp(mi_time_t *);
92 static void man_print_mpg(man_pg_t *);
93 static void man_print_path(man_path_t *);
94 static void man_print_work(man_work_t *);
95 
96 /*
97  * Set manstr_t dlpistate (upper half of multiplexor)
98  */
99 #define	SETSTATE(msp, state) \
100 	MAN_DBG(MAN_DLPI, ("msp=0x%p @ %d state %s=>%s\n",		\
101 		    (void *)msp, __LINE__, dss[msp->ms_dlpistate],	\
102 		    dss[(state)]));					\
103 		    msp->ms_dlpistate = (state);
104 /*
105  * Set man_dest_t dlpistate (lower half of multiplexor)
106  */
107 #define	D_SETSTATE(mdp, state) \
108 	MAN_DBG(MAN_DLPI, ("dst=0x%p @ %d state %s=>%s\n",	   \
109 		    (void *)mdp, __LINE__, dss[mdp->md_dlpistate], \
110 		    dss[(state)]));				   \
111 		    mdp->md_dlpistate = (state);
112 
113 static char *promisc[] = {	/* DLPI promisc Strings */
114 	"not used",		/* 0x00 */
115 	"DL_PROMISC_PHYS",	/* 0x01 */
116 	"DL_PROMISC_SAP",	/* 0x02 */
117 	"DL_PROMISC_MULTI"	/* 0x03 */
118 };
119 
120 static char *dps[] = {			/* DLPI Primitive Strings */
121 	"DL_INFO_REQ",			/* 0x00 */
122 	"DL_BIND_REQ",			/* 0x01 */
123 	"DL_UNBIND_REQ",		/* 0x02 */
124 	"DL_INFO_ACK",			/* 0x03 */
125 	"DL_BIND_ACK",			/* 0x04 */
126 	"DL_ERROR_ACK",			/* 0x05 */
127 	"DL_OK_ACK",			/* 0x06 */
128 	"DL_UNITDATA_REQ",		/* 0x07 */
129 	"DL_UNITDATA_IND",		/* 0x08 */
130 	"DL_UDERROR_IND",		/* 0x09 */
131 	"DL_UDQOS_REQ",			/* 0x0a */
132 	"DL_ATTACH_REQ",		/* 0x0b */
133 	"DL_DETACH_REQ",		/* 0x0c */
134 	"DL_CONNECT_REQ",		/* 0x0d */
135 	"DL_CONNECT_IND",		/* 0x0e */
136 	"DL_CONNECT_RES",		/* 0x0f */
137 	"DL_CONNECT_CON",		/* 0x10 */
138 	"DL_TOKEN_REQ",			/* 0x11 */
139 	"DL_TOKEN_ACK",			/* 0x12 */
140 	"DL_DISCONNECT_REQ",		/* 0x13 */
141 	"DL_DISCONNECT_IND",		/* 0x14 */
142 	"DL_SUBS_UNBIND_REQ",		/* 0x15 */
143 	"DL_LIARLIARPANTSONFIRE",	/* 0x16 */
144 	"DL_RESET_REQ",			/* 0x17 */
145 	"DL_RESET_IND",			/* 0x18 */
146 	"DL_RESET_RES",			/* 0x19 */
147 	"DL_RESET_CON",			/* 0x1a */
148 	"DL_SUBS_BIND_REQ",		/* 0x1b */
149 	"DL_SUBS_BIND_ACK",		/* 0x1c */
150 	"DL_ENABMULTI_REQ",		/* 0x1d */
151 	"DL_DISABMULTI_REQ",		/* 0x1e */
152 	"DL_PROMISCON_REQ",		/* 0x1f */
153 	"DL_PROMISCOFF_REQ",		/* 0x20 */
154 	"DL_DATA_ACK_REQ",		/* 0x21 */
155 	"DL_DATA_ACK_IND",		/* 0x22 */
156 	"DL_DATA_ACK_STATUS_IND",	/* 0x23 */
157 	"DL_REPLY_REQ",			/* 0x24 */
158 	"DL_REPLY_IND",			/* 0x25 */
159 	"DL_REPLY_STATUS_IND",		/* 0x26 */
160 	"DL_REPLY_UPDATE_REQ",		/* 0x27 */
161 	"DL_REPLY_UPDATE_STATUS_IND",	/* 0x28 */
162 	"DL_XID_REQ",			/* 0x29 */
163 	"DL_XID_IND",			/* 0x2a */
164 	"DL_XID_RES",			/* 0x2b */
165 	"DL_XID_CON",			/* 0x2c */
166 	"DL_TEST_REQ",			/* 0x2d */
167 	"DL_TEST_IND",			/* 0x2e */
168 	"DL_TEST_RES",			/* 0x2f */
169 	"DL_TEST_CON",			/* 0x30 */
170 	"DL_PHYS_ADDR_REQ",		/* 0x31 */
171 	"DL_PHYS_ADDR_ACK",		/* 0x32 */
172 	"DL_SET_PHYS_ADDR_REQ",		/* 0x33 */
173 	"DL_GET_STATISTICS_REQ",	/* 0x34 */
174 	"DL_GET_STATISTICS_ACK",	/* 0x35 */
175 };
176 
177 #define	MAN_DLPI_MAX_PRIM	0x35
178 
179 static char *dss[] = {			/* DLPI State Strings */
180 	"DL_UNBOUND",			/* 0x00	*/
181 	"DL_BIND_PENDING",		/* 0x01	*/
182 	"DL_UNBIND_PENDING",		/* 0x02	*/
183 	"DL_IDLE",			/* 0x03	*/
184 	"DL_UNATTACHED",		/* 0x04	*/
185 	"DL_ATTACH_PENDING",		/* 0x05	*/
186 	"DL_DETACH_PENDING",		/* 0x06	*/
187 	"DL_UDQOS_PENDING",		/* 0x07	*/
188 	"DL_OUTCON_PENDING",		/* 0x08	*/
189 	"DL_INCON_PENDING",		/* 0x09	*/
190 	"DL_CONN_RES_PENDING",		/* 0x0a	*/
191 	"DL_DATAXFER",			/* 0x0b	*/
192 	"DL_USER_RESET_PENDING",	/* 0x0c	*/
193 	"DL_PROV_RESET_PENDING",	/* 0x0d	*/
194 	"DL_RESET_RES_PENDING",		/* 0x0e	*/
195 	"DL_DISCON8_PENDING",		/* 0x0f	*/
196 	"DL_DISCON9_PENDING",		/* 0x10	*/
197 	"DL_DISCON11_PENDING",		/* 0x11	*/
198 	"DL_DISCON12_PENDING",		/* 0x12	*/
199 	"DL_DISCON13_PENDING",		/* 0x13	*/
200 	"DL_SUBS_BIND_PND",		/* 0x14	*/
201 	"DL_SUBS_UNBIND_PND",		/* 0x15	*/
202 };
203 
204 static const char *lss[] = {
205 	"UNKNOWN",	/* 0x0 */
206 	"INIT",		/* 0x1 */
207 	"GOOD",		/* 0x2 */
208 	"STALE",	/* 0x3 */
209 	"FAIL",		/* 0x4 */
210 };
211 
212 static char *_mw_type[] = {
213 	"OPEN_CTL",		/* 0x0 */
214 	"CLOSE_CTL",		/* 0x1 */
215 	"SWITCH",		/* 0x2 */
216 	"PATH_UPDATE",		/* 0x3 */
217 	"CLOSE",		/* 0x4 */
218 	"CLOSE_STREAM",	/* 0x5 */
219 	"DRATTACH",		/* 0x6 */
220 	"DRDETACH",		/* 0x7 */
221 	"STOP",			/* 0x8 */
222 	"DRSWITCH",		/* 0x9 */
223 	"KSTAT_UPDATE"		/* 0xA */
224 };
225 
226 uint32_t		man_debug = MAN_WARN;
227 
228 #define	man_kzalloc(a, b)	man_dbg_kzalloc(__LINE__, a, b)
229 #define	man_kfree(a, b)		man_dbg_kfree(__LINE__, a, b)
230 void	*man_dbg_kzalloc(int line, size_t size, int kmflags);
231 void	man_dbg_kfree(int line, void *buf, size_t size);
232 
233 #else	/* DEBUG */
234 
235 uint32_t		man_debug = 0;
236 /*
237  * Set manstr_t dlpistate (upper half of multiplexor)
238  */
239 #define	SETSTATE(msp, state) msp->ms_dlpistate = (state);
240 /*
241  * Set man_dest_t dlpistate (lower half of multiplexor)
242  */
243 #define	D_SETSTATE(mdp, state) mdp->md_dlpistate = (state);
244 
245 #define	man_kzalloc(a, b)	kmem_zalloc(a, b)
246 #define	man_kfree(a, b)		kmem_free(a, b)
247 
248 #endif	/* DEBUG */
249 
250 #define	DL_PRIM(mp)	(((union DL_primitives *)(mp)->b_rptr)->dl_primitive)
251 #define	DL_PROMISCON_TYPE(mp)	\
252 		(((union DL_primitives *)(mp)->b_rptr)->promiscon_req.dl_level)
253 #define	IOC_CMD(mp)	(((struct iocblk *)(mp)->b_rptr)->ioc_cmd)
254 
255 /*
256  * Start of kstat-related declarations
257  */
258 #define	MK_NOT_COUNTER		(1<<0)	/* is it a counter? */
259 #define	MK_ERROR		(1<<2)	/* for error statistics */
260 #define	MK_NOT_PHYSICAL		(1<<3)	/* no matching physical stat */
261 
262 typedef struct man_kstat_info_s {
263 	char		*mk_name;	/* e.g. align_errors */
264 	char		*mk_physname;	/* e.g. framing (NULL for same) */
265 	char		*mk_physalias;	/* e.g. framing (NULL for same) */
266 	uchar_t		mk_type;	/* e.g. KSTAT_DATA_UINT32 */
267 	int		mk_flags;
268 } man_kstat_info_t;
269 
270 /*
271  * Master declaration macro, note that it uses token pasting
272  */
273 #define	MK_DECLARE(name, pname, palias, bits, flags) \
274 	{ name,		pname,	palias,	KSTAT_DATA_UINT ## bits, flags }
275 
276 /*
277  * Obsolete forms don't have the _sinceswitch forms, they are all errors
278  */
279 #define	MK_OBSOLETE32(name, alias) MK_DECLARE(alias, name, alias, 32, MK_ERROR)
280 #define	MK_OBSOLETE64(name, alias) MK_DECLARE(alias, name, alias, 64, MK_ERROR)
281 
282 /*
283  * The only non-counters don't have any other aliases
284  */
285 #define	MK_NOTCOUNTER32(name) MK_DECLARE(name, name, NULL, 32, MK_NOT_COUNTER)
286 #define	MK_NOTCOUNTER64(name) MK_DECLARE(name, name, NULL, 64, MK_NOT_COUNTER)
287 
288 /*
289  * Normal counter forms
290  */
291 #define	MK_DECLARE32(name, alias) \
292 	MK_DECLARE(name, name, alias, 32, 0)
293 #define	MK_DECLARE64(name, alias) \
294 	MK_DECLARE(name, name, alias, 64, 0)
295 
296 /*
297  * Error counters need special MK_ERROR flag only for the non-AP form
298  */
299 #define	MK_ERROR32(name, alias) \
300 	MK_DECLARE(name, name, alias, 32, MK_ERROR)
301 #define	MK_ERROR64(name, alias) \
302 	MK_DECLARE(name, name, alias, 64, MK_ERROR)
303 
304 /*
305  * These AP-specific stats are not backed by physical statistics
306  */
307 #define	MK_NOTPHYS32(name) MK_DECLARE(name, NULL, NULL, 32, MK_NOT_PHYSICAL)
308 #define	MK_NOTPHYS64(name) MK_DECLARE(name, NULL, NULL, 64, MK_NOT_PHYSICAL)
309 
310 /*
311  * START of the actual man_kstat_info declaration using above macros
312  */
313 static man_kstat_info_t man_kstat_info[] = {
314 	/*
315 	 * Link Input/Output stats
316 	 */
317 	MK_DECLARE32("ipackets", NULL),
318 	MK_ERROR32("ierrors", NULL),
319 	MK_DECLARE32("opackets", NULL),
320 	MK_ERROR32("oerrors", NULL),
321 	MK_ERROR32("collisions", NULL),
322 	MK_NOTCOUNTER64("ifspeed"),
323 	/*
324 	 * These are new MIB-II stats, per PSARC 1997/198
325 	 */
326 	MK_DECLARE32("rbytes", NULL),
327 	MK_DECLARE32("obytes", NULL),
328 	MK_DECLARE32("multircv", NULL),
329 	MK_DECLARE32("multixmt", NULL),
330 	MK_DECLARE32("brdcstrcv", NULL),
331 	MK_DECLARE32("brdcstxmt", NULL),
332 	/*
333 	 * Error values
334 	 */
335 	MK_ERROR32("norcvbuf", NULL),
336 	MK_ERROR32("noxmtbuf", NULL),
337 	MK_ERROR32("unknowns", NULL),
338 	/*
339 	 * These are the 64-bit values, they fallback to 32-bit values
340 	 */
341 	MK_DECLARE64("ipackets64", "ipackets"),
342 	MK_DECLARE64("opackets64", "opackets"),
343 	MK_DECLARE64("rbytes64", "rbytes"),
344 	MK_DECLARE64("obytes64", "obytes"),
345 
346 	/* New AP switching statistics */
347 	MK_NOTPHYS64("man_switches"),
348 	MK_NOTPHYS64("man_link_fails"),
349 	MK_NOTPHYS64("man_link_stales"),
350 	MK_NOTPHYS64("man_icmpv4_probes"),
351 	MK_NOTPHYS64("man_icmpv6_probes"),
352 
353 	MK_ERROR32("align_errors", "framing"),
354 	MK_ERROR32("fcs_errors", "crc"),
355 	MK_ERROR32("first_collisions", NULL),
356 	MK_ERROR32("multi_collisions", NULL),
357 	MK_ERROR32("sqe_errors", "sqe"),
358 
359 	MK_ERROR32("tx_late_collisions", NULL),
360 	MK_ERROR32("ex_collisions", "excollisions"),
361 	MK_ERROR32("macxmt_errors", NULL),
362 	MK_ERROR32("carrier_errors", "nocarrier"),
363 	MK_ERROR32("toolong_errors", "buff"),
364 	MK_ERROR32("macrcv_errors", NULL),
365 
366 	MK_OBSOLETE32("framing", "align_errors"),
367 	MK_OBSOLETE32("crc", "fcs_errors"),
368 	MK_OBSOLETE32("sqe", "sqe_errors"),
369 	MK_OBSOLETE32("excollisions", "ex_collisions"),
370 	MK_OBSOLETE32("nocarrier", "carrier_errors"),
371 	MK_OBSOLETE32("buff", "toolong_errors"),
372 };
373 
374 #define	MAN_NUMSTATS (sizeof (man_kstat_info) / sizeof (man_kstat_info_t))
375 
376 /*
377  * Miscellaneous ethernet stuff.
378  *
379  * MANs DL_INFO_ACK template.
380  */
381 static	dl_info_ack_t man_infoack = {
382 	DL_INFO_ACK,				/* dl_primitive */
383 	ETHERMTU,				/* dl_max_sdu */
384 	0,					/* dl_min_sdu */
385 	MAN_ADDRL,				/* dl_addr_length */
386 	DL_ETHER,				/* dl_mac_type */
387 	0,					/* dl_reserved */
388 	0,					/* dl_current_state */
389 	-2,					/* dl_sap_length */
390 	DL_CLDLS,				/* dl_service_mode */
391 	0,					/* dl_qos_length */
392 	0,					/* dl_qos_offset */
393 	0,					/* dl_range_length */
394 	0,					/* dl_range_offset */
395 	DL_STYLE2,				/* dl_provider_style */
396 	sizeof (dl_info_ack_t),			/* dl_addr_offset */
397 	DL_VERSION_2,				/* dl_version */
398 	ETHERADDRL,				/* dl_brdcst_addr_length */
399 	sizeof (dl_info_ack_t) + MAN_ADDRL,	/* dl_brdcst_addr_offset */
400 	0					/* dl_growth */
401 };
402 
403 /*
404  * Ethernet broadcast address definition.
405  */
406 static	struct ether_addr	etherbroadcast = {
407 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
408 };
409 
410 static struct ether_addr zero_ether_addr = {
411 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
412 };
413 
414 /*
415  * Set via MAN_SET_SC_IPADDRS ioctl.
416  */
417 man_sc_ipaddrs_t	man_sc_ipaddrs = { 0xffffffffU, 0xffffffffU };
418 
419 /*
420  * Set via MAN_SET_SC_IP6ADDRS ioctl.
421  */
422 man_sc_ip6addrs_t	man_sc_ip6addrs = { 0, 0, 0, 0, 0, 0, 0, 0 };
423 
424 /*
425  * IP & ICMP constants
426  */
427 #ifndef	ETHERTYPE_IPV6
428 #define	ETHERTYPE_IPV6 0x86DD
429 #endif
430 
431 /*
432  * Function prototypes.
433  *
434  * Upper multiplexor functions.
435  */
436 static int	man_attach(dev_info_t *, ddi_attach_cmd_t);
437 static int	man_detach(dev_info_t *, ddi_detach_cmd_t);
438 static int	man_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
439 static int	man_open(register queue_t *, dev_t *, int, int, cred_t *);
440 static int	man_configure(queue_t *);
441 static int	man_deconfigure(void);
442 static int	man_init_dests(man_t *, manstr_t *);
443 static void	man_start_dest(man_dest_t *, manstr_t *, man_pg_t *);
444 static void	man_set_optimized_dest(manstr_t *);
445 static int	man_close(queue_t *);
446 static void	man_cancel_timers(man_adest_t *);
447 static int	man_uwput(queue_t *, mblk_t *);
448 static int	man_start(queue_t *, mblk_t *, eaddr_t *);
449 static void	man_ioctl(queue_t *, mblk_t *);
450 static void	man_set_linkcheck_time(queue_t *, mblk_t *);
451 static void	man_setpath(queue_t *, mblk_t *);
452 static void	man_geteaddr(queue_t *, mblk_t *);
453 static void	man_set_sc_ipaddrs(queue_t *, mblk_t *);
454 static void	man_set_sc_ip6addrs(queue_t *, mblk_t *);
455 static int	man_get_our_etheraddr(eaddr_t *eap);
456 static void	man_nd_getset(queue_t *, mblk_t *);
457 static void	man_dl_ioc_hdr_info(queue_t *, mblk_t *);
458 static int	man_uwsrv(queue_t *);
459 static int	man_proto(queue_t *, mblk_t *);
460 static int	man_udreq(queue_t *, mblk_t *);
461 static void	man_areq(queue_t *, mblk_t *);
462 static mblk_t	*man_alloc_physreq_mp(eaddr_t *);
463 static void	man_dreq(queue_t *, mblk_t *);
464 static void	man_dodetach(manstr_t *, man_work_t *);
465 static void	man_dl_clean(mblk_t **);
466 static void	man_breq(queue_t *, mblk_t *);
467 static void	man_ubreq(queue_t *, mblk_t *);
468 static void	man_ireq(queue_t *, mblk_t *);
469 static void	man_ponreq(queue_t *, mblk_t *);
470 static void	man_poffreq(queue_t *, mblk_t *);
471 static void	man_emreq(queue_t *, mblk_t *);
472 static void	man_dmreq(queue_t *, mblk_t *);
473 static void	man_pareq(queue_t *, mblk_t *);
474 static void	man_spareq(queue_t *, mblk_t *);
475 static int	man_dlpi(manstr_t *, mblk_t *);
476 static int	man_dlioc(manstr_t *, mblk_t *);
477 static int	man_dl_catch(mblk_t **, mblk_t *);
478 static void	man_dl_release(mblk_t **, mblk_t *);
479 static int	man_match_proto(mblk_t *, mblk_t *);
480 static int	man_open_ctl();
481 static void	man_close_ctl();
482 /*
483  * upper/lower multiplexor functions.
484  */
485 static int	man_dlpi_senddown(manstr_t *, mblk_t *);
486 static int	man_start_lower(man_dest_t *, mblk_t *, queue_t *, int caller);
487 static int	man_lrput(queue_t *, mblk_t *);
488 /*
489  * Lower multiplexor functions.
490  */
491 static int	man_lwsrv(queue_t *);
492 static int	man_lrsrv(queue_t *);
493 static void	man_dlpi_replay(man_dest_t *, mblk_t *);
494 static int	man_dlioc_replay(man_dest_t *);
495 /*
496  * Link failover routines.
497  */
498 static int	man_gettimer(int, man_dest_t *);
499 static void	man_linkcheck_timer(void *);
500 static int	man_needs_linkcheck(man_dest_t *);
501 static int	man_do_autoswitch(man_dest_t *);
502 static int	man_autoswitch(man_pg_t *, man_dev_t *, man_work_t *);
503 static int	man_prep_dests_for_switch(man_pg_t *, man_dest_t **, int *);
504 static int	man_str_uses_pg(manstr_t *, man_pg_t *);
505 static void	man_do_icmp_bcast(man_dest_t *, t_uscalar_t);
506 static mblk_t	*man_alloc_udreq(int, man_dladdr_t *);
507 static mblk_t	*man_pinger(t_uscalar_t);
508 /*
509  * Functions normally executing outside of the STREAMs perimeter.
510  */
511 /*
512  * Functions supporting/processing work requests.
513  */
514 static void	man_bwork(void);
515 static void	man_iwork(void);		/* inside perimeter */
516 void		man_work_add(man_workq_t *, man_work_t *);
517 man_work_t	*man_work_alloc(int, int);
518 void		man_work_free(man_work_t *);
519 /*
520  * Functions implementing/supporting failover.
521  *
522  * Executed inside perimeter.
523  */
524 static int	man_do_dr_attach(man_work_t *);
525 static int	man_do_dr_switch(man_work_t *);
526 static void	man_do_dr_detach(man_work_t *);
527 static int	man_iswitch(man_work_t *);
528 static void	man_ifail_dest(man_dest_t *);
529 static man_dest_t *man_switch_match(man_dest_t *, int, void *);
530 static void	man_add_dests(man_pg_t *);
531 static void	man_reset_dlpi(void *);
532 static mblk_t	*man_dup_mplist(mblk_t *);
533 static mblk_t	*man_alloc_ubreq_dreq();
534 /*
535  * Executed outside perimeter (us man_lock for synchronization).
536  */
537 static void	man_bclose(man_adest_t *);
538 static void	man_bswitch(man_adest_t *, man_work_t *);
539 static int	man_plumb(man_dest_t *);
540 static void	man_unplumb(man_dest_t *);
541 static void	man_plink(queue_t *, mblk_t *);
542 static void	man_unplink(queue_t *, mblk_t *);
543 static void	man_linkrec_insert(man_linkrec_t *);
544 static queue_t	*man_linkrec_find(int);
545 /*
546  * Functions supporting pathgroups
547  */
548 int	man_pg_cmd(mi_path_t *, man_work_t *);
549 static int	man_pg_assign(man_pg_t **, mi_path_t *, int);
550 static int	man_pg_create(man_pg_t **, man_pg_t **, mi_path_t *);
551 static int	man_pg_unassign(man_pg_t **, mi_path_t *);
552 static int	man_pg_activate(man_t *, mi_path_t *, man_work_t *);
553 static int	man_pg_read(man_pg_t *, mi_path_t *);
554 static man_pg_t	*man_find_path_by_dev(man_pg_t *, man_dev_t *, man_path_t **);
555 static man_pg_t	*man_find_pg_by_id(man_pg_t *, int);
556 static man_path_t	*man_find_path_by_ppa(man_path_t *, int);
557 static man_path_t	*man_find_active_path(man_path_t *);
558 static man_path_t	*man_find_alternate_path(man_path_t *);
559 static void	man_path_remove(man_path_t **, man_path_t *);
560 static void	man_path_insert(man_path_t **, man_path_t *);
561 static void	man_path_merge(man_path_t **, man_path_t *);
562 static int	man_path_kstat_init(man_path_t *);
563 static void	man_path_kstat_uninit(man_path_t *);
564 /*
565  * Functions supporting kstat reporting.
566  */
567 static int	man_kstat_update(kstat_t *, int);
568 static void	man_do_kstats(man_work_t *);
569 static void	man_update_path_kstats(man_t *);
570 static void 	man_update_dev_kstats(kstat_named_t *, man_path_t *);
571 static void	man_sum_dests_kstats(kstat_named_t *, man_pg_t *);
572 static void	man_kstat_named_init(kstat_named_t *, int);
573 static int	man_kstat_byname(kstat_t *, char *, kstat_named_t *);
574 static void	man_sum_kstats(kstat_named_t *, kstat_t *, kstat_named_t *);
575 /*
576  * Functions supporting ndd.
577  */
578 static int	man_param_register(param_t *, int);
579 static int	man_pathgroups_report(queue_t *, mblk_t *, caddr_t, cred_t *);
580 static void	man_preport(man_path_t *, mblk_t *);
581 static int	man_set_active_path(queue_t *, mblk_t *, char *, caddr_t,
582 			cred_t *);
583 static int	man_get_hostinfo(queue_t *, mblk_t *, caddr_t, cred_t *);
584 static char	*man_inet_ntoa(in_addr_t);
585 static int	man_param_get(queue_t *, mblk_t *, caddr_t, cred_t *);
586 static int	man_param_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *);
587 static  void    man_param_cleanup(void);
588 static  void    man_nd_free(caddr_t *nd_pparam);
589 /*
590  * MAN SSC/Domain specific externs.
591  */
592 extern int	man_get_iosram(manc_t *);
593 extern int	man_domain_configure(void);
594 extern int	man_domain_deconfigure(void);
595 extern int	man_dossc_switch(uint32_t);
596 extern int	man_is_on_domain;
597 
598 /*
599  * Driver Globals protected by inner perimeter.
600  */
601 static manstr_t	*man_strup = NULL;	/* list of MAN STREAMS */
602 static caddr_t	man_ndlist = NULL;	/* head of ndd var list */
603 void		*man_softstate = NULL;
604 
605 /*
606  * Driver globals protected by man_lock.
607  */
608 kmutex_t		man_lock;		/* lock protecting vars below */
609 static kthread_id_t	man_bwork_id = NULL;	/* background thread ID */
610 man_workq_t		*man_bwork_q;		/* bgthread work q */
611 man_workq_t		*man_iwork_q;		/* inner perim (uwsrv) work q */
612 static man_linkrec_t	*man_linkrec_head = NULL;	/* list of linkblks */
613 ldi_handle_t		man_ctl_lh = NULL;	/* MAN control handle */
614 queue_t			*man_ctl_wq = NULL;	/* MAN control rq */
615 static int		man_config_state = MAN_UNCONFIGURED;
616 static int		man_config_error = ENODEV;
617 
618 /*
619  * These parameters are accessed via ndd to report the link configuration
620  * for the MAN driver. They can also be used to force configuration changes.
621  */
622 #define	MAN_NOTUSR	0x0f000000
623 
624 /* ------------------------------------------------------------------------- */
625 
626 static  param_t	man_param_arr[] = {
627 	/* min		max		value		name */
628 	{  0,		0xFFFF,		0,		"man_debug_level"},
629 };
630 
631 #define	MAN_NDD_GETABLE	1
632 #define	MAN_NDD_SETABLE	2
633 
634 static  uint32_t	man_param_display[] = {
635 /* DISPLAY */
636 MAN_NDD_SETABLE,	/* man_debug_level */
637 };
638 
639 /*
640  * STREAMs information.
641  */
642 static struct module_info man_m_info = {
643 	MAN_IDNUM,			/* mi_idnum */
644 	MAN_IDNAME,			/* mi_idname */
645 	MAN_MINPSZ,			/* mi_minpsz */
646 	MAN_MAXPSZ,			/* mi_maxpsz */
647 	MAN_HIWAT,			/* mi_hiwat */
648 	MAN_LOWAT			/* mi_lowat */
649 };
650 
651 /*
652  * Upper read queue does not do anything.
653  */
654 static struct qinit man_urinit = {
655 	NULL,				/* qi_putp */
656 	NULL,				/* qi_srvp */
657 	man_open,			/* qi_qopen */
658 	man_close,			/* qi_qclose */
659 	NULL,				/* qi_qadmin */
660 	&man_m_info,			/* qi_minfo */
661 	NULL				/* qi_mstat */
662 };
663 
664 static struct qinit man_lrinit = {
665 	man_lrput,			/* qi_putp */
666 	man_lrsrv,			/* qi_srvp */
667 	man_open,			/* qi_qopen */
668 	man_close,			/* qi_qclose */
669 	NULL,				/* qi_qadmin */
670 	&man_m_info,			/* qi_minfo */
671 	NULL				/* qi_mstat */
672 };
673 
674 static struct qinit man_uwinit = {
675 	man_uwput,			/* qi_putp */
676 	man_uwsrv,			/* qi_srvp */
677 	man_open,			/* qi_qopen */
678 	man_close,			/* qi_qclose */
679 	NULL,				/* qi_qadmin */
680 	&man_m_info,			/* qi_minfo */
681 	NULL				/* qi_mstat */
682 };
683 
684 static struct qinit man_lwinit = {
685 	NULL,				/* qi_putp */
686 	man_lwsrv,			/* qi_srvp */
687 	man_open,			/* qi_qopen */
688 	man_close,			/* qi_qclose */
689 	NULL,				/* qi_qadmin */
690 	&man_m_info,			/* qi_minfo */
691 	NULL				/* qi_mstat */
692 };
693 
694 static struct streamtab man_maninfo = {
695 	&man_urinit,			/* st_rdinit */
696 	&man_uwinit,			/* st_wrinit */
697 	&man_lrinit,			/* st_muxrinit */
698 	&man_lwinit			/* st_muxwrinit */
699 };
700 
701 
702 /*
703  * Module linkage information for the kernel.
704  *
705  * Locking Theory:
706  * 	D_MTPERMOD -	Only an inner perimeter: All routines single
707  * 			threaded (except put, see below).
708  *	D_MTPUTSHARED -	Put routines enter inner perimeter shared (not
709  *			exclusive) for concurrency/performance reasons.
710  *
711  *	Anyone who needs exclusive outer perimeter permission (changing
712  *	global data structures) does so via qwriter() calls. The
713  *	background thread does all his work outside of perimeter and
714  *	submits work via qtimeout() when data structures need to be
715  *	modified.
716  */
717 
718 #define	MAN_MDEV_FLAGS	(D_MP|D_MTPERMOD|D_MTPUTSHARED)
719 
720 DDI_DEFINE_STREAM_OPS(man_ops, nulldev, nulldev, man_attach,
721     man_detach, nodev, man_info, MAN_MDEV_FLAGS, &man_maninfo,
722     ddi_quiesce_not_supported);
723 
724 extern int nodev(), nulldev();
725 
726 static struct modldrv modldrv = {
727 	&mod_driverops, 	/* Module type.  This one is a pseudo driver */
728 	"MAN MetaDriver",
729 	&man_ops,		/* driver ops */
730 };
731 
732 static struct modlinkage modlinkage = {
733 	MODREV_1,
734 	(void *) &modldrv,
735 	NULL
736 };
737 
738 
739 /* Virtual Driver loader entry points */
740 
741 int
742 _init(void)
743 {
744 	int		status = DDI_FAILURE;
745 
746 	MAN_DBG(MAN_INIT, ("_init:"));
747 
748 	status = mod_install(&modlinkage);
749 	if (status != 0) {
750 		cmn_err(CE_WARN, "man_init: mod_install failed"
751 		    " error = %d", status);
752 		return (status);
753 	}
754 
755 	status = ddi_soft_state_init(&man_softstate, sizeof (man_t), 4);
756 	if (status != 0) {
757 		cmn_err(CE_WARN, "man_init: ddi_soft_state_init failed"
758 		    " error = %d", status);
759 		mod_remove(&modlinkage);
760 		return (status);
761 	}
762 
763 	man_bwork_q = man_kzalloc(sizeof (man_workq_t), KM_SLEEP);
764 	man_iwork_q = man_kzalloc(sizeof (man_workq_t), KM_SLEEP);
765 
766 	mutex_init(&man_lock, NULL, MUTEX_DRIVER, NULL);
767 	cv_init(&man_bwork_q->q_cv, NULL, CV_DRIVER, NULL);
768 	cv_init(&man_iwork_q->q_cv, NULL, CV_DRIVER, NULL);
769 
770 	return (0);
771 }
772 
773 /*
774  * _info is called by modinfo().
775  */
776 int
777 _info(struct modinfo *modinfop)
778 {
779 	int	status;
780 
781 	MAN_DBG(MAN_INIT, ("_info:"));
782 
783 	status = mod_info(&modlinkage, modinfop);
784 
785 	MAN_DBG(MAN_INIT, ("_info: returns %d", status));
786 
787 	return (status);
788 }
789 
790 /*
791  * _fini called by modunload() just before driver is unloaded from memory.
792  */
793 int
794 _fini(void)
795 {
796 	int status = 0;
797 
798 	MAN_DBG(MAN_INIT, ("_fini:"));
799 
800 
801 	/*
802 	 * The only upper stream left should be man_ctl_lh. Note that
803 	 * man_close (upper stream) is synchronous (i.e. it waits for
804 	 * all STREAMS framework associated with the upper stream to be
805 	 * torn down). This guarantees that man_ctl_lh will never become
806 	 * NULL until noone is around to notice. This assumption is made
807 	 * in a few places like man_plumb, man_unplumb, etc.
808 	 */
809 	if (man_strup && (man_strup->ms_next != NULL))
810 		return (EBUSY);
811 
812 	/*
813 	 * Deconfigure the driver.
814 	 */
815 	status = man_deconfigure();
816 	if (status)
817 		goto exit;
818 
819 	/*
820 	 * need to detach every instance of the driver
821 	 */
822 	status = mod_remove(&modlinkage);
823 	if (status != 0)
824 		goto exit;
825 
826 	ddi_soft_state_fini(&man_softstate);
827 
828 	/*
829 	 * Free up locks.
830 	 */
831 	mutex_destroy(&man_lock);
832 	cv_destroy(&man_bwork_q->q_cv);
833 	cv_destroy(&man_iwork_q->q_cv);
834 
835 	man_kfree(man_bwork_q, sizeof (man_workq_t));
836 	man_kfree(man_iwork_q, sizeof (man_workq_t));
837 
838 exit:
839 
840 	MAN_DBG(MAN_INIT, ("_fini: returns %d", status));
841 
842 	return (status);
843 }
844 
845 /*
846  * Deconfigure the MAN driver.
847  */
848 static int
849 man_deconfigure()
850 {
851 	man_work_t	*wp;
852 	int		status = 0;
853 
854 	MAN_DBG(MAN_CONFIG, ("man_deconfigure:\n"));
855 
856 	mutex_enter(&man_lock);
857 
858 	if (man_is_on_domain) {
859 		status = man_domain_deconfigure();
860 		if (status != 0)
861 			goto exit;
862 	}
863 
864 	man_param_cleanup();	/* Free up NDD resources */
865 
866 	/*
867 	 * I may have to handle straggling work requests. Just qwait?
868 	 * or cvwait? Called from _fini - TBD
869 	 */
870 	ASSERT(man_bwork_q->q_work == NULL);
871 	ASSERT(man_iwork_q->q_work == NULL);
872 
873 	MAN_DBG(MAN_CONFIG, ("man_deconfigure: submitting CLOSE_CTL\n"));
874 
875 	if (man_ctl_lh != NULL) {
876 		wp = man_work_alloc(MAN_WORK_CLOSE_CTL, KM_SLEEP);
877 		wp->mw_flags = MAN_WFLAGS_CVWAITER;
878 		man_work_add(man_bwork_q, wp);
879 
880 		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
881 			cv_wait(&wp->mw_cv, &man_lock);
882 		}
883 		man_work_free(wp);
884 	}
885 
886 	MAN_DBG(MAN_CONFIG, ("man_deconfigure: submitting STOP\n"));
887 	if (man_bwork_id != NULL) {
888 
889 		wp = man_work_alloc(MAN_WORK_STOP, KM_SLEEP);
890 		wp->mw_flags = MAN_WFLAGS_CVWAITER;
891 		man_work_add(man_bwork_q, wp);
892 
893 		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
894 			cv_wait(&wp->mw_cv, &man_lock);
895 		}
896 		man_work_free(wp);
897 	}
898 	man_config_state = MAN_UNCONFIGURED;
899 
900 exit:
901 	mutex_exit(&man_lock);
902 
903 	MAN_DBG(MAN_CONFIG, ("man_deconfigure: returns %d\n", status));
904 
905 	return (status);
906 }
907 
908 /*
909  * man_attach - allocate resources and attach an instance of the MAN driver
910  * The <man>.conf file controls how many instances of the MAN driver are
911  * available.
912  *
913  *	dip - devinfo of node
914  * 	cmd - one of DDI_ATTACH | DDI_RESUME
915  *
916  *	returns	- success - DDI_SUCCESS
917  *		- failure - DDI_FAILURE
918  */
919 static int
920 man_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
921 {
922 	man_t		*manp;		/* per instance data */
923 	uchar_t		flag = KSTAT_FLAG_WRITABLE; /* support netstat -kc */
924 	kstat_t		*ksp;
925 	int		minor_node_created = 0;
926 	int		instance;
927 	eaddr_t		man_eaddr;
928 
929 	MAN_DBG(MAN_INIT, ("man_attach: \n"));
930 
931 	if (cmd != DDI_ATTACH) {
932 		MAN_DBG(MAN_INIT, ("man_attach: bad command %d\n", cmd));
933 		return (DDI_FAILURE);
934 	}
935 
936 	if (man_get_our_etheraddr(&man_eaddr))
937 		return (DDI_FAILURE);
938 
939 	instance = ddi_get_instance(dip);
940 
941 	/*
942 	 * we assume that instance is always equal to zero.
943 	 * and there will always only be one instance.
944 	 * this is done because when dman opens itself via DMAN_INT_PATH,
945 	 * the path assumes that the instance number is zero.
946 	 * if we ever need to support multiple instances of the dman
947 	 * driver or non-zero instances, this will have to change.
948 	 */
949 	ASSERT(instance == 0);
950 
951 	/*
952 	 * Allocate per device info pointer and link in to global list of
953 	 * MAN devices.
954 	 */
955 	if ((ddi_soft_state_zalloc(man_softstate, instance) != DDI_SUCCESS) ||
956 	    ((manp = ddi_get_soft_state(man_softstate, instance)) == NULL)) {
957 		cmn_err(CE_WARN, "man_attach: cannot zalloc soft state!");
958 		return (DDI_FAILURE);
959 	}
960 
961 	ddi_set_driver_private(dip, manp);
962 	manp->man_dip = dip;
963 	manp->man_meta_major = ddi_name_to_major(ddi_get_name(dip));
964 	manp->man_meta_ppa = instance;
965 
966 	/*
967 	 * Set ethernet address. Note that this address is duplicated
968 	 * at md_src_eaddr.
969 	 */
970 	ether_copy(&man_eaddr, &manp->man_eaddr);
971 	manp->man_eaddr_v = 1;
972 
973 	MAN_DBG(MAN_INIT, ("man_attach: set ether to %s",
974 	    ether_sprintf(&manp->man_eaddr)));
975 
976 	/*
977 	 * Initialize failover-related fields (timers and such),
978 	 * taking values from properties if present.
979 	 */
980 	manp->man_init_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
981 	    "init_time", MAN_INIT_TIME);
982 
983 	manp->man_linkcheck_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
984 	    "linkcheck_time", MAN_LINKCHECK_TIME);
985 
986 	manp->man_linkstale_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
987 	    "man_linkstale_time", MAN_LINKSTALE_TIME);
988 
989 	manp->man_linkstale_retries = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
990 	    "man_linkstale_retries", MAN_LINKSTALE_RETRIES);
991 
992 	manp->man_dr_delay = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
993 	    "man_dr_delay", MAN_DR_DELAY);
994 
995 	manp->man_dr_retries = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
996 	    "man_dr_retries", MAN_DR_RETRIES);
997 
998 	manp->man_kstat_waittime = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
999 	    "man_kstat_waittime", MAN_KSTAT_WAITTIME);
1000 
1001 	manp->man_dlpireset_time = ddi_getprop(DDI_DEV_T_ANY, dip, 0,
1002 	    "man_dlpireset_time", MAN_DLPIRESET_TIME);
1003 
1004 	if (ddi_create_internal_pathname(dip, MAN_IDNAME, S_IFCHR,
1005 	    ddi_get_instance(dip)) == DDI_SUCCESS) {
1006 		minor_node_created = 1;
1007 	} else {
1008 		cmn_err(CE_WARN, "man_attach: failed for instance %d",
1009 		    ddi_get_instance(dip));
1010 		goto exit;
1011 	}
1012 
1013 	if (ddi_create_minor_node(dip, MAN_IDNAME, S_IFCHR,
1014 	    ddi_get_instance(dip), DDI_NT_NET, CLONE_DEV) == DDI_SUCCESS) {
1015 		minor_node_created = 1;
1016 	} else {
1017 		cmn_err(CE_WARN, "man_attach: failed for instance %d",
1018 		    ddi_get_instance(dip));
1019 		goto exit;
1020 	}
1021 
1022 	/*
1023 	 * Allocate meta kstat_t for this instance of the driver.
1024 	 * Note that each of man_path_t keeps track of the kstats
1025 	 * for the real devices via mp_last_knp.
1026 	 */
1027 #ifdef	kstat
1028 	flag |= KSTAT_FLAG_PERSISTENT;
1029 #endif
1030 	ksp = kstat_create(MAN_IDNAME, ddi_get_instance(dip), NULL, "net",
1031 	    KSTAT_TYPE_NAMED, MAN_NUMSTATS, flag);
1032 
1033 	if (ksp == NULL) {
1034 		cmn_err(CE_WARN, "man_attach(%d): kstat_create failed"
1035 		    " - manp(0x%p)", manp->man_meta_ppa,
1036 		    (void *)manp);
1037 		goto exit;
1038 	}
1039 
1040 	man_kstat_named_init(ksp->ks_data, MAN_NUMSTATS);
1041 	ksp->ks_update = man_kstat_update;
1042 	ksp->ks_private = (void *) manp;
1043 	manp->man_ksp = ksp;
1044 	kstat_install(manp->man_ksp);
1045 
1046 	ddi_report_dev(dip);
1047 
1048 	MAN_DBG(MAN_INIT, ("man_attach(%d) returns DDI_SUCCESS",
1049 	    ddi_get_instance(dip)));
1050 
1051 	return (DDI_SUCCESS);
1052 
1053 exit:
1054 	if (minor_node_created)
1055 		ddi_remove_minor_node(dip, NULL);
1056 	ddi_set_driver_private(dip, NULL);
1057 	ddi_soft_state_free(man_softstate, instance);
1058 
1059 	MAN_DBG(MAN_INIT, ("man_attach(%d) eaddr returns DDI_FAILIRE",
1060 	    ddi_get_instance(dip)));
1061 
1062 	return (DDI_FAILURE);
1063 
1064 }
1065 
1066 static int
1067 man_get_our_etheraddr(eaddr_t *eap)
1068 {
1069 	manc_t	manc;
1070 	int	status = 0;
1071 
1072 	if (man_is_on_domain) {
1073 		if (status = man_get_iosram(&manc))
1074 			return (status);
1075 		ether_copy(&manc.manc_dom_eaddr, eap);
1076 	} else {
1077 		(void) localetheraddr((struct ether_addr *)NULL, eap);
1078 	}
1079 
1080 	return (status);
1081 }
1082 
1083 /*
1084  * man_detach - detach an instance of a driver
1085  *
1086  *	dip - devinfo of node
1087  * 	cmd - one of DDI_DETACH | DDI_SUSPEND
1088  *
1089  *	returns	- success - DDI_SUCCESS
1090  *		- failure - DDI_FAILURE
1091  */
1092 static int
1093 man_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1094 {
1095 	register man_t	*manp;		/* per instance data */
1096 	int		instance;
1097 
1098 	MAN_DBG(MAN_INIT, ("man_detach(%d):\n", ddi_get_instance(dip)));
1099 
1100 	if (cmd != DDI_DETACH) {
1101 		MAN_DBG(MAN_INIT, ("man_detach: bad command %d\n", cmd));
1102 		return (DDI_FAILURE);
1103 	}
1104 
1105 	if (dip == NULL) {
1106 		MAN_DBG(MAN_INIT, ("man_detach: dip == NULL\n"));
1107 		return (DDI_FAILURE);
1108 	}
1109 
1110 	instance = ddi_get_instance(dip);
1111 
1112 	mutex_enter(&man_lock);
1113 
1114 	manp = (man_t *)ddi_get_soft_state(man_softstate, instance);
1115 	if (manp == NULL) {
1116 		mutex_exit(&man_lock);
1117 
1118 		cmn_err(CE_WARN, "man_detach: unable to get softstate"
1119 		    " for instance = %d, dip = 0x%p!\n", instance,
1120 		    (void *)dip);
1121 		return (DDI_FAILURE);
1122 	}
1123 
1124 	if (manp->man_refcnt != 0) {
1125 		mutex_exit(&man_lock);
1126 
1127 		cmn_err(CE_WARN, "man_detach: %s%d refcnt %d", MAN_IDNAME,
1128 		    instance, manp->man_refcnt);
1129 		MAN_DBGCALL(MAN_INIT, man_print_man(manp));
1130 
1131 		return (DDI_FAILURE);
1132 	}
1133 
1134 	ddi_remove_minor_node(dip, NULL);
1135 
1136 	mutex_exit(&man_lock);
1137 
1138 	kstat_delete(manp->man_ksp);
1139 	ddi_soft_state_free(man_softstate, instance);
1140 	ddi_set_driver_private(dip, NULL);
1141 
1142 	MAN_DBG(MAN_INIT, ("man_detach returns DDI_SUCCESS"));
1143 
1144 	return (DDI_SUCCESS);
1145 }
1146 
1147 /*
1148  * man_info:
1149  *	As a standard DLPI style-2, man_info() should always return
1150  *	DDI_FAILURE.
1151  *
1152  *	However, man_open() has special treatment for a direct open
1153  *	via kstr_open() without going through the CLONE driver.
1154  *	To make this special kstr_open() work, we need to map
1155  *	minor of 0 to instance 0.
1156  */
1157 /*ARGSUSED*/
1158 static int
1159 man_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1160 {
1161 	minor_t minor;
1162 
1163 	switch (infocmd) {
1164 	case DDI_INFO_DEVT2DEVINFO:
1165 		break;
1166 
1167 	case DDI_INFO_DEVT2INSTANCE:
1168 		minor = getminor((dev_t)arg);
1169 		if (minor == 0) {
1170 			*result = (void *)(uintptr_t)minor;
1171 			return (DDI_SUCCESS);
1172 		}
1173 		break;
1174 	default:
1175 		break;
1176 	}
1177 	return (DDI_FAILURE);
1178 }
1179 
1180 /* Standard Device Driver entry points */
1181 
1182 /*
1183  * man_open - open the device
1184  *
1185  *	rq - upper read queue of the stream
1186  *	devp - pointer to a device number
1187  *	flag - information passed from the user program open(2) system call
1188  *	sflag - stream flags
1189  *	credp - pointer to the cred(9S) user credential structure
1190  *
1191  *	returns	- success - 0
1192  *		- failure - errno value for failure
1193  */
1194 /*ARGSUSED*/
1195 static int
1196 man_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
1197 {
1198 	int			minordev = -1;
1199 	manstr_t		*msp;
1200 	manstr_t		*tsp;
1201 	manstr_t		**prevmsp;
1202 	int			status = 0;
1203 
1204 	MAN_DBG(MAN_OCLOSE, ("man_open: rq(0x%p) sflag(0x%x)\n",
1205 	    (void *)rq, sflag));
1206 
1207 	ASSERT(rq);
1208 	ASSERT(sflag != MODOPEN);
1209 
1210 	/*
1211 	 * reopen; q_ptr set to msp at open completion.
1212 	 */
1213 	if (rq->q_ptr) {
1214 		return (0);
1215 	}
1216 
1217 	/*
1218 	 * Allocate and initialize manstr_t for this device.
1219 	 */
1220 	msp = man_kzalloc(sizeof (manstr_t), KM_SLEEP);
1221 	SETSTATE(msp, DL_UNATTACHED);
1222 	msp->ms_meta_ppa = -1;
1223 	msp->ms_rq = rq;
1224 	rq->q_ptr = WR(rq)->q_ptr = msp;
1225 
1226 	/*
1227 	 * Get the MAN driver configured on 1st open.  Note that the only way
1228 	 * we get sflag != CLONEOPEN is via the call in man_plumbctl().  All
1229 	 * CLONEOPEN calls to man_open will be via the file system
1230 	 * device node /dev/man, a pseudo clone device.
1231 	 */
1232 
1233 	qprocson(rq);
1234 
1235 	if (sflag == CLONEOPEN && man_config_state != MAN_CONFIGURED) {
1236 		/*
1237 		 * First open calls man_configure. Everyone qwaits until
1238 		 * we get it open. See man_open_ctl() comments for mutex
1239 		 * lock/synchronization info.
1240 		 */
1241 
1242 		mutex_enter(&man_lock);
1243 
1244 		if (man_config_state == MAN_UNCONFIGURED) {
1245 			man_config_state = MAN_CONFIGURING;
1246 			mutex_exit(&man_lock);
1247 			status = man_configure(rq);
1248 			if (status != 0)
1249 				goto exit;
1250 		} else {
1251 			while (man_config_state == MAN_CONFIGURING) {
1252 
1253 				mutex_exit(&man_lock);
1254 				status = qwait_sig(rq);
1255 
1256 				if (status == 0) {
1257 					status = EINTR;
1258 					goto exit;
1259 				}
1260 
1261 				mutex_enter(&man_lock);
1262 			}
1263 			mutex_exit(&man_lock);
1264 
1265 			if (man_config_error) {
1266 				status = man_config_error;
1267 				goto exit;
1268 			}
1269 		}
1270 	}
1271 
1272 	/*
1273 	 * Determine minor device number. man_open serialized by
1274 	 * D_MTPERMOD.
1275 	 */
1276 	prevmsp = &man_strup;
1277 	if (sflag == CLONEOPEN) {
1278 
1279 		minordev = 0;
1280 		for (; (tsp = *prevmsp) != NULL; prevmsp = &tsp->ms_next) {
1281 			if (minordev < tsp->ms_minor)
1282 				break;
1283 			minordev++;
1284 		}
1285 		*devp = makedevice(getmajor(*devp), minordev);
1286 
1287 	} else {
1288 		/*
1289 		 * Should only get here from man_plumbctl().
1290 		 */
1291 		/*LINTED E_ASSIGN_UINT_TO_SIGNED_INT*/
1292 		minordev = getminor(*devp);
1293 
1294 		/*
1295 		 * No need to protect this here as all opens are
1296 		 * qwaiting, and the bgthread (who is doing this open)
1297 		 * is the only one who mucks with this variable.
1298 		 */
1299 		man_ctl_wq = WR(rq);
1300 
1301 		ASSERT(minordev == 0);	/* TBD delete this */
1302 	}
1303 
1304 	msp->ms_meta_maj = getmajor(*devp);
1305 	msp->ms_minor = minordev;
1306 	if (minordev == 0)
1307 		msp->ms_flags = MAN_SFLAG_CONTROL;
1308 
1309 	/*
1310 	 * Link new entry into global list of active entries.
1311 	 */
1312 	msp->ms_next = *prevmsp;
1313 	*prevmsp = msp;
1314 
1315 
1316 	/*
1317 	 * Disable automatic enabling of our write service procedure.
1318 	 * We control this explicitly.
1319 	 */
1320 	noenable(WR(rq));
1321 
1322 exit:
1323 	MAN_DBG(MAN_OCLOSE, ("man_open: exit rq(0x%p) minor %d errno %d\n",
1324 	    (void *)rq, minordev, status));
1325 
1326 	/*
1327 	 * Clean up on error.
1328 	 */
1329 	if (status) {
1330 		qprocsoff(rq);
1331 		rq->q_ptr = WR(rq)->q_ptr = NULL;
1332 		man_kfree((char *)msp, sizeof (manstr_t));
1333 	} else
1334 		(void) qassociate(rq, -1);
1335 
1336 	return (status);
1337 }
1338 
1339 /*
1340  * Get the driver configured.  Called from first man_open with exclusive
1341  * inner perimeter.
1342  */
1343 static int
1344 man_configure(queue_t *rq)
1345 {
1346 	man_work_t	*wp;
1347 	int		status = 0;
1348 
1349 	MAN_DBG(MAN_CONFIG, ("man_configure:"));
1350 
1351 	/*
1352 	 * Initialize NDD parameters.
1353 	 */
1354 	if (!man_ndlist &&
1355 	    !man_param_register(man_param_arr, A_CNT(man_param_arr))) {
1356 		cmn_err(CE_WARN, "man_configure: man_param_register failed!");
1357 		man_config_error = ENOMEM;
1358 		goto exit;
1359 	}
1360 
1361 	mutex_enter(&man_lock);
1362 
1363 	/*
1364 	 * Start up background thread.
1365 	 */
1366 	if (man_bwork_id == NULL)
1367 		man_bwork_id = thread_create(NULL, 2 * DEFAULTSTKSZ,
1368 		    man_bwork, NULL, 0, &p0, TS_RUN, minclsyspri);
1369 
1370 	/*
1371 	 * Submit work to get control stream opened. Qwait until its
1372 	 * done. See man_open_ctl for mutex lock/synchronization info.
1373 	 */
1374 
1375 	if (man_ctl_lh == NULL) {
1376 		wp = man_work_alloc(MAN_WORK_OPEN_CTL, KM_SLEEP);
1377 		wp->mw_flags |= MAN_WFLAGS_QWAITER;
1378 		wp->mw_q = WR(rq);
1379 
1380 		/*
1381 		 * Submit work and wait. When man_open_ctl exits
1382 		 * man_open, it will cause qwait below to return.
1383 		 */
1384 		man_work_add(man_bwork_q, wp);
1385 		while (!(wp->mw_flags & MAN_WFLAGS_DONE)) {
1386 			mutex_exit(&man_lock);
1387 			qwait(rq);
1388 			mutex_enter(&man_lock);
1389 		}
1390 		status = wp->mw_status;
1391 		man_work_free(wp);
1392 
1393 	}
1394 	mutex_exit(&man_lock);
1395 
1396 	/*
1397 	 * If on domain, setup IOSRAM and build the pathgroups
1398 	 * automatically.
1399 	 */
1400 	if ((status == 0) && man_is_on_domain)
1401 		status = man_domain_configure();
1402 
1403 exit:
1404 	mutex_enter(&man_lock);
1405 
1406 	man_config_error = status;
1407 	if (status != 0)
1408 		man_config_state = MAN_UNCONFIGURED;
1409 	else
1410 		man_config_state = MAN_CONFIGURED;
1411 
1412 	mutex_exit(&man_lock);
1413 
1414 	MAN_DBG(MAN_CONFIG, ("man_configure: returns %d\n", status));
1415 
1416 	return (status);
1417 }
1418 
1419 /*
1420  * man_close - close the device
1421  *
1422  *	rq - upper read queue of the stream
1423  *
1424  *	returns	- success - 0
1425  *		- failure - errno value for failure
1426  */
1427 static int
1428 man_close(queue_t *rq)
1429 {
1430 	manstr_t		*close_msp;
1431 	manstr_t		*msp;
1432 
1433 	MAN_DBG(MAN_OCLOSE, ("man_close: rq(0x%p)\n", (void *)rq));
1434 
1435 	qprocsoff(rq);
1436 	close_msp = (manstr_t *)rq->q_ptr;
1437 
1438 	/*
1439 	 * Unlink the per-Stream entry from the active list and free it.
1440 	 */
1441 	if (close_msp == man_strup)
1442 		man_strup = close_msp->ms_next;
1443 	else {
1444 		for (msp = man_strup; msp && msp->ms_next != close_msp; )
1445 			msp = msp->ms_next;
1446 
1447 		if (msp == NULL) {
1448 			cmn_err(CE_WARN, "man_close: no stream!");
1449 			return (ENODEV);
1450 		}
1451 
1452 		msp->ms_next = close_msp->ms_next;
1453 	}
1454 
1455 	if (close_msp->ms_dests != NULL) {
1456 		/*
1457 		 * Still DL_ATTACHED
1458 		 */
1459 		man_work_t *wp;
1460 
1461 		wp = man_work_alloc(MAN_WORK_CLOSE_STREAM, KM_SLEEP);
1462 		man_dodetach(close_msp, wp);
1463 	}
1464 
1465 	if (close_msp->ms_flags & MAN_SFLAG_CONTROL) {
1466 		/*
1467 		 * Driver about to unload.
1468 		 */
1469 		man_ctl_wq = NULL;
1470 	}
1471 
1472 	rq->q_ptr = WR(rq)->q_ptr = NULL;
1473 	man_kfree((char *)close_msp, sizeof (manstr_t));
1474 	(void) qassociate(rq, -1);
1475 
1476 	MAN_DBG(MAN_OCLOSE, ("man_close: exit\n"));
1477 
1478 	return (0);
1479 }
1480 
1481 /*
1482  * Ask bgthread to tear down lower stream and qwait
1483  * until its done.
1484  */
1485 static void
1486 man_dodetach(manstr_t *msp, man_work_t *wp)
1487 {
1488 	man_dest_t	*mdp;
1489 	int		i;
1490 	mblk_t		*mp;
1491 
1492 	mdp = msp->ms_dests;
1493 	msp->ms_dests = NULL;
1494 	msp->ms_destp = NULL;
1495 
1496 	/*
1497 	 * Excise lower dests array, set it closing and hand it to
1498 	 * background thread to dispose of.
1499 	 */
1500 	for (i = 0; i < MAN_MAX_DESTS; i++) {
1501 
1502 		mdp[i].md_state |= MAN_DSTATE_CLOSING;
1503 		mdp[i].md_msp = NULL;
1504 		mdp[i].md_rq = NULL;
1505 
1506 		if (mdp[i].md_lc_timer_id != 0) {
1507 			(void) quntimeout(man_ctl_wq, mdp[i].md_lc_timer_id);
1508 			mdp[i].md_lc_timer_id = 0;
1509 		}
1510 		if (mdp[i].md_bc_id != 0) {
1511 			qunbufcall(man_ctl_wq, mdp[i].md_bc_id);
1512 			mdp[i].md_bc_id = 0;
1513 		}
1514 
1515 		mutex_enter(&mdp[i].md_lock);
1516 		while ((mp = mdp[i].md_dmp_head) != NULL) {
1517 			mdp[i].md_dmp_head = mp->b_next;
1518 			mp->b_next = NULL;
1519 			freemsg(mp);
1520 		}
1521 		mdp[i].md_dmp_count = 0;
1522 		mdp[i].md_dmp_tail = NULL;
1523 		mutex_exit(&mdp[i].md_lock);
1524 	}
1525 
1526 	/*
1527 	 * Dump any DL type messages previously caught.
1528 	 */
1529 	man_dl_clean(&msp->ms_dl_mp);
1530 	man_dl_clean(&msp->ms_dlioc_mp);
1531 
1532 	/*
1533 	 * We need to clear fast path flag when dlioc messages are cleaned.
1534 	 */
1535 	msp->ms_flags &= ~MAN_SFLAG_FAST;
1536 
1537 	/*
1538 	 * MAN_WORK_CLOSE_STREAM work request preallocated by caller.
1539 	 */
1540 	ASSERT(wp->mw_type == MAN_WORK_CLOSE_STREAM);
1541 	ASSERT(mdp != NULL);
1542 	wp->mw_arg.a_mdp = mdp;
1543 	wp->mw_arg.a_ndests = MAN_MAX_DESTS;
1544 	wp->mw_arg.a_pg_id = -1;	/* Don't care */
1545 
1546 	mutex_enter(&man_lock);
1547 	man_work_add(man_bwork_q, wp);
1548 	msp->ms_manp->man_refcnt--;
1549 	mutex_exit(&man_lock);
1550 
1551 	msp->ms_manp = NULL;
1552 
1553 }
1554 
1555 
1556 /*
1557  * man_uwput - handle DLPI messages issued from upstream, the write
1558  * side of the upper half of multiplexor. Called with shared access to
1559  * the inner perimeter.
1560  *
1561  *	wq - upper write queue of mxx
1562  *	mp - mblk ptr to DLPI request
1563  */
1564 static int
1565 man_uwput(register queue_t *wq, register mblk_t *mp)
1566 {
1567 	register manstr_t	*msp;		/* per stream data */
1568 	register man_t		*manp;		/* per instance data */
1569 
1570 	msp = (manstr_t *)wq->q_ptr;
1571 
1572 	MAN_DBG(MAN_UWPUT, ("man_uwput: wq(0x%p) mp(0x%p) db_type(0x%x)"
1573 	    " msp(0x%p)\n",
1574 	    (void *)wq, (void *)mp, DB_TYPE(mp), (void *)msp));
1575 #if DEBUG
1576 	if (man_debug & MAN_UWPUT) {
1577 		if (DB_TYPE(mp) == M_IOCTL) {
1578 			struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
1579 			MAN_DBG(MAN_UWPUT,
1580 			    ("man_uwput: M_IOCTL ioc_cmd(0x%x)\n",
1581 			    iocp->ioc_cmd));
1582 		} else if (DB_TYPE(mp) == M_CTL) {
1583 			struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
1584 			MAN_DBG(MAN_UWPUT,
1585 			    ("man_uwput: M_CTL ioc_cmd(0x%x)\n",
1586 			    iocp->ioc_cmd));
1587 		}
1588 	}
1589 #endif	/* DEBUG */
1590 
1591 
1592 	switch (DB_TYPE(mp)) {
1593 	case M_DATA:
1594 		manp = msp->ms_manp;
1595 
1596 		if (((msp->ms_flags & (MAN_SFLAG_FAST | MAN_SFLAG_RAW)) == 0) ||
1597 		    (msp->ms_dlpistate != DL_IDLE) ||
1598 		    (manp == NULL)) {
1599 
1600 			merror(wq, mp, EPROTO);
1601 			break;
1602 		}
1603 
1604 		if (wq->q_first) {
1605 			(void) putq(wq, mp);
1606 			qenable(wq);
1607 		} else {
1608 			ehdr_t	*ep = (ehdr_t *)mp->b_rptr;
1609 
1610 			(void) man_start(wq, mp, &ep->ether_dhost);
1611 		}
1612 		break;
1613 
1614 	case M_PROTO:
1615 	case M_PCPROTO:
1616 		if ((DL_PRIM(mp) == DL_UNITDATA_IND) && !wq->q_first) {
1617 			(void) man_udreq(wq, mp);
1618 		} else {
1619 			(void) putq(wq, mp);
1620 			qenable(wq);
1621 		}
1622 		break;
1623 
1624 	case M_IOCTL:
1625 	case M_IOCDATA:
1626 		qwriter(wq, mp, man_ioctl, PERIM_INNER);
1627 		break;
1628 
1629 	case M_CTL:
1630 		freemsg(mp);
1631 		break;
1632 
1633 	case M_FLUSH:
1634 		MAN_DBG(MAN_UWPUT, ("man_wput: M_FLUSH\n"));
1635 		if (*mp->b_rptr & FLUSHW)
1636 			flushq(wq, FLUSHDATA);
1637 		if (*mp->b_rptr & FLUSHR) {
1638 			flushq(RD(wq), FLUSHDATA);
1639 			*mp->b_rptr &= ~FLUSHW;
1640 			qreply(wq, mp);
1641 		} else {
1642 			freemsg(mp);
1643 		}
1644 		break;
1645 
1646 	default:
1647 		MAN_DBG(MAN_WARN,
1648 		    ("man_uwput: illegal mblk(0x%p) type(0x%x)\n",
1649 		    (void *)mp, DB_TYPE(mp)));
1650 		freemsg(mp);
1651 		break;
1652 	} /* End switch */
1653 
1654 	MAN_DBG(MAN_UWPUT, ("man_uwput: exit wq(0x%p) mp(0x%p)\n",
1655 	    (void *)wq, (void *)mp));
1656 
1657 	return (0);
1658 }
1659 
1660 /*
1661  * man_start - handle data messages issued from upstream.  Send down
1662  * to particular man_dest based on ether_addr, otherwise send out to all
1663  * valid man_dests.
1664  *
1665  *	wq - upper write queue of mxx
1666  *	mp - mblk ptr to DLPI request
1667  * 	caller - Caller ID for decision making on canput failure
1668  *
1669  * Returns:
1670  *	0	- Data xmitted or No flow control situation detected.
1671  *	1	- Flow control situation detected.
1672  *
1673  * STREAMS Flow Control: can be used if there is only one destination
1674  * for a stream (1 to 1 multiplexor). In this case, we will use the upper
1675  * write queue to store mblks when in flow control. If there are multiple
1676  * destinations, we cannot use the STREAMs based flow control (1 to many
1677  * multiplexor). In this case, we will use the lower write queue to store
1678  * mblks when in flow control. Since destinations come and go, we may
1679  * transition between 1-to-1 and 1-to-m. So it may be the case that we have
1680  * some mblks stored on the upper queue, and some on the lower queue. However,
1681  * we will never send mblks out of order. See man_uwput and man_start_lower().
1682  *
1683  * A simple flow control mechanism is implemented for the deferred mblk list,
1684  * as this list is expected to be used temporarily for a very short
1685  * period required for switching paths. This flow control mechanism is
1686  * used only as a defensive approach to avoid infinite growth of this list.
1687  */
1688 static int
1689 man_start(register queue_t *wq, register mblk_t *mp, eaddr_t *eap)
1690 {
1691 	register manstr_t	*msp;		/* per stream data */
1692 	register man_dest_t	*mdp = NULL;	/* destination */
1693 	mblk_t			*tmp;
1694 	int			i;
1695 	int			status = 0;
1696 
1697 	msp = (manstr_t *)wq->q_ptr;
1698 
1699 	MAN_DBG(MAN_DATA, ("man_start: msp(0x%p) ether_addr(%s)\n",
1700 	    (void *)msp, ether_sprintf(eap)));
1701 
1702 	if (msp->ms_dests == NULL) {
1703 		cmn_err(CE_WARN, "man_start: no destinations");
1704 		freemsg(mp);
1705 		return (0);
1706 	}
1707 
1708 	/*
1709 	 * Optimization if only one valid destination.
1710 	 */
1711 	mdp = msp->ms_destp;
1712 
1713 	if (IS_UNICAST(eap)) {
1714 		queue_t			*flow_wq = NULL;
1715 
1716 		if (mdp == NULL) {
1717 			/*
1718 			 * TDB - This needs to be optimized (some bits in
1719 			 * ehp->dhost will act as an index.
1720 			 */
1721 			for (i = 0; i < MAN_MAX_DESTS; i++) {
1722 
1723 				mdp = &msp->ms_dests[i];
1724 
1725 				if ((mdp->md_state == MAN_DSTATE_READY) &&
1726 				    (ether_cmp(eap, &mdp->md_dst_eaddr) == 0))
1727 					break;
1728 				mdp = NULL;
1729 			}
1730 		} else {
1731 			/*
1732 			 * 1 to 1 multiplexing, use upper wq for flow control.
1733 			 */
1734 			flow_wq = wq;
1735 		}
1736 
1737 		if (mdp != NULL) {
1738 			/*
1739 			 * Its going somewhere specific
1740 			 */
1741 			status =  man_start_lower(mdp, mp, flow_wq, MAN_UPPER);
1742 
1743 		} else {
1744 			MAN_DBG(MAN_DATA, ("man_start: no destination"
1745 			    " for eaddr %s\n", ether_sprintf(eap)));
1746 			freemsg(mp);
1747 		}
1748 	} else {
1749 		/*
1750 		 * Broadcast or multicast - send everone a copy.
1751 		 */
1752 		if (mdp == NULL) {
1753 			for (i = 0; i < MAN_MAX_DESTS; i++) {
1754 				mdp = &msp->ms_dests[i];
1755 
1756 				if (mdp->md_state != MAN_DSTATE_READY)
1757 					continue;
1758 
1759 				if ((tmp = copymsg(mp)) != NULL) {
1760 					(void) man_start_lower(mdp, tmp,
1761 					    NULL, MAN_UPPER);
1762 				} else {
1763 					MAN_DBG(MAN_DATA, ("man_start: copymsg"
1764 					    " failed!"));
1765 				}
1766 			}
1767 			freemsg(mp);
1768 		} else {
1769 			if (mdp->md_state == MAN_DSTATE_READY)
1770 				status =  man_start_lower(mdp, mp, wq,
1771 				    MAN_UPPER);
1772 			else
1773 				freemsg(mp);
1774 		}
1775 	}
1776 	return (status);
1777 }
1778 
1779 /*
1780  * Send a DL_UNITDATA or M_DATA fastpath data mblk to a particular
1781  * destination. Others mblk types sent down via * man_dlpi_senddown().
1782  *
1783  * Returns:
1784  *	0	- Data xmitted
1785  *	1	- Data not xmitted due to flow control.
1786  */
1787 static int
1788 man_start_lower(man_dest_t *mdp, mblk_t *mp, queue_t *flow_wq, int caller)
1789 {
1790 	queue_t		*wq = mdp->md_wq;
1791 	int		status = 0;
1792 
1793 	/*
1794 	 * Lower stream ready for data transmit.
1795 	 */
1796 	if (mdp->md_state == MAN_DSTATE_READY &&
1797 	    mdp->md_dlpistate == DL_IDLE) {
1798 
1799 		ASSERT(mdp->md_wq != NULL);
1800 
1801 		if (caller == MAN_UPPER) {
1802 			/*
1803 			 * Check for flow control conditions for lower
1804 			 * stream.
1805 			 */
1806 			if (mdp->md_dmp_head == NULL &&
1807 			    wq->q_first == NULL && canputnext(wq)) {
1808 
1809 				(void) putnext(wq, mp);
1810 
1811 			} else {
1812 				mutex_enter(&mdp->md_lock);
1813 				if (mdp->md_dmp_head != NULL) {
1814 					/*
1815 					 * A simple flow control mechanism.
1816 					 */
1817 					if (mdp->md_dmp_count >= MAN_HIWAT) {
1818 						freemsg(mp);
1819 					} else {
1820 						/*
1821 						 * Add 'mp' to the deferred
1822 						 * msg list.
1823 						 */
1824 						mdp->md_dmp_tail->b_next = mp;
1825 						mdp->md_dmp_tail = mp;
1826 						mdp->md_dmp_count +=
1827 						    msgsize(mp);
1828 					}
1829 					mutex_exit(&mdp->md_lock);
1830 					/*
1831 					 * Inform flow control situation
1832 					 * to the caller.
1833 					 */
1834 					status = 1;
1835 					qenable(wq);
1836 					goto exit;
1837 				}
1838 				mutex_exit(&mdp->md_lock);
1839 				/*
1840 				 * If 1 to 1 mux, use upper write queue for
1841 				 * flow control.
1842 				 */
1843 				if (flow_wq != NULL) {
1844 					/*
1845 					 * putbq() message and indicate
1846 					 * flow control situation to the
1847 					 * caller.
1848 					 */
1849 					putbq(flow_wq, mp);
1850 					qenable(flow_wq);
1851 					status = 1;
1852 					goto exit;
1853 				}
1854 				/*
1855 				 * 1 to many mux, use lower write queue for
1856 				 * flow control. Be mindful not to overflow
1857 				 * the lower MAN STREAM q.
1858 				 */
1859 				if (canput(wq)) {
1860 					(void) putq(wq, mp);
1861 					qenable(wq);
1862 				} else {
1863 					MAN_DBG(MAN_DATA, ("man_start_lower:"
1864 					    " lower q flow controlled -"
1865 					    " discarding packet"));
1866 					freemsg(mp);
1867 					goto exit;
1868 				}
1869 			}
1870 
1871 		} else {
1872 			/*
1873 			 * man_lwsrv  is draining flow controlled mblks.
1874 			 */
1875 			if (canputnext(wq))
1876 				(void) putnext(wq, mp);
1877 			else
1878 				status = 1;
1879 		}
1880 		goto exit;
1881 	}
1882 
1883 	/*
1884 	 * Lower stream in transition, do flow control.
1885 	 */
1886 	status = 1;
1887 
1888 	if (mdp->md_state == MAN_DSTATE_NOTPRESENT) {
1889 nodest:
1890 		cmn_err(CE_WARN,
1891 		    "man_start_lower: no dest for mdp(0x%p), caller(%d)!",
1892 		    (void *)mdp, caller);
1893 		if (caller == MAN_UPPER)
1894 			freemsg(mp);
1895 		goto exit;
1896 	}
1897 
1898 	if (mdp->md_state & MAN_DSTATE_CLOSING) {
1899 		MAN_DBG(MAN_DATA, ("man_start_lower: mdp(0x%p) closing",
1900 		    (void *)mdp));
1901 		if (caller == MAN_UPPER)
1902 			freemsg(mp);
1903 		goto exit;
1904 	}
1905 
1906 	if ((mdp->md_state & MAN_DSTATE_PLUMBING) ||
1907 	    (mdp->md_state == MAN_DSTATE_INITIALIZING) ||
1908 	    (mdp->md_dlpistate != DL_IDLE)) {
1909 		/*
1910 		 * Defer until PLUMBED and DL_IDLE. See man_lwsrv().
1911 		 */
1912 		if (caller == MAN_UPPER) {
1913 			/*
1914 			 * Upper stream sending data down, add to defered mblk
1915 			 * list for stream.
1916 			 */
1917 			mutex_enter(&mdp->md_lock);
1918 			if (mdp->md_dmp_count >= MAN_HIWAT) {
1919 				freemsg(mp);
1920 			} else {
1921 				if (mdp->md_dmp_head == NULL) {
1922 					ASSERT(mdp->md_dmp_tail == NULL);
1923 					mdp->md_dmp_head = mp;
1924 					mdp->md_dmp_tail = mp;
1925 				} else {
1926 					mdp->md_dmp_tail->b_next = mp;
1927 					mdp->md_dmp_tail = mp;
1928 				}
1929 				mdp->md_dmp_count += msgsize(mp);
1930 			}
1931 			mutex_exit(&mdp->md_lock);
1932 		}
1933 
1934 		goto exit;
1935 	}
1936 
1937 exit:
1938 	return (status);
1939 }
1940 
1941 /*
1942  * man_ioctl - handle ioctl requests for this driver (I_PLINK/I_PUNLINK)
1943  * or pass thru to the physical driver below.  Note that most M_IOCTLs we
1944  * care about come down the control msp, but the IOC ones come down the IP.
1945  * Called with exclusive inner perimeter.
1946  *
1947  *	wq - upper write queue of mxx
1948  *	mp - mblk ptr to DLPI ioctl request
1949  */
1950 static void
1951 man_ioctl(register queue_t *wq, register mblk_t *mp)
1952 {
1953 	manstr_t		*msp;
1954 	struct iocblk		*iocp;
1955 
1956 	iocp = (struct iocblk *)mp->b_rptr;
1957 	msp = (manstr_t *)wq->q_ptr;
1958 
1959 #ifdef DEBUG
1960 	{
1961 		char			ioc_cmd[30];
1962 
1963 		sprintf(ioc_cmd, "not handled IOCTL 0x%x", iocp->ioc_cmd);
1964 		MAN_DBG((MAN_SWITCH | MAN_PATH | MAN_DLPI),
1965 		    ("man_ioctl: wq(0x%p) mp(0x%p) cmd(%s)\n",
1966 		    (void *)wq, (void *)mp,
1967 		    (iocp->ioc_cmd == I_PLINK) ? "I_PLINK" :
1968 		    (iocp->ioc_cmd == I_PUNLINK) ? "I_PUNLINK" :
1969 		    (iocp->ioc_cmd == MAN_SETPATH) ? "MAN_SETPATH" :
1970 		    (iocp->ioc_cmd == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
1971 		    (iocp->ioc_cmd == DLIOCRAW) ? "DLIOCRAW" : ioc_cmd));
1972 	}
1973 #endif /* DEBUG */
1974 
1975 
1976 	/*
1977 	 *  Handle the requests...
1978 	 */
1979 	switch ((unsigned int)iocp->ioc_cmd) {
1980 
1981 	case I_PLINK:
1982 		man_plink(wq, mp);
1983 		break;
1984 
1985 	case I_PUNLINK:
1986 		man_unplink(wq, mp);
1987 		break;
1988 
1989 	case MAN_SETPATH:
1990 		man_setpath(wq, mp);
1991 		break;
1992 
1993 	case MAN_GETEADDR:
1994 		man_geteaddr(wq, mp);
1995 		break;
1996 
1997 	case MAN_SET_LINKCHECK_TIME:
1998 		man_set_linkcheck_time(wq, mp);
1999 		break;
2000 
2001 	case MAN_SET_SC_IPADDRS:
2002 		man_set_sc_ipaddrs(wq, mp);
2003 		break;
2004 
2005 	case MAN_SET_SC_IP6ADDRS:
2006 		man_set_sc_ip6addrs(wq, mp);
2007 		break;
2008 
2009 	case DLIOCRAW:
2010 		if (man_dlioc(msp, mp))
2011 			miocnak(wq, mp, 0, ENOMEM);
2012 		else {
2013 			msp->ms_flags |= MAN_SFLAG_RAW;
2014 			miocack(wq, mp, 0, 0);
2015 		}
2016 		break;
2017 
2018 	case DL_IOC_HDR_INFO:
2019 		man_dl_ioc_hdr_info(wq, mp);
2020 		break;
2021 
2022 	case MAN_ND_GET:
2023 	case MAN_ND_SET:
2024 		man_nd_getset(wq, mp);
2025 		break;
2026 
2027 	default:
2028 		MAN_DBG(MAN_DDI, ("man_ioctl: unknown ioc_cmd %d\n",
2029 		    (unsigned int)iocp->ioc_cmd));
2030 		miocnak(wq, mp, 0, EINVAL);
2031 		break;
2032 	}
2033 exit:
2034 	MAN_DBG((MAN_SWITCH | MAN_PATH | MAN_DLPI), ("man_ioctl: exit\n"));
2035 
2036 }
2037 
2038 /*
2039  * man_plink: handle I_PLINK requests on the control stream
2040  */
2041 void
2042 man_plink(queue_t *wq, mblk_t *mp)
2043 {
2044 	struct linkblk	*linkp;
2045 	man_linkrec_t	*lrp;
2046 	int		status = 0;
2047 
2048 	linkp = (struct linkblk *)mp->b_cont->b_rptr;
2049 
2050 	/*
2051 	 * Create a record to hold lower stream info. man_plumb will
2052 	 * retrieve it after calling ldi_ioctl(I_PLINK)
2053 	 */
2054 	lrp = man_kzalloc(sizeof (man_linkrec_t), KM_NOSLEEP);
2055 	if (lrp == NULL) {
2056 		status = ENOMEM;
2057 		goto exit;
2058 	}
2059 
2060 	lrp->l_muxid = linkp->l_index;
2061 	lrp->l_wq = linkp->l_qbot;
2062 	lrp->l_rq = RD(linkp->l_qbot);
2063 
2064 	man_linkrec_insert(lrp);
2065 
2066 exit:
2067 	if (status)
2068 		miocnak(wq, mp, 0, status);
2069 	else
2070 		miocack(wq, mp, 0, 0);
2071 
2072 }
2073 
2074 /*
2075  * man_unplink - handle I_PUNLINK requests on the control stream
2076  */
2077 void
2078 man_unplink(queue_t *wq, mblk_t *mp)
2079 {
2080 	struct linkblk	*linkp;
2081 
2082 	linkp = (struct linkblk *)mp->b_cont->b_rptr;
2083 	RD(linkp->l_qbot)->q_ptr = NULL;
2084 	WR(linkp->l_qbot)->q_ptr = NULL;
2085 	miocack(wq, mp, 0, 0);
2086 }
2087 
2088 void
2089 man_linkrec_insert(man_linkrec_t *lrp)
2090 {
2091 	mutex_enter(&man_lock);
2092 
2093 	lrp->l_next = man_linkrec_head;
2094 	man_linkrec_head = lrp;
2095 
2096 	mutex_exit(&man_lock);
2097 
2098 }
2099 
2100 static queue_t *
2101 man_linkrec_find(int muxid)
2102 {
2103 	man_linkrec_t	*lpp;
2104 	man_linkrec_t	*lp;
2105 	queue_t		*wq = NULL;
2106 
2107 	mutex_enter(&man_lock);
2108 
2109 	if (man_linkrec_head == NULL)
2110 		goto exit;
2111 
2112 	lp = lpp = man_linkrec_head;
2113 	if (lpp->l_muxid == muxid) {
2114 		man_linkrec_head = lpp->l_next;
2115 	} else {
2116 		for (lp = lpp->l_next; lp; lp = lp->l_next) {
2117 			if (lp->l_muxid == muxid)
2118 				break;
2119 			lpp = lp;
2120 		}
2121 	}
2122 
2123 	if (lp == NULL)
2124 		goto exit;
2125 
2126 	wq = lp->l_wq;
2127 	ASSERT(wq != NULL);
2128 
2129 	lpp->l_next = lp->l_next;
2130 	man_kfree(lp, sizeof (man_linkrec_t));
2131 
2132 exit:
2133 	mutex_exit(&man_lock);
2134 
2135 	return (wq);
2136 }
2137 
2138 /*
2139  * Set instance linkcheck timer value.
2140  */
2141 static void
2142 man_set_linkcheck_time(queue_t *wq, mblk_t *mp)
2143 {
2144 	mi_time_t	*mtp;
2145 	int		error;
2146 	man_t		*manp;
2147 
2148 	MAN_DBG(MAN_LINK, ("man_set_linkcheck_time: enter"));
2149 
2150 	error = miocpullup(mp, sizeof (mi_time_t));
2151 	if (error != 0)
2152 		goto exit;
2153 
2154 	mtp = (mi_time_t *)mp->b_cont->b_rptr;
2155 
2156 	MAN_DBG(MAN_LINK, ("man_set_linkcheck_time: mtp"));
2157 	MAN_DBGCALL(MAN_LINK, man_print_mtp(mtp));
2158 
2159 	manp = ddi_get_soft_state(man_softstate, mtp->mtp_man_ppa);
2160 	if (manp == NULL) {
2161 		error = ENODEV;
2162 		goto exit;
2163 	}
2164 
2165 	manp->man_linkcheck_time = mtp->mtp_time;
2166 exit:
2167 	if (error)
2168 		miocnak(wq, mp, 0, error);
2169 	else
2170 		miocack(wq, mp, sizeof (mi_time_t), 0);
2171 }
2172 
2173 /*
2174  * Man path ioctl processing. Should only happen on the SSC. Called
2175  * with exclusive inner perimeter.
2176  */
2177 static void
2178 man_setpath(queue_t *wq, mblk_t *mp)
2179 {
2180 	mi_path_t		*mip;
2181 	int			error;
2182 
2183 	error = miocpullup(mp, sizeof (mi_path_t));
2184 	if (error != 0)
2185 		goto exit;
2186 
2187 	mip = (mi_path_t *)mp->b_cont->b_rptr;
2188 	mutex_enter(&man_lock);
2189 	error = man_pg_cmd(mip, NULL);
2190 	mutex_exit(&man_lock);
2191 
2192 exit:
2193 	if (error)
2194 		miocnak(wq, mp, 0, error);
2195 	else
2196 		miocack(wq, mp, sizeof (mi_path_t), 0);
2197 }
2198 
2199 /*
2200  * Get the local ethernet address of this machine.
2201  */
2202 static void
2203 man_geteaddr(queue_t *wq, mblk_t *mp)
2204 {
2205 	eaddr_t			*eap;
2206 	int			error;
2207 
2208 	error = miocpullup(mp, sizeof (eaddr_t));
2209 	if (error != 0) {
2210 		miocnak(wq, mp, 0, error);
2211 		return;
2212 	}
2213 
2214 	eap = (eaddr_t *)mp->b_cont->b_rptr;
2215 	(void) localetheraddr(NULL, eap);
2216 	miocack(wq, mp, sizeof (eaddr_t), 0);
2217 }
2218 
2219 /*
2220  * Set my SC and other SC IPv4 addresses for use in man_pinger routine.
2221  */
2222 static void
2223 man_set_sc_ipaddrs(queue_t *wq, mblk_t *mp)
2224 {
2225 	int			error;
2226 
2227 	error = miocpullup(mp, sizeof (man_sc_ipaddrs_t));
2228 	if (error != 0)
2229 		goto exit;
2230 
2231 	man_sc_ipaddrs = *(man_sc_ipaddrs_t *)mp->b_cont->b_rptr;
2232 
2233 #ifdef DEBUG
2234 	{
2235 		char	buf[INET_ADDRSTRLEN];
2236 
2237 		(void) inet_ntop(AF_INET,
2238 		    (void *) &man_sc_ipaddrs.ip_other_sc_ipaddr,
2239 		    buf, INET_ADDRSTRLEN);
2240 		MAN_DBG(MAN_CONFIG, ("ip_other_sc_ipaddr = %s", buf));
2241 		(void) inet_ntop(AF_INET,
2242 		    (void *) &man_sc_ipaddrs.ip_my_sc_ipaddr,
2243 		    buf, INET_ADDRSTRLEN);
2244 		MAN_DBG(MAN_CONFIG, ("ip_my_sc_ipaddr = %s", buf));
2245 	}
2246 #endif /* DEBUG */
2247 exit:
2248 	if (error)
2249 		miocnak(wq, mp, 0, error);
2250 	else
2251 		miocack(wq, mp, sizeof (man_sc_ipaddrs_t), 0);
2252 }
2253 
2254 /*
2255  * Set my SC and other SC IPv6 addresses for use in man_pinger routine.
2256  */
2257 static void
2258 man_set_sc_ip6addrs(queue_t *wq, mblk_t *mp)
2259 {
2260 	int			error;
2261 
2262 	error = miocpullup(mp, sizeof (man_sc_ip6addrs_t));
2263 	if (error != 0)
2264 		goto exit;
2265 
2266 	man_sc_ip6addrs = *(man_sc_ip6addrs_t *)mp->b_cont->b_rptr;
2267 
2268 #ifdef DEBUG
2269 	{
2270 		char	buf[INET6_ADDRSTRLEN];
2271 
2272 		(void) inet_ntop(AF_INET6,
2273 		    (void *) &man_sc_ip6addrs.ip6_other_sc_ipaddr,
2274 		    buf, INET6_ADDRSTRLEN);
2275 		MAN_DBG(MAN_CONFIG, ("ip6_other_sc_ipaddr = %s", buf));
2276 		(void) inet_ntop(AF_INET6,
2277 		    (void *) &man_sc_ip6addrs.ip6_my_sc_ipaddr,
2278 		    buf, INET6_ADDRSTRLEN);
2279 		MAN_DBG(MAN_CONFIG, ("ip6_my_sc_ipaddr = %s", buf));
2280 	}
2281 #endif /* DEBUG */
2282 exit:
2283 	if (error)
2284 		miocnak(wq, mp, 0, error);
2285 	else
2286 		miocack(wq, mp, sizeof (man_sc_ip6addrs_t), 0);
2287 }
2288 
2289 /*
2290  * M_DATA fastpath info request.
2291  */
2292 static void
2293 man_dl_ioc_hdr_info(queue_t *wq, mblk_t *mp)
2294 {
2295 	manstr_t		*msp;
2296 	man_t			*manp;
2297 	mblk_t			*nmp;
2298 	man_dladdr_t		*dlap;
2299 	dl_unitdata_req_t	*dludp;
2300 	struct	ether_header	*headerp;
2301 	t_uscalar_t		off, len;
2302 	int			status = 0;
2303 
2304 	MAN_DBG(MAN_DLPI, ("man_dl_ioc_hdr_info: enter"));
2305 
2306 	msp = (manstr_t *)wq->q_ptr;
2307 	manp = msp->ms_manp;
2308 	if (manp == NULL) {
2309 		status = EINVAL;
2310 		goto exit;
2311 	}
2312 
2313 	status = miocpullup(mp, sizeof (dl_unitdata_req_t) + MAN_ADDRL);
2314 	if (status != 0)
2315 		goto exit;
2316 
2317 	/*
2318 	 * Sanity check the DL_UNITDATA_REQ destination address
2319 	 * offset and length values.
2320 	 */
2321 	dludp = (dl_unitdata_req_t *)mp->b_cont->b_rptr;
2322 	off = dludp->dl_dest_addr_offset;
2323 	len = dludp->dl_dest_addr_length;
2324 	if (dludp->dl_primitive != DL_UNITDATA_REQ ||
2325 	    !MBLKIN(mp->b_cont, off, len) || len != MAN_ADDRL) {
2326 		status = EINVAL;
2327 		goto exit;
2328 	}
2329 
2330 	dlap = (man_dladdr_t  *)(mp->b_cont->b_rptr + off);
2331 
2332 	/*
2333 	 * Allocate a new mblk to hold the ether header.
2334 	 */
2335 	if ((nmp = allocb(ETHERHEADER_SIZE, BPRI_MED)) == NULL) {
2336 		status = ENOMEM;
2337 		goto exit;
2338 	}
2339 
2340 	/* We only need one dl_ioc_hdr mblk for replay */
2341 	if (!(msp->ms_flags & MAN_SFLAG_FAST))
2342 		status = man_dl_catch(&msp->ms_dlioc_mp, mp);
2343 
2344 	/* Forward the packet to all lower destinations. */
2345 	if ((status != 0) || ((status = man_dlpi_senddown(msp, mp)) != 0)) {
2346 		freemsg(nmp);
2347 		goto exit;
2348 	}
2349 
2350 	nmp->b_wptr += ETHERHEADER_SIZE;
2351 
2352 	/*
2353 	 * Fill in the ether header.
2354 	 */
2355 	headerp = (struct ether_header *)nmp->b_rptr;
2356 	ether_copy(&dlap->dl_phys, &headerp->ether_dhost);
2357 	ether_copy(&manp->man_eaddr, &headerp->ether_shost);
2358 	put_ether_type(headerp, dlap->dl_sap);
2359 
2360 	/*
2361 	 * Link new mblk in after the "request" mblks.
2362 	 */
2363 	linkb(mp, nmp);
2364 
2365 exit:
2366 	MAN_DBG(MAN_DLPI, ("man_dl_ioc_hdr_info: returns, status = %d",
2367 	    status));
2368 
2369 	if (status) {
2370 		miocnak(wq, mp, 0, status);
2371 	} else {
2372 		msp = (manstr_t *)wq->q_ptr;
2373 		msp->ms_flags |= MAN_SFLAG_FAST;
2374 		miocack(wq, mp, msgsize(mp->b_cont), 0);
2375 	}
2376 
2377 }
2378 
2379 /*
2380  * man_uwsrv - Upper write queue service routine to handle deferred
2381  * DLPI messages issued from upstream, the write side of the upper half
2382  * of multiplexor. It is also used by man_bwork to switch the lower
2383  * multiplexor.
2384  *
2385  *	wq - upper write queue of mxx
2386  */
2387 static int
2388 man_uwsrv(queue_t *wq)
2389 {
2390 	register mblk_t		*mp;
2391 	manstr_t		*msp;		/* per stream data */
2392 	man_t			*manp;		/* per instance data */
2393 	ehdr_t			*ep;
2394 	int			status;
2395 
2396 	msp = (manstr_t *)wq->q_ptr;
2397 
2398 	MAN_DBG(MAN_UWSRV, ("man_uwsrv: wq(0x%p) msp", (void *)wq));
2399 	MAN_DBGCALL(MAN_UWSRV, man_print_msp(msp));
2400 
2401 	if (msp == NULL)
2402 		goto done;
2403 
2404 	manp = msp->ms_manp;
2405 
2406 	while (mp = getq(wq)) {
2407 
2408 		switch (DB_TYPE(mp)) {
2409 		/*
2410 		 * Can probably remove this as I never put data messages
2411 		 * here.
2412 		 */
2413 		case M_DATA:
2414 			if (manp) {
2415 				ep = (ehdr_t *)mp->b_rptr;
2416 				status = man_start(wq, mp, &ep->ether_dhost);
2417 				if (status) {
2418 					/*
2419 					 * man_start() indicated flow control
2420 					 * situation, stop processing now.
2421 					 */
2422 					goto break_loop;
2423 				}
2424 			} else
2425 				freemsg(mp);
2426 			break;
2427 
2428 		case M_PROTO:
2429 		case M_PCPROTO:
2430 			status = man_proto(wq, mp);
2431 			if (status) {
2432 				/*
2433 				 * man_proto() indicated flow control
2434 				 * situation detected by man_start(),
2435 				 * stop processing now.
2436 				 */
2437 				goto break_loop;
2438 			}
2439 			break;
2440 
2441 		default:
2442 			MAN_DBG(MAN_UWSRV, ("man_uwsrv: discarding mp(0x%p)",
2443 			    (void *)mp));
2444 			freemsg(mp);
2445 			break;
2446 		}
2447 	}
2448 
2449 break_loop:
2450 	/*
2451 	 * Check to see if bgthread wants us to do something inside the
2452 	 * perimeter.
2453 	 */
2454 	if ((msp->ms_flags & MAN_SFLAG_CONTROL) &&
2455 	    man_iwork_q->q_work != NULL) {
2456 
2457 		man_iwork();
2458 	}
2459 
2460 done:
2461 
2462 	MAN_DBG(MAN_UWSRV, ("man_uwsrv: returns"));
2463 
2464 	return (0);
2465 }
2466 
2467 
2468 /*
2469  * man_proto - handle DLPI protocol requests issued from upstream.
2470  * Called by man_uwsrv().  We disassociate upper and lower multiplexor
2471  * DLPI state transitions. The upper stream here (manstr_t) transitions
2472  * appropriately, saves the DLPI requests via man_dlpi(), and then
2473  * arranges for the DLPI request to be sent down via man_dlpi_senddown() if
2474  * appropriate.
2475  *
2476  *	wq - upper write queue of mxx
2477  *	mp - mbl ptr to protocol request
2478  */
2479 static int
2480 man_proto(queue_t *wq, mblk_t *mp)
2481 {
2482 	union DL_primitives	*dlp;
2483 	int			flow_status = 0;
2484 
2485 	dlp = (union DL_primitives *)mp->b_rptr;
2486 
2487 	MAN_DBG((MAN_UWSRV | MAN_DLPI),
2488 	    ("man_proto: mp(0x%p) prim(%s)\n", (void *)mp,
2489 	    dps[dlp->dl_primitive]));
2490 
2491 	switch (dlp->dl_primitive) {
2492 	case DL_UNITDATA_REQ:
2493 		flow_status = man_udreq(wq, mp);
2494 		break;
2495 
2496 	case DL_ATTACH_REQ:
2497 		man_areq(wq, mp);
2498 		break;
2499 
2500 	case DL_DETACH_REQ:
2501 		man_dreq(wq, mp);
2502 		break;
2503 
2504 	case DL_BIND_REQ:
2505 		man_breq(wq, mp);
2506 		break;
2507 
2508 	case DL_UNBIND_REQ:
2509 		man_ubreq(wq, mp);
2510 		break;
2511 
2512 	case DL_INFO_REQ:
2513 		man_ireq(wq, mp);
2514 		break;
2515 
2516 	case DL_PROMISCON_REQ:
2517 		man_ponreq(wq, mp);
2518 		break;
2519 
2520 	case DL_PROMISCOFF_REQ:
2521 		man_poffreq(wq, mp);
2522 		break;
2523 
2524 	case DL_ENABMULTI_REQ:
2525 		man_emreq(wq, mp);
2526 		break;
2527 
2528 	case DL_DISABMULTI_REQ:
2529 		man_dmreq(wq, mp);
2530 		break;
2531 
2532 	case DL_PHYS_ADDR_REQ:
2533 		man_pareq(wq, mp);
2534 		break;
2535 
2536 	case DL_SET_PHYS_ADDR_REQ:
2537 		man_spareq(wq, mp);
2538 		break;
2539 
2540 	default:
2541 		MAN_DBG((MAN_UWSRV | MAN_DLPI), ("man_proto: prim(%d)\n",
2542 		    dlp->dl_primitive));
2543 		dlerrorack(wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
2544 		break;
2545 
2546 	} /* End switch */
2547 
2548 	MAN_DBG((MAN_UWSRV | MAN_DLPI), ("man_proto: exit\n"));
2549 	return (flow_status);
2550 
2551 }
2552 
2553 static int
2554 man_udreq(queue_t *wq, mblk_t *mp)
2555 {
2556 	manstr_t		*msp;
2557 	dl_unitdata_req_t	*dludp;
2558 	mblk_t	*nmp;
2559 	man_dladdr_t		*dlap;
2560 	t_uscalar_t 		off, len;
2561 	int 			flow_status = 0;
2562 
2563 	msp = (manstr_t *)wq->q_ptr;
2564 
2565 
2566 	if (msp->ms_dlpistate != DL_IDLE) {
2567 		dlerrorack(wq, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
2568 		return (flow_status);
2569 	}
2570 	dludp = (dl_unitdata_req_t *)mp->b_rptr;
2571 	off = dludp->dl_dest_addr_offset;
2572 	len = dludp->dl_dest_addr_length;
2573 
2574 	/*
2575 	 * Validate destination address format.
2576 	 */
2577 	if (!MBLKIN(mp, off, len) || (len != MAN_ADDRL)) {
2578 		dluderrorind(wq, mp, mp->b_rptr + off, len, DL_BADADDR, 0);
2579 		return (flow_status);
2580 	}
2581 
2582 	/*
2583 	 * Error if no M_DATA follows.
2584 	 */
2585 	nmp = mp->b_cont;
2586 	if (nmp == NULL) {
2587 		dluderrorind(wq, mp, mp->b_rptr + off, len, DL_BADDATA, 0);
2588 		return (flow_status);
2589 	}
2590 
2591 	dlap = (man_dladdr_t *)(mp->b_rptr + off);
2592 
2593 	flow_status = man_start(wq, mp, &dlap->dl_phys);
2594 	return (flow_status);
2595 }
2596 
2597 /*
2598  * Handle DL_ATTACH_REQ.
2599  */
2600 static void
2601 man_areq(queue_t *wq, mblk_t *mp)
2602 {
2603 	man_t			*manp;	/* per instance data */
2604 	manstr_t		*msp;	/* per stream data */
2605 	short			ppa;
2606 	union DL_primitives	*dlp;
2607 	mblk_t			*preq = NULL;
2608 	int			did_refcnt = FALSE;
2609 	int			dlerror = 0;
2610 	int			status = 0;
2611 
2612 	msp = (manstr_t *)wq->q_ptr;
2613 	dlp = (union DL_primitives *)mp->b_rptr;
2614 
2615 	/*
2616 	 * Attach us to MAN PPA (device instance).
2617 	 */
2618 	if (MBLKL(mp) < DL_ATTACH_REQ_SIZE) {
2619 		dlerror = DL_BADPRIM;
2620 		goto exit;
2621 	}
2622 
2623 	if (msp->ms_dlpistate != DL_UNATTACHED) {
2624 		dlerror = DL_OUTSTATE;
2625 		goto exit;
2626 	}
2627 
2628 	ppa = dlp->attach_req.dl_ppa;
2629 	if (ppa == -1 || qassociate(wq, ppa) != 0) {
2630 		dlerror = DL_BADPPA;
2631 		MAN_DBG(MAN_WARN, ("man_areq: bad PPA %d", ppa));
2632 		goto exit;
2633 	}
2634 
2635 	mutex_enter(&man_lock);
2636 	manp = ddi_get_soft_state(man_softstate, ppa);
2637 	ASSERT(manp != NULL);	/* qassociate() succeeded */
2638 
2639 	manp->man_refcnt++;
2640 	did_refcnt = TRUE;
2641 	mutex_exit(&man_lock);
2642 
2643 	/*
2644 	 * Create a DL replay list for the lower stream. These wont
2645 	 * actually be sent down until the lower streams are made active
2646 	 * (sometime after the call to man_init_dests below).
2647 	 */
2648 	preq = man_alloc_physreq_mp(&manp->man_eaddr);
2649 	if (preq == NULL) {
2650 		dlerror = DL_SYSERR;
2651 		status = ENOMEM;
2652 		goto exit;
2653 	}
2654 
2655 	/*
2656 	 * Make copy for dlpi resync of upper and lower streams.
2657 	 */
2658 	if (man_dlpi(msp, mp)) {
2659 		dlerror = DL_SYSERR;
2660 		status = ENOMEM;
2661 		goto exit;
2662 	}
2663 
2664 	/* TBD - need to clean off ATTACH req on failure here. */
2665 	if (man_dlpi(msp, preq)) {
2666 		dlerror = DL_SYSERR;
2667 		status = ENOMEM;
2668 		goto exit;
2669 	}
2670 
2671 	/*
2672 	 * man_init_dests/man_start_dest needs these set before call.
2673 	 */
2674 	msp->ms_manp = manp;
2675 	msp->ms_meta_ppa = ppa;
2676 
2677 	/*
2678 	 *  Allocate and init lower destination structures.
2679 	 */
2680 	ASSERT(msp->ms_dests == NULL);
2681 	if (man_init_dests(manp, msp)) {
2682 		mblk_t	 *tmp;
2683 
2684 		/*
2685 		 * If we cant get the lower streams ready, then
2686 		 * remove the messages from the DL replay list and
2687 		 * fail attach.
2688 		 */
2689 		while ((tmp = msp->ms_dl_mp) != NULL) {
2690 			msp->ms_dl_mp = msp->ms_dl_mp->b_next;
2691 			tmp->b_next = tmp->b_prev = NULL;
2692 			freemsg(tmp);
2693 		}
2694 
2695 		msp->ms_manp = NULL;
2696 		msp->ms_meta_ppa = -1;
2697 
2698 		dlerror = DL_SYSERR;
2699 		status = ENOMEM;
2700 		goto exit;
2701 	}
2702 
2703 	MAN_DBG(MAN_DLPI, ("man_areq: ppa 0x%x man_refcnt: %d\n",
2704 	    ppa, manp->man_refcnt));
2705 
2706 	SETSTATE(msp, DL_UNBOUND);
2707 
2708 exit:
2709 	if (dlerror == 0) {
2710 		dlokack(wq, mp, DL_ATTACH_REQ);
2711 	} else {
2712 		if (did_refcnt) {
2713 			mutex_enter(&man_lock);
2714 			manp->man_refcnt--;
2715 			mutex_exit(&man_lock);
2716 		}
2717 		dlerrorack(wq, mp, DL_ATTACH_REQ, dlerror, status);
2718 		(void) qassociate(wq, -1);
2719 	}
2720 	if (preq != NULL)
2721 		freemsg(preq);
2722 
2723 }
2724 
2725 /*
2726  * Called at DL_ATTACH time.
2727  * Man_lock is held to protect pathgroup list(man_pg).
2728  */
2729 static int
2730 man_init_dests(man_t *manp, manstr_t *msp)
2731 {
2732 	man_dest_t	*mdp;
2733 	man_pg_t	*mpg;
2734 	int		i;
2735 
2736 	mdp = man_kzalloc(MAN_DEST_ARRAY_SIZE, KM_NOSLEEP);
2737 	if (mdp == NULL)
2738 		return (ENOMEM);
2739 
2740 	msp->ms_dests = mdp;
2741 
2742 	mutex_enter(&man_lock);
2743 	for (i = 0; i < MAN_MAX_DESTS; i++) {
2744 
2745 		mdp[i].md_muxid = -1;	/* muxid 0 is valid */
2746 		mutex_init(&mdp->md_lock, NULL, MUTEX_DRIVER, NULL);
2747 
2748 		mpg = man_find_pg_by_id(manp->man_pg, i);
2749 
2750 		if (mpg && man_find_active_path(mpg->mpg_pathp))
2751 			man_start_dest(&mdp[i], msp, mpg);
2752 	}
2753 	mutex_exit(&man_lock);
2754 
2755 	return (0);
2756 }
2757 
2758 /*
2759  * Get a destination ready for use.
2760  */
2761 static void
2762 man_start_dest(man_dest_t *mdp, manstr_t *msp, man_pg_t *mpg)
2763 {
2764 	man_path_t	*ap;
2765 
2766 	mdp->md_muxid = -1;
2767 	mdp->md_dlpistate = DL_UNATTACHED;
2768 	mdp->md_msp = msp;
2769 	mdp->md_rq = msp->ms_rq;
2770 	mdp->md_pg_id = mpg->mpg_pg_id;
2771 
2772 	ASSERT(msp->ms_manp);
2773 
2774 	ether_copy(&msp->ms_manp->man_eaddr, &mdp->md_src_eaddr);
2775 	ether_copy(&mpg->mpg_dst_eaddr, &mdp->md_dst_eaddr);
2776 
2777 	ap = man_find_active_path(mpg->mpg_pathp);
2778 	ASSERT(ap);
2779 	mdp->md_device = ap->mp_device;
2780 
2781 	/*
2782 	 * Set up linktimers so that first time through, we will do
2783 	 * a failover.
2784 	 */
2785 	mdp->md_linkstate = MAN_LINKFAIL;
2786 	mdp->md_state = MAN_DSTATE_INITIALIZING;
2787 	mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
2788 	    (void *)mdp, man_gettimer(MAN_TIMER_INIT, mdp));
2789 
2790 	/*
2791 	 * As an optimization, if there is only one destination,
2792 	 * remember the destination pointer. Used by man_start().
2793 	 */
2794 	man_set_optimized_dest(msp);
2795 
2796 	MAN_DBG(MAN_DEST, ("man_start_dest: mdp"));
2797 	MAN_DBGCALL(MAN_DEST, man_print_mdp(mdp));
2798 }
2799 
2800 static void
2801 man_set_optimized_dest(manstr_t *msp)
2802 {
2803 	int		count = 0;
2804 	int		i;
2805 	man_dest_t	*mdp = NULL;
2806 
2807 	for (i = 0; i < MAN_MAX_DESTS; i++) {
2808 		if (msp->ms_dests[i].md_msp != NULL) {
2809 			count++;
2810 			mdp = &msp->ms_dests[i];
2811 		}
2812 	}
2813 
2814 	if (count == 1)
2815 		msp->ms_destp = mdp;
2816 	else
2817 		msp->ms_destp = NULL;
2818 
2819 }
2820 
2821 /*
2822  * Catch dlpi message for replaying, and arrange to send it down
2823  * to any destinations not PLUMBING. See man_dlpi_replay().
2824  */
2825 static int
2826 man_dlpi(manstr_t *msp, mblk_t *mp)
2827 {
2828 	int	status;
2829 
2830 	status = man_dl_catch(&msp->ms_dl_mp, mp);
2831 	if (status == 0)
2832 		status = man_dlpi_senddown(msp, mp);
2833 
2834 	return (status);
2835 }
2836 
2837 /*
2838  * Catch IOCTL type DL_ messages.
2839  */
2840 static int
2841 man_dlioc(manstr_t *msp, mblk_t *mp)
2842 {
2843 	int status;
2844 
2845 	status = man_dl_catch(&msp->ms_dlioc_mp, mp);
2846 	if (status == 0)
2847 		status = man_dlpi_senddown(msp, mp);
2848 
2849 	return (status);
2850 }
2851 
2852 /*
2853  * We catch all DLPI messages that we have to resend to a new AP'ed
2854  * device to put him in the right state.  We link these messages together
2855  * w/ their b_next fields and hang it off of msp->ms_dl_mp.  We
2856  * must be careful to restore b_next fields before doing dupmsg/freemsg!
2857  *
2858  *	msp - pointer of stream struct to process
2859  *	mblk - pointer to DLPI request to catch
2860  */
2861 static int
2862 man_dl_catch(mblk_t **mplist, mblk_t *mp)
2863 {
2864 	mblk_t			*dupmp;
2865 	mblk_t			*tmp;
2866 	unsigned		prim;
2867 	int			status = 0;
2868 
2869 	dupmp = copymsg(mp);
2870 	if (dupmp == NULL) {
2871 		status = ENOMEM;
2872 		goto exit;
2873 	}
2874 
2875 
2876 	if (*mplist == NULL)
2877 		*mplist = dupmp;
2878 	else {
2879 		for (tmp = *mplist; tmp->b_next; )
2880 			tmp = tmp->b_next;
2881 
2882 		tmp->b_next = dupmp;
2883 	}
2884 
2885 	prim = DL_PRIM(mp);
2886 	MAN_DBG(MAN_DLPI,
2887 	    ("man_dl_catch: adding %s\n",
2888 	    (prim == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
2889 	    (prim == DLIOCRAW) ? "DLIOCRAW" :
2890 	    (prim == DL_PROMISCON_REQ) ? promisc[DL_PROMISCON_TYPE(mp)] :
2891 	    dps[prim]));
2892 
2893 exit:
2894 
2895 	return (status);
2896 }
2897 
2898 /*
2899  * Send down a single DLPI M_[PC]PROTO to all currently valid dests.
2900  *
2901  *	msp - ptr to NDM stream structure DL_ messages was received on.
2902  *	mp - ptr to mblk containing DL_ request.
2903  */
2904 static int
2905 man_dlpi_senddown(manstr_t *msp, mblk_t *mp)
2906 {
2907 	man_dest_t	*mdp;
2908 	int		i;
2909 	mblk_t		*rmp[MAN_MAX_DESTS];	/* Copy to replay */
2910 	int		dstate[MAN_MAX_DESTS];
2911 	int		no_dests = TRUE;
2912 	int		status = 0;
2913 
2914 	if (msp->ms_dests == NULL)
2915 		goto exit;
2916 
2917 	for (i = 0; i < MAN_MAX_DESTS; i++) {
2918 		mdp = &msp->ms_dests[i];
2919 		if (mdp->md_state == MAN_DSTATE_READY) {
2920 			dstate[i] = TRUE;
2921 			no_dests = FALSE;
2922 		} else {
2923 			dstate[i] = FALSE;
2924 		}
2925 		rmp[i] = NULL;
2926 	}
2927 
2928 	if (no_dests)
2929 		goto exit;
2930 
2931 	/*
2932 	 * Build replay and duplicate list for all possible destinations.
2933 	 */
2934 	for (i = 0; i < MAN_MAX_DESTS; i++) {
2935 		if (dstate[i]) {
2936 			rmp[i] = copymsg(mp);
2937 			if (rmp[i] == NULL) {
2938 				status = ENOMEM;
2939 				break;
2940 			}
2941 		}
2942 	}
2943 
2944 	if (status == 0) {
2945 		for (i = 0; i < MAN_MAX_DESTS; i++)
2946 			if (dstate[i]) {
2947 				mdp = &msp->ms_dests[i];
2948 
2949 				ASSERT(mdp->md_wq != NULL);
2950 				ASSERT(mp->b_next == NULL);
2951 				ASSERT(mp->b_prev == NULL);
2952 
2953 				man_dlpi_replay(mdp, rmp[i]);
2954 			}
2955 	} else {
2956 		for (; i >= 0; i--)
2957 			if (dstate[i] && rmp[i])
2958 				freemsg(rmp[i]);
2959 	}
2960 
2961 exit:
2962 	return (status);
2963 }
2964 
2965 /*
2966  * man_dlpi_replay - traverse the list of DLPI requests and reapply them to
2967  * get the upper and lower streams into the same state. Called holding inner
2968  * perimeter lock exclusive. Note thet we defer M_IOCTL type dlpi messages
2969  * until we get an OK_ACK to our ATTACH (see man_lrsrv and
2970  * man_dlioc_replay).
2971  *
2972  * 	mdp - pointer to lower queue (destination)
2973  *	rmp - list of mblks to send down stream.
2974  */
2975 static void
2976 man_dlpi_replay(man_dest_t *mdp, mblk_t *rmp)
2977 {
2978 	mblk_t			*mp;
2979 	union DL_primitives	*dlp = NULL;
2980 
2981 	MAN_DBG(MAN_DLPI, ("man_dlpi_replay: mdp(0x%p)", (void *)mdp));
2982 
2983 	while (rmp) {
2984 		mp = rmp;
2985 		rmp = rmp->b_next;
2986 		mp->b_prev = mp->b_next = NULL;
2987 
2988 		dlp = (union DL_primitives *)mp->b_rptr;
2989 		MAN_DBG(MAN_DLPI,
2990 		    ("man_dlpi_replay: mdp(0x%p) sending %s\n",
2991 		    (void *)mdp,
2992 		    (dlp->dl_primitive == DL_IOC_HDR_INFO) ?
2993 		    "DL_IOC_HDR_INFO" : (dlp->dl_primitive == DLIOCRAW) ?
2994 		    "DLIOCRAW" : dps[(unsigned)(dlp->dl_primitive)]));
2995 
2996 		if (dlp->dl_primitive == DL_ATTACH_REQ) {
2997 			/*
2998 			 * insert the lower devices ppa.
2999 			 */
3000 			dlp->attach_req.dl_ppa = mdp->md_device.mdev_ppa;
3001 		}
3002 
3003 		(void) putnext(mdp->md_wq, mp);
3004 	}
3005 
3006 }
3007 
3008 static void
3009 man_dreq(queue_t *wq, mblk_t *mp)
3010 {
3011 	manstr_t	*msp;	/* per stream data */
3012 	man_work_t	*wp;
3013 
3014 	msp = (manstr_t *)wq->q_ptr;
3015 
3016 	if (MBLKL(mp) < DL_DETACH_REQ_SIZE) {
3017 		dlerrorack(wq, mp, DL_DETACH_REQ, DL_BADPRIM, 0);
3018 		return;
3019 	}
3020 
3021 	if (msp->ms_dlpistate != DL_UNBOUND) {
3022 		dlerrorack(wq, mp, DL_DETACH_REQ, DL_OUTSTATE, 0);
3023 		return;
3024 	}
3025 
3026 	ASSERT(msp->ms_dests != NULL);
3027 
3028 	wp = man_work_alloc(MAN_WORK_CLOSE_STREAM, KM_NOSLEEP);
3029 	if (wp == NULL) {
3030 		dlerrorack(wq, mp, DL_DETACH_REQ, DL_SYSERR, ENOMEM);
3031 		return;
3032 	}
3033 	man_dodetach(msp, wp);
3034 	(void) qassociate(wq, -1);
3035 
3036 	SETSTATE(msp, DL_UNATTACHED);
3037 
3038 	dlokack(wq, mp, DL_DETACH_REQ);
3039 }
3040 
3041 static void
3042 man_dl_clean(mblk_t **mplist)
3043 {
3044 	mblk_t	*tmp;
3045 
3046 	/*
3047 	 * Toss everything.
3048 	 */
3049 	while ((tmp = *mplist) != NULL) {
3050 		*mplist = (*mplist)->b_next;
3051 		tmp->b_next = tmp->b_prev = NULL;
3052 		freemsg(tmp);
3053 	}
3054 
3055 }
3056 
3057 /*
3058  * man_dl_release - Remove the corresponding DLPI request from the
3059  * catch list. Walk thru the catch list looking for the other half of
3060  * the pair and delete it.  If we are detaching, delete the entire list.
3061  *
3062  *	msp - pointer of stream struct to process
3063  *	mp  - pointer to mblk to first half of pair.  We will delete other
3064  * 		half of pair based on this.
3065  */
3066 static void
3067 man_dl_release(mblk_t **mplist, mblk_t *mp)
3068 {
3069 	uchar_t			match_dbtype;
3070 	mblk_t			*tmp;
3071 	mblk_t			*tmpp;
3072 	int			matched = FALSE;
3073 
3074 	if (*mplist == NULL)
3075 		goto exit;
3076 
3077 	match_dbtype = DB_TYPE(mp);
3078 
3079 	/*
3080 	 * Currently we only clean DL_ PROTO type messages. There is
3081 	 * no way to turn off M_CTL or DL_IOC stuff other than sending
3082 	 * down a DL_DETACH, which resets everything.
3083 	 */
3084 	if (match_dbtype != M_PROTO && match_dbtype != M_PCPROTO) {
3085 		goto exit;
3086 	}
3087 
3088 	/*
3089 	 * Selectively find a caught mblk that matches this one and
3090 	 * remove it from the list
3091 	 */
3092 	tmp = tmpp = *mplist;
3093 	matched = man_match_proto(mp, tmp);
3094 	if (matched) {
3095 		*mplist = tmp->b_next;
3096 		tmp->b_next = tmp->b_prev = NULL;
3097 	} else {
3098 		for (tmp = tmp->b_next; tmp != NULL; tmp = tmp->b_next) {
3099 			if (matched = man_match_proto(mp, tmp))
3100 				break;
3101 			tmpp = tmp;
3102 		}
3103 
3104 		if (matched) {
3105 			tmpp->b_next = tmp->b_next;
3106 			tmp->b_next = tmp->b_prev = NULL;
3107 		}
3108 	}
3109 
3110 exit:
3111 	if (matched) {
3112 
3113 		MAN_DBG(MAN_DLPI, ("man_dl_release: release %s",
3114 		    (DL_PRIM(mp) == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3115 		    (DL_PRIM(mp) == DLIOCRAW) ? "DLIOCRAW" :
3116 		    dps[(int)DL_PRIM(mp)]));
3117 
3118 		freemsg(tmp);
3119 	}
3120 	MAN_DBG(MAN_DLPI, ("man_dl_release: returns"));
3121 
3122 }
3123 
3124 /*
3125  * Compare two DL_ messages. If they are complimentary (e.g. DL_UNBIND
3126  * compliments DL_BIND), return true.
3127  */
3128 static int
3129 man_match_proto(mblk_t *mp1, mblk_t *mp2)
3130 {
3131 	t_uscalar_t	prim1;
3132 	t_uscalar_t	prim2;
3133 	int		matched = FALSE;
3134 
3135 	/*
3136 	 * Primitive to clean off list.
3137 	 */
3138 	prim1 = DL_PRIM(mp1);
3139 	prim2 = DL_PRIM(mp2);
3140 
3141 	switch (prim1) {
3142 	case DL_UNBIND_REQ:
3143 		if (prim2 == DL_BIND_REQ)
3144 			matched = TRUE;
3145 		break;
3146 
3147 	case DL_PROMISCOFF_REQ:
3148 		if (prim2 == DL_PROMISCON_REQ) {
3149 			dl_promiscoff_req_t	*poff1;
3150 			dl_promiscoff_req_t	*poff2;
3151 
3152 			poff1 = (dl_promiscoff_req_t *)mp1->b_rptr;
3153 			poff2 = (dl_promiscoff_req_t *)mp2->b_rptr;
3154 
3155 			if (poff1->dl_level == poff2->dl_level)
3156 				matched = TRUE;
3157 		}
3158 		break;
3159 
3160 	case DL_DISABMULTI_REQ:
3161 		if (prim2 == DL_ENABMULTI_REQ) {
3162 			union DL_primitives	*dlp;
3163 			t_uscalar_t		off;
3164 			eaddr_t			*addrp1;
3165 			eaddr_t			*addrp2;
3166 
3167 			dlp = (union DL_primitives *)mp1->b_rptr;
3168 			off = dlp->disabmulti_req.dl_addr_offset;
3169 			addrp1 = (eaddr_t *)(mp1->b_rptr + off);
3170 
3171 			dlp = (union DL_primitives *)mp2->b_rptr;
3172 			off = dlp->disabmulti_req.dl_addr_offset;
3173 			addrp2 = (eaddr_t *)(mp2->b_rptr + off);
3174 
3175 			if (ether_cmp(addrp1, addrp2) == 0)
3176 				matched = 1;
3177 		}
3178 		break;
3179 
3180 	default:
3181 		break;
3182 	}
3183 
3184 	MAN_DBG(MAN_DLPI, ("man_match_proto returns %d", matched));
3185 
3186 	return (matched);
3187 }
3188 
3189 /*
3190  * Bind upper stream to a particular SAP. Called with exclusive innerperim
3191  * QPAIR, shared outerperim.
3192  */
3193 static void
3194 man_breq(queue_t *wq, mblk_t *mp)
3195 {
3196 	man_t			*manp;	/* per instance data */
3197 	manstr_t		*msp;	/* per stream data */
3198 	union DL_primitives	*dlp;
3199 	man_dladdr_t		man_addr;
3200 	t_uscalar_t		sap;
3201 	t_uscalar_t		xidtest;
3202 
3203 	msp = (manstr_t *)wq->q_ptr;
3204 
3205 	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
3206 		dlerrorack(wq, mp, DL_BIND_REQ, DL_BADPRIM, 0);
3207 		return;
3208 	}
3209 
3210 	if (msp->ms_dlpistate != DL_UNBOUND) {
3211 		dlerrorack(wq, mp, DL_BIND_REQ, DL_OUTSTATE, 0);
3212 		return;
3213 	}
3214 
3215 	dlp = (union DL_primitives *)mp->b_rptr;
3216 	manp = msp->ms_manp;			/* valid after attach */
3217 	sap = dlp->bind_req.dl_sap;
3218 	xidtest = dlp->bind_req.dl_xidtest_flg;
3219 
3220 	ASSERT(manp);
3221 
3222 	if (xidtest) {
3223 		dlerrorack(wq, mp, DL_BIND_REQ, DL_NOAUTO, 0);
3224 		return;
3225 	}
3226 
3227 	if (sap > ETHERTYPE_MAX) {
3228 		dlerrorack(wq, mp, DL_BIND_REQ, DL_BADSAP, 0);
3229 		return;
3230 	}
3231 
3232 	if (man_dlpi(msp, mp)) {
3233 		dlerrorack(wq, mp, DL_BIND_REQ, DL_SYSERR, ENOMEM);
3234 		return;
3235 	}
3236 
3237 	msp->ms_sap = sap;
3238 
3239 	SETSTATE(msp, DL_IDLE);
3240 
3241 	man_addr.dl_sap = msp->ms_sap;
3242 	ether_copy(&msp->ms_manp->man_eaddr, &man_addr.dl_phys);
3243 
3244 	dlbindack(wq, mp, msp->ms_sap, &man_addr, MAN_ADDRL, 0, 0);
3245 
3246 }
3247 
3248 static void
3249 man_ubreq(queue_t *wq, mblk_t *mp)
3250 {
3251 	manstr_t		*msp;	/* per stream data */
3252 
3253 	msp = (manstr_t *)wq->q_ptr;
3254 
3255 	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
3256 		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
3257 		return;
3258 	}
3259 
3260 	if (msp->ms_dlpistate != DL_IDLE) {
3261 		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
3262 		return;
3263 	}
3264 
3265 	if (man_dlpi_senddown(msp, mp)) {
3266 		dlerrorack(wq, mp, DL_UNBIND_REQ, DL_SYSERR, ENOMEM);
3267 		return;
3268 	}
3269 
3270 	man_dl_release(&msp->ms_dl_mp, mp);
3271 
3272 	SETSTATE(msp, DL_UNBOUND);
3273 
3274 	dlokack(wq, mp, DL_UNBIND_REQ);
3275 
3276 }
3277 
3278 static void
3279 man_ireq(queue_t *wq, mblk_t *mp)
3280 {
3281 	manstr_t	*msp;
3282 	dl_info_ack_t	*dlip;
3283 	man_dladdr_t	*dlap;
3284 	eaddr_t		*ep;
3285 	size_t	size;
3286 
3287 	msp = (manstr_t *)wq->q_ptr;
3288 
3289 	if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
3290 		dlerrorack(wq, mp, DL_INFO_REQ, DL_BADPRIM, 0);
3291 		return;
3292 	}
3293 
3294 	/* Exchange current msg for a DL_INFO_ACK. */
3295 	size = sizeof (dl_info_ack_t) + MAN_ADDRL + ETHERADDRL;
3296 	mp = mexchange(wq, mp, size, M_PCPROTO, DL_INFO_ACK);
3297 	if (mp == NULL) {
3298 		MAN_DBG(MAN_DLPI, ("man_ireq: man_ireq: mp == NULL."));
3299 		return;
3300 	}
3301 
3302 	/* Fill in the DL_INFO_ACK fields and reply. */
3303 	dlip = (dl_info_ack_t *)mp->b_rptr;
3304 	*dlip = man_infoack;
3305 	dlip->dl_current_state = msp->ms_dlpistate;
3306 	dlap = (man_dladdr_t *)(mp->b_rptr + dlip->dl_addr_offset);
3307 	dlap->dl_sap = msp->ms_sap;
3308 
3309 	/*
3310 	 * If attached, return physical address.
3311 	 */
3312 	if (msp->ms_manp != NULL) {
3313 		ether_copy(&msp->ms_manp->man_eaddr, &dlap->dl_phys);
3314 	} else {
3315 		bzero((caddr_t)&dlap->dl_phys, ETHERADDRL);
3316 	}
3317 
3318 	ep = (struct ether_addr *)(mp->b_rptr + dlip->dl_brdcst_addr_offset);
3319 	ether_copy(&etherbroadcast, ep);
3320 
3321 	qreply(wq, mp);
3322 
3323 }
3324 
3325 
3326 static void
3327 man_ponreq(queue_t *wq, mblk_t *mp)
3328 {
3329 	manstr_t	*msp;
3330 	int		flag;
3331 
3332 	msp = (manstr_t *)wq->q_ptr;
3333 
3334 	if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
3335 		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
3336 		return;
3337 	}
3338 
3339 	switch (((dl_promiscon_req_t *)mp->b_rptr)->dl_level) {
3340 	case DL_PROMISC_PHYS:
3341 		flag = MAN_SFLAG_ALLPHYS;
3342 		break;
3343 
3344 	case DL_PROMISC_SAP:
3345 		flag = MAN_SFLAG_ALLSAP;
3346 		break;
3347 
3348 	case DL_PROMISC_MULTI:
3349 		flag = MAN_SFLAG_ALLMULTI;
3350 		break;
3351 
3352 	default:
3353 		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_NOTSUPPORTED, 0);
3354 		return;
3355 	}
3356 
3357 	/*
3358 	 * Catch request for replay, and forward down to any lower
3359 	 * lower stream.
3360 	 */
3361 	if (man_dlpi(msp, mp)) {
3362 		dlerrorack(wq, mp, DL_PROMISCON_REQ, DL_SYSERR, ENOMEM);
3363 		return;
3364 	}
3365 
3366 	msp->ms_flags |= flag;
3367 
3368 	dlokack(wq, mp, DL_PROMISCON_REQ);
3369 
3370 }
3371 
3372 static void
3373 man_poffreq(queue_t *wq, mblk_t *mp)
3374 {
3375 	manstr_t		*msp;
3376 	int			flag;
3377 
3378 	msp = (manstr_t *)wq->q_ptr;
3379 
3380 	if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
3381 		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
3382 		return;
3383 	}
3384 
3385 	switch (((dl_promiscoff_req_t *)mp->b_rptr)->dl_level) {
3386 	case DL_PROMISC_PHYS:
3387 		flag = MAN_SFLAG_ALLPHYS;
3388 		break;
3389 
3390 	case DL_PROMISC_SAP:
3391 		flag = MAN_SFLAG_ALLSAP;
3392 		break;
3393 
3394 	case DL_PROMISC_MULTI:
3395 		flag = MAN_SFLAG_ALLMULTI;
3396 		break;
3397 
3398 	default:
3399 		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_NOTSUPPORTED, 0);
3400 		return;
3401 	}
3402 
3403 	if ((msp->ms_flags & flag) == 0) {
3404 		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
3405 		return;
3406 	}
3407 
3408 	if (man_dlpi_senddown(msp, mp)) {
3409 		dlerrorack(wq, mp, DL_PROMISCOFF_REQ, DL_SYSERR, ENOMEM);
3410 		return;
3411 	}
3412 
3413 	man_dl_release(&msp->ms_dl_mp, mp);
3414 
3415 	msp->ms_flags &= ~flag;
3416 
3417 	dlokack(wq, mp, DL_PROMISCOFF_REQ);
3418 
3419 }
3420 
3421 /*
3422  * Enable multicast requests. We might need to track addresses instead of
3423  * just passing things through (see eri_dmreq) - TBD.
3424  */
3425 static void
3426 man_emreq(queue_t *wq, mblk_t *mp)
3427 {
3428 	manstr_t		*msp;
3429 	union DL_primitives	*dlp;
3430 	eaddr_t			*addrp;
3431 	t_uscalar_t		off;
3432 	t_uscalar_t		len;
3433 
3434 	msp = (manstr_t *)wq->q_ptr;
3435 
3436 	if (MBLKL(mp) < DL_ENABMULTI_REQ_SIZE) {
3437 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADPRIM, 0);
3438 		return;
3439 	}
3440 
3441 	if (msp->ms_dlpistate == DL_UNATTACHED) {
3442 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_OUTSTATE, 0);
3443 		return;
3444 	}
3445 
3446 	dlp = (union DL_primitives *)mp->b_rptr;
3447 	len = dlp->enabmulti_req.dl_addr_length;
3448 	off = dlp->enabmulti_req.dl_addr_offset;
3449 	addrp = (struct ether_addr *)(mp->b_rptr + off);
3450 
3451 	if ((len != ETHERADDRL) ||
3452 	    !MBLKIN(mp, off, len) ||
3453 	    ((addrp->ether_addr_octet[0] & 01) == 0)) {
3454 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADADDR, 0);
3455 		return;
3456 	}
3457 
3458 	/*
3459 	 * Catch request for replay, and forward down to any lower
3460 	 * lower stream.
3461 	 */
3462 	if (man_dlpi(msp, mp)) {
3463 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_SYSERR, ENOMEM);
3464 		return;
3465 	}
3466 
3467 	dlokack(wq, mp, DL_ENABMULTI_REQ);
3468 
3469 }
3470 
3471 static void
3472 man_dmreq(queue_t *wq, mblk_t *mp)
3473 {
3474 	manstr_t		*msp;
3475 	union DL_primitives	*dlp;
3476 	eaddr_t			*addrp;
3477 	t_uscalar_t		off;
3478 	t_uscalar_t		len;
3479 
3480 	msp = (manstr_t *)wq->q_ptr;
3481 
3482 	if (MBLKL(mp) < DL_DISABMULTI_REQ_SIZE) {
3483 		dlerrorack(wq, mp, DL_DISABMULTI_REQ, DL_BADPRIM, 0);
3484 		return;
3485 	}
3486 
3487 	if (msp->ms_dlpistate == DL_UNATTACHED) {
3488 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_OUTSTATE, 0);
3489 		return;
3490 	}
3491 
3492 	dlp = (union DL_primitives *)mp->b_rptr;
3493 	len = dlp->enabmulti_req.dl_addr_length;
3494 	off = dlp->enabmulti_req.dl_addr_offset;
3495 	addrp = (struct ether_addr *)(mp->b_rptr + off);
3496 
3497 	if ((len != ETHERADDRL) ||
3498 	    !MBLKIN(mp, off, len) ||
3499 	    ((addrp->ether_addr_octet[0] & 01) == 0)) {
3500 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_BADADDR, 0);
3501 		return;
3502 	}
3503 
3504 	if (man_dlpi_senddown(msp, mp)) {
3505 		dlerrorack(wq, mp, DL_ENABMULTI_REQ, DL_SYSERR, ENOMEM);
3506 		return;
3507 	}
3508 
3509 	man_dl_release(&msp->ms_dl_mp, mp);
3510 
3511 	dlokack(wq, mp, DL_DISABMULTI_REQ);
3512 
3513 }
3514 
3515 static void
3516 man_pareq(queue_t *wq, mblk_t *mp)
3517 {
3518 	manstr_t		*msp;
3519 	union	DL_primitives	*dlp;
3520 	uint32_t		type;
3521 	struct	ether_addr	addr;
3522 
3523 	msp = (manstr_t *)wq->q_ptr;
3524 
3525 	if (MBLKL(mp) < DL_PHYS_ADDR_REQ_SIZE) {
3526 		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3527 		return;
3528 	}
3529 
3530 	dlp = (union DL_primitives *)mp->b_rptr;
3531 	type = dlp->physaddr_req.dl_addr_type;
3532 	if (msp->ms_manp == NULL) {
3533 		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_OUTSTATE, 0);
3534 		return;
3535 	}
3536 
3537 	switch (type) {
3538 	case	DL_FACT_PHYS_ADDR:
3539 		(void) localetheraddr((struct ether_addr *)NULL, &addr);
3540 		break;
3541 
3542 	case	DL_CURR_PHYS_ADDR:
3543 		ether_bcopy(&msp->ms_manp->man_eaddr, &addr);
3544 		break;
3545 
3546 	default:
3547 		dlerrorack(wq, mp, DL_PHYS_ADDR_REQ, DL_NOTSUPPORTED, 0);
3548 		return;
3549 	}
3550 
3551 	dlphysaddrack(wq, mp, &addr, ETHERADDRL);
3552 }
3553 
3554 /*
3555  * TBD - this routine probably should be protected w/ an ndd
3556  * tuneable, or a man.conf parameter.
3557  */
3558 static void
3559 man_spareq(queue_t *wq, mblk_t *mp)
3560 {
3561 	manstr_t		*msp;
3562 	union DL_primitives	*dlp;
3563 	t_uscalar_t		off;
3564 	t_uscalar_t		len;
3565 	eaddr_t			*addrp;
3566 
3567 	msp = (manstr_t *)wq->q_ptr;
3568 
3569 	if (MBLKL(mp) < DL_SET_PHYS_ADDR_REQ_SIZE) {
3570 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3571 		return;
3572 	}
3573 
3574 	dlp = (union DL_primitives *)mp->b_rptr;
3575 	len = dlp->set_physaddr_req.dl_addr_length;
3576 	off = dlp->set_physaddr_req.dl_addr_offset;
3577 
3578 	if (!MBLKIN(mp, off, len)) {
3579 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADPRIM, 0);
3580 		return;
3581 	}
3582 
3583 	addrp = (struct ether_addr *)(mp->b_rptr + off);
3584 
3585 	/*
3586 	 * Error if length of address isn't right or the address
3587 	 * specified is a multicast or broadcast address.
3588 	 */
3589 	if ((len != ETHERADDRL) ||
3590 	    ((addrp->ether_addr_octet[0] & 01) == 1) ||
3591 	    (ether_cmp(addrp, &etherbroadcast) == 0)) {
3592 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_BADADDR, 0);
3593 		return;
3594 	}
3595 	/*
3596 	 * Error if this stream is not attached to a device.
3597 	 */
3598 	if (msp->ms_manp == NULL) {
3599 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_OUTSTATE, 0);
3600 		return;
3601 	}
3602 
3603 	/*
3604 	 * We will also resend DL_SET_PHYS_ADDR_REQ for each dest
3605 	 * when it is linked under us.
3606 	 */
3607 	if (man_dlpi_senddown(msp, mp)) {
3608 		dlerrorack(wq, mp, DL_SET_PHYS_ADDR_REQ, DL_SYSERR, ENOMEM);
3609 		return;
3610 	}
3611 
3612 	ether_copy(addrp, msp->ms_manp->man_eaddr.ether_addr_octet);
3613 
3614 	MAN_DBG(MAN_DLPI, ("man_sareq: snagged %s\n",
3615 	    ether_sprintf(&msp->ms_manp->man_eaddr)));
3616 
3617 	dlokack(wq, mp, DL_SET_PHYS_ADDR_REQ);
3618 
3619 }
3620 
3621 /*
3622  * These routines make up the lower part of the MAN streams framework.
3623  */
3624 
3625 /*
3626  * man_lwsrv - Deferred mblks for down stream. We end up here when
3627  * the destination is not DL_IDLE when traffic comes downstream.
3628  *
3629  *	wq - lower write queue of mxx
3630  */
3631 static int
3632 man_lwsrv(queue_t *wq)
3633 {
3634 	mblk_t		*mp;
3635 	mblk_t		*mlistp;
3636 	man_dest_t	*mdp;
3637 	size_t		count;
3638 
3639 	mdp = (man_dest_t *)wq->q_ptr;
3640 
3641 	MAN_DBG(MAN_LWSRV, ("man_lwsrv: wq(0x%p) mdp(0x%p)"
3642 	    " md_rq(0x%p)\n", (void *)wq, (void *)mdp,
3643 	    mdp ? (void *)mdp->md_rq : NULL));
3644 
3645 	if (mdp == NULL)
3646 		goto exit;
3647 
3648 	if (mdp->md_state & MAN_DSTATE_CLOSING) {
3649 			flushq(wq, FLUSHDATA);
3650 			flushq(RD(wq), FLUSHDATA);
3651 			goto exit;
3652 	}
3653 
3654 	/*
3655 	 * Arrange to send deferred mp's first, then mblks on the
3656 	 * service queue. Since we are exclusive in the inner perimeter,
3657 	 * we dont have to worry about md_lock, like the put procedures,
3658 	 * which are MTPUTSHARED.
3659 	 */
3660 	mutex_enter(&mdp->md_lock);
3661 	mlistp = mdp->md_dmp_head;
3662 	mdp->md_dmp_head = NULL;
3663 	count = mdp->md_dmp_count;
3664 	mdp->md_dmp_count = 0;
3665 	mutex_exit(&mdp->md_lock);
3666 
3667 	while (mlistp != NULL) {
3668 		mp = mlistp;
3669 		mlistp = mp->b_next;
3670 		mp->b_next = NULL;
3671 		count -= msgsize(mp);
3672 		if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
3673 
3674 			mutex_enter(&mdp->md_lock);
3675 			mdp->md_dmp_count += count + msgsize(mp);
3676 			mp->b_next = mlistp;
3677 			mdp->md_dmp_head = mp;
3678 			mutex_exit(&mdp->md_lock);
3679 			goto exit;
3680 		}
3681 	}
3682 	mdp->md_dmp_tail = NULL;
3683 
3684 	while (mp = getq(wq)) {
3685 		if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
3686 			/*
3687 			 * Put it back on queue, making sure to avoid
3688 			 * infinite loop mentioned in putbq(9F)
3689 			 */
3690 			noenable(wq);
3691 			putbq(wq, mp);
3692 			enableok(wq);
3693 
3694 			break;
3695 		}
3696 	}
3697 
3698 exit:
3699 
3700 	return (0);
3701 }
3702 
3703 /*
3704  * man_lrput - handle DLPI messages issued from downstream.
3705  *
3706  *	rq - lower read queue of mxx
3707  *	mp - mblk ptr to DLPI request
3708  *
3709  *	returns 0
3710  */
3711 static int
3712 man_lrput(queue_t *rq, mblk_t *mp)
3713 {
3714 	man_dest_t	*mdp;
3715 	manstr_t	*msp;
3716 
3717 #if defined(DEBUG)
3718 	union DL_primitives	*dlp;
3719 	t_uscalar_t		prim = MAN_DLPI_MAX_PRIM + 1;
3720 	char			*prim_str;
3721 #endif  /* DEBUG */
3722 
3723 	mdp = (man_dest_t *)rq->q_ptr;
3724 
3725 #if defined(DEBUG)
3726 	if (DB_TYPE(mp) == M_PROTO) {
3727 		dlp = (union DL_primitives *)mp->b_rptr;
3728 		prim = dlp->dl_primitive;
3729 	}
3730 
3731 	prim_str = (prim > MAN_DLPI_MAX_PRIM) ? "NON DLPI" :
3732 	    (prim == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3733 	    (prim == DLIOCRAW) ? "DLIOCRAW" :
3734 	    dps[(unsigned int)prim];
3735 	MAN_DBG(MAN_LRPUT, ("man_lrput: rq(0x%p) mp(0x%p) mdp(0x%p)"
3736 	    " db_type(0x%x) dl_prim %s", (void *)rq,
3737 	    (void *)mp, (void *)mdp, DB_TYPE(mp), prim_str));
3738 	MAN_DBGCALL(MAN_LRPUT2, man_print_mdp(mdp));
3739 #endif  /* DEBUG */
3740 
3741 	if (DB_TYPE(mp) == M_FLUSH) {
3742 		/* Turn around */
3743 		if (*mp->b_rptr & FLUSHW) {
3744 			*mp->b_rptr &= ~FLUSHR;
3745 			qreply(rq, mp);
3746 		} else
3747 			freemsg(mp);
3748 		return (0);
3749 	}
3750 
3751 	if (mdp == NULL || mdp->md_state != MAN_DSTATE_READY) {
3752 
3753 		MAN_DBG(MAN_LRPUT, ("man_lrput: not ready mdp(0x%p),"
3754 		    " state(%d)", (void *)mdp, mdp ? mdp->md_state : -1));
3755 		freemsg(mp);
3756 		return (0);
3757 	}
3758 
3759 	/*
3760 	 * If we have a destination in the right state, forward on datagrams.
3761 	 */
3762 	if (MAN_IS_DATA(mp)) {
3763 		if (mdp->md_dlpistate == DL_IDLE && canputnext(mdp->md_rq)) {
3764 
3765 			msp = mdp->md_msp;
3766 			if (!(msp->ms_flags & MAN_SFLAG_PROMISC))
3767 				mdp->md_rcvcnt++; /* Count for failover */
3768 			/*
3769 			 * go put mblk_t directly up to next queue.
3770 			 */
3771 			MAN_DBG(MAN_LRPUT, ("man_lrput: putnext to rq(0x%p)",
3772 			    (void *)mdp->md_rq));
3773 			(void) putnext(mdp->md_rq, mp);
3774 		} else {
3775 			freemsg(mp);
3776 		}
3777 	} else {
3778 		/*
3779 		 * Handle in man_lrsrv with exclusive inner perimeter lock.
3780 		 */
3781 		putq(rq, mp);
3782 	}
3783 
3784 	return (0);
3785 }
3786 
3787 /*
3788  * Either this is a response from our attempt to sync the upper and lower
3789  * stream states, or its data. If its not data. Do DL_* response processing
3790  * and transition md_dlpistate accordingly. If its data, toss it.
3791  */
3792 static int
3793 man_lrsrv(queue_t *rq)
3794 {
3795 	man_dest_t		*mdp;
3796 	mblk_t			*mp;
3797 	union DL_primitives	*dlp;
3798 	ulong_t			prim;
3799 	ulong_t			cprim;
3800 	int			need_dl_reset = FALSE;
3801 
3802 #if defined(DEBUG)
3803 		struct iocblk	*iocp;
3804 		char		ioc_cmd[256];
3805 #endif  /* DEBUG */
3806 
3807 	MAN_DBG(MAN_LRSRV, ("man_lrsrv: rq(0x%p)", (void *)rq));
3808 
3809 	mdp = (man_dest_t *)rq->q_ptr;
3810 
3811 	if ((mdp == NULL) || (mdp->md_state & MAN_DSTATE_CLOSING)) {
3812 			flushq(rq, FLUSHDATA);
3813 			flushq(WR(rq), FLUSHDATA);
3814 			goto exit;
3815 	}
3816 
3817 	while (mp = getq(rq)) {
3818 
3819 
3820 	/*
3821 	 * If we're not connected, or its a datagram, toss it.
3822 	 */
3823 	if (MAN_IS_DATA(mp) || mdp->md_state != MAN_DSTATE_READY) {
3824 
3825 		MAN_DBG(MAN_LRSRV, ("man_lrsrv: dropping mblk mdp(0x%p)"
3826 		    " is_data(%d)", (void *)mdp, MAN_IS_DATA(mp)));
3827 		freemsg(mp);
3828 		continue;
3829 	}
3830 
3831 	/*
3832 	 * Should be response to man_dlpi_replay. Discard unless there
3833 	 * is a failure we care about.
3834 	 */
3835 
3836 	switch (DB_TYPE(mp)) {
3837 	case M_PROTO:
3838 	case M_PCPROTO:
3839 		/* Do proto processing below. */
3840 		break;
3841 
3842 	case M_IOCNAK:
3843 		/*
3844 		 * DL_IOC* failed for some reason.
3845 		 */
3846 		need_dl_reset = TRUE;
3847 
3848 #if defined(DEBUG)
3849 		iocp = (struct iocblk *)mp->b_rptr;
3850 
3851 		sprintf(ioc_cmd, "0x%x", iocp->ioc_cmd);
3852 		MAN_DBG(MAN_LRSRV, ("man_lrsrv: M_IOCNAK err %d for cmd(%s)\n",
3853 		    iocp->ioc_error,
3854 		    (iocp->ioc_cmd == DL_IOC_HDR_INFO) ? "DL_IOC_HDR_INFO" :
3855 		    (iocp->ioc_cmd == DLIOCRAW) ? "DLIOCRAW" : ioc_cmd));
3856 #endif  /* DEBUG */
3857 
3858 		/* FALLTHRU */
3859 
3860 	case M_IOCACK:
3861 	case M_CTL:
3862 		/*
3863 		 * OK response from DL_IOC*, ignore.
3864 		 */
3865 		goto dl_reset;
3866 	}
3867 
3868 	dlp = (union DL_primitives *)mp->b_rptr;
3869 	prim = dlp->dl_primitive;
3870 
3871 	MAN_DBG(MAN_LRSRV, ("man_lrsrv: prim %s", dps[(int)prim]));
3872 
3873 	/*
3874 	 * DLPI state processing big theory: We do not rigorously check
3875 	 * DLPI states (e.g. PENDING stuff). Simple rules:
3876 	 *
3877 	 * 	1) If we see an OK_ACK to an ATTACH_REQ, dlpistate = DL_UNBOUND.
3878 	 *	2) If we see an BIND_ACK to a BIND_REQ, dlpistate = DL_IDLE.
3879 	 *	3) If we see a OK_ACK response to an UNBIND_REQ
3880 	 *	   dlpistate = DL_UNBOUND.
3881 	 *	4) If we see a OK_ACK response to a DETACH_REQ,
3882 	 *	   dlpistate = DL_UNATTACHED.
3883 	 *
3884 	 * Everything that isn't handle by 1-4 above is handled by 5)
3885 	 *
3886 	 *	5) A NAK to any DL_* messages we care about causes
3887 	 *	   dlpistate = DL_UNATTACHED and man_reset_dlpi to run
3888 	 *
3889 	 * TBD - need a reset counter so we can try a switch if it gets
3890 	 * too high.
3891 	 */
3892 
3893 	switch (prim) {
3894 	case DL_OK_ACK:
3895 		cprim = dlp->ok_ack.dl_correct_primitive;
3896 
3897 		switch (cprim) {
3898 		case DL_ATTACH_REQ:
3899 			if (man_dlioc_replay(mdp)) {
3900 				D_SETSTATE(mdp, DL_UNBOUND);
3901 			} else {
3902 				need_dl_reset = TRUE;
3903 				break;
3904 			}
3905 			break;
3906 
3907 		case DL_DETACH_REQ:
3908 			D_SETSTATE(mdp, DL_UNATTACHED);
3909 			break;
3910 
3911 		case DL_UNBIND_REQ:
3912 			/*
3913 			 * Cancel timer and set md_dlpistate.
3914 			 */
3915 			D_SETSTATE(mdp, DL_UNBOUND);
3916 
3917 			ASSERT(mdp->md_bc_id == 0);
3918 			if (mdp->md_lc_timer_id != 0) {
3919 				(void) quntimeout(man_ctl_wq,
3920 				    mdp->md_lc_timer_id);
3921 				mdp->md_lc_timer_id = 0;
3922 			}
3923 		}
3924 		MAN_DBG(MAN_DLPI,
3925 		    ("		cprim %s", dps[(int)cprim]));
3926 		break;
3927 
3928 	case DL_BIND_ACK:
3929 		/*
3930 		 * We're ready for data. Get man_lwsrv to run to
3931 		 * process any defered data and start linkcheck timer.
3932 		 */
3933 		D_SETSTATE(mdp, DL_IDLE);
3934 		qenable(mdp->md_wq);
3935 		mdp->md_linkstate = MAN_LINKGOOD;
3936 		if (man_needs_linkcheck(mdp)) {
3937 			mdp->md_lc_timer_id = qtimeout(man_ctl_wq,
3938 			    man_linkcheck_timer, (void *)mdp,
3939 			    man_gettimer(MAN_TIMER_LINKCHECK, mdp));
3940 		}
3941 
3942 		break;
3943 
3944 	case DL_ERROR_ACK:
3945 		cprim = dlp->error_ack.dl_error_primitive;
3946 		switch (cprim) {
3947 		case DL_ATTACH_REQ:
3948 		case DL_BIND_REQ:
3949 		case DL_DISABMULTI_REQ:
3950 		case DL_ENABMULTI_REQ:
3951 		case DL_PROMISCON_REQ:
3952 		case DL_PROMISCOFF_REQ:
3953 		case DL_SET_PHYS_ADDR_REQ:
3954 			need_dl_reset = TRUE;
3955 			break;
3956 
3957 		/*
3958 		 * ignore error TBD (better comment)
3959 		 */
3960 		case DL_UNBIND_REQ:
3961 		case DL_DETACH_REQ:
3962 			break;
3963 		}
3964 
3965 		MAN_DBG(MAN_DLPI,
3966 		    ("\tdl_errno %d dl_unix_errno %d cprim %s",
3967 		    dlp->error_ack.dl_errno, dlp->error_ack.dl_unix_errno,
3968 		    dps[(int)cprim]));
3969 		break;
3970 
3971 	case DL_UDERROR_IND:
3972 		MAN_DBG(MAN_DLPI,
3973 		    ("\tdl_errno %d unix_errno %d",
3974 		    dlp->uderror_ind.dl_errno,
3975 		    dlp->uderror_ind.dl_unix_errno));
3976 		break;
3977 
3978 	case DL_INFO_ACK:
3979 		break;
3980 
3981 	default:
3982 		/*
3983 		 * We should not get here.
3984 		 */
3985 		cmn_err(CE_WARN, "man_lrsrv: unexpected DL prim 0x%lx!",
3986 		    prim);
3987 		need_dl_reset = TRUE;
3988 		break;
3989 	}
3990 
3991 dl_reset:
3992 	freemsg(mp);
3993 
3994 	if (need_dl_reset) {
3995 		man_pg_t	*mpg;
3996 		man_path_t	*mp;
3997 
3998 		if (qsize(rq)) {	/* Dump all messages. */
3999 			flushq(rq, FLUSHDATA);
4000 			flushq(WR(rq), FLUSHDATA);
4001 		}
4002 
4003 		mdp->md_dlpierrors++;
4004 		D_SETSTATE(mdp, DL_UNATTACHED);
4005 		if (mdp->md_lc_timer_id != 0) {
4006 			(void) quntimeout(man_ctl_wq, mdp->md_lc_timer_id);
4007 			mdp->md_lc_timer_id = 0;
4008 		}
4009 
4010 		mutex_enter(&man_lock);
4011 		ASSERT(mdp->md_msp != NULL);
4012 		ASSERT(mdp->md_msp->ms_manp != NULL);
4013 		mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg,
4014 		    mdp->md_pg_id);
4015 		ASSERT(mpg != NULL);
4016 		mp = man_find_path_by_ppa(mpg->mpg_pathp,
4017 		    mdp->md_device.mdev_ppa);
4018 		ASSERT(mp != NULL);
4019 		mp->mp_device.mdev_state |= MDEV_FAILED;
4020 		if ((mdp->md_dlpierrors >= MAN_MAX_DLPIERRORS) &&
4021 		    (man_is_on_domain ||
4022 		    mdp->md_msp->ms_manp->man_meta_ppa == 1)) {
4023 			/*
4024 			 * Autoswitching is disabled for instance 0
4025 			 * on the SC as we expect the domain to
4026 			 * initiate the path switching.
4027 			 */
4028 			(void) man_do_autoswitch((man_dest_t *)mdp);
4029 			MAN_DBG(MAN_WARN, ("man_lrsrv: dlpi failure(%d,%d),"
4030 			    " switching path", mdp->md_device.mdev_major,
4031 			    mdp->md_device.mdev_ppa));
4032 		} else {
4033 			mdp->md_lc_timer_id = qtimeout(man_ctl_wq,
4034 			    man_reset_dlpi, (void *)mdp,
4035 			    man_gettimer(MAN_TIMER_DLPIRESET, mdp));
4036 		}
4037 		mutex_exit(&man_lock);
4038 	}
4039 
4040 
4041 	} /* End while (getq()) */
4042 
4043 exit:
4044 	MAN_DBG(MAN_DLPI, ("man_lrsrv: returns"));
4045 
4046 	return (0);
4047 }
4048 
4049 static int
4050 man_needs_linkcheck(man_dest_t *mdp)
4051 {
4052 	/*
4053 	 * Not ready for linkcheck.
4054 	 */
4055 	if (mdp->md_msp == NULL || mdp->md_msp->ms_manp == NULL)
4056 		return (0);
4057 
4058 	/*
4059 	 * Linkchecking needs to be done on IP streams. For domain, all
4060 	 * driver instances need checking, for SC only instance 1 needs it.
4061 	 */
4062 	if ((man_is_on_domain || mdp->md_msp->ms_manp->man_meta_ppa == 1) &&
4063 	    (mdp->md_msp->ms_sap == ETHERTYPE_IP ||
4064 	    mdp->md_msp->ms_sap == ETHERTYPE_IPV6))
4065 
4066 		return (1);
4067 
4068 	/*
4069 	 * Linkcheck not need on this link.
4070 	 */
4071 	return (0);
4072 }
4073 
4074 /*
4075  * The following routines process work requests posted to man_iwork_q
4076  * from the non-STREAMS half of the driver (see man_bwork.c). The work
4077  * requires access to the inner perimeter lock of the driver. This
4078  * lock is acquired by man_uwsrv, who calls man_iwork to process the
4079  * man_iwork_q->
4080  */
4081 
4082 /*
4083  * The man_bwork has posted some work for us to do inside the
4084  * perimeter. This mainly involves updating lower multiplexor data
4085  * structures (non-blocking type stuff). So, we can hold the man_lock
4086  * until we are done processing all work items. Note that some of these
4087  * routines in turn submit work back to the bgthread, which they can do
4088  * since we hold the man_lock.
4089  */
4090 static void
4091 man_iwork()
4092 {
4093 	man_work_t	*wp;
4094 	int		wp_finished;
4095 
4096 	MAN_DBG(MAN_SWITCH, ("man_iwork: q_work(0x%p)",
4097 	    (void *)man_iwork_q->q_work));
4098 
4099 	mutex_enter(&man_lock);
4100 
4101 	while (man_iwork_q->q_work) {
4102 
4103 		wp = man_iwork_q->q_work;
4104 		man_iwork_q->q_work = wp->mw_next;
4105 		wp->mw_next = NULL;
4106 
4107 		mutex_exit(&man_lock);
4108 
4109 		MAN_DBG(MAN_SWITCH, ("man_iwork: type %s",
4110 		    _mw_type[wp->mw_type]));
4111 
4112 		wp_finished = TRUE;
4113 
4114 		switch (wp->mw_type) {
4115 		case MAN_WORK_DRATTACH:
4116 			(void) man_do_dr_attach(wp);
4117 			break;
4118 
4119 		case MAN_WORK_DRSWITCH:
4120 			/*
4121 			 * Return status to man_dr_detach immediately. If
4122 			 * no error submitting SWITCH request, man_iswitch
4123 			 * or man_bclose will cv_signal man_dr_detach on
4124 			 * completion of SWITCH work request.
4125 			 */
4126 			if (man_do_dr_switch(wp) == 0)
4127 				wp_finished = FALSE;
4128 			break;
4129 
4130 		case MAN_WORK_DRDETACH:
4131 			man_do_dr_detach(wp);
4132 			break;
4133 
4134 		case MAN_WORK_SWITCH:
4135 			if (man_iswitch(wp))
4136 				wp_finished = FALSE;
4137 			break;
4138 
4139 		case MAN_WORK_KSTAT_UPDATE:
4140 			man_do_kstats(wp);
4141 			break;
4142 
4143 		default:
4144 			cmn_err(CE_WARN, "man_iwork: "
4145 			    "illegal work type(%d)", wp->mw_type);
4146 			break;
4147 		}
4148 
4149 		mutex_enter(&man_lock);
4150 
4151 		/*
4152 		 * If we've completed the work request, delete, or
4153 		 * cv_signal waiter.
4154 		 */
4155 		if (wp_finished) {
4156 			wp->mw_flags |= MAN_WFLAGS_DONE;
4157 
4158 			if (wp->mw_flags & MAN_WFLAGS_CVWAITER)
4159 				cv_signal(&wp->mw_cv);
4160 			else
4161 				man_work_free(wp);
4162 		}
4163 	}
4164 
4165 	mutex_exit(&man_lock);
4166 }
4167 
4168 /*
4169  * man_dr_detach has submitted a request to DRSWITCH a path.
4170  * He is in cv_wait_sig(wp->mw_cv). We forward the work request on to
4171  * man_bwork as a switch request. It should end up back at
4172  * man_iwork, who will cv_signal(wp->mw_cv) man_dr_detach.
4173  *
4174  * Called holding inner perimeter lock.
4175  * man_lock is held to synchronize access to pathgroup list(man_pg).
4176  */
4177 static int
4178 man_do_dr_switch(man_work_t *wp)
4179 {
4180 	man_t		*manp;
4181 	man_pg_t	*mpg;
4182 	man_path_t	*mp;
4183 	man_path_t	*ap;
4184 	man_adest_t	*adp;
4185 	mi_path_t	mpath;
4186 	int		status = 0;
4187 
4188 	adp = &wp->mw_arg;
4189 
4190 	MAN_DBG(MAN_SWITCH, ("man_do_dr_switch: pg_id %d work:", adp->a_pg_id));
4191 	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4192 
4193 	mutex_enter(&man_lock);
4194 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4195 	if (manp == NULL || manp->man_pg == NULL) {
4196 		status = ENODEV;
4197 		goto exit;
4198 	}
4199 
4200 	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4201 	if (mpg == NULL) {
4202 		status = ENODEV;
4203 		goto exit;
4204 	}
4205 
4206 	if (mpg->mpg_flags & MAN_PG_SWITCHING) {
4207 		status = EAGAIN;
4208 		goto exit;
4209 	}
4210 
4211 	/*
4212 	 * Check to see if detaching device is active. If so, activate
4213 	 * an alternate.
4214 	 */
4215 	mp = man_find_active_path(mpg->mpg_pathp);
4216 	if (mp && mp->mp_device.mdev_ppa == adp->a_sf_dev.mdev_ppa) {
4217 
4218 		ap = man_find_alternate_path(mpg->mpg_pathp);
4219 		if (ap == NULL) {
4220 			status = EBUSY;
4221 			goto exit;
4222 		}
4223 
4224 		bzero((char *)&mpath, sizeof (mi_path_t));
4225 
4226 		mpath.mip_cmd = MI_PATH_ACTIVATE;
4227 		mpath.mip_man_ppa = 0;
4228 		mpath.mip_pg_id = 0;
4229 		mpath.mip_devs[0] = ap->mp_device;
4230 		mpath.mip_ndevs = 1;
4231 		ether_copy(&manp->man_eaddr, &mpath.mip_eaddr);
4232 
4233 		/*
4234 		 * DR thread is sleeping on wp->mw_cv. We change the work
4235 		 * request from DRSWITCH to SWITCH and submit it to
4236 		 * for processing by man_bwork (via man_pg_cmd). At
4237 		 * completion the SWITCH work request is processed by
4238 		 * man_iswitch() or man_bclose and the DR thread will
4239 		 * be cv_signal'd.
4240 		 */
4241 		wp->mw_type = MAN_WORK_SWITCH;
4242 		if (status = man_pg_cmd(&mpath, wp))
4243 			goto exit;
4244 
4245 	} else {
4246 		/*
4247 		 * Tell man_dr_detach that detaching device is not currently
4248 		 * in use.
4249 		 */
4250 		status = ENODEV;
4251 	}
4252 
4253 exit:
4254 	if (status) {
4255 		/*
4256 		 * ENODEV is a noop, not really an error.
4257 		 */
4258 		if (status != ENODEV)
4259 			wp->mw_status = status;
4260 	}
4261 	mutex_exit(&man_lock);
4262 
4263 	return (status);
4264 }
4265 
4266 /*
4267  * man_dr_attach has submitted a request to DRATTACH a path,
4268  * add that path to the path list.
4269  *
4270  * Called holding perimeter lock.
4271  */
4272 static int
4273 man_do_dr_attach(man_work_t *wp)
4274 {
4275 	man_t		*manp;
4276 	man_adest_t	*adp;
4277 	mi_path_t	mpath;
4278 	manc_t		manc;
4279 	int		status = 0;
4280 
4281 	adp = &wp->mw_arg;
4282 
4283 	MAN_DBG(MAN_SWITCH, ("man_do_dr_attach: pg_id %d work:", adp->a_pg_id));
4284 	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4285 
4286 	mutex_enter(&man_lock);
4287 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4288 	if (manp == NULL || manp->man_pg == NULL) {
4289 		status = ENODEV;
4290 		goto exit;
4291 	}
4292 
4293 	if (status = man_get_iosram(&manc)) {
4294 		goto exit;
4295 	}
4296 	/*
4297 	 * Extract SC ethernet address from IOSRAM.
4298 	 */
4299 	ether_copy(&manc.manc_sc_eaddr, &mpath.mip_eaddr);
4300 
4301 	mpath.mip_pg_id = adp->a_pg_id;
4302 	mpath.mip_man_ppa = adp->a_man_ppa;
4303 	/*
4304 	 * man_dr_attach passes the new device info in a_sf_dev.
4305 	 */
4306 	MAN_DBG(MAN_DR, ("man_do_dr_attach: "));
4307 	MAN_DBGCALL(MAN_DR, man_print_dev(&adp->a_sf_dev));
4308 	mpath.mip_devs[0] = adp->a_sf_dev;
4309 	mpath.mip_ndevs = 1;
4310 	mpath.mip_cmd = MI_PATH_ADD;
4311 	status = man_pg_cmd(&mpath, NULL);
4312 
4313 exit:
4314 	mutex_exit(&man_lock);
4315 	return (status);
4316 }
4317 
4318 /*
4319  * man_dr_detach has submitted a request to DRDETACH a path.
4320  * He is in cv_wait_sig(wp->mw_cv). We remove the path and
4321  * cv_signal(wp->mw_cv) man_dr_detach.
4322  *
4323  * Called holding perimeter lock.
4324  */
4325 static void
4326 man_do_dr_detach(man_work_t *wp)
4327 {
4328 	man_t		*manp;
4329 	man_pg_t	*mpg;
4330 	man_path_t	*mp;
4331 	man_adest_t	*adp;
4332 	manc_t		manc;
4333 	mi_path_t	mpath;
4334 	int		i;
4335 	int		found;
4336 	int		status = 0;
4337 
4338 	adp = &wp->mw_arg;
4339 
4340 	MAN_DBG(MAN_SWITCH, ("man_do_dr_detach: pg_id %d work:", adp->a_pg_id));
4341 	MAN_DBGCALL(MAN_SWITCH, man_print_work(wp));
4342 
4343 	mutex_enter(&man_lock);
4344 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4345 	if (manp == NULL || manp->man_pg == NULL) {
4346 		status = ENODEV;
4347 		goto exit;
4348 	}
4349 
4350 	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4351 	if (mpg == NULL) {
4352 		status = ENODEV;
4353 		goto exit;
4354 	}
4355 
4356 	if (mpg->mpg_flags & MAN_PG_SWITCHING) {
4357 		status = EAGAIN;
4358 		goto exit;
4359 	}
4360 
4361 	/*
4362 	 * We should have switched detaching path if it was active.
4363 	 */
4364 	mp = man_find_active_path(mpg->mpg_pathp);
4365 	if (mp && mp->mp_device.mdev_ppa == adp->a_sf_dev.mdev_ppa) {
4366 		status = EAGAIN;
4367 		goto exit;
4368 	}
4369 
4370 	/*
4371 	 * Submit an ASSIGN command, minus the detaching device.
4372 	 */
4373 	bzero((char *)&mpath, sizeof (mi_path_t));
4374 
4375 	if (status = man_get_iosram(&manc)) {
4376 		goto exit;
4377 	}
4378 
4379 	mpath.mip_cmd = MI_PATH_ASSIGN;
4380 	mpath.mip_man_ppa = 0;
4381 	mpath.mip_pg_id = 0;
4382 
4383 	mp = mpg->mpg_pathp;
4384 	i = 0;
4385 	found = FALSE;
4386 	while (mp != NULL) {
4387 		if (mp->mp_device.mdev_ppa != adp->a_sf_dev.mdev_ppa) {
4388 			mpath.mip_devs[i] = mp->mp_device;
4389 			i++;
4390 		} else {
4391 			found = TRUE;
4392 		}
4393 		mp = mp->mp_next;
4394 	}
4395 
4396 	if (found) {
4397 		/*
4398 		 * Need to include SCs ethernet address in command.
4399 		 */
4400 		mpath.mip_ndevs = i;
4401 		ether_copy(&manc.manc_sc_eaddr, &mpath.mip_eaddr);
4402 
4403 		status = man_pg_cmd(&mpath, NULL);
4404 	}
4405 
4406 	/*
4407 	 * Hand back status to man_dr_detach request.
4408 	 */
4409 exit:
4410 	if (status != ENODEV)
4411 		wp->mw_status = status;
4412 
4413 	mutex_exit(&man_lock);
4414 
4415 }
4416 
4417 
4418 /*
4419  * The background thread has configured new lower multiplexor streams for
4420  * the given destinations. Update the appropriate destination data structures
4421  * inside the inner perimeter. We must take care to deal with destinations
4422  * whose upper stream has closed or detached from lower streams.
4423  *
4424  * Returns
4425  *	0		Done with work request.
4426  *	1		Reused work request.
4427  */
4428 static int
4429 man_iswitch(man_work_t *wp)
4430 {
4431 	man_adest_t	*adp;
4432 	man_t		*manp;
4433 	man_pg_t	*mpg;
4434 	man_path_t	*mp = NULL;
4435 	man_dest_t	*mdp;
4436 	man_dest_t	*tdp;
4437 	int		i;
4438 	int		switch_ok = TRUE;
4439 
4440 	adp = &wp->mw_arg;
4441 
4442 	if (wp->mw_status != 0) {
4443 		switch_ok = FALSE;	/* Never got things opened */
4444 	}
4445 
4446 	/*
4447 	 * Update destination structures as appropriate.
4448 	 */
4449 	for (i = 0; i < adp->a_ndests; i++) {
4450 		man_dest_t	tmp;
4451 
4452 		/*
4453 		 * Check to see if lower stream we just switch is still
4454 		 * around.
4455 		 */
4456 		tdp = &adp->a_mdp[i];
4457 		mdp = man_switch_match(tdp, adp->a_pg_id, tdp->md_switch_id);
4458 
4459 		if (mdp == NULL)
4460 			continue;
4461 
4462 		if (switch_ok == FALSE) {
4463 			/*
4464 			 * Switch failed for some reason.  Clear
4465 			 * PLUMBING flag and retry switch again later.
4466 			 */
4467 			man_ifail_dest(mdp);
4468 			continue;
4469 		}
4470 
4471 		/*
4472 		 * Swap new info, for old. We return the old info to
4473 		 * man_bwork to close things up below.
4474 		 */
4475 		bcopy((char *)mdp, (char *)&tmp, sizeof (man_dest_t));
4476 
4477 		ASSERT(mdp->md_state & MAN_DSTATE_PLUMBING);
4478 		ASSERT(mdp->md_state == tdp->md_state);
4479 
4480 		mdp->md_state = tdp->md_state;
4481 
4482 		/*
4483 		 * save the wq from the destination passed(tdp).
4484 		 */
4485 		mdp->md_wq = tdp->md_wq;
4486 		RD(mdp->md_wq)->q_ptr = (void *)(mdp);
4487 		WR(mdp->md_wq)->q_ptr = (void *)(mdp);
4488 
4489 		mdp->md_state &= ~MAN_DSTATE_INITIALIZING;
4490 		mdp->md_state |= MAN_DSTATE_READY;
4491 
4492 		ASSERT(mdp->md_device.mdev_major == adp->a_sf_dev.mdev_major);
4493 
4494 		ASSERT(tdp->md_device.mdev_ppa == adp->a_st_dev.mdev_ppa);
4495 		ASSERT(tdp->md_device.mdev_major == adp->a_st_dev.mdev_major);
4496 
4497 		mdp->md_device = tdp->md_device;
4498 		mdp->md_muxid = tdp->md_muxid;
4499 		mdp->md_linkstate = MAN_LINKUNKNOWN;
4500 		(void) drv_getparm(TIME, &mdp->md_lastswitch);
4501 		mdp->md_state &= ~MAN_DSTATE_PLUMBING;
4502 		mdp->md_switch_id = 0;
4503 		mdp->md_switches++;
4504 		mdp->md_dlpierrors = 0;
4505 		D_SETSTATE(mdp, DL_UNATTACHED);
4506 
4507 		/*
4508 		 * Resync lower w/ upper dlpi state. This will start link
4509 		 * timer if/when lower stream goes to DL_IDLE (see man_lrsrv).
4510 		 */
4511 		man_reset_dlpi((void *)mdp);
4512 
4513 		bcopy((char *)&tmp, (char *)tdp, sizeof (man_dest_t));
4514 	}
4515 
4516 	if (switch_ok) {
4517 		for (i = 0; i < adp->a_ndests; i++) {
4518 			tdp = &adp->a_mdp[i];
4519 
4520 			tdp->md_state &= ~MAN_DSTATE_PLUMBING;
4521 			tdp->md_state &= ~MAN_DSTATE_INITIALIZING;
4522 			tdp->md_state |= MAN_DSTATE_READY;
4523 		}
4524 	} else {
4525 		/*
4526 		 * Never got switch-to destinations open, free them.
4527 		 */
4528 		man_kfree(adp->a_mdp,
4529 		    sizeof (man_dest_t) * adp->a_ndests);
4530 	}
4531 
4532 	/*
4533 	 * Clear pathgroup switching flag and update path flags.
4534 	 */
4535 	mutex_enter(&man_lock);
4536 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
4537 
4538 	ASSERT(manp != NULL);
4539 	ASSERT(manp->man_pg != NULL);
4540 
4541 	mpg = man_find_pg_by_id(manp->man_pg, adp->a_pg_id);
4542 	ASSERT(mpg != NULL);
4543 	ASSERT(mpg->mpg_flags & MAN_PG_SWITCHING);
4544 	mpg->mpg_flags &= ~MAN_PG_SWITCHING;
4545 
4546 	/*
4547 	 * Switch succeeded, mark path we switched from as failed, and
4548 	 * device we switch to as active and clear its failed flag (if set).
4549 	 * Sync up kstats.
4550 	 */
4551 	if (switch_ok) {
4552 		mp = man_find_active_path(mpg->mpg_pathp);
4553 		if (mp != NULL) {
4554 
4555 			ASSERT(adp->a_sf_dev.mdev_major != 0);
4556 
4557 			MAN_DBG(MAN_SWITCH, ("man_iswitch: switch from dev:"));
4558 			MAN_DBGCALL(MAN_SWITCH, man_print_dev(&adp->a_sf_dev));
4559 
4560 			mp->mp_device.mdev_state &= ~MDEV_ACTIVE;
4561 		} else
4562 			ASSERT(adp->a_sf_dev.mdev_major == 0);
4563 
4564 		MAN_DBG(MAN_SWITCH, ("man_iswitch: switch to dev:"));
4565 		MAN_DBGCALL(MAN_SWITCH, man_print_dev(&adp->a_st_dev));
4566 
4567 		ASSERT(adp->a_st_dev.mdev_major != 0);
4568 
4569 		mp = man_find_path_by_ppa(mpg->mpg_pathp,
4570 		    adp->a_st_dev.mdev_ppa);
4571 
4572 		ASSERT(mp != NULL);
4573 
4574 		mp->mp_device.mdev_state |= MDEV_ACTIVE;
4575 	}
4576 
4577 	/*
4578 	 * Decrement manp reference count and hand back work request if
4579 	 * needed.
4580 	 */
4581 	manp->man_refcnt--;
4582 
4583 	if (switch_ok) {
4584 		wp->mw_type = MAN_WORK_CLOSE;
4585 		man_work_add(man_bwork_q, wp);
4586 	}
4587 
4588 	mutex_exit(&man_lock);
4589 
4590 	return (switch_ok);
4591 }
4592 
4593 /*
4594  * Find the destination in the upper stream that we just switched.
4595  */
4596 man_dest_t *
4597 man_switch_match(man_dest_t *sdp, int pg_id, void *sid)
4598 {
4599 	man_dest_t	*mdp = NULL;
4600 	manstr_t	*msp;
4601 
4602 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4603 		/*
4604 		 * Check if upper stream closed, or detached.
4605 		 */
4606 		if (msp != sdp->md_msp)
4607 			continue;
4608 
4609 		if (msp->ms_dests == NULL)
4610 			break;
4611 
4612 		mdp = &msp->ms_dests[pg_id];
4613 
4614 		/*
4615 		 * Upper stream detached and reattached while we were
4616 		 * switching.
4617 		 */
4618 		if (mdp->md_switch_id != sid) {
4619 			mdp = NULL;
4620 			break;
4621 		}
4622 	}
4623 
4624 	return (mdp);
4625 }
4626 
4627 /*
4628  * bg_thread cant complete the switch for some reason. (Re)start the
4629  * linkcheck timer again.
4630  */
4631 static void
4632 man_ifail_dest(man_dest_t *mdp)
4633 {
4634 	ASSERT(mdp->md_lc_timer_id == 0);
4635 	ASSERT(mdp->md_bc_id == 0);
4636 	ASSERT(mdp->md_state & MAN_DSTATE_PLUMBING);
4637 
4638 	MAN_DBG(MAN_SWITCH, ("man_ifail_dest"));
4639 	MAN_DBGCALL(MAN_SWITCH, man_print_mdp(mdp));
4640 
4641 	mdp->md_state &= ~MAN_DSTATE_PLUMBING;
4642 	mdp->md_linkstate = MAN_LINKFAIL;
4643 
4644 	/*
4645 	 * If we have not yet initialized link, or the upper stream is
4646 	 * DL_IDLE, restart the linktimer.
4647 	 */
4648 	if ((mdp->md_state & MAN_DSTATE_INITIALIZING) ||
4649 	    ((mdp->md_msp->ms_sap == ETHERTYPE_IPV6 ||
4650 	    mdp->md_msp->ms_sap == ETHERTYPE_IP) &&
4651 	    mdp->md_msp->ms_dlpistate == DL_IDLE)) {
4652 
4653 		mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
4654 		    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
4655 	}
4656 
4657 }
4658 
4659 /*
4660  * Arrange to replay all of ms_dl_mp on the new lower stream to get it
4661  * in sync with the upper stream. Note that this includes setting the
4662  * physical address.
4663  *
4664  * Called from qtimeout with inner perimeter lock.
4665  */
4666 static void
4667 man_reset_dlpi(void *argp)
4668 {
4669 	man_dest_t	*mdp = (man_dest_t *)argp;
4670 	manstr_t	*msp;
4671 	mblk_t		*mp;
4672 	mblk_t		*rmp = NULL;
4673 	mblk_t		*tmp;
4674 
4675 	mdp->md_lc_timer_id = 0;
4676 
4677 	if (mdp->md_state != MAN_DSTATE_READY) {
4678 		MAN_DBG(MAN_DLPI, ("man_reset_dlpi: not ready!"));
4679 		return;
4680 	}
4681 
4682 	msp = mdp->md_msp;
4683 
4684 	rmp = man_dup_mplist(msp->ms_dl_mp);
4685 	if (rmp == NULL)
4686 		goto fail;
4687 
4688 	/*
4689 	 * Send down an unbind and detach request, just to clean things
4690 	 * out, we ignore ERROR_ACKs for unbind and detach in man_lrsrv.
4691 	 */
4692 	tmp = man_alloc_ubreq_dreq();
4693 	if (tmp == NULL) {
4694 		goto fail;
4695 	}
4696 	mp = tmp;
4697 	while (mp->b_next != NULL)
4698 		mp = mp->b_next;
4699 	mp->b_next = rmp;
4700 	rmp = tmp;
4701 
4702 	man_dlpi_replay(mdp, rmp);
4703 
4704 	return;
4705 
4706 fail:
4707 
4708 	while (rmp) {
4709 		mp = rmp;
4710 		rmp = rmp->b_next;
4711 		mp->b_next = mp->b_prev = NULL;
4712 		freemsg(mp);
4713 	}
4714 
4715 	ASSERT(mdp->md_lc_timer_id == 0);
4716 	ASSERT(mdp->md_bc_id == 0);
4717 
4718 	/*
4719 	 * If low on memory, try again later. I Could use qbufcall, but that
4720 	 * could fail and I would have to try and recover from that w/
4721 	 * qtimeout anyway.
4722 	 */
4723 	mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_reset_dlpi,
4724 	    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
4725 }
4726 
4727 /*
4728  * Once we receive acknowledgement that DL_ATTACH_REQ was successful,
4729  * we can send down the DL_* related IOCTLs (e.g. DL_IOC_HDR). If we
4730  * try and send them downsteam w/o waiting, the ioctl's get processed before
4731  * the ATTACH_REQ and they are rejected. TBD - could just do the lower
4732  * dlpi state change in lock step. TBD
4733  */
4734 static int
4735 man_dlioc_replay(man_dest_t *mdp)
4736 {
4737 	mblk_t		*rmp;
4738 	int		status = 1;
4739 
4740 	if (mdp->md_msp->ms_dlioc_mp == NULL)
4741 		goto exit;
4742 
4743 	rmp = man_dup_mplist(mdp->md_msp->ms_dlioc_mp);
4744 	if (rmp == NULL) {
4745 		status = 0;
4746 		goto exit;
4747 	}
4748 
4749 	man_dlpi_replay(mdp, rmp);
4750 exit:
4751 	return (status);
4752 }
4753 
4754 static mblk_t *
4755 man_alloc_ubreq_dreq()
4756 {
4757 	mblk_t			*dreq;
4758 	mblk_t			*ubreq = NULL;
4759 	union DL_primitives	*dlp;
4760 
4761 	dreq = allocb(DL_DETACH_REQ_SIZE, BPRI_MED);
4762 	if (dreq == NULL)
4763 		goto exit;
4764 
4765 	dreq->b_datap->db_type = M_PROTO;
4766 	dlp = (union DL_primitives *)dreq->b_rptr;
4767 	dlp->dl_primitive = DL_DETACH_REQ;
4768 	dreq->b_wptr += DL_DETACH_REQ_SIZE;
4769 
4770 	ubreq = allocb(DL_UNBIND_REQ_SIZE, BPRI_MED);
4771 	if (ubreq == NULL) {
4772 		freemsg(dreq);
4773 		goto exit;
4774 	}
4775 
4776 	ubreq->b_datap->db_type = M_PROTO;
4777 	dlp = (union DL_primitives *)ubreq->b_rptr;
4778 	dlp->dl_primitive = DL_UNBIND_REQ;
4779 	ubreq->b_wptr += DL_UNBIND_REQ_SIZE;
4780 
4781 	ubreq->b_next = dreq;
4782 
4783 exit:
4784 
4785 	return (ubreq);
4786 }
4787 
4788 static mblk_t *
4789 man_dup_mplist(mblk_t *mp)
4790 {
4791 	mblk_t	*listp = NULL;
4792 	mblk_t	*tailp = NULL;
4793 
4794 	for (; mp != NULL; mp = mp->b_next) {
4795 
4796 		mblk_t	*nmp;
4797 		mblk_t	*prev;
4798 		mblk_t	*next;
4799 
4800 		prev = mp->b_prev;
4801 		next = mp->b_next;
4802 		mp->b_prev = mp->b_next = NULL;
4803 
4804 		nmp = copymsg(mp);
4805 
4806 		mp->b_prev = prev;
4807 		mp->b_next = next;
4808 
4809 		if (nmp == NULL)
4810 			goto nomem;
4811 
4812 		if (listp == NULL) {
4813 			listp = tailp = nmp;
4814 		} else {
4815 			tailp->b_next = nmp;
4816 			tailp = nmp;
4817 		}
4818 	}
4819 
4820 	return (listp);
4821 nomem:
4822 
4823 	while (listp) {
4824 		mp = listp;
4825 		listp = mp->b_next;
4826 		mp->b_next = mp->b_prev = NULL;
4827 		freemsg(mp);
4828 	}
4829 
4830 	return (NULL);
4831 
4832 }
4833 
4834 static mblk_t *
4835 man_alloc_physreq_mp(eaddr_t *man_eap)
4836 {
4837 
4838 	mblk_t			*mp;
4839 	union DL_primitives	*dlp;
4840 	t_uscalar_t		off;
4841 	eaddr_t			*eap;
4842 
4843 	mp = allocb(DL_SET_PHYS_ADDR_REQ_SIZE + ETHERADDRL, BPRI_MED);
4844 	if (mp == NULL)
4845 		goto exit;
4846 
4847 	mp->b_datap->db_type = M_PROTO;
4848 	dlp = (union DL_primitives *)mp->b_wptr;
4849 	dlp->set_physaddr_req.dl_primitive = DL_SET_PHYS_ADDR_REQ;
4850 	dlp->set_physaddr_req.dl_addr_length = ETHERADDRL;
4851 	off = DL_SET_PHYS_ADDR_REQ_SIZE;
4852 	dlp->set_physaddr_req.dl_addr_offset =  off;
4853 	mp->b_wptr += DL_SET_PHYS_ADDR_REQ_SIZE + ETHERADDRL;
4854 
4855 	eap = (eaddr_t *)(mp->b_rptr + off);
4856 	ether_copy(man_eap, eap);
4857 
4858 exit:
4859 	MAN_DBG(MAN_DLPI, ("man_alloc_physreq: physaddr %s\n",
4860 	    ether_sprintf(eap)));
4861 
4862 	return (mp);
4863 }
4864 
4865 /*
4866  * A new path in a pathgroup has become active for the first time. Setup
4867  * the lower destinations in prepartion for man_pg_activate to call
4868  * man_autoswitch.
4869  */
4870 static void
4871 man_add_dests(man_pg_t *mpg)
4872 {
4873 	manstr_t	*msp;
4874 	man_dest_t	*mdp;
4875 
4876 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4877 
4878 		if (!man_str_uses_pg(msp, mpg))
4879 			continue;
4880 
4881 		mdp = &msp->ms_dests[mpg->mpg_pg_id];
4882 
4883 /*
4884  * TBD - Take out
4885  *		ASSERT(mdp->md_device.mdev_state == MDEV_UNASSIGNED);
4886  *		ASSERT(mdp->md_state == MAN_DSTATE_NOTPRESENT);
4887  */
4888 		if (mdp->md_device.mdev_state != MDEV_UNASSIGNED) {
4889 			cmn_err(CE_NOTE, "man_add_dests mdev !unassigned");
4890 			MAN_DBGCALL(MAN_PATH, man_print_mdp(mdp));
4891 		}
4892 
4893 		man_start_dest(mdp, msp, mpg);
4894 	}
4895 
4896 }
4897 
4898 static int
4899 man_remove_dests(man_pg_t *mpg)
4900 {
4901 	manstr_t	*msp;
4902 	int		close_cnt = 0;
4903 	man_dest_t	*cdp;
4904 	man_dest_t	*mdp;
4905 	man_dest_t	*tdp;
4906 	man_work_t	*wp;
4907 	mblk_t		*mp;
4908 	int		status = 0;
4909 
4910 	wp = man_work_alloc(MAN_WORK_CLOSE, KM_NOSLEEP);
4911 	if (wp == NULL) {
4912 		status = ENOMEM;
4913 		goto exit;
4914 	}
4915 
4916 	/*
4917 	 * Count up number of destinations we need to close.
4918 	 */
4919 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4920 		if (!man_str_uses_pg(msp, mpg))
4921 			continue;
4922 
4923 		close_cnt++;
4924 	}
4925 
4926 	if (close_cnt == 0)
4927 		goto exit;
4928 
4929 	cdp = man_kzalloc(sizeof (man_dest_t) * close_cnt, KM_NOSLEEP);
4930 	if (cdp == NULL) {
4931 		status = ENOMEM;
4932 		man_work_free(wp);
4933 		goto exit;
4934 	}
4935 
4936 	tdp = cdp;
4937 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
4938 		if (!man_str_uses_pg(msp, mpg))
4939 			continue;
4940 
4941 		mdp = &msp->ms_dests[mpg->mpg_pg_id];
4942 
4943 		mdp->md_state |= MAN_DSTATE_CLOSING;
4944 		mdp->md_device.mdev_state = MDEV_UNASSIGNED;
4945 		mdp->md_msp = NULL;
4946 		mdp->md_rq = NULL;
4947 
4948 		/*
4949 		 * Clean up optimized destination pointer if we are
4950 		 * closing it.
4951 		 */
4952 		man_set_optimized_dest(msp);
4953 
4954 		if (mdp->md_lc_timer_id != 0) {
4955 			(void) quntimeout(man_ctl_wq, mdp->md_lc_timer_id);
4956 			mdp->md_lc_timer_id = 0;
4957 		}
4958 		if (mdp->md_bc_id != 0) {
4959 			qunbufcall(man_ctl_wq, mdp->md_bc_id);
4960 			mdp->md_bc_id = 0;
4961 		}
4962 
4963 		mutex_enter(&mdp->md_lock);
4964 		while ((mp = mdp->md_dmp_head) != NULL) {
4965 			mdp->md_dmp_head = mp->b_next;
4966 			mp->b_next = NULL;
4967 			freemsg(mp);
4968 		}
4969 		mdp->md_dmp_count = 0;
4970 		mdp->md_dmp_tail = NULL;
4971 		mutex_exit(&mdp->md_lock);
4972 
4973 		*tdp++ = *mdp;
4974 
4975 		mdp->md_state = MAN_DSTATE_NOTPRESENT;
4976 		mdp->md_muxid = -1;
4977 	}
4978 
4979 	wp->mw_arg.a_mdp = cdp;
4980 	wp->mw_arg.a_ndests = close_cnt;
4981 	man_work_add(man_bwork_q, wp);
4982 
4983 exit:
4984 	return (status);
4985 
4986 }
4987 
4988 /*
4989  * Returns TRUE if stream uses pathgroup, FALSE otherwise.
4990  */
4991 static int
4992 man_str_uses_pg(manstr_t *msp, man_pg_t *mpg)
4993 {
4994 	int	status;
4995 
4996 	status = ((msp->ms_flags & MAN_SFLAG_CONTROL)	||
4997 	    (msp->ms_dests == NULL)	||
4998 	    (msp->ms_manp == NULL)	||
4999 	    (msp->ms_manp->man_meta_ppa != mpg->mpg_man_ppa));
5000 
5001 	return (!status);
5002 }
5003 
5004 static int
5005 man_gettimer(int timer, man_dest_t *mdp)
5006 {
5007 
5008 	int attached = TRUE;
5009 	int time = 0;
5010 
5011 	if (mdp == NULL || mdp->md_msp == NULL || mdp->md_msp->ms_manp == NULL)
5012 		attached = FALSE;
5013 
5014 	switch (timer) {
5015 	case MAN_TIMER_INIT:
5016 		if (attached)
5017 			time = mdp->md_msp->ms_manp->man_init_time;
5018 		else
5019 			time = MAN_INIT_TIME;
5020 		break;
5021 
5022 	case MAN_TIMER_LINKCHECK:
5023 		if (attached) {
5024 			if (mdp->md_linkstate == MAN_LINKSTALE)
5025 				time = mdp->md_msp->ms_manp->man_linkstale_time;
5026 			else
5027 				time = mdp->md_msp->ms_manp->man_linkcheck_time;
5028 		} else
5029 			time = MAN_LINKCHECK_TIME;
5030 		break;
5031 
5032 	case MAN_TIMER_DLPIRESET:
5033 		if (attached)
5034 			time = mdp->md_msp->ms_manp->man_dlpireset_time;
5035 		else
5036 			time = MAN_DLPIRESET_TIME;
5037 		break;
5038 
5039 	default:
5040 		MAN_DBG(MAN_LINK, ("man_gettimer: unknown timer %d", timer));
5041 		time = MAN_LINKCHECK_TIME;
5042 		break;
5043 	}
5044 
5045 	return (drv_usectohz(time));
5046 }
5047 
5048 /*
5049  * Check the links for each active destination. Called inside inner
5050  * perimeter via qtimeout. This timer only runs on the domain side of the
5051  * driver. It should never run on the SC side.
5052  *
5053  * On a MAN_LINKGOOD link, we check/probe the link health every
5054  * MAN_LINKCHECK_TIME seconds. If the link goes MAN_LINKSTALE, the we probe
5055  * the link every MAN_LINKSTALE_TIME seconds, and fail the link after probing
5056  * the link MAN_LINKSTALE_RETRIES times.
5057  * The man_lock is held to synchronize access pathgroup list(man_pg).
5058  */
5059 void
5060 man_linkcheck_timer(void *argp)
5061 {
5062 	man_dest_t		*mdp = (man_dest_t *)argp;
5063 	int			restart_timer = TRUE;
5064 	int			send_ping = TRUE;
5065 	int			newstate;
5066 	int			oldstate;
5067 	man_pg_t		*mpg;
5068 	man_path_t		*mp;
5069 
5070 	MAN_DBG(MAN_LINK, ("man_linkcheck_timer: mdp"));
5071 	MAN_DBGCALL(MAN_LINK, man_print_mdp(mdp));
5072 
5073 	/*
5074 	 * Clear timeout id and check if someones waiting on us to
5075 	 * complete a close.
5076 	 */
5077 	mdp->md_lc_timer_id = 0;
5078 
5079 	if (mdp->md_state == MAN_DSTATE_NOTPRESENT ||
5080 	    mdp->md_state & MAN_DSTATE_BUSY) {
5081 
5082 		MAN_DBG(MAN_LINK, ("man_linkcheck_timer: not ready mdp"));
5083 		MAN_DBGCALL(MAN_LINK, man_print_mdp(mdp));
5084 		goto exit;
5085 	}
5086 
5087 	mutex_enter(&man_lock);
5088 	/*
5089 	 * If the lower stream needs initializing, just go straight to
5090 	 * switch code. As the linkcheck timer is started for all
5091 	 * SAPs, do not send ping packets during the initialization.
5092 	 */
5093 	if (mdp->md_state == MAN_DSTATE_INITIALIZING) {
5094 		send_ping = FALSE;
5095 		goto do_switch;
5096 	}
5097 
5098 	newstate = oldstate = mdp->md_linkstate;
5099 
5100 	if (!man_needs_linkcheck(mdp)) {
5101 		cmn_err(CE_NOTE,
5102 		    "man_linkcheck_timer: unneeded linkcheck on mdp(0x%p)",
5103 		    (void *)mdp);
5104 		mutex_exit(&man_lock);
5105 		return;
5106 	}
5107 
5108 	/*
5109 	 * The above call to  man_needs_linkcheck() validates
5110 	 * mdp->md_msp and mdp->md_msp->ms_manp pointers.
5111 	 */
5112 	mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg, mdp->md_pg_id);
5113 	ASSERT(mpg != NULL);
5114 	mp = man_find_path_by_ppa(mpg->mpg_pathp, mdp->md_device.mdev_ppa);
5115 	ASSERT(mp != NULL);
5116 
5117 	/*
5118 	 * This is the most common case, when traffic is flowing.
5119 	 */
5120 	if (mdp->md_rcvcnt != mdp->md_lastrcvcnt) {
5121 
5122 		newstate = MAN_LINKGOOD;
5123 		mdp->md_lastrcvcnt = mdp->md_rcvcnt;
5124 		send_ping = FALSE;
5125 
5126 		/*
5127 		 * Clear the FAILED flag and update lru.
5128 		 */
5129 		mp->mp_device.mdev_state &= ~MDEV_FAILED;
5130 		(void) drv_getparm(TIME, &mp->mp_lru);
5131 
5132 		if (mdp->md_link_updown_msg == MAN_LINK_DOWN_MSG) {
5133 			man_t *manp = mdp->md_msp->ms_manp;
5134 
5135 			cmn_err(CE_NOTE, "%s%d Link up",
5136 			    ddi_major_to_name(manp->man_meta_major),
5137 			    manp->man_meta_ppa);
5138 
5139 			mdp->md_link_updown_msg = MAN_LINK_UP_MSG;
5140 		}
5141 
5142 		goto done;
5143 	}
5144 
5145 	/*
5146 	 * If we're here, it means we have not seen any traffic
5147 	 */
5148 	switch (oldstate) {
5149 	case MAN_LINKINIT:
5150 	case MAN_LINKGOOD:
5151 		newstate = MAN_LINKSTALE;
5152 		mdp->md_linkstales++;
5153 		mdp->md_linkstale_retries =
5154 		    mdp->md_msp->ms_manp->man_linkstale_retries;
5155 		break;
5156 
5157 	case MAN_LINKSTALE:
5158 	case MAN_LINKFAIL:
5159 		mdp->md_linkstales++;
5160 		mdp->md_linkstale_retries--;
5161 		if (mdp->md_linkstale_retries < 0) {
5162 			newstate = MAN_LINKFAIL;
5163 			mdp->md_linkfails++;
5164 			mdp->md_linkstale_retries =
5165 			    mdp->md_msp->ms_manp->man_linkstale_retries;
5166 			/*
5167 			 * Mark the destination as FAILED and
5168 			 * update lru.
5169 			 */
5170 			if (oldstate != MAN_LINKFAIL) {
5171 				mp->mp_device.mdev_state |= MDEV_FAILED;
5172 				(void) drv_getparm(TIME, &mp->mp_lru);
5173 			}
5174 		}
5175 		break;
5176 
5177 	default:
5178 		cmn_err(CE_WARN, "man_linkcheck_timer: illegal link"
5179 		    " state %d", oldstate);
5180 		break;
5181 	}
5182 done:
5183 
5184 	if (oldstate != newstate) {
5185 
5186 		MAN_DBG(MAN_LINK, ("man_linkcheck_timer"
5187 		    " link state %s -> %s", lss[oldstate],
5188 		    lss[newstate]));
5189 
5190 		mdp->md_linkstate = newstate;
5191 	}
5192 
5193 	/*
5194 	 * Do any work required from state transitions above.
5195 	 */
5196 	if (newstate == MAN_LINKFAIL) {
5197 do_switch:
5198 		if (!man_do_autoswitch(mdp)) {
5199 			/*
5200 			 * Stop linkcheck timer until switch completes.
5201 			 */
5202 			restart_timer = FALSE;
5203 			send_ping = FALSE;
5204 		}
5205 	}
5206 
5207 	mutex_exit(&man_lock);
5208 	if (send_ping)
5209 		man_do_icmp_bcast(mdp, mdp->md_msp->ms_sap);
5210 
5211 	if (restart_timer)
5212 		mdp->md_lc_timer_id = qtimeout(man_ctl_wq, man_linkcheck_timer,
5213 		    (void *)mdp, man_gettimer(MAN_TIMER_LINKCHECK, mdp));
5214 
5215 exit:
5216 	MAN_DBG(MAN_LINK, ("man_linkcheck_timer: returns"));
5217 
5218 }
5219 
5220 /*
5221  * Handle linkcheck initiated autoswitching.
5222  * Called with man_lock held.
5223  */
5224 static int
5225 man_do_autoswitch(man_dest_t *mdp)
5226 {
5227 	man_pg_t	*mpg;
5228 	man_path_t	*ap;
5229 	int		status = 0;
5230 
5231 	ASSERT(MUTEX_HELD(&man_lock));
5232 	/*
5233 	 * Set flags and refcnt. Cleared in man_iswitch when SWITCH completes.
5234 	 */
5235 	mdp->md_msp->ms_manp->man_refcnt++;
5236 
5237 	mpg = man_find_pg_by_id(mdp->md_msp->ms_manp->man_pg, mdp->md_pg_id);
5238 	ASSERT(mpg);
5239 
5240 	if (mpg->mpg_flags & MAN_PG_SWITCHING)
5241 		return (EBUSY);
5242 
5243 	mpg->mpg_flags |= MAN_PG_SWITCHING;
5244 
5245 	if (mdp->md_state == MAN_DSTATE_INITIALIZING) {
5246 		/*
5247 		 * We're initializing, ask for a switch to our currently
5248 		 * active device.
5249 		 */
5250 		status = man_autoswitch(mpg, &mdp->md_device, NULL);
5251 	} else {
5252 
5253 		if (mdp->md_msp != NULL && mdp->md_msp->ms_manp != NULL &&
5254 		    mdp->md_link_updown_msg == MAN_LINK_UP_MSG) {
5255 
5256 			man_t *manp = mdp->md_msp->ms_manp;
5257 
5258 			cmn_err(CE_NOTE, "%s%d Link down",
5259 			    ddi_major_to_name(manp->man_meta_major),
5260 			    manp->man_meta_ppa);
5261 		}
5262 		mdp->md_link_updown_msg = MAN_LINK_DOWN_MSG;
5263 
5264 		MAN_DBG(MAN_LINK, ("man_linkcheck_timer: link failure on %s%d",
5265 		    ddi_major_to_name(mdp->md_device.mdev_major),
5266 		    mdp->md_device.mdev_ppa));
5267 
5268 		ap = man_find_alternate_path(mpg->mpg_pathp);
5269 
5270 		if (ap == NULL) {
5271 			status = ENODEV;
5272 			goto exit;
5273 		}
5274 		status = man_autoswitch(mpg, &ap->mp_device, NULL);
5275 	}
5276 exit:
5277 	if (status != 0) {
5278 		/*
5279 		 * man_iswitch not going to run, clean up.
5280 		 */
5281 		mpg->mpg_flags &= ~MAN_PG_SWITCHING;
5282 		mdp->md_msp->ms_manp->man_refcnt--;
5283 	}
5284 
5285 	return (status);
5286 }
5287 
5288 /*
5289  * Gather up all lower multiplexor streams that have this link open and
5290  * try to switch them. Called from inner perimeter and holding man_lock.
5291  *
5292  *	pg_id		- Pathgroup to do switch for.
5293  *	st_devp		- New device to switch to.
5294  *	wait_for_switch	- whether or not to qwait for completion.
5295  */
5296 static int
5297 man_autoswitch(man_pg_t *mpg, man_dev_t *st_devp, man_work_t *waiter_wp)
5298 {
5299 	man_work_t	*wp;
5300 	int		sdp_cnt = 0;
5301 	man_dest_t	*sdp;
5302 	int		status = 0;
5303 
5304 	ASSERT(MUTEX_HELD(&man_lock));
5305 	if (waiter_wp == NULL) {
5306 		wp = man_work_alloc(MAN_WORK_SWITCH, KM_NOSLEEP);
5307 		if (wp == NULL) {
5308 			status = ENOMEM;
5309 			goto exit;
5310 		}
5311 	} else {
5312 		ASSERT(waiter_wp->mw_type == MAN_WORK_SWITCH);
5313 		wp = waiter_wp;
5314 	}
5315 
5316 	/*
5317 	 * Set dests as PLUMBING, cancel timers and return array of dests
5318 	 * that need a switch.
5319 	 */
5320 	status = man_prep_dests_for_switch(mpg, &sdp, &sdp_cnt);
5321 	if (status) {
5322 		if (waiter_wp == NULL)
5323 			man_work_free(wp);
5324 		goto exit;
5325 	}
5326 
5327 	/*
5328 	 * If no streams are active, there are no streams to switch.
5329 	 * Return ENODEV (see man_pg_activate).
5330 	 */
5331 	if (sdp_cnt == 0) {
5332 		if (waiter_wp == NULL)
5333 			man_work_free(wp);
5334 		status = ENODEV;
5335 		goto exit;
5336 	}
5337 
5338 	/*
5339 	 * Ask the bgthread to switch. See man_bwork.
5340 	 */
5341 	wp->mw_arg.a_sf_dev = sdp->md_device;
5342 	wp->mw_arg.a_st_dev = *st_devp;
5343 	wp->mw_arg.a_pg_id = mpg->mpg_pg_id;
5344 	wp->mw_arg.a_man_ppa = mpg->mpg_man_ppa;
5345 
5346 	wp->mw_arg.a_mdp = sdp;
5347 	wp->mw_arg.a_ndests = sdp_cnt;
5348 	man_work_add(man_bwork_q, wp);
5349 
5350 exit:
5351 
5352 	return (status);
5353 }
5354 
5355 /*
5356  * If an alternate path exists for pathgroup, arrange for switch to
5357  * happen. Note that we need to switch each of msp->dests[pg_id], for
5358  * all on man_strup. We must:
5359  *
5360  *		Cancel any timers
5361  *		Mark dests as PLUMBING
5362  *		Submit switch request to man_bwork_q->
5363  */
5364 static int
5365 man_prep_dests_for_switch(man_pg_t *mpg, man_dest_t **mdpp, int *cntp)
5366 {
5367 	manstr_t	*msp;
5368 	man_dest_t	*mdp;
5369 	int		sdp_cnt = 0;
5370 	man_dest_t	*sdp = NULL;
5371 	man_dest_t	*tdp;
5372 	int		status = 0;
5373 
5374 	MAN_DBG(MAN_SWITCH, ("man_prep_dests_for_switch: pg_id %d",
5375 	    mpg->mpg_pg_id));
5376 
5377 	/*
5378 	 * Count up number of streams, there is one destination that needs
5379 	 * switching per stream.
5380 	 */
5381 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
5382 		if (man_str_uses_pg(msp, mpg))
5383 			sdp_cnt++;
5384 	}
5385 
5386 	if (sdp_cnt == 0)
5387 		goto exit;
5388 
5389 	sdp = man_kzalloc(sizeof (man_dest_t) * sdp_cnt, KM_NOSLEEP);
5390 	if (sdp == NULL) {
5391 		status = ENOMEM;
5392 		goto exit;
5393 	}
5394 	tdp = sdp;
5395 	/*
5396 	 * Mark each destination as unusable.
5397 	 */
5398 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
5399 		if (man_str_uses_pg(msp, mpg)) {
5400 
5401 			/*
5402 			 * Mark destination as plumbing and store the
5403 			 * address of sdp as a way to identify the
5404 			 * SWITCH request when it comes back (see man_iswitch).
5405 			 */
5406 			mdp = &msp->ms_dests[mpg->mpg_pg_id];
5407 			mdp->md_state |= MAN_DSTATE_PLUMBING;
5408 			mdp->md_switch_id = sdp;
5409 
5410 			/*
5411 			 * Copy destination info.
5412 			 */
5413 			bcopy(mdp, tdp, sizeof (man_dest_t));
5414 			tdp++;
5415 
5416 			/*
5417 			 * Cancel timers.
5418 			 */
5419 			if (mdp->md_lc_timer_id) {
5420 				(void) quntimeout(man_ctl_wq,
5421 				    mdp->md_lc_timer_id);
5422 				mdp->md_lc_timer_id = 0;
5423 			}
5424 			if (mdp->md_bc_id) {
5425 				qunbufcall(man_ctl_wq, mdp->md_bc_id);
5426 				mdp->md_bc_id = 0;
5427 			}
5428 		}
5429 	}
5430 
5431 	*mdpp = sdp;
5432 	*cntp = sdp_cnt;
5433 	status = 0;
5434 exit:
5435 
5436 	MAN_DBG(MAN_SWITCH, ("man_prep_dests_for_switch: returns %d"
5437 	    " sdp(0x%p) sdp_cnt(%d)", status, (void *)sdp, sdp_cnt));
5438 
5439 	return (status);
5440 
5441 }
5442 
5443 /*
5444  * The code below generates an ICMP echo packet and sends it to the
5445  * broadcast address in the hopes that the other end will respond
5446  * and the man_linkcheck_timer logic will see the traffic.
5447  *
5448  * This assumes ethernet-like media.
5449  */
5450 /*
5451  * Generate an ICMP packet. Called exclusive inner perimeter.
5452  *
5453  *	mdp - destination to send packet to.
5454  *	sap - either ETHERTYPE_ARP or ETHERTYPE_IPV6
5455  */
5456 static void
5457 man_do_icmp_bcast(man_dest_t *mdp, t_uscalar_t sap)
5458 {
5459 	mblk_t			*mp = NULL;
5460 
5461 	/* TBD - merge pinger and this routine. */
5462 
5463 	ASSERT(sap == ETHERTYPE_IPV6 || sap == ETHERTYPE_IP);
5464 
5465 	if (sap == ETHERTYPE_IPV6) {
5466 		mdp->md_icmpv6probes++;
5467 	} else {
5468 		mdp->md_icmpv4probes++;
5469 	}
5470 	/*
5471 	 * Send the ICMP message
5472 	 */
5473 	mp = man_pinger(sap);
5474 
5475 	MAN_DBG(MAN_LINK, ("man_do_icmp_bcast: sap=0x%x mp=0x%p",
5476 	    sap, (void *)mp));
5477 	if (mp == NULL)
5478 		return;
5479 
5480 	/*
5481 	 * Send it out.
5482 	 */
5483 	if (man_start_lower(mdp, mp, NULL, MAN_LOWER)) {
5484 
5485 		MAN_DBG(MAN_LINK, ("man_do_icmp_broadcast: xmit failed"));
5486 
5487 		freemsg(mp);
5488 	}
5489 
5490 }
5491 
5492 static mblk_t *
5493 man_pinger(t_uscalar_t sap)
5494 {
5495 	mblk_t		*mp = NULL;
5496 	man_dladdr_t	dlsap;
5497 	icmph_t		*icmph;
5498 	int		ipver;
5499 	ipha_t		*ipha;
5500 	ip6_t		*ip6h;
5501 	int		iph_hdr_len;
5502 	int		datalen = 64;
5503 	uchar_t		*datap;
5504 	uint16_t	size;
5505 	uchar_t		i;
5506 
5507 	dlsap.dl_sap = htons(sap);
5508 	bcopy(&etherbroadcast, &dlsap.dl_phys, sizeof (dlsap.dl_phys));
5509 
5510 	if (sap == ETHERTYPE_IPV6) {
5511 		ipver = IPV6_VERSION;
5512 		iph_hdr_len = sizeof (ip6_t);
5513 		size = ICMP6_MINLEN;
5514 	} else {
5515 		ipver = IPV4_VERSION;
5516 		iph_hdr_len = sizeof (ipha_t);
5517 		size = ICMPH_SIZE;
5518 	}
5519 	size += (uint16_t)iph_hdr_len;
5520 	size += datalen;
5521 
5522 	mp = man_alloc_udreq(size, &dlsap);
5523 	if (mp == NULL)
5524 		goto exit;
5525 
5526 	/*
5527 	 * fill out the ICMP echo packet headers
5528 	 */
5529 	mp->b_cont->b_wptr += iph_hdr_len;
5530 	if (ipver == IPV4_VERSION) {
5531 		ipha = (ipha_t *)mp->b_cont->b_rptr;
5532 		ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
5533 		    | IP_SIMPLE_HDR_LENGTH_IN_WORDS;
5534 		ipha->ipha_type_of_service = 0;
5535 		ipha->ipha_length = size;
5536 		ipha->ipha_fragment_offset_and_flags = IPH_DF;
5537 		ipha->ipha_ttl = 1;
5538 		ipha->ipha_protocol = IPPROTO_ICMP;
5539 		if (man_is_on_domain) {
5540 			manc_t		manc;
5541 
5542 			if (man_get_iosram(&manc)) {
5543 				freemsg(mp);
5544 				mp = NULL;
5545 				goto exit;
5546 			}
5547 
5548 			/*
5549 			 * Domain generates ping packets for domain to
5550 			 * SC network (dman0 <--> scman0).
5551 			 */
5552 			ipha->ipha_dst = manc.manc_sc_ipaddr;
5553 			ipha->ipha_src = manc.manc_dom_ipaddr;
5554 		} else {
5555 			/*
5556 			 * Note that ping packets are only generated
5557 			 * by the SC across scman1 (SC to SC network).
5558 			 */
5559 			ipha->ipha_dst = man_sc_ipaddrs.ip_other_sc_ipaddr;
5560 			ipha->ipha_src = man_sc_ipaddrs.ip_my_sc_ipaddr;
5561 		}
5562 
5563 		ipha->ipha_ident = 0;
5564 
5565 		ipha->ipha_hdr_checksum = 0;
5566 		ipha->ipha_hdr_checksum = IP_CSUM(mp->b_cont, 0, 0);
5567 
5568 	} else {
5569 		ip6h = (ip6_t *)mp->b_cont->b_rptr;
5570 		/*
5571 		 * IP version = 6, priority = 0, flow = 0
5572 		 */
5573 		ip6h->ip6_flow = (IPV6_VERSION << 28);
5574 		ip6h->ip6_plen =
5575 		    htons((short)(size - iph_hdr_len));
5576 		ip6h->ip6_nxt = IPPROTO_ICMPV6;
5577 		ip6h->ip6_hlim = 1;	/* stay on link */
5578 
5579 		if (man_is_on_domain) {
5580 			manc_t		manc;
5581 
5582 			if (man_get_iosram(&manc)) {
5583 				freemsg(mp);
5584 				mp = NULL;
5585 				goto exit;
5586 			}
5587 
5588 			/*
5589 			 * Domain generates ping packets for domain to
5590 			 * SC network (dman0 <--> scman0).
5591 			 */
5592 			ip6h->ip6_src = manc.manc_dom_ipv6addr;
5593 			ip6h->ip6_dst = manc.manc_sc_ipv6addr;
5594 		} else {
5595 			/*
5596 			 * Note that ping packets are only generated
5597 			 * by the SC across scman1 (SC to SC network).
5598 			 */
5599 			ip6h->ip6_src = man_sc_ip6addrs.ip6_my_sc_ipaddr;
5600 			ip6h->ip6_dst = man_sc_ip6addrs.ip6_other_sc_ipaddr;
5601 		}
5602 	}
5603 
5604 	/*
5605 	 * IPv6 and IP are the same for ICMP as far as I'm concerned.
5606 	 */
5607 	icmph = (icmph_t *)mp->b_cont->b_wptr;
5608 	if (ipver == IPV4_VERSION) {
5609 		mp->b_cont->b_wptr += ICMPH_SIZE;
5610 		icmph->icmph_type = ICMP_ECHO_REQUEST;
5611 		icmph->icmph_code = 0;
5612 	} else {
5613 		mp->b_cont->b_wptr += ICMP6_MINLEN;
5614 		icmph->icmph_type = ICMP6_ECHO_REQUEST;
5615 		icmph->icmph_code = 0;
5616 	}
5617 
5618 	datap = mp->b_cont->b_wptr;
5619 	mp->b_cont->b_wptr += datalen;
5620 
5621 	for (i = 0; i < datalen; i++)
5622 		*datap++ = i;
5623 
5624 	if (ipver == IPV4_VERSION) {
5625 		icmph->icmph_checksum = IP_CSUM(mp->b_cont, iph_hdr_len, 0);
5626 	} else {
5627 		uint32_t	sum;
5628 
5629 		sum = htons(IPPROTO_ICMPV6) + ip6h->ip6_plen;
5630 		icmph->icmph_checksum = IP_CSUM(mp->b_cont, iph_hdr_len - 32,
5631 		    (sum & 0xffff) + (sum >> 16));
5632 	}
5633 
5634 /*
5635  * TBD
5636  *	icp->icmp_time =  ???;
5637  */
5638 
5639 exit:
5640 	return (mp);
5641 }
5642 
5643 static mblk_t *
5644 man_alloc_udreq(int size, man_dladdr_t *dlsap)
5645 {
5646 	dl_unitdata_req_t	*udreq;
5647 	mblk_t			*bp;
5648 	mblk_t			*mp;
5649 
5650 	mp = allocb(sizeof (dl_unitdata_req_t) + sizeof (*dlsap), BPRI_MED);
5651 
5652 	if (mp == NULL) {
5653 		cmn_err(CE_NOTE, "man_preparepkt: allocb failed");
5654 		return (NULL);
5655 	}
5656 
5657 	if ((bp = allocb(size, BPRI_MED)) == NULL) {
5658 		freemsg(mp);
5659 		cmn_err(CE_NOTE, "man_preparepkts: allocb failed");
5660 		return (NULL);
5661 	}
5662 	bzero(bp->b_rptr, size);
5663 
5664 	mp->b_cont = bp;
5665 	mp->b_datap->db_type = M_PROTO;
5666 	udreq = (dl_unitdata_req_t *)mp->b_wptr;
5667 	mp->b_wptr += sizeof (dl_unitdata_req_t);
5668 
5669 	/*
5670 	 * phys addr first - TBD
5671 	 */
5672 	bcopy((char *)dlsap, mp->b_wptr, sizeof (*dlsap));
5673 	mp->b_wptr += sizeof (*dlsap);
5674 
5675 	udreq->dl_primitive = DL_UNITDATA_REQ;
5676 	udreq->dl_dest_addr_length = sizeof (*dlsap);
5677 	udreq->dl_dest_addr_offset = sizeof (*udreq);
5678 	udreq->dl_priority.dl_min = 0;
5679 	udreq->dl_priority.dl_max = 0;
5680 
5681 	return (mp);
5682 }
5683 
5684 
5685 /*
5686  * The routines in this file are executed by the MAN background thread,
5687  * which executes outside of the STREAMS framework (see man_str.c). It is
5688  * allowed to do the things required to modify the STREAMS driver (things
5689  * that are normally done from a user process). These routines do things like
5690  * open and close drivers, PLINK and PUNLINK streams to/from the multiplexor,
5691  * etc.
5692  *
5693  * The mechanism of communication between the STREAMS portion of the driver
5694  * and the background thread portion are two work queues, man_bwork_q
5695  * and man_iwork_q (background work q and streams work q).  Work
5696  * requests are placed on those queues when one half of the driver wants
5697  * the other half to do some work for it.
5698  *
5699  * The MAN background thread executes the man_bwork routine. Its sole
5700  * job is to process work requests placed on this work q. The MAN upper
5701  * write service routine is responsible for processing work requests posted
5702  * to the man_iwork_q->
5703  *
5704  * Both work queues are protected by the global mutex man_lock. The
5705  * man_bwork is signalged via the condvarman_bwork_q->q_cv. The man_uwsrv
5706  * routine is signaled by calling qenable (forcing man_uwsrv to run).
5707  */
5708 
5709 /*
5710  * man_bwork - Work thread for this device.  It is responsible for
5711  * performing operations which can't occur within the STREAMS framework.
5712  *
5713  * Locking:
5714  *	- Called holding no locks
5715  *	- Obtains the global mutex man_lock to remove work from
5716  *	  man_bwork_q, and post work to man_iwork_q->
5717  *	- Note that we do not want to hold any locks when making
5718  *	  any ldi_ calls.
5719  */
5720 void
5721 man_bwork()
5722 {
5723 	man_work_t	*wp;
5724 	int		done = 0;
5725 	callb_cpr_t	cprinfo;
5726 	int		wp_finished;
5727 
5728 	CALLB_CPR_INIT(&cprinfo, &man_lock, callb_generic_cpr,
5729 	    "mn_work_thrd");
5730 
5731 	MAN_DBG(MAN_CONFIG, ("man_bwork: enter"));
5732 
5733 	while (done == 0) {
5734 
5735 		mutex_enter(&man_lock);
5736 		/*
5737 		 * While there is nothing to do, sit in cv_wait.  If work
5738 		 * request is made, requester will signal.
5739 		 */
5740 		while (man_bwork_q->q_work == NULL) {
5741 
5742 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
5743 
5744 			cv_wait(&man_bwork_q->q_cv, &man_lock);
5745 
5746 			CALLB_CPR_SAFE_END(&cprinfo, &man_lock);
5747 		}
5748 
5749 		wp = man_bwork_q->q_work;
5750 		man_bwork_q->q_work = wp->mw_next;
5751 		wp->mw_next = NULL;
5752 		mutex_exit(&man_lock);
5753 
5754 		wp_finished = TRUE;
5755 
5756 		MAN_DBG(MAN_SWITCH, ("man_bwork: type %s",
5757 		    _mw_type[wp->mw_type]));
5758 
5759 		switch (wp->mw_type) {
5760 		case MAN_WORK_OPEN_CTL:
5761 			wp->mw_status = man_open_ctl();
5762 			break;
5763 
5764 		case MAN_WORK_CLOSE_CTL:
5765 			man_close_ctl();
5766 			break;
5767 
5768 		case MAN_WORK_CLOSE:
5769 		case MAN_WORK_CLOSE_STREAM:
5770 			man_bclose(&wp->mw_arg);
5771 			break;
5772 
5773 		case MAN_WORK_SWITCH:
5774 			man_bswitch(&wp->mw_arg, wp);
5775 			wp_finished = FALSE;
5776 			break;
5777 
5778 		case MAN_WORK_STOP:		/* man_bwork_stop() */
5779 			done = 1;
5780 			mutex_enter(&man_lock);
5781 			CALLB_CPR_EXIT(&cprinfo); /* Unlocks man_lock */
5782 			break;
5783 
5784 		default:
5785 			cmn_err(CE_WARN, "man_bwork: "
5786 			    "illegal work type(%d)", wp->mw_type);
5787 			break;
5788 		}
5789 
5790 		mutex_enter(&man_lock);
5791 
5792 		if (wp_finished) {
5793 			wp->mw_flags |= MAN_WFLAGS_DONE;
5794 			if (wp->mw_flags & MAN_WFLAGS_CVWAITER)
5795 				cv_signal(&wp->mw_cv);
5796 			else if (wp->mw_flags & MAN_WFLAGS_QWAITER)
5797 				qenable(wp->mw_q);
5798 			else
5799 				man_work_free(wp);
5800 		}
5801 
5802 		mutex_exit(&man_lock);
5803 	}
5804 
5805 	MAN_DBG(MAN_CONFIG, ("man_bwork: thread_exit"));
5806 
5807 	mutex_enter(&man_lock);
5808 	man_bwork_id = NULL;
5809 	mutex_exit(&man_lock);
5810 
5811 	thread_exit();
5812 }
5813 
5814 /*
5815  * man_open_ctl - Open the control stream.
5816  *
5817  *	returns	- success - 0
5818  *		- failure - errno code
5819  *
5820  * Mutex Locking Notes:
5821  *	We need a way to keep the CLONE_OPEN qwaiters in man_open from
5822  *	checking the man_config variables after the ldi_open call below
5823  *	returns from man_open, leaving the inner perimeter. So, we use the
5824  *	man_lock to synchronize the threads in man_open_ctl and man_open.  We
5825  *	hold man_lock across this call into man_open, which in general is a
5826  *	no-no. But, the STREAMs portion of the driver (other than open)
5827  *	doesn't use it. So, if ldi_open gets hijacked to run any part of
5828  *	the MAN streams driver, it wont end up recursively trying to acquire
5829  *	man_lock. Note that the non-CLONE_OPEN portion of man_open doesnt
5830  *	acquire it either, so again no recursive mutex.
5831  */
5832 static int
5833 man_open_ctl()
5834 {
5835 	int		status = 0;
5836 	ldi_handle_t	ctl_lh = NULL;
5837 	ldi_ident_t	li = NULL;
5838 
5839 	MAN_DBG(MAN_CONFIG, ("man_open_ctl: plumbing control stream\n"));
5840 
5841 	/*
5842 	 * Get eri driver loaded and kstats initialized. Is there a better
5843 	 * way to do this? - TBD.
5844 	 */
5845 	status = ldi_ident_from_mod(&modlinkage, &li);
5846 	if (status) {
5847 		cmn_err(CE_WARN,
5848 		    "man_open_ctl: ident alloc failed, error %d", status);
5849 		goto exit;
5850 	}
5851 
5852 	status = ldi_open_by_name(ERI_PATH, FREAD | FWRITE | FNOCTTY,
5853 	    kcred, &ctl_lh, li);
5854 	if (status) {
5855 		cmn_err(CE_WARN,
5856 		    "man_open_ctl: eri open failed, error %d", status);
5857 		ctl_lh = NULL;
5858 		goto exit;
5859 	}
5860 	(void) ldi_close(ctl_lh, NULL, kcred);
5861 	ctl_lh = NULL;
5862 
5863 	mutex_enter(&man_lock);
5864 
5865 	if (man_ctl_lh != NULL) {
5866 		mutex_exit(&man_lock);
5867 		goto exit;
5868 	}
5869 
5870 	ASSERT(man_ctl_wq == NULL);
5871 	mutex_exit(&man_lock);
5872 
5873 	status = ldi_open_by_name(DMAN_INT_PATH, FREAD | FWRITE | FNOCTTY,
5874 	    kcred, &ctl_lh, li);
5875 	if (status) {
5876 		cmn_err(CE_WARN,
5877 		    "man_open_ctl: man control dev open failed, "
5878 		    "error %d", status);
5879 		goto exit;
5880 	}
5881 
5882 	/*
5883 	 * Update global config state. TBD - dont need lock here, since
5884 	 * everyone is stuck in open until we finish. Only other modifier
5885 	 * is man_deconfigure via _fini, which returns EBUSY if there is
5886 	 * any open streams (other than control). Do need to signal qwaiters
5887 	 * on error.
5888 	 */
5889 	mutex_enter(&man_lock);
5890 	ASSERT(man_config_state == MAN_CONFIGURING);
5891 	ASSERT(man_ctl_lh == NULL);
5892 	man_ctl_lh = ctl_lh;
5893 	mutex_exit(&man_lock);
5894 
5895 exit:
5896 	if (li)
5897 		ldi_ident_release(li);
5898 
5899 	MAN_DBG(MAN_CONFIG, ("man_open_ctl: man_ctl_lh(0x%p) errno = %d\n",
5900 	    (void *)man_ctl_lh, status));
5901 
5902 	return (status);
5903 }
5904 
5905 /*
5906  * man_close_ctl - Close control stream, we are about to unload driver.
5907  *
5908  * Locking:
5909  *	- Called holding no locks.
5910  */
5911 static void
5912 man_close_ctl()
5913 {
5914 	ldi_handle_t tlh;
5915 
5916 	MAN_DBG(MAN_CONFIG, ("man_close_ctl: unplumbing control stream\n"));
5917 
5918 	mutex_enter(&man_lock);
5919 	if ((tlh = man_ctl_lh) != NULL)
5920 		man_ctl_lh = NULL;
5921 	mutex_exit(&man_lock);
5922 
5923 	if (tlh != NULL) {
5924 		(void) ldi_close(tlh, NULL, kcred);
5925 	}
5926 
5927 }
5928 
5929 /*
5930  * Close the lower streams. Get all the timers canceled, close the lower
5931  * stream and delete the dest array.
5932  *
5933  * Returns:
5934  *	0	Closed all streams.
5935  *	1	Couldn't close one or more streams, timers still running.
5936  *
5937  * Locking:
5938  *	- Called holding no locks.
5939  */
5940 static void
5941 man_bclose(man_adest_t *adp)
5942 {
5943 	int		i;
5944 	man_dest_t	*mdp;
5945 
5946 	man_cancel_timers(adp);
5947 
5948 	for (i = 0; i < adp->a_ndests; i++) {
5949 		mdp = &adp->a_mdp[i];
5950 
5951 		if (mdp->md_muxid != -1)
5952 			man_unplumb(mdp);
5953 	}
5954 
5955 	mutex_destroy(&mdp->md_lock);
5956 	man_kfree(adp->a_mdp, sizeof (man_dest_t) * adp->a_ndests);
5957 	adp->a_mdp = NULL;
5958 }
5959 
5960 /*
5961  * We want to close down all lower streams. Need to wait until all
5962  * timers and work related to these lower streams is quiesced.
5963  *
5964  * Returns 1 if lower streams are quiesced, 0 if we need to wait
5965  * a bit longer.
5966  */
5967 static void
5968 man_cancel_timers(man_adest_t *adp)
5969 {
5970 	man_dest_t	*mdp;
5971 	int		cnt;
5972 	int		i;
5973 
5974 	mdp = adp->a_mdp;
5975 	cnt = adp->a_ndests;
5976 
5977 	MAN_DBG(MAN_SWITCH, ("man_cancel_timers: mdp(0x%p) cnt %d",
5978 	    (void *)mdp, cnt));
5979 
5980 	for (i = 0; i < cnt; i++) {
5981 
5982 		if (mdp[i].md_lc_timer_id != 0) {
5983 			(void) quntimeout(man_ctl_wq, mdp[i].md_lc_timer_id);
5984 			mdp[i].md_lc_timer_id = 0;
5985 		}
5986 
5987 		if (mdp[i].md_bc_id != 0) {
5988 			qunbufcall(man_ctl_wq, mdp[i].md_bc_id);
5989 			mdp[i].md_bc_id = 0;
5990 		}
5991 	}
5992 
5993 	MAN_DBG(MAN_SWITCH, ("man_cancel_timers: returns"));
5994 }
5995 
5996 /*
5997  * A failover is started at start of day, when the driver detects a
5998  * link failure (see man_linkcheck_timer), or when DR detaches
5999  * the IO board containing the current active link between SC and
6000  * domain (see man_dr_detach, man_iwork, and man_do_dr_detach). A
6001  * MAN_WORK_SWITCH work request containing all the lower streams that
6002  * should be switched is posted on the man_bwork_q-> This work request is
6003  * processed here. Once all lower streams have been switched to an
6004  * alternate path, the MAN_WORK_SWITCH work request is passed back to
6005  * man_iwork_q where it is processed within the inner perimeter of the
6006  * STREAMS framework (see man_iswitch).
6007  *
6008  * Note that when the switch fails for whatever reason, we just hand
6009  * back the lower streams untouched and let another failover happen.
6010  * Hopefully we will sooner or later succeed at the failover.
6011  */
6012 static void
6013 man_bswitch(man_adest_t *adp, man_work_t *wp)
6014 {
6015 	man_dest_t	*tdp;
6016 	man_t		*manp;
6017 	int		i;
6018 	int		status = 0;
6019 
6020 	/*
6021 	 * Make a temporary copy of dest array, updating device to the
6022 	 * alternate and try to open all lower streams. bgthread can sleep.
6023 	 */
6024 
6025 	tdp = man_kzalloc(sizeof (man_dest_t) * adp->a_ndests,
6026 	    KM_SLEEP);
6027 	bcopy(adp->a_mdp, tdp, sizeof (man_dest_t) * adp->a_ndests);
6028 
6029 	/*
6030 	 * Before we switch to the new path, lets sync the kstats.
6031 	 */
6032 	mutex_enter(&man_lock);
6033 
6034 	manp = ddi_get_soft_state(man_softstate, adp->a_man_ppa);
6035 	if (manp != NULL) {
6036 		man_update_path_kstats(manp);
6037 	} else
6038 		status = ENODEV;
6039 
6040 	mutex_exit(&man_lock);
6041 
6042 	if (status != 0)
6043 		goto exit;
6044 
6045 	for (i = 0; i < adp->a_ndests; i++) {
6046 
6047 		tdp[i].md_device = adp->a_st_dev;
6048 		tdp[i].md_muxid = -1;
6049 
6050 		if (man_plumb(&tdp[i]))
6051 			break;
6052 	}
6053 
6054 	/*
6055 	 * Didn't plumb everyone, unplumb new lower stuff and return.
6056 	 */
6057 	if (i < adp->a_ndests) {
6058 		int	j;
6059 
6060 		for (j = 0; j <= i; j++)
6061 			man_unplumb(&tdp[j]);
6062 		status = EAGAIN;
6063 		goto exit;
6064 	}
6065 
6066 	if (man_is_on_domain && man_dossc_switch(adp->a_st_dev.mdev_exp_id)) {
6067 		/*
6068 		 * If we cant set new path on the SSC, then fail the
6069 		 * failover.
6070 		 */
6071 		for (i = 0; i < adp->a_ndests; i++)
6072 			man_unplumb(&tdp[i]);
6073 		status = EAGAIN;
6074 		goto exit;
6075 	}
6076 
6077 	man_kfree(adp->a_mdp, sizeof (man_dest_t) * adp->a_ndests);
6078 	adp->a_mdp = tdp;
6079 
6080 exit:
6081 	if (status)
6082 		man_kfree(tdp, sizeof (man_dest_t) * adp->a_ndests);
6083 
6084 
6085 	MAN_DBG(MAN_SWITCH, ("man_bswitch: returns %d", status));
6086 
6087 	/*
6088 	 * Hand processed switch request back to man_iwork for
6089 	 * processing in man_iswitch.
6090 	 */
6091 	wp->mw_status = status;
6092 
6093 	mutex_enter(&man_lock);
6094 	man_work_add(man_iwork_q, wp);
6095 	mutex_exit(&man_lock);
6096 
6097 }
6098 
6099 /*
6100  * man_plumb - Configure a lower stream for this destination.
6101  *
6102  * Locking:
6103  * 	- Called holding no locks.
6104  *
6105  * Returns:
6106  *	- success - 0
6107  *	- failure - error code of failure
6108  */
6109 static int
6110 man_plumb(man_dest_t *mdp)
6111 {
6112 	int		status;
6113 	int		muxid;
6114 	ldi_handle_t	lh;
6115 	ldi_ident_t	li = NULL;
6116 
6117 	MAN_DBG(MAN_SWITCH, ("man_plumb: mdp(0x%p) %s%d exp(%d)",
6118 	    (void *)mdp, ddi_major_to_name(mdp->md_device.mdev_major),
6119 	    mdp->md_device.mdev_ppa, mdp->md_device.mdev_exp_id));
6120 
6121 	/*
6122 	 * Control stream should already be open.
6123 	 */
6124 	if (man_ctl_lh == NULL) {
6125 		status = EAGAIN;
6126 		goto exit;
6127 	}
6128 
6129 	mutex_enter(&man_lock);
6130 	ASSERT(man_ctl_wq != NULL);
6131 	status = ldi_ident_from_stream(man_ctl_wq, &li);
6132 	if (status != 0) {
6133 		cmn_err(CE_WARN,
6134 		    "man_plumb: ident alloc failed, error %d", status);
6135 		goto exit;
6136 	}
6137 	mutex_exit(&man_lock);
6138 
6139 	/*
6140 	 * previously opens were done by a dev_t of makedev(clone_major,
6141 	 * mdev_major) which should always map to /devices/pseudo/clone@0:eri
6142 	 */
6143 	ASSERT(strcmp(ERI_IDNAME,
6144 	    ddi_major_to_name(mdp->md_device.mdev_major)) == 0);
6145 
6146 	status = ldi_open_by_name(ERI_PATH, FREAD | FWRITE | FNOCTTY,
6147 	    kcred, &lh, li);
6148 	if (status) {
6149 		cmn_err(CE_WARN,
6150 		    "man_plumb: eri open failed, error %d", status);
6151 		goto exit;
6152 	}
6153 
6154 	/*
6155 	 * Link netdev under MAN.
6156 	 */
6157 	ASSERT(mdp->md_muxid == -1);
6158 
6159 	status = ldi_ioctl(man_ctl_lh, I_PLINK, (intptr_t)lh,
6160 	    FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &muxid);
6161 	if (status) {
6162 		cmn_err(CE_WARN,
6163 		    "man_plumb: ldi_ioctl(I_PLINK) failed, error %d", status);
6164 		(void) ldi_close(lh, NULL, kcred);
6165 		goto exit;
6166 
6167 	}
6168 	mdp->md_muxid = muxid;
6169 	mdp->md_wq = man_linkrec_find(muxid);
6170 	/*
6171 	 * If we can't find the linkrec then return an
6172 	 * error. It will be automatically unplumbed on failure.
6173 	 */
6174 	if (mdp->md_wq == NULL)
6175 		status = EAGAIN;
6176 
6177 	(void) ldi_close(lh, NULL, kcred);
6178 exit:
6179 	if (li)
6180 		ldi_ident_release(li);
6181 
6182 	MAN_DBG(MAN_SWITCH, ("man_plumb: exit\n"));
6183 
6184 	return (status);
6185 }
6186 
6187 /*
6188  * man_unplumb - tear down the STREAMs framework for the lower multiplexor.
6189  *
6190  *	mdp - destination struct of interest
6191  *
6192  *	returns	- success - 0
6193  *		- failure - return error from ldi_ioctl
6194  */
6195 static void
6196 man_unplumb(man_dest_t *mdp)
6197 {
6198 	int	status, rval;
6199 
6200 	MAN_DBG(MAN_SWITCH, ("man_unplumb: mdp"));
6201 	MAN_DBGCALL(MAN_SWITCH, man_print_mdp(mdp));
6202 
6203 	if (mdp->md_muxid == -1)
6204 		return;
6205 
6206 	ASSERT(man_ctl_lh != NULL);
6207 
6208 	/*
6209 	 * I_PUNLINK causes the multiplexor resources to be freed.
6210 	 */
6211 	status = ldi_ioctl(man_ctl_lh, I_PUNLINK, (intptr_t)mdp->md_muxid,
6212 	    FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &rval);
6213 	if (status) {
6214 		cmn_err(CE_WARN, "man_unplumb: ldi_ioctl(I_PUNLINK) failed"
6215 		    " errno %d\n", status);
6216 	}
6217 	/*
6218 	 * Delete linkrec if it exists.
6219 	 */
6220 	(void) man_linkrec_find(mdp->md_muxid);
6221 	mdp->md_muxid = -1;
6222 
6223 }
6224 
6225 /*
6226  * The routines below deal with paths and pathgroups. These data structures
6227  * are used to track the physical devices connecting the domain and SSC.
6228  * These devices make up the lower streams of the MAN multiplexor. The
6229  * routines all expect the man_lock to be held.
6230  *
6231  * A pathgroup consists of all paths that connect a particular domain and the
6232  * SSC. The concept of a pathgroup id (pg_id) is used to uniquely identify
6233  * a pathgroup.  For Domains, there is just one pathgroup, that connecting
6234  * the domain to the SSC (pg_id == 0). On the SSC, there is one pathgroup per
6235  * domain. The pg_id field corresponds to the domain tags A-R. A pg_id of
6236  * 0 means domain tag A, a pg_id of 1 means domain B, etc.
6237  *
6238  * The path data structure identifies one path between the SSC and a domain.
6239  * It describes the information for the path: the major and minor number of
6240  * the physical device; kstat pointers; and ethernet address of the
6241  * other end of the path.
6242  *
6243  * The pathgroups are anchored at man_pg_head and are protected by the
6244  * by the inner perimeter. The routines are only called by the STREAMs
6245  * portion of the driver.
6246  */
6247 
6248 /*
6249  * Update man instance pathgroup info. Exclusive inner perimeter assures
6250  * this code is single threaded. man_refcnt assures man_t wont detach
6251  * while we are playing with man_pg stuff.
6252  *
6253  * Returns 0 on success, errno on failure.
6254  */
6255 int
6256 man_pg_cmd(mi_path_t *mip, man_work_t *waiter_wp)
6257 {
6258 	int		status = 0;
6259 	man_t		*manp;
6260 
6261 	if (mip->mip_ndevs < 0) {
6262 		status = EINVAL;
6263 		cmn_err(CE_WARN, "man_pg_cmd: EINVAL: mip_ndevs %d",
6264 		    mip->mip_ndevs);
6265 		goto exit;
6266 	}
6267 
6268 	ASSERT(MUTEX_HELD(&man_lock));
6269 	manp = ddi_get_soft_state(man_softstate, mip->mip_man_ppa);
6270 	if (manp == NULL) {
6271 		status = ENODEV;
6272 		goto exit;
6273 	}
6274 
6275 	MAN_DBG(MAN_PATH, ("man_pg_cmd: mip"));
6276 	MAN_DBGCALL(MAN_PATH, man_print_mip(mip));
6277 
6278 	MAN_DBG(MAN_PATH, ("\tman_t"));
6279 	MAN_DBGCALL(MAN_PATH, man_print_man(manp));
6280 
6281 	switch (mip->mip_cmd) {
6282 	case MI_PATH_ASSIGN:
6283 		status = man_pg_assign(&manp->man_pg, mip, FALSE);
6284 		break;
6285 
6286 	case MI_PATH_ADD:
6287 		status = man_pg_assign(&manp->man_pg, mip, TRUE);
6288 		break;
6289 
6290 	case MI_PATH_UNASSIGN:
6291 		status = man_pg_unassign(&manp->man_pg, mip);
6292 		break;
6293 
6294 	case MI_PATH_ACTIVATE:
6295 		status = man_pg_activate(manp, mip, waiter_wp);
6296 		break;
6297 
6298 	case MI_PATH_READ:
6299 		status = man_pg_read(manp->man_pg, mip);
6300 		break;
6301 
6302 	default:
6303 		status = EINVAL;
6304 		cmn_err(CE_NOTE, "man_pg_cmd: invalid command");
6305 		break;
6306 	}
6307 
6308 exit:
6309 	MAN_DBG(MAN_PATH, ("man_pg_cmd: returns %d", status));
6310 
6311 	return (status);
6312 }
6313 
6314 /*
6315  * Assign paths to a pathgroup. If pathgroup doesnt exists, create it.
6316  * If path doesnt exist, create it. If ethernet address of existing
6317  * pathgroup different, change it. If an existing path is not in the new
6318  * list, remove it.  If anything changed, send PATH_UPDATE request to
6319  * man_iwork to update all man_dest_t's.
6320  *
6321  * 	mplpp	- man pathgroup list point to point.
6322  *	mip	- new/updated pathgroup info to assign.
6323  */
6324 static int
6325 man_pg_assign(man_pg_t **mplpp, mi_path_t *mip, int add_only)
6326 {
6327 	man_pg_t	*mpg;
6328 	man_path_t	*mp;
6329 	man_path_t	*add_paths = NULL;
6330 	int		cnt;
6331 	int		i;
6332 	int		first_pass = TRUE;
6333 	int		status = 0;
6334 
6335 	ASSERT(MUTEX_HELD(&man_lock));
6336 
6337 	cnt = mip->mip_ndevs;
6338 	if (cnt == 0) {
6339 		status = EINVAL;
6340 		cmn_err(CE_NOTE, "man_pg_assign: mip_ndevs == 0");
6341 		goto exit;
6342 	}
6343 
6344 	/*
6345 	 * Assure the devices to be assigned are not assigned to some other
6346 	 * pathgroup.
6347 	 */
6348 	for (i = 0; i < cnt; i++) {
6349 		mpg = man_find_path_by_dev(*mplpp, &mip->mip_devs[i], NULL);
6350 
6351 		if (mpg == NULL)
6352 			continue;
6353 
6354 		if ((mpg->mpg_man_ppa != mip->mip_man_ppa) ||
6355 		    (mpg->mpg_pg_id != mip->mip_pg_id)) {
6356 			/*
6357 			 * Already assigned to some other man instance
6358 			 * or pathgroup.
6359 			 */
6360 			status = EEXIST;
6361 			goto exit;
6362 		}
6363 	}
6364 
6365 	/*
6366 	 * Find pathgroup, or allocate new one if it doesnt exist and
6367 	 * add it to list at mplpp. Result is that mpg points to
6368 	 * pathgroup to modify.
6369 	 */
6370 	mpg = man_find_pg_by_id(*mplpp, mip->mip_pg_id);
6371 	if (mpg == NULL) {
6372 
6373 		status = man_pg_create(mplpp, &mpg, mip);
6374 		if (status)
6375 			goto exit;
6376 
6377 	} else if (ether_cmp(&mip->mip_eaddr, &mpg->mpg_dst_eaddr) != 0) {
6378 
6379 		cmn_err(CE_WARN, "man_pg_assign: ethernet address mismatch");
6380 		cmn_err(CE_CONT, "existing %s",
6381 		    ether_sprintf(&mpg->mpg_dst_eaddr));
6382 		cmn_err(CE_CONT, "new %s",
6383 		    ether_sprintf(&mip->mip_eaddr));
6384 
6385 		status = EINVAL;
6386 		goto exit;
6387 	}
6388 
6389 	/*
6390 	 * Create list of new paths to add to pathgroup.
6391 	 */
6392 	for (i = 0; i < cnt; i++) {
6393 
6394 		if (man_find_path_by_dev(*mplpp, &mip->mip_devs[i], NULL))
6395 			continue;	/* Already exists in this pathgroup */
6396 
6397 		mp = man_kzalloc(sizeof (man_path_t), KM_NOSLEEP);
6398 		if (mp == NULL) {
6399 			status = ENOMEM;
6400 			goto exit;
6401 		}
6402 
6403 		mp->mp_device = mip->mip_devs[i];
6404 		mp->mp_device.mdev_state = MDEV_ASSIGNED;
6405 
6406 		MAN_DBG(MAN_PATH, ("man_pg_assign: assigning mdp"));
6407 		MAN_DBGCALL(MAN_PATH, man_print_dev(&mp->mp_device));
6408 
6409 		status = man_path_kstat_init(mp);
6410 		if (status) {
6411 			man_kfree(mp, sizeof (man_path_t));
6412 			goto exit;
6413 		}
6414 
6415 		man_path_insert(&add_paths, mp);
6416 	}
6417 
6418 	/*
6419 	 * man_dr_attach passes only the path which is being DRd in.
6420 	 * So just add the path and don't worry about removing paths.
6421 	 */
6422 	if (add_only == TRUE)
6423 		goto exit;
6424 
6425 
6426 	/*
6427 	 * Check if any paths we want to remove are ACTIVE. If not,
6428 	 * do a second pass and remove them.
6429 	 */
6430 again:
6431 	mp = mpg->mpg_pathp;
6432 	while (mp != NULL) {
6433 		int		in_new_list;
6434 		man_path_t	*rp;
6435 
6436 		rp = NULL;
6437 		in_new_list = FALSE;
6438 
6439 		for (i = 0; i < cnt; i++) {
6440 			if (mp->mp_device.mdev_ppa ==
6441 			    mip->mip_devs[i].mdev_ppa) {
6442 
6443 				in_new_list = TRUE;
6444 				break;
6445 			}
6446 		}
6447 
6448 		if (!in_new_list) {
6449 			if (first_pass) {
6450 				if (mp->mp_device.mdev_state & MDEV_ACTIVE) {
6451 					status = EBUSY;
6452 					goto exit;
6453 				}
6454 			} else {
6455 				rp = mp;
6456 			}
6457 		}
6458 		mp = mp->mp_next;
6459 
6460 		if (rp != NULL)
6461 			man_path_remove(&mpg->mpg_pathp, rp);
6462 	}
6463 
6464 	if (first_pass == TRUE) {
6465 		first_pass = FALSE;
6466 		goto again;
6467 	}
6468 
6469 exit:
6470 	if (status == 0) {
6471 		if (add_paths)
6472 			man_path_merge(&mpg->mpg_pathp, add_paths);
6473 	} else {
6474 		while (add_paths != NULL) {
6475 			mp = add_paths;
6476 			add_paths = mp->mp_next;
6477 			mp->mp_next = NULL;
6478 
6479 			man_path_kstat_uninit(mp);
6480 			man_kfree(mp, sizeof (man_path_t));
6481 		}
6482 	}
6483 
6484 	return (status);
6485 }
6486 
6487 /*
6488  * Remove all paths from a pathgroup (domain shutdown). If there is an
6489  * active path in the group, shut down all destinations referencing it
6490  * first.
6491  */
6492 static int
6493 man_pg_unassign(man_pg_t **plpp, mi_path_t *mip)
6494 {
6495 	man_pg_t	*mpg;
6496 	man_pg_t	*tpg;
6497 	man_pg_t	*tppg;
6498 	man_path_t	*mp = NULL;
6499 	int		status = 0;
6500 
6501 	ASSERT(MUTEX_HELD(&man_lock));
6502 
6503 	/*
6504 	 * Check for existence of pathgroup.
6505 	 */
6506 	if ((mpg = man_find_pg_by_id(*plpp, mip->mip_pg_id)) == NULL)
6507 		goto exit;
6508 
6509 	if (man_find_active_path(mpg->mpg_pathp) != NULL) {
6510 		status = man_remove_dests(mpg);
6511 		if (status)
6512 			goto exit;
6513 	}
6514 
6515 	/*
6516 	 * Free all the paths for this pathgroup.
6517 	 */
6518 	while (mpg->mpg_pathp) {
6519 		mp = mpg->mpg_pathp;
6520 		mpg->mpg_pathp = mp->mp_next;
6521 		mp->mp_next = NULL;
6522 
6523 		man_path_kstat_uninit(mp);
6524 		man_kfree(mp, sizeof (man_path_t));
6525 	}
6526 
6527 	/*
6528 	 * Remove this pathgroup from the list, and free it.
6529 	 */
6530 	tpg = tppg = *plpp;
6531 	if (tpg == mpg) {
6532 		*plpp = tpg->mpg_next;
6533 		goto free_pg;
6534 	}
6535 
6536 	for (tpg = tpg->mpg_next; tpg != NULL; tpg = tpg->mpg_next) {
6537 		if (tpg == mpg)
6538 			break;
6539 		tppg = tpg;
6540 	}
6541 
6542 	ASSERT(tpg != NULL);
6543 
6544 	tppg->mpg_next = tpg->mpg_next;
6545 	tpg->mpg_next = NULL;
6546 
6547 free_pg:
6548 	man_kfree(tpg, sizeof (man_pg_t));
6549 
6550 exit:
6551 	return (status);
6552 
6553 }
6554 
6555 /*
6556  * Set a new active path. This is done via man_ioctl so we are
6557  * exclusive in the inner perimeter.
6558  */
6559 static int
6560 man_pg_activate(man_t *manp, mi_path_t *mip, man_work_t *waiter_wp)
6561 {
6562 	man_pg_t	*mpg1;
6563 	man_pg_t	*mpg2;
6564 	man_pg_t	*plp;
6565 	man_path_t	*mp;
6566 	man_path_t	*ap;
6567 	int		status = 0;
6568 
6569 	ASSERT(MUTEX_HELD(&man_lock));
6570 	MAN_DBG(MAN_PATH, ("man_pg_activate: dev"));
6571 	MAN_DBGCALL(MAN_PATH, man_print_dev(mip->mip_devs));
6572 
6573 	if (mip->mip_ndevs != 1) {
6574 		status = EINVAL;
6575 		goto exit;
6576 	}
6577 
6578 	plp = manp->man_pg;
6579 	mpg1 = man_find_pg_by_id(plp, mip->mip_pg_id);
6580 	if (mpg1 == NULL) {
6581 		status = EINVAL;
6582 		goto exit;
6583 	}
6584 
6585 	mpg2 = man_find_path_by_dev(plp, mip->mip_devs, &mp);
6586 	if (mpg2 == NULL) {
6587 		status = ENODEV;
6588 		goto exit;
6589 	}
6590 
6591 	if (mpg1 != mpg2) {
6592 		status = EINVAL;
6593 		goto exit;
6594 	}
6595 
6596 	ASSERT(mp->mp_device.mdev_ppa == mip->mip_devs->mdev_ppa);
6597 
6598 	if (mpg1->mpg_flags & MAN_PG_SWITCHING) {
6599 		status = EAGAIN;
6600 		goto exit;
6601 	}
6602 
6603 	ap = man_find_active_path(mpg1->mpg_pathp);
6604 	if (ap == NULL) {
6605 		/*
6606 		 * This is the first time a path has been activated for
6607 		 * this pathgroup. Initialize all upper streams dest
6608 		 * structure for this pathgroup so autoswitch will find
6609 		 * them.
6610 		 */
6611 		mp->mp_device.mdev_state |= MDEV_ACTIVE;
6612 		man_add_dests(mpg1);
6613 		goto exit;
6614 	}
6615 
6616 	/*
6617 	 * Path already active, nothing to do.
6618 	 */
6619 	if (ap == mp)
6620 		goto exit;
6621 
6622 	/*
6623 	 * Try to autoswitch to requested device. Set flags and refcnt.
6624 	 * Cleared in man_iswitch when SWITCH completes.
6625 	 */
6626 	manp->man_refcnt++;
6627 	mpg1->mpg_flags |= MAN_PG_SWITCHING;
6628 
6629 	/*
6630 	 * Switch to path specified.
6631 	 */
6632 	status = man_autoswitch(mpg1, mip->mip_devs, waiter_wp);
6633 
6634 	if (status != 0) {
6635 		/*
6636 		 * man_iswitch not going to run, clean up.
6637 		 */
6638 		manp->man_refcnt--;
6639 		mpg1->mpg_flags &= ~MAN_PG_SWITCHING;
6640 
6641 		if (status == ENODEV) {
6642 			/*
6643 			 * Device not plumbed isn't really an error. Change
6644 			 * active device setting here, since man_iswitch isn't
6645 			 * going to be run to do it.
6646 			 */
6647 			status = 0;
6648 			ap->mp_device.mdev_state &= ~MDEV_ACTIVE;
6649 			mp->mp_device.mdev_state |= MDEV_ACTIVE;
6650 		}
6651 	}
6652 
6653 exit:
6654 	MAN_DBG(MAN_PATH, ("man_pg_activate: returns %d", status));
6655 
6656 	return (status);
6657 }
6658 
6659 static int
6660 man_pg_read(man_pg_t *plp, mi_path_t *mip)
6661 {
6662 	man_pg_t	*mpg;
6663 	man_path_t	*mp;
6664 	int		cnt;
6665 	int		status = 0;
6666 
6667 	ASSERT(MUTEX_HELD(&man_lock));
6668 
6669 	if ((mpg = man_find_pg_by_id(plp, mip->mip_pg_id)) == NULL) {
6670 		status = ENODEV;
6671 		goto exit;
6672 	}
6673 
6674 	cnt = 0;
6675 	for (mp = mpg->mpg_pathp; mp != NULL; mp = mp->mp_next) {
6676 		bcopy(&mp->mp_device, &mip->mip_devs[cnt], sizeof (man_dev_t));
6677 		if (cnt == mip->mip_ndevs)
6678 			break;
6679 		cnt++;
6680 	}
6681 
6682 	MAN_DBG(MAN_PATH, ("man_pg_read: pg(0x%p) id(%d) found %d paths",
6683 	    (void *)mpg, mpg->mpg_pg_id, cnt));
6684 
6685 	mip->mip_ndevs = cnt;
6686 
6687 	/*
6688 	 * TBD - What should errno be if user buffer too small ?
6689 	 */
6690 	if (mp != NULL) {
6691 		status = ENOMEM;
6692 	}
6693 
6694 exit:
6695 
6696 	return (status);
6697 }
6698 
6699 /*
6700  * return existing pathgroup, or create it. TBD - Need to update
6701  * all of destinations if we added a pathgroup. Also, need to update
6702  * all of man_strup if we add a path.
6703  *
6704  * 	mplpp	- man pathgroup list point to pointer.
6705  * 	mpgp	- returns newly created man pathgroup.
6706  *	mip	- info to fill in mpgp.
6707  */
6708 static int
6709 man_pg_create(man_pg_t **mplpp, man_pg_t **mpgp, mi_path_t *mip)
6710 {
6711 	man_pg_t	*mpg;
6712 	man_pg_t	*tpg;
6713 	int		status = 0;
6714 
6715 	ASSERT(MUTEX_HELD(&man_lock));
6716 
6717 	if (ether_cmp(&mip->mip_eaddr, &zero_ether_addr) == 0) {
6718 		cmn_err(CE_NOTE, "man_ioctl: man_pg_create: ether"
6719 		    " addresss not set!");
6720 		status = EINVAL;
6721 		goto exit;
6722 	}
6723 
6724 	mpg = man_kzalloc(sizeof (man_pg_t), KM_NOSLEEP);
6725 	if (mpg == NULL) {
6726 		status = ENOMEM;
6727 		goto exit;
6728 	}
6729 
6730 	mpg->mpg_flags = MAN_PG_IDLE;
6731 	mpg->mpg_pg_id = mip->mip_pg_id;
6732 	mpg->mpg_man_ppa = mip->mip_man_ppa;
6733 	ether_copy(&mip->mip_eaddr, &mpg->mpg_dst_eaddr);
6734 
6735 	MAN_DBG(MAN_PATH, ("man_pg_create: new mpg"));
6736 	MAN_DBGCALL(MAN_PATH, man_print_mpg(mpg));
6737 
6738 	tpg = *mplpp;
6739 	if (tpg == NULL) {
6740 		*mplpp = mpg;
6741 	} else {
6742 		while (tpg->mpg_next != NULL)
6743 			tpg = tpg->mpg_next;
6744 		tpg->mpg_next = mpg;
6745 	}
6746 
6747 exit:
6748 	*mpgp = mpg;
6749 
6750 	return (status);
6751 }
6752 
6753 /*
6754  * Return pointer to pathgroup containing mdevp, null otherwise. Also,
6755  * if a path pointer is passed in, set it to matching path in pathgroup.
6756  *
6757  * Called holding man_lock.
6758  */
6759 static man_pg_t *
6760 man_find_path_by_dev(man_pg_t *plp, man_dev_t *mdevp, man_path_t **mpp)
6761 {
6762 	man_pg_t	*mpg;
6763 	man_path_t	*mp;
6764 
6765 	ASSERT(MUTEX_HELD(&man_lock));
6766 	for (mpg = plp; mpg != NULL; mpg = mpg->mpg_next) {
6767 		for (mp  = mpg->mpg_pathp; mp != NULL; mp = mp->mp_next) {
6768 			if (mp->mp_device.mdev_major == mdevp->mdev_major &&
6769 			    mp->mp_device.mdev_ppa == mdevp->mdev_ppa) {
6770 
6771 				if (mpp != NULL)
6772 					*mpp = mp;
6773 				return (mpg);
6774 			}
6775 		}
6776 	}
6777 
6778 	return (NULL);
6779 }
6780 
6781 /*
6782  * Return pointer to pathgroup assigned to destination, null if not found.
6783  *
6784  * Called holding man_lock.
6785  */
6786 static man_pg_t *
6787 man_find_pg_by_id(man_pg_t *mpg, int pg_id)
6788 {
6789 	ASSERT(MUTEX_HELD(&man_lock));
6790 	for (; mpg != NULL; mpg = mpg->mpg_next) {
6791 		if (mpg->mpg_pg_id == pg_id)
6792 			return (mpg);
6793 	}
6794 
6795 	return (NULL);
6796 }
6797 
6798 static man_path_t *
6799 man_find_path_by_ppa(man_path_t *mplist, int ppa)
6800 {
6801 	man_path_t	*mp;
6802 
6803 	ASSERT(MUTEX_HELD(&man_lock));
6804 	for (mp = mplist; mp != NULL; mp = mp->mp_next) {
6805 		if (mp->mp_device.mdev_ppa == ppa)
6806 			return (mp);
6807 	}
6808 
6809 	return (NULL);
6810 }
6811 
6812 static man_path_t *
6813 man_find_active_path(man_path_t *mplist)
6814 {
6815 	man_path_t	*mp;
6816 
6817 	ASSERT(MUTEX_HELD(&man_lock));
6818 	for (mp = mplist; mp != NULL; mp = mp->mp_next)
6819 		if (mp->mp_device.mdev_state & MDEV_ACTIVE)
6820 			return (mp);
6821 
6822 	return (NULL);
6823 }
6824 
6825 /*
6826  * Try and find an alternate path.
6827  */
6828 static man_path_t *
6829 man_find_alternate_path(man_path_t *mlp)
6830 {
6831 	man_path_t	*ap;		/* Active path */
6832 	man_path_t	*np;		/* New alternate path */
6833 	man_path_t	*fp = NULL;	/* LRU failed path */
6834 
6835 	ASSERT(MUTEX_HELD(&man_lock));
6836 	ap = man_find_active_path(mlp);
6837 
6838 	/*
6839 	 * Find a non-failed path, or the lru failed path and switch to it.
6840 	 */
6841 	for (np = mlp; np != NULL; np = np->mp_next) {
6842 		if (np == ap)
6843 			continue;
6844 
6845 		if (np->mp_device.mdev_state == MDEV_ASSIGNED)
6846 			goto exit;
6847 
6848 		if (np->mp_device.mdev_state & MDEV_FAILED) {
6849 			if (fp == NULL)
6850 				fp = np;
6851 			else
6852 				if (fp->mp_lru > np->mp_lru)
6853 						fp = np;
6854 		}
6855 	}
6856 
6857 	/*
6858 	 * Nowhere to switch to.
6859 	 */
6860 	if (np == NULL && (np =  fp) == NULL)
6861 		goto exit;
6862 
6863 exit:
6864 	return (np);
6865 }
6866 
6867 /*
6868  * Assumes caller has verified existence.
6869  */
6870 static void
6871 man_path_remove(man_path_t **lpp, man_path_t *mp)
6872 {
6873 	man_path_t	*tp;
6874 	man_path_t	*tpp;
6875 
6876 	ASSERT(MUTEX_HELD(&man_lock));
6877 	MAN_DBG(MAN_PATH, ("man_path_remove: removing path"));
6878 	MAN_DBGCALL(MAN_PATH, man_print_path(mp));
6879 
6880 	tp = tpp = *lpp;
6881 	if (tp == mp) {
6882 		*lpp = tp->mp_next;
6883 		goto exit;
6884 	}
6885 
6886 	for (tp = tp->mp_next; tp != NULL; tp = tp->mp_next) {
6887 		if (tp == mp)
6888 			break;
6889 		tpp = tp;
6890 	}
6891 
6892 	ASSERT(tp != NULL);
6893 
6894 	tpp->mp_next = tp->mp_next;
6895 	tp->mp_next = NULL;
6896 
6897 exit:
6898 	man_path_kstat_uninit(tp);
6899 	man_kfree(tp, sizeof (man_path_t));
6900 
6901 }
6902 
6903 /*
6904  * Insert path into list, ascending order by ppa.
6905  */
6906 static void
6907 man_path_insert(man_path_t **lpp, man_path_t *mp)
6908 {
6909 	man_path_t	*tp;
6910 	man_path_t	*tpp;
6911 
6912 	ASSERT(MUTEX_HELD(&man_lock));
6913 	if (*lpp == NULL) {
6914 		*lpp = mp;
6915 		return;
6916 	}
6917 
6918 	tp = tpp = *lpp;
6919 	if (tp->mp_device.mdev_ppa > mp->mp_device.mdev_ppa) {
6920 		mp->mp_next = tp;
6921 		*lpp = mp;
6922 		return;
6923 	}
6924 
6925 	for (tp = tp->mp_next; tp != NULL; tp =  tp->mp_next) {
6926 		if (tp->mp_device.mdev_ppa > mp->mp_device.mdev_ppa)
6927 			break;
6928 		tpp = tp;
6929 	}
6930 
6931 	if (tp == NULL) {
6932 		tpp->mp_next = mp;
6933 	} else {
6934 		tpp->mp_next = mp;
6935 		mp->mp_next = tp;
6936 	}
6937 }
6938 
6939 /*
6940  * Merge npp into lpp, ascending order by ppa. Assumes no
6941  * duplicates in either list.
6942  */
6943 static void
6944 man_path_merge(man_path_t **lpp, man_path_t *np)
6945 {
6946 	man_path_t	*tmp;
6947 
6948 	ASSERT(MUTEX_HELD(&man_lock));
6949 	while (np != NULL) {
6950 		tmp = np;
6951 		np = np->mp_next;
6952 		tmp->mp_next = NULL;
6953 
6954 		man_path_insert(lpp, tmp);
6955 	}
6956 
6957 }
6958 
6959 static int
6960 man_path_kstat_init(man_path_t *mpp)
6961 {
6962 
6963 	kstat_named_t	*dev_knp;
6964 	int		status = 0;
6965 
6966 	ASSERT(MUTEX_HELD(&man_lock));
6967 	MAN_DBG(MAN_PATH, ("man_path_kstat_init: mpp(0x%p)\n", (void *)mpp));
6968 
6969 	/*
6970 	 * Create named kstats for accounting purposes.
6971 	 */
6972 	dev_knp = man_kzalloc(MAN_NUMSTATS * sizeof (kstat_named_t),
6973 	    KM_NOSLEEP);
6974 	if (dev_knp == NULL) {
6975 		status = ENOMEM;
6976 		goto exit;
6977 	}
6978 	man_kstat_named_init(dev_knp, MAN_NUMSTATS);
6979 	mpp->mp_last_knp = dev_knp;
6980 
6981 exit:
6982 
6983 	MAN_DBG(MAN_PATH, ("man_path_kstat_init: returns %d\n", status));
6984 
6985 	return (status);
6986 }
6987 
6988 static void
6989 man_path_kstat_uninit(man_path_t *mp)
6990 {
6991 	ASSERT(MUTEX_HELD(&man_lock));
6992 	man_kfree(mp->mp_last_knp, MAN_NUMSTATS * sizeof (kstat_named_t));
6993 }
6994 
6995 /*
6996  * man_work_alloc - allocate and initiate a work request structure
6997  *
6998  *	type - type of request to allocate
6999  *	returns	- success - ptr to an initialized work structure
7000  *		- failure - NULL
7001  */
7002 man_work_t *
7003 man_work_alloc(int type, int kmflag)
7004 {
7005 	man_work_t	*wp;
7006 
7007 	wp = man_kzalloc(sizeof (man_work_t), kmflag);
7008 	if (wp == NULL)
7009 		goto exit;
7010 
7011 	cv_init(&wp->mw_cv, NULL, CV_DRIVER, NULL); \
7012 	wp->mw_type = type;
7013 
7014 exit:
7015 	return (wp);
7016 }
7017 
7018 /*
7019  * man_work_free - deallocate a work request structure
7020  *
7021  *	wp - ptr to work structure to be freed
7022  */
7023 void
7024 man_work_free(man_work_t *wp)
7025 {
7026 	cv_destroy(&wp->mw_cv);
7027 	man_kfree((void *)wp, sizeof (man_work_t));
7028 }
7029 
7030 /*
7031  * Post work to a work queue.  The man_bwork sleeps on
7032  * man_bwork_q->q_cv, and work requesters may sleep on mw_cv.
7033  * The man_lock is used to protect both cv's.
7034  */
7035 void
7036 man_work_add(man_workq_t *q, man_work_t *wp)
7037 {
7038 	man_work_t	*lp = q->q_work;
7039 
7040 	if (lp) {
7041 		while (lp->mw_next != NULL)
7042 			lp = lp->mw_next;
7043 
7044 		lp->mw_next = wp;
7045 
7046 	} else {
7047 		q->q_work = wp;
7048 	}
7049 
7050 	/*
7051 	 * cv_signal for man_bwork_q, qenable for man_iwork_q
7052 	 */
7053 	if (q == man_bwork_q) {
7054 		cv_signal(&q->q_cv);
7055 
7056 	} else {	/* q == man_iwork_q */
7057 
7058 		if (man_ctl_wq != NULL)
7059 			qenable(man_ctl_wq);
7060 	}
7061 
7062 }
7063 
7064 /* <<<<<<<<<<<<<<<<<<<<<<< NDD SUPPORT FUNCTIONS	>>>>>>>>>>>>>>>>>>> */
7065 /*
7066  * ndd support functions to get/set parameters
7067  */
7068 
7069 /*
7070  * Register each element of the parameter array with the
7071  * named dispatch handler. Each element is loaded using
7072  * nd_load()
7073  *
7074  * 	cnt	- the number of elements present in the parameter array
7075  */
7076 static int
7077 man_param_register(param_t *manpa, int cnt)
7078 {
7079 	int	i;
7080 	ndgetf_t getp;
7081 	ndsetf_t setp;
7082 	int	status = B_TRUE;
7083 
7084 	MAN_DBG(MAN_CONFIG, ("man_param_register: manpa(0x%p) cnt %d\n",
7085 	    (void *)manpa, cnt));
7086 
7087 	getp = man_param_get;
7088 
7089 	for (i = 0; i < cnt; i++, manpa++) {
7090 		switch (man_param_display[i]) {
7091 		case MAN_NDD_GETABLE:
7092 			setp = NULL;
7093 			break;
7094 
7095 		case MAN_NDD_SETABLE:
7096 			setp = man_param_set;
7097 			break;
7098 
7099 		default:
7100 			continue;
7101 		}
7102 
7103 		if (!nd_load(&man_ndlist, manpa->param_name, getp,
7104 		    setp, (caddr_t)manpa)) {
7105 
7106 			(void) man_nd_free(&man_ndlist);
7107 			status = B_FALSE;
7108 			goto exit;
7109 		}
7110 	}
7111 
7112 	if (!nd_load(&man_ndlist, "man_pathgroups_report",
7113 	    man_pathgroups_report, NULL, NULL)) {
7114 
7115 		(void) man_nd_free(&man_ndlist);
7116 		status = B_FALSE;
7117 		goto exit;
7118 	}
7119 
7120 	if (!nd_load(&man_ndlist, "man_set_active_path",
7121 	    NULL, man_set_active_path, NULL)) {
7122 
7123 		(void) man_nd_free(&man_ndlist);
7124 		status = B_FALSE;
7125 		goto exit;
7126 	}
7127 
7128 	if (!nd_load(&man_ndlist, "man_get_hostinfo",
7129 	    man_get_hostinfo, NULL, NULL)) {
7130 
7131 		(void) man_nd_free(&man_ndlist);
7132 		status = B_FALSE;
7133 		goto exit;
7134 	}
7135 
7136 exit:
7137 
7138 	MAN_DBG(MAN_CONFIG, ("man_param_register: returns %d\n", status));
7139 
7140 	return (status);
7141 }
7142 
7143 static void
7144 man_nd_getset(queue_t *wq, mblk_t *mp)
7145 {
7146 
7147 	if (!nd_getset(wq, man_ndlist, mp))
7148 		miocnak(wq, mp, 0, ENOENT);
7149 	else
7150 		qreply(wq, mp);
7151 }
7152 
7153 /*ARGSUSED*/
7154 static int
7155 man_pathgroups_report(queue_t *wq, mblk_t *mp, caddr_t cp, cred_t *cr)
7156 {
7157 
7158 	man_t		*manp;
7159 	man_pg_t	*mpg;
7160 	int		i;
7161 	char		pad[] = "                 "; /* 17 spaces */
7162 	int		pad_end;
7163 
7164 
7165 	MAN_DBG(MAN_PATH, ("man_pathgroups_report: wq(0x%p) mp(0x%p)"
7166 	    " caddr 0x%p", (void *)wq, (void *)mp, (void *)cp));
7167 
7168 	(void) mi_mpprintf(mp, "MAN Pathgroup report: (* == failed)");
7169 	(void) mi_mpprintf(mp, "====================================="
7170 	    "==========================================");
7171 
7172 	mutex_enter(&man_lock);
7173 
7174 	for (i = 0; i < 2; i++) {
7175 		manp = ddi_get_soft_state(man_softstate, i);
7176 		if (manp == NULL)
7177 			continue;
7178 
7179 	(void) mi_mpprintf(mp,
7180 	    "Interface\tDestination\t\tActive Path\tAlternate Paths");
7181 	(void) mi_mpprintf(mp, "---------------------------------------"
7182 	    "----------------------------------------");
7183 
7184 		for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7185 
7186 			(void) mi_mpprintf(mp, "%s%d\t\t",
7187 			    ddi_major_to_name(manp->man_meta_major),
7188 			    manp->man_meta_ppa);
7189 
7190 			if (man_is_on_domain) {
7191 				(void) mi_mpprintf_nr(mp, "Master SSC\t");
7192 				man_preport(mpg->mpg_pathp, mp);
7193 			} else {
7194 				if (i == 0) {
7195 					pad_end = 17 - strlen(ether_sprintf(
7196 					    &mpg->mpg_dst_eaddr));
7197 					if (pad_end < 0 || pad_end > 16)
7198 					pad_end = 0;
7199 					pad[pad_end] = '\0';
7200 
7201 					(void) mi_mpprintf_nr(mp, "%c %s%s",
7202 					    mpg->mpg_pg_id + 'A',
7203 					    ether_sprintf(&mpg->mpg_dst_eaddr),
7204 					    pad);
7205 
7206 					pad[pad_end] = ' ';
7207 				} else {
7208 					(void) mi_mpprintf_nr(mp,
7209 					    "Other SSC\t");
7210 				}
7211 				man_preport(mpg->mpg_pathp, mp);
7212 			}
7213 			(void) mi_mpprintf_nr(mp, "\n");
7214 		}
7215 	}
7216 
7217 	mutex_exit(&man_lock);
7218 	MAN_DBG(MAN_PATH, ("man_pathgroups_report: returns"));
7219 
7220 	return (0);
7221 }
7222 
7223 static void
7224 man_preport(man_path_t *plist, mblk_t *mp)
7225 {
7226 	man_path_t	*ap;
7227 
7228 	ap = man_find_active_path(plist);
7229 	/*
7230 	 * Active path
7231 	 */
7232 	if (ap != NULL) {
7233 		(void) mi_mpprintf_nr(mp, "\t%s%d\t\t",
7234 		    ddi_major_to_name(ap->mp_device.mdev_major),
7235 		    ap->mp_device.mdev_ppa);
7236 	} else {
7237 		(void) mi_mpprintf_nr(mp, "None \t");
7238 	}
7239 
7240 	/*
7241 	 * Alternate Paths.
7242 	 */
7243 	while (plist != NULL) {
7244 		(void) mi_mpprintf_nr(mp, "%s%d exp %d",
7245 		    ddi_major_to_name(plist->mp_device.mdev_major),
7246 		    plist->mp_device.mdev_ppa,
7247 		    plist->mp_device.mdev_exp_id);
7248 		if (plist->mp_device.mdev_state & MDEV_FAILED)
7249 			(void) mi_mpprintf_nr(mp, "*");
7250 		plist = plist->mp_next;
7251 		if (plist)
7252 			(void) mi_mpprintf_nr(mp, ", ");
7253 	}
7254 }
7255 
7256 /*
7257  * NDD request to set active path. Calling context is man_ioctl, so we are
7258  * exclusive in the inner perimeter.
7259  *
7260  *	Syntax is "ndd -set /dev/dman <man ppa> <pg_id> <phys ppa>"
7261  */
7262 /* ARGSUSED3 */
7263 static int
7264 man_set_active_path(queue_t *wq, mblk_t *mp, char *value, caddr_t cp,
7265     cred_t *cr)
7266 {
7267 	char		*end, *meta_ppap, *phys_ppap, *pg_idp;
7268 	int		meta_ppa;
7269 	int		phys_ppa;
7270 	int		pg_id;
7271 	man_t		*manp;
7272 	man_pg_t	*mpg;
7273 	man_path_t	*np;
7274 	mi_path_t	mpath;
7275 	int		status = 0;
7276 
7277 	MAN_DBG(MAN_PATH, ("man_set_active_path: wq(0x%p) mp(0x%p)"
7278 	    " args %s", (void *)wq, (void *)mp, value));
7279 
7280 	meta_ppap = value;
7281 
7282 	if ((pg_idp = strchr(value, ' ')) == NULL) {
7283 		status = EINVAL;
7284 		goto exit;
7285 	}
7286 
7287 	*pg_idp++ = '\0';
7288 
7289 	if ((phys_ppap = strchr(pg_idp, ' ')) == NULL) {
7290 		status = EINVAL;
7291 		goto exit;
7292 	}
7293 
7294 	*phys_ppap++ = '\0';
7295 
7296 	meta_ppa = (int)mi_strtol(meta_ppap, &end, 10);
7297 	pg_id = (int)mi_strtol(pg_idp, &end, 10);
7298 	phys_ppa = (int)mi_strtol(phys_ppap, &end, 10);
7299 
7300 	mutex_enter(&man_lock);
7301 	manp = ddi_get_soft_state(man_softstate, meta_ppa);
7302 	if (manp == NULL || manp->man_pg == NULL) {
7303 		status = EINVAL;
7304 		mutex_exit(&man_lock);
7305 		goto exit;
7306 	}
7307 
7308 	mpg = man_find_pg_by_id(manp->man_pg, pg_id);
7309 	if (mpg == NULL) {
7310 		status = EINVAL;
7311 		mutex_exit(&man_lock);
7312 		goto exit;
7313 	}
7314 
7315 	np = man_find_path_by_ppa(mpg->mpg_pathp, phys_ppa);
7316 
7317 	if (np == NULL) {
7318 		status = EINVAL;
7319 		mutex_exit(&man_lock);
7320 		goto exit;
7321 	}
7322 
7323 	mpath.mip_cmd = MI_PATH_ACTIVATE;
7324 	mpath.mip_pg_id = pg_id;
7325 	mpath.mip_man_ppa = meta_ppa;
7326 	mpath.mip_devs[0] = np->mp_device;
7327 	mpath.mip_ndevs = 1;
7328 
7329 	status = man_pg_cmd(&mpath, NULL);
7330 	mutex_exit(&man_lock);
7331 
7332 exit:
7333 
7334 	MAN_DBG(MAN_PATH, ("man_set_active_path: returns %d", status));
7335 
7336 	return (status);
7337 }
7338 
7339 /*
7340  * Dump out the contents of the IOSRAM handoff structure. Note that if
7341  * anything changes here, you must make sure that the sysinit script
7342  * stays in sync with this output.
7343  */
7344 /* ARGSUSED */
7345 static int
7346 man_get_hostinfo(queue_t *wq, mblk_t *mp, caddr_t cp, cred_t *cr)
7347 {
7348 	manc_t	manc;
7349 	char	*ipaddr;
7350 	char	ipv6addr[INET6_ADDRSTRLEN];
7351 	int	i;
7352 	int	status;
7353 
7354 	if (!man_is_on_domain)
7355 		return (0);
7356 
7357 	if (status = man_get_iosram(&manc)) {
7358 		return (status);
7359 	}
7360 
7361 	mi_mpprintf(mp, "manc_magic = 0x%x", manc.manc_magic);
7362 	mi_mpprintf(mp, "manc_version = 0%d", manc.manc_version);
7363 	mi_mpprintf(mp, "manc_csum = 0x%x", manc.manc_csum);
7364 
7365 	if (manc.manc_ip_type == AF_INET) {
7366 		in_addr_t	netnum;
7367 
7368 		mi_mpprintf(mp, "manc_ip_type = AF_INET");
7369 
7370 		ipaddr = man_inet_ntoa(manc.manc_dom_ipaddr);
7371 		mi_mpprintf(mp, "manc_dom_ipaddr = %s", ipaddr);
7372 
7373 		ipaddr = man_inet_ntoa(manc.manc_dom_ip_netmask);
7374 		mi_mpprintf(mp, "manc_dom_ip_netmask = %s", ipaddr);
7375 
7376 		netnum = manc.manc_dom_ipaddr & manc.manc_dom_ip_netmask;
7377 		ipaddr = man_inet_ntoa(netnum);
7378 		mi_mpprintf(mp, "manc_dom_ip_netnum = %s", ipaddr);
7379 
7380 		ipaddr = man_inet_ntoa(manc.manc_sc_ipaddr);
7381 		mi_mpprintf(mp, "manc_sc_ipaddr = %s", ipaddr);
7382 
7383 	} else if (manc.manc_ip_type == AF_INET6) {
7384 
7385 		mi_mpprintf(mp, "manc_ip_type = AF_INET6");
7386 
7387 		(void) inet_ntop(AF_INET6, (void *)&manc.manc_dom_ipv6addr,
7388 		    ipv6addr, INET6_ADDRSTRLEN);
7389 		mi_mpprintf(mp, "manc_dom_ipv6addr = %s", ipv6addr);
7390 
7391 		mi_mpprintf(mp, "manc_dom_ipv6_netmask = %d",
7392 		    manc.manc_dom_ipv6_netmask.s6_addr[0]);
7393 
7394 		(void) inet_ntop(AF_INET6, (void *)&manc.manc_sc_ipv6addr,
7395 		    ipv6addr, INET6_ADDRSTRLEN);
7396 		mi_mpprintf(mp, "manc_sc_ipv6addr = %s", ipv6addr);
7397 
7398 	} else {
7399 
7400 		mi_mpprintf(mp, "manc_ip_type = NONE");
7401 	}
7402 
7403 	mi_mpprintf(mp, "manc_dom_eaddr = %s",
7404 	    ether_sprintf(&manc.manc_dom_eaddr));
7405 	mi_mpprintf(mp, "manc_sc_eaddr = %s",
7406 	    ether_sprintf(&manc.manc_sc_eaddr));
7407 
7408 	mi_mpprintf(mp, "manc_iob_bitmap = 0x%x\tio boards = ",
7409 	    manc.manc_iob_bitmap);
7410 	for (i = 0; i < MAN_MAX_EXPANDERS; i++) {
7411 		if ((manc.manc_iob_bitmap >> i) & 0x1) {
7412 			mi_mpprintf_nr(mp, "%d.1, ", i);
7413 		}
7414 	}
7415 	mi_mpprintf(mp, "manc_golden_iob = %d", manc.manc_golden_iob);
7416 
7417 	return (0);
7418 }
7419 
7420 static char *
7421 man_inet_ntoa(in_addr_t in)
7422 {
7423 	static char b[18];
7424 	unsigned char *p;
7425 
7426 	p = (unsigned char *)&in;
7427 	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
7428 	return (b);
7429 }
7430 
7431 /*
7432  * parameter value. cp points to the required parameter.
7433  */
7434 /* ARGSUSED */
7435 static int
7436 man_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
7437 {
7438 	param_t	*manpa = (param_t *)cp;
7439 
7440 	(void) mi_mpprintf(mp, "%u", manpa->param_val);
7441 	return (0);
7442 }
7443 
7444 /*
7445  * Sets the man parameter to the value in the param_register using
7446  * nd_load().
7447  */
7448 /* ARGSUSED */
7449 static int
7450 man_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
7451 {
7452 	char *end;
7453 	size_t new_value;
7454 	param_t	*manpa = (param_t *)cp;
7455 
7456 	new_value = mi_strtol(value, &end, 10);
7457 
7458 	if (end == value || new_value < manpa->param_min ||
7459 	    new_value > manpa->param_max) {
7460 			return (EINVAL);
7461 	}
7462 
7463 	manpa->param_val = new_value;
7464 
7465 	return (0);
7466 
7467 }
7468 
7469 /*
7470  * Free the Named Dispatch Table by calling man_nd_free
7471  */
7472 static void
7473 man_param_cleanup()
7474 {
7475 	if (man_ndlist != NULL)
7476 		nd_free(&man_ndlist);
7477 }
7478 
7479 /*
7480  * Free the table pointed to by 'ndp'
7481  */
7482 static void
7483 man_nd_free(caddr_t *nd_pparam)
7484 {
7485 	ND	*nd;
7486 
7487 	if ((nd = (ND *)(*nd_pparam)) != NULL) {
7488 		if (nd->nd_tbl)
7489 			mi_free((char *)nd->nd_tbl);
7490 		mi_free((char *)nd);
7491 		*nd_pparam = NULL;
7492 	}
7493 }
7494 
7495 
7496 /*
7497  * man_kstat_update - update the statistics for a meta-interface.
7498  *
7499  *	ksp - kstats struct
7500  *	rw - flag indicating whether stats are to be read or written.
7501  *
7502  *	returns	0
7503  *
7504  * The destination specific kstat information is protected by the
7505  * perimeter lock, so we submit a work request to get the stats
7506  * updated (see man_do_kstats()), and then collect the results
7507  * when cv_signal'd. Note that we are doing cv_timedwait_sig()
7508  * as a precautionary measure only.
7509  */
7510 static int
7511 man_kstat_update(kstat_t *ksp, int rw)
7512 {
7513 	man_t			*manp;		/* per instance data */
7514 	man_work_t		*wp;
7515 	int			status = 0;
7516 	kstat_named_t		*knp;
7517 	kstat_named_t		*man_knp;
7518 	int			i;
7519 
7520 	MAN_DBG(MAN_KSTAT, ("man_kstat_update: %s\n", rw ? "KSTAT_WRITE" :
7521 	    "KSTAT_READ"));
7522 
7523 	mutex_enter(&man_lock);
7524 	manp = (man_t *)ksp->ks_private;
7525 	manp->man_refcnt++;
7526 
7527 	/*
7528 	 * If the driver has been configured, get kstats updated by inner
7529 	 * perimeter prior to retrieving.
7530 	 */
7531 	if (man_config_state == MAN_CONFIGURED) {
7532 		clock_t wait_status;
7533 
7534 		man_update_path_kstats(manp);
7535 		wp = man_work_alloc(MAN_WORK_KSTAT_UPDATE, KM_SLEEP);
7536 		wp->mw_arg.a_man_ppa = manp->man_meta_ppa;
7537 		wp->mw_flags = MAN_WFLAGS_CVWAITER;
7538 		man_work_add(man_iwork_q, wp);
7539 
7540 		wait_status = cv_timedwait_sig(&wp->mw_cv, &man_lock,
7541 		    ddi_get_lbolt() + drv_usectohz(manp->man_kstat_waittime));
7542 
7543 		if (wp->mw_flags & MAN_WFLAGS_DONE) {
7544 			status = wp->mw_status;
7545 			man_work_free(wp);
7546 		} else {
7547 			ASSERT(wait_status <= 0);
7548 			wp->mw_flags &= ~MAN_WFLAGS_CVWAITER;
7549 			if (wait_status == 0)
7550 				status = EINTR;
7551 			else {
7552 				MAN_DBG(MAN_KSTAT, ("man_kstat_update: "
7553 				    "timedout, returning stale stats."));
7554 				status = 0;
7555 			}
7556 		}
7557 		if (status)
7558 			goto exit;
7559 	}
7560 
7561 	knp = (kstat_named_t *)ksp->ks_data;
7562 	man_knp = (kstat_named_t *)manp->man_ksp->ks_data;
7563 
7564 	if (rw == KSTAT_READ) {
7565 		for (i = 0; i < MAN_NUMSTATS; i++) {
7566 			knp[i].value.ui64 = man_knp[i].value.ui64;
7567 		}
7568 	} else {
7569 		for (i = 0; i < MAN_NUMSTATS; i++) {
7570 			man_knp[i].value.ui64 = knp[i].value.ui64;
7571 		}
7572 	}
7573 
7574 exit:
7575 	manp->man_refcnt--;
7576 	mutex_exit(&man_lock);
7577 
7578 	MAN_DBG(MAN_KSTAT, ("man_kstat_update: returns %d", status));
7579 
7580 	return (status);
7581 }
7582 
7583 /*
7584  * Sum destination kstats for all active paths for a given instance of the
7585  * MAN driver. Called with perimeter lock.
7586  */
7587 static void
7588 man_do_kstats(man_work_t *wp)
7589 {
7590 	man_t		*manp;
7591 	man_pg_t	*mpg;
7592 	man_path_t	*mp;
7593 
7594 	MAN_DBG(MAN_KSTAT, ("man_do_kstats:"));
7595 
7596 	mutex_enter(&man_lock);
7597 	/*
7598 	 * Sync mp_last_knp for each path associated with the MAN instance.
7599 	 */
7600 	manp = (man_t *)ddi_get_soft_state(man_softstate,
7601 	    wp->mw_arg.a_man_ppa);
7602 	for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7603 
7604 		ASSERT(mpg->mpg_man_ppa == manp->man_meta_ppa);
7605 
7606 		if ((mp = man_find_active_path(mpg->mpg_pathp)) != NULL) {
7607 
7608 			MAN_DBG(MAN_KSTAT, ("\tkstat: path"));
7609 			MAN_DBGCALL(MAN_KSTAT, man_print_path(mp));
7610 
7611 			/*
7612 			 * We just to update the destination statistics here.
7613 			 */
7614 			man_sum_dests_kstats(mp->mp_last_knp, mpg);
7615 		}
7616 	}
7617 	mutex_exit(&man_lock);
7618 	MAN_DBG(MAN_KSTAT, ("man_do_kstats: returns"));
7619 }
7620 
7621 /*
7622  * Sum device kstats for all active paths for a given instance of the
7623  * MAN driver. Called with man_lock.
7624  */
7625 static void
7626 man_update_path_kstats(man_t *manp)
7627 {
7628 	kstat_named_t	*man_knp;
7629 	man_pg_t	*mpg;
7630 	man_path_t	*mp;
7631 
7632 	ASSERT(MUTEX_HELD(&man_lock));
7633 	MAN_DBG(MAN_KSTAT, ("man_update_path_kstats:"));
7634 
7635 	man_knp = (kstat_named_t *)manp->man_ksp->ks_data;
7636 
7637 	for (mpg = manp->man_pg; mpg != NULL; mpg = mpg->mpg_next) {
7638 
7639 		ASSERT(mpg->mpg_man_ppa == manp->man_meta_ppa);
7640 
7641 		if ((mp = man_find_active_path(mpg->mpg_pathp)) != NULL) {
7642 
7643 			man_update_dev_kstats(man_knp, mp);
7644 
7645 		}
7646 	}
7647 	MAN_DBG(MAN_KSTAT, ("man_update_path_kstats: returns"));
7648 }
7649 
7650 /*
7651  * Update the device kstats.
7652  * As man_kstat_update() is called with kstat_chain_lock held,
7653  * we can safely update the statistics from the underlying driver here.
7654  */
7655 static void
7656 man_update_dev_kstats(kstat_named_t *man_knp, man_path_t *mp)
7657 {
7658 	kstat_t		*dev_ksp;
7659 	major_t		major;
7660 	int		instance;
7661 	char		buf[KSTAT_STRLEN];
7662 
7663 
7664 	major = mp->mp_device.mdev_major;
7665 	instance = mp->mp_device.mdev_ppa;
7666 	(void) sprintf(buf, "%s%d", ddi_major_to_name(major), instance);
7667 
7668 	dev_ksp = kstat_hold_byname(ddi_major_to_name(major), instance, buf,
7669 	    ALL_ZONES);
7670 	if (dev_ksp != NULL) {
7671 
7672 		KSTAT_ENTER(dev_ksp);
7673 		KSTAT_UPDATE(dev_ksp, KSTAT_READ);
7674 		man_sum_kstats(man_knp, dev_ksp, mp->mp_last_knp);
7675 		KSTAT_EXIT(dev_ksp);
7676 		kstat_rele(dev_ksp);
7677 
7678 	} else {
7679 		MAN_DBG(MAN_KSTAT,
7680 		    ("man_update_dev_kstats: no kstat data found for %s(%d,%d)",
7681 		    buf, major, instance));
7682 	}
7683 }
7684 
7685 static void
7686 man_sum_dests_kstats(kstat_named_t *knp, man_pg_t *mpg)
7687 {
7688 	int		i;
7689 	int		flags;
7690 	char		*statname;
7691 	manstr_t	*msp;
7692 	man_dest_t	*mdp;
7693 	uint64_t	switches = 0;
7694 	uint64_t	linkfails = 0;
7695 	uint64_t	linkstales = 0;
7696 	uint64_t	icmpv4probes = 0;
7697 	uint64_t	icmpv6probes = 0;
7698 
7699 	MAN_DBG(MAN_KSTAT, ("man_sum_dests_kstats: mpg 0x%p", (void *)mpg));
7700 
7701 	for (msp = man_strup; msp != NULL; msp = msp->ms_next) {
7702 
7703 		if (!man_str_uses_pg(msp, mpg))
7704 			continue;
7705 
7706 		mdp = &msp->ms_dests[mpg->mpg_pg_id];
7707 
7708 		switches += mdp->md_switches;
7709 		linkfails += mdp->md_linkfails;
7710 		linkstales += mdp->md_linkstales;
7711 		icmpv4probes += mdp->md_icmpv4probes;
7712 		icmpv6probes += mdp->md_icmpv6probes;
7713 	}
7714 
7715 	for (i = 0; i < MAN_NUMSTATS; i++) {
7716 
7717 		statname = man_kstat_info[i].mk_name;
7718 		flags = man_kstat_info[i].mk_flags;
7719 
7720 		if (!(flags & MK_NOT_PHYSICAL))
7721 			continue;
7722 
7723 		if (strcmp(statname, "man_switches") == 0) {
7724 			knp[i].value.ui64 = switches;
7725 		} else if (strcmp(statname, "man_link_fails") == 0) {
7726 			knp[i].value.ui64 = linkfails;
7727 		} else if (strcmp(statname, "man_link_stales") == 0) {
7728 			knp[i].value.ui64 = linkstales;
7729 		} else if (strcmp(statname, "man_icmpv4_probes") == 0) {
7730 			knp[i].value.ui64 = icmpv4probes;
7731 		} else if (strcmp(statname, "man_icmpv6_probes") == 0) {
7732 			knp[i].value.ui64 = icmpv6probes;
7733 		}
7734 	}
7735 
7736 	MAN_DBG(MAN_KSTAT, ("man_sum_dests_kstats: returns"));
7737 }
7738 
7739 /*
7740  * Initialize MAN named kstats in the space provided.
7741  */
7742 static void
7743 man_kstat_named_init(kstat_named_t *knp, int num_stats)
7744 {
7745 	int	i;
7746 
7747 	MAN_DBG(MAN_KSTAT, ("man_kstat_named_init: knp(0x%p) num_stats = %d",
7748 	    (void *)knp, num_stats));
7749 
7750 	for (i = 0; i < num_stats; i++) {
7751 		kstat_named_init(&knp[i], man_kstat_info[i].mk_name,
7752 		    man_kstat_info[i].mk_type);
7753 	}
7754 
7755 	MAN_DBG(MAN_KSTAT, ("man_kstat_named_init: returns"));
7756 
7757 }
7758 
7759 /*
7760  * man_kstat_byname - get a kernel stat value from its structure
7761  *
7762  *	ksp - kstat_t structure to play with
7763  *	s   - string to match names with
7764  *	res - in/out result data pointer
7765  *
7766  *	returns	- success - 1 (found)
7767  *		- failure - 0 (not found)
7768  */
7769 static int
7770 man_kstat_byname(kstat_t *ksp, char *s, kstat_named_t *res)
7771 {
7772 	int		found = 0;
7773 
7774 	MAN_DBG(MAN_KSTAT2, ("man_kstat_byname: GETTING %s\n", s));
7775 
7776 	if (ksp->ks_type == KSTAT_TYPE_NAMED) {
7777 		kstat_named_t *knp;
7778 
7779 		for (knp = KSTAT_NAMED_PTR(ksp);
7780 		    (caddr_t)knp < ((caddr_t)ksp->ks_data+ksp->ks_data_size);
7781 		    knp++) {
7782 
7783 			if (strcmp(s, knp->name) == NULL) {
7784 
7785 				res->data_type = knp->data_type;
7786 				res->value = knp->value;
7787 				found++;
7788 
7789 				MAN_DBG(MAN_KSTAT2, ("\t%s: %d\n", knp->name,
7790 				    (int)knp->value.ul));
7791 			}
7792 		}
7793 	} else {
7794 		MAN_DBG(MAN_KSTAT2, ("\tbad kstats type %d\n", ksp->ks_type));
7795 	}
7796 
7797 	/*
7798 	 * if getting a value but couldn't find the namestring, result = 0.
7799 	 */
7800 	if (!found) {
7801 		/*
7802 		 * a reasonable default
7803 		 */
7804 		res->data_type = KSTAT_DATA_ULONG;
7805 		res->value.l = 0;
7806 		MAN_DBG(MAN_KSTAT2, ("\tcouldn't find, using defaults\n"));
7807 	}
7808 
7809 	MAN_DBG(MAN_KSTAT2, ("man_kstat_byname: returns\n"));
7810 
7811 	return (found);
7812 }
7813 
7814 
7815 /*
7816  *
7817  * Accumulate MAN driver kstats from the incremental values of the underlying
7818  * physical interfaces.
7819  *
7820  * Parameters:
7821  *	sum_knp		- The named kstat area to put cumulative value,
7822  *			  NULL if we just want to sync next two params.
7823  *	phys_ksp	- Physical interface kstat_t pointer. Contains
7824  *			  more current counts.
7825  * 	phys_last_knp	- counts from the last time we were called for this
7826  *			  physical interface. Note that the name kstats
7827  *			  pointed to are actually in MAN format, but they
7828  *			  hold the mirrored physical devices last read
7829  *			  kstats.
7830  * Basic algorithm is:
7831  *
7832  * 	for each named kstat variable {
7833  *	    sum_knp[i] += (phys_ksp->ksp_data[i] - phys_last_knp[i]);
7834  *	    phys_last_knp[i] = phys_ksp->ksp_data[i];
7835  *	}
7836  *
7837  */
7838 static void
7839 man_sum_kstats(kstat_named_t *sum_knp, kstat_t *phys_ksp,
7840 	kstat_named_t *phys_last_knp)
7841 {
7842 	char		*physname;
7843 	char		*physalias;
7844 	char		*statname;
7845 	kstat_named_t	phys_kn_entry;
7846 	uint64_t	delta64;
7847 	int		i;
7848 
7849 	MAN_DBG(MAN_KSTAT, ("man_sum_kstats: sum_knp(0x%p) phys_ksp(0x%p)"
7850 	    " phys_last_knp(0x%p)\n", (void *)sum_knp, (void *)phys_ksp,
7851 	    (void *)phys_last_knp));
7852 
7853 	/*
7854 	 * Now for each entry in man_kstat_info, sum the named kstat.
7855 	 * Not that all MAN specific kstats will end up !found.
7856 	 */
7857 	for (i = 0; i < MAN_NUMSTATS; i++) {
7858 		int	found = 0;
7859 		int	flags = 0;
7860 
7861 		delta64 = 0;
7862 
7863 		statname = man_kstat_info[i].mk_name;
7864 		physname = man_kstat_info[i].mk_physname;
7865 		physalias = man_kstat_info[i].mk_physalias;
7866 		flags = man_kstat_info[i].mk_flags;
7867 
7868 		/*
7869 		 * Update MAN private kstats.
7870 		 */
7871 		if (flags & MK_NOT_PHYSICAL) {
7872 
7873 			kstat_named_t	*knp = phys_last_knp;
7874 
7875 			if (sum_knp == NULL)
7876 				continue;
7877 
7878 			if (strcmp(statname, "man_switches") == 0) {
7879 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7880 			} else if (strcmp(statname, "man_link_fails") == 0) {
7881 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7882 			} else if (strcmp(statname, "man_link_stales") == 0) {
7883 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7884 			} else if (strcmp(statname, "man_icmpv4_probes") == 0) {
7885 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7886 			} else if (strcmp(statname, "man_icmpv6_probes") == 0) {
7887 				sum_knp[i].value.ui64 = knp[i].value.ui64;
7888 			}
7889 
7890 			continue;	/* phys_ksp doesnt have this stat */
7891 		}
7892 
7893 		/*
7894 		 * first try it by the "official" name
7895 		 */
7896 		if (phys_ksp) {
7897 			if (man_kstat_byname(phys_ksp, physname,
7898 			    &phys_kn_entry)) {
7899 
7900 				found = 1;
7901 
7902 			} else if ((physalias) && (man_kstat_byname(phys_ksp,
7903 			    physalias, &phys_kn_entry))) {
7904 
7905 				found = 1;
7906 			}
7907 		}
7908 
7909 		if (!found) {
7910 			/*
7911 			 * clear up the "last" value, no change to the sum
7912 			 */
7913 			phys_last_knp[i].value.ui64 = 0;
7914 			continue;
7915 		}
7916 
7917 		/*
7918 		 * at this point, we should have the good underlying
7919 		 * kstat value stored in phys_kn_entry
7920 		 */
7921 		if (flags & MK_NOT_COUNTER) {
7922 			/*
7923 			 * it isn't a counter, so store the value and
7924 			 * move on (e.g. ifspeed)
7925 			 */
7926 			phys_last_knp[i].value = phys_kn_entry.value;
7927 			continue;
7928 		}
7929 
7930 		switch (phys_kn_entry.data_type) {
7931 		case KSTAT_DATA_UINT32:
7932 
7933 			/*
7934 			 * this handles 32-bit wrapping
7935 			 */
7936 			if (phys_kn_entry.value.ui32 <
7937 			    phys_last_knp[i].value.ui32) {
7938 
7939 				/*
7940 				 * we've wrapped!
7941 				 */
7942 				delta64 += (UINT_MAX -
7943 				    phys_last_knp[i].value.ui32);
7944 				phys_last_knp[i].value.ui32 = 0;
7945 			}
7946 
7947 			delta64 += phys_kn_entry.value.ui32 -
7948 			    phys_last_knp[i].value.ui32;
7949 			phys_last_knp[i].value.ui32 = phys_kn_entry.value.ui32;
7950 			break;
7951 
7952 		default:
7953 			/*
7954 			 * must be a 64-bit value, we ignore 64-bit
7955 			 * wraps, since they shouldn't ever happen
7956 			 * within the life of a machine (if we assume
7957 			 * machines don't stay up for more than a few
7958 			 * hundred years without a reboot...)
7959 			 */
7960 			delta64 = phys_kn_entry.value.ui64 -
7961 			    phys_last_knp[i].value.ui64;
7962 			phys_last_knp[i].value.ui64 = phys_kn_entry.value.ui64;
7963 		}
7964 
7965 		if (sum_knp != NULL) {
7966 			/*
7967 			 * now we need to save the value
7968 			 */
7969 			switch (sum_knp[i].data_type) {
7970 			case KSTAT_DATA_UINT32:
7971 				/* trunk down to 32 bits, possibly lossy */
7972 				sum_knp[i].value.ui32 += (uint32_t)delta64;
7973 				break;
7974 
7975 			default:
7976 				sum_knp[i].value.ui64 += delta64;
7977 				break;
7978 			}
7979 		}
7980 	}
7981 
7982 	MAN_DBG(MAN_KSTAT, ("man_sum_kstats: returns\n"));
7983 }
7984 
7985 
7986 #if defined(DEBUG)
7987 
7988 
7989 static char *_ms_flags[] = {
7990 	"NONE",
7991 	"FAST", 	/* 0x1 */
7992 	"RAW",		/* 0x2 */
7993 	"ALLPHYS",	/* 0x4 */
7994 	"ALLMULTI",	/* 0x8 */
7995 	"ALLSAP",	/* 0x10 */
7996 	"CKSUM",	/* 0x20 */
7997 	"MULTI",	/* 0x40 */
7998 	"SERLPBK",	/* 0x80 */
7999 	"MACLPBK",	/* 0x100 */
8000 	"CLOSING",	/* 0x200 */
8001 	"CLOSE_DONE",	/* 0x400 */
8002 	"CONTROL"	/* 0x800 */
8003 };
8004 
8005 static void
8006 man_print_msp(manstr_t *msp)
8007 {
8008 	char	buf[512];
8009 	char	prbuf[512];
8010 	uint_t	flags;
8011 	int	i;
8012 
8013 	cmn_err(CE_CONT, "\tmsp(0x%p)\n", (void *)msp);
8014 
8015 	if (msp == NULL)
8016 		return;
8017 
8018 	cmn_err(CE_CONT, "\t%s%d SAP(0x%x):\n",
8019 	    ddi_major_to_name(msp->ms_meta_maj), msp->ms_meta_ppa,
8020 	    msp->ms_sap);
8021 
8022 	buf[0] = '\0';
8023 	prbuf[0] = '\0';
8024 	flags = msp->ms_flags;
8025 	for (i = 0; i < A_CNT(_ms_flags); i++) {
8026 		if ((flags >> i) & 0x1) {
8027 			sprintf(buf, " %s |", _ms_flags[i+1]);
8028 			strcat(prbuf, buf);
8029 		}
8030 	}
8031 	prbuf[strlen(prbuf) - 1] = '\0';
8032 	cmn_err(CE_CONT, "\tms_flags: %s\n", prbuf);
8033 
8034 	cmn_err(CE_CONT, "\tms_dlpistate: %s\n", dss[msp->ms_dlpistate]);
8035 
8036 	cmn_err(CE_CONT, "\tms_dl_mp: 0x%p\n", (void *)msp->ms_dl_mp);
8037 
8038 	cmn_err(CE_CONT, "\tms_manp: 0x%p\n", (void *)msp->ms_manp);
8039 
8040 	cmn_err(CE_CONT, "\tms_dests: 0x%p\n", (void *)msp->ms_dests);
8041 
8042 }
8043 
8044 static char *_md_state[] = {
8045 	"NOTPRESENT",		/* 0x0 */
8046 	"INITIALIZING",		/* 0x1 */
8047 	"READY",		/* 0x2 */
8048 	"PLUMBING",		/* 0x4 */
8049 	"CLOSING"		/* 0x8 */
8050 };
8051 
8052 static void
8053 man_print_mdp(man_dest_t *mdp)
8054 {
8055 	uint_t		state;
8056 	int		i;
8057 	char		buf[64];
8058 	char		prbuf[512];
8059 
8060 	buf[0] = '\0';
8061 	prbuf[0] = '\0';
8062 
8063 	cmn_err(CE_CONT, "\tmdp(0x%p)\n", (void *)mdp);
8064 
8065 	if (mdp == NULL)
8066 		return;
8067 
8068 	cmn_err(CE_CONT, "\tmd_pg_id: %d\n", mdp->md_pg_id);
8069 	cmn_err(CE_CONT, "\tmd_dst_eaddr: %s\n",
8070 	    ether_sprintf(&mdp->md_dst_eaddr));
8071 	cmn_err(CE_CONT, "\tmd_src_eaddr: %s\n",
8072 	    ether_sprintf(&mdp->md_src_eaddr));
8073 	cmn_err(CE_CONT, "\tmd_dlpistate: %s", dss[mdp->md_dlpistate]);
8074 	cmn_err(CE_CONT, "\tmd_muxid: 0x%u", mdp->md_muxid);
8075 	cmn_err(CE_CONT, "\tmd_rcvcnt %lu md_lastrcvcnt %lu", mdp->md_rcvcnt,
8076 	    mdp->md_lastrcvcnt);
8077 
8078 	/*
8079 	 * Print out state as text.
8080 	 */
8081 	state = mdp->md_state;
8082 
8083 	if (state == 0) {
8084 		strcat(prbuf, _md_state[0]);
8085 	} else {
8086 
8087 		for (i = 0; i < A_CNT(_md_state); i++) {
8088 			if ((state >> i) & 0x1)  {
8089 				sprintf(buf, " %s |", _md_state[i+1]);
8090 				strcat(prbuf, buf);
8091 			}
8092 		}
8093 		prbuf[strlen(prbuf) -1] = '\0';
8094 	}
8095 	cmn_err(CE_CONT, "\tmd_state: %s", prbuf);
8096 
8097 	cmn_err(CE_CONT, "\tmd_device:\n");
8098 	man_print_dev(&mdp->md_device);
8099 
8100 }
8101 
8102 static void
8103 man_print_man(man_t *manp)
8104 {
8105 	char	buf[512];
8106 	char	prbuf[512];
8107 
8108 	buf[0] = '\0';
8109 	prbuf[0] = '\0';
8110 
8111 	if (manp == NULL)
8112 		return;
8113 
8114 	if (ddi_major_to_name(manp->man_meta_major)) {
8115 		sprintf(buf, "\t man_device: %s%d\n",
8116 		    ddi_major_to_name(manp->man_meta_major),
8117 		    manp->man_meta_ppa);
8118 	} else {
8119 		sprintf(buf, "\t major: %d", manp->man_meta_major);
8120 		sprintf(buf, "\t ppa: %d", manp->man_meta_ppa);
8121 	}
8122 
8123 	cmn_err(CE_CONT, "%s", buf);
8124 
8125 }
8126 
8127 static char *_mdev_state[] = {
8128 	"UNASSIGNED  ",
8129 	"ASSIGNED",
8130 	"ACTIVE",
8131 	"FAILED"
8132 };
8133 
8134 static void
8135 man_print_dev(man_dev_t *mdevp)
8136 {
8137 	char	buf[512];
8138 	char	prbuf[512];
8139 	int	i;
8140 	uint_t	state;
8141 
8142 	buf[0] = '\0';
8143 	prbuf[0] = '\0';
8144 
8145 	if (mdevp == NULL)
8146 		return;
8147 
8148 	if (mdevp->mdev_major == 0) {
8149 number:
8150 		sprintf(buf, "\t mdev_major: %d\n", mdevp->mdev_major);
8151 	} else if (ddi_major_to_name(mdevp->mdev_major)) {
8152 		sprintf(buf, "\t mdev_device: %s%d\n",
8153 		    ddi_major_to_name(mdevp->mdev_major),
8154 		    mdevp->mdev_ppa);
8155 	} else
8156 		goto number;
8157 
8158 	cmn_err(CE_CONT, "%s", buf);
8159 
8160 	cmn_err(CE_CONT, "\t mdev_exp_id: %d\n", mdevp->mdev_exp_id);
8161 
8162 	buf[0] = '\0';
8163 	prbuf[0] = '\0';
8164 	state = mdevp->mdev_state;
8165 
8166 	if (state == 0) {
8167 		strcat(prbuf, _mdev_state[0]);
8168 	} else {
8169 		for (i = 0; i < A_CNT(_mdev_state); i++) {
8170 			if ((state >> i) & 0x1) {
8171 				sprintf(buf, " %s |", _mdev_state[i+1]);
8172 				strcat(prbuf, buf);
8173 			}
8174 		}
8175 	}
8176 
8177 	prbuf[strlen(prbuf) - 2] = '\0';
8178 
8179 	cmn_err(CE_CONT, "\t mdev_state: %s\n", prbuf);
8180 
8181 }
8182 
8183 static char *_mip_cmd[] = {
8184 	"MI_PATH_READ",
8185 	"MI_PATH_ASSIGN",
8186 	"MI_PATH_ACTIVATE",
8187 	"MI_PATH_DEACTIVATE",
8188 	"MI_PATH_UNASSIGN"
8189 };
8190 
8191 static void
8192 man_print_mtp(mi_time_t *mtp)
8193 {
8194 	cmn_err(CE_CONT, "\tmtp(0x%p)\n", (void *)mtp);
8195 
8196 	if (mtp == NULL)
8197 		return;
8198 
8199 	cmn_err(CE_CONT, "\tmtp_instance: %d\n", mtp->mtp_man_ppa);
8200 
8201 	cmn_err(CE_CONT, "\tmtp_time: %d\n", mtp->mtp_time);
8202 
8203 }
8204 
8205 static void
8206 man_print_mip(mi_path_t *mip)
8207 {
8208 	cmn_err(CE_CONT, "\tmip(0x%p)\n", (void *)mip);
8209 
8210 	if (mip == NULL)
8211 		return;
8212 
8213 	cmn_err(CE_CONT, "\tmip_pg_id: %d\n", mip->mip_pg_id);
8214 
8215 	cmn_err(CE_CONT, "\tmip_cmd: %s\n", _mip_cmd[mip->mip_cmd]);
8216 
8217 	cmn_err(CE_CONT, "\tmip_eaddr: %s\n", ether_sprintf(&mip->mip_eaddr));
8218 
8219 	cmn_err(CE_CONT, "\tmip_devs: 0x%p\n", (void *)mip->mip_devs);
8220 
8221 	cmn_err(CE_CONT, "\tmip_ndevs: %d\n", mip->mip_ndevs);
8222 
8223 }
8224 
8225 static void
8226 man_print_mpg(man_pg_t *mpg)
8227 {
8228 	cmn_err(CE_CONT, "\tmpg(0x%p)\n", (void *)mpg);
8229 
8230 	if (mpg == NULL)
8231 		return;
8232 
8233 	cmn_err(CE_CONT, "\tmpg_next: 0x%p\n", (void *)mpg->mpg_next);
8234 
8235 	cmn_err(CE_CONT, "\tmpg_pg_id: %d\n", mpg->mpg_pg_id);
8236 
8237 	cmn_err(CE_CONT, "\tmpg_man_ppa: %d\n", mpg->mpg_man_ppa);
8238 
8239 	cmn_err(CE_CONT, "\tmpg_dst_eaddr: %s\n",
8240 	    ether_sprintf(&mpg->mpg_dst_eaddr));
8241 
8242 	cmn_err(CE_CONT, "\tmpg_pathp: 0x%p\n", (void *)mpg->mpg_pathp);
8243 
8244 }
8245 
8246 static char *_mw_flags[] = {
8247 	"NOWAITER",		/* 0x0 */
8248 	"CVWAITER",		/* 0x1 */
8249 	"QWAITER",		/* 0x2 */
8250 	"DONE"		/* 0x3 */
8251 };
8252 
8253 static void
8254 man_print_work(man_work_t *wp)
8255 {
8256 	int 	i;
8257 
8258 	cmn_err(CE_CONT, "\twp(0x%p)\n\n", (void *)wp);
8259 
8260 	if (wp == NULL)
8261 		return;
8262 
8263 	cmn_err(CE_CONT, "\tmw_type: %s\n", _mw_type[wp->mw_type]);
8264 
8265 	cmn_err(CE_CONT, "\tmw_flags: ");
8266 	for (i = 0; i < A_CNT(_mw_flags); i++) {
8267 		if ((wp->mw_flags >> i) & 0x1)
8268 			cmn_err(CE_CONT, "%s", _mw_flags[i]);
8269 	}
8270 	cmn_err(CE_CONT, "\n");
8271 
8272 	cmn_err(CE_CONT, "\twp_status: %d\n", wp->mw_status);
8273 
8274 	cmn_err(CE_CONT, "\twp_arg: 0x%p\n", (void *)&wp->mw_arg);
8275 
8276 	cmn_err(CE_CONT, "\tmw_next: 0x%p\n", (void *)wp->mw_next);
8277 
8278 	cmn_err(CE_CONT, "\twp_q: 0x%p", (void *)wp->mw_q);
8279 
8280 }
8281 
8282 static void
8283 man_print_path(man_path_t *mp)
8284 {
8285 	cmn_err(CE_CONT, "\tmp(0x%p)\n\n", (void *)mp);
8286 
8287 	if (mp == NULL)
8288 		return;
8289 
8290 	cmn_err(CE_CONT, "\tmp_device:");
8291 	man_print_dev(&mp->mp_device);
8292 
8293 	cmn_err(CE_CONT, "\tmp_next: 0x%p\n", (void *)mp->mp_next);
8294 
8295 	cmn_err(CE_CONT, "\tmp_last_knp: 0x%p\n", (void *)mp->mp_last_knp);
8296 
8297 	cmn_err(CE_CONT, "\tmp_lru: 0x%lx", mp->mp_lru);
8298 
8299 }
8300 
8301 void *
8302 man_dbg_kzalloc(int line, size_t size, int kmflags)
8303 {
8304 	void *tmp;
8305 
8306 	tmp = kmem_zalloc(size, kmflags);
8307 	MAN_DBG(MAN_KMEM, ("0x%p %lu\tzalloc'd @ %d\n", (void *)tmp,
8308 	    size, line));
8309 
8310 	return (tmp);
8311 
8312 }
8313 
8314 void
8315 man_dbg_kfree(int line, void *buf, size_t size)
8316 {
8317 
8318 	MAN_DBG(MAN_KMEM, ("0x%p %lu\tfree'd @ %d\n", (void *)buf, size, line));
8319 
8320 	kmem_free(buf, size);
8321 
8322 }
8323 
8324 #endif  /* DEBUG */
8325