xref: /titanic_44/usr/src/uts/sun4v/io/vsw.c (revision d62bc4badc1c1f1549c961cfb8b420e650e1272b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/debug.h>
32 #include <sys/time.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/user.h>
36 #include <sys/stropts.h>
37 #include <sys/stream.h>
38 #include <sys/strlog.h>
39 #include <sys/strsubr.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpu.h>
42 #include <sys/kmem.h>
43 #include <sys/conf.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/ksynch.h>
47 #include <sys/stat.h>
48 #include <sys/kstat.h>
49 #include <sys/vtrace.h>
50 #include <sys/strsun.h>
51 #include <sys/dlpi.h>
52 #include <sys/ethernet.h>
53 #include <net/if.h>
54 #include <sys/varargs.h>
55 #include <sys/machsystm.h>
56 #include <sys/modctl.h>
57 #include <sys/modhash.h>
58 #include <sys/mac.h>
59 #include <sys/mac_ether.h>
60 #include <sys/taskq.h>
61 #include <sys/note.h>
62 #include <sys/mach_descrip.h>
63 #include <sys/mac.h>
64 #include <sys/mdeg.h>
65 #include <sys/ldc.h>
66 #include <sys/vsw_fdb.h>
67 #include <sys/vsw.h>
68 #include <sys/vio_mailbox.h>
69 #include <sys/vnet_mailbox.h>
70 #include <sys/vnet_common.h>
71 #include <sys/vio_util.h>
72 #include <sys/sdt.h>
73 #include <sys/atomic.h>
74 #include <sys/callb.h>
75 
76 /*
77  * Function prototypes.
78  */
79 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
80 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
81 static	int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
82 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
83 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *);
84 
85 /* MDEG routines */
86 static	int vsw_mdeg_register(vsw_t *vswp);
87 static	void vsw_mdeg_unregister(vsw_t *vswp);
88 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
89 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
90 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
91 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
92 static	int vsw_read_mdprops(vsw_t *vswp);
93 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
94 
95 /* Mac driver related routines */
96 static int vsw_mac_register(vsw_t *);
97 static int vsw_mac_unregister(vsw_t *);
98 static int vsw_m_stat(void *, uint_t, uint64_t *);
99 static void vsw_m_stop(void *arg);
100 static int vsw_m_start(void *arg);
101 static int vsw_m_unicst(void *arg, const uint8_t *);
102 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
103 static int vsw_m_promisc(void *arg, boolean_t);
104 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
105 static uint_t vsw_rx_softintr(caddr_t arg1, caddr_t arg2);
106 void vsw_mac_rx(vsw_t *vswp, int caller, mac_resource_handle_t mrh,
107     mblk_t *mp, mblk_t *mpt, vsw_macrx_flags_t flags);
108 
109 /*
110  * Functions imported from other files.
111  */
112 extern void vsw_setup_switching_timeout(void *arg);
113 extern void vsw_stop_switching_timeout(vsw_t *vswp);
114 extern int vsw_setup_switching(vsw_t *);
115 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
116 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
117 extern void vsw_del_mcst_vsw(vsw_t *);
118 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
119 extern int vsw_detach_ports(vsw_t *vswp);
120 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
121 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
122 extern	int vsw_port_attach(vsw_t *vswp, int p_instance,
123 	uint64_t *ldcids, int nids, struct ether_addr *macaddr);
124 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
125 extern int vsw_mac_attach(vsw_t *vswp);
126 extern void vsw_mac_detach(vsw_t *vswp);
127 extern int vsw_mac_open(vsw_t *vswp);
128 extern void vsw_mac_close(vsw_t *vswp);
129 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int);
130 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
131 extern void vsw_reconfig_hw(vsw_t *);
132 extern void vsw_unset_addrs(vsw_t *vswp);
133 extern void vsw_set_addrs(vsw_t *vswp);
134 
135 
136 /*
137  * Internal tunables.
138  */
139 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
140 int	vsw_wretries = 100;		/* # of write attempts */
141 int	vsw_desc_delay = 0;		/* delay in us */
142 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
143 int	vsw_mac_open_retries = 20;	/* max # of mac_open() retries */
144 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
145 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
146 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
147 int	vsw_ldc_tx_max_failures = 40;	/* Max ldc tx failures */
148 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
149 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
150 
151 
152 /*
153  * External tunables.
154  */
155 /*
156  * Enable/disable thread per ring. This is a mode selection
157  * that is done a vsw driver attach time.
158  */
159 boolean_t vsw_multi_ring_enable = B_FALSE;
160 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS;
161 
162 /*
163  * Max number of mblks received in one receive operation.
164  */
165 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
166 
167 /*
168  * Tunables for three different pools, that is, the size and
169  * number of mblks for each pool.
170  */
171 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128;	/* size=128 for pool1 */
172 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256;	/* size=256 for pool2 */
173 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048;	/* size=2048 for pool3 */
174 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
175 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
176 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
177 
178 /*
179  * MAC callbacks
180  */
181 static	mac_callbacks_t	vsw_m_callbacks = {
182 	0,
183 	vsw_m_stat,
184 	vsw_m_start,
185 	vsw_m_stop,
186 	vsw_m_promisc,
187 	vsw_m_multicst,
188 	vsw_m_unicst,
189 	vsw_m_tx,
190 	NULL,
191 	NULL,
192 	NULL
193 };
194 
195 static	struct	cb_ops	vsw_cb_ops = {
196 	nulldev,			/* cb_open */
197 	nulldev,			/* cb_close */
198 	nodev,				/* cb_strategy */
199 	nodev,				/* cb_print */
200 	nodev,				/* cb_dump */
201 	nodev,				/* cb_read */
202 	nodev,				/* cb_write */
203 	nodev,				/* cb_ioctl */
204 	nodev,				/* cb_devmap */
205 	nodev,				/* cb_mmap */
206 	nodev,				/* cb_segmap */
207 	nochpoll,			/* cb_chpoll */
208 	ddi_prop_op,			/* cb_prop_op */
209 	NULL,				/* cb_stream */
210 	D_MP,				/* cb_flag */
211 	CB_REV,				/* rev */
212 	nodev,				/* int (*cb_aread)() */
213 	nodev				/* int (*cb_awrite)() */
214 };
215 
216 static	struct	dev_ops	vsw_ops = {
217 	DEVO_REV,		/* devo_rev */
218 	0,			/* devo_refcnt */
219 	vsw_getinfo,		/* devo_getinfo */
220 	nulldev,		/* devo_identify */
221 	nulldev,		/* devo_probe */
222 	vsw_attach,		/* devo_attach */
223 	vsw_detach,		/* devo_detach */
224 	nodev,			/* devo_reset */
225 	&vsw_cb_ops,		/* devo_cb_ops */
226 	(struct bus_ops *)NULL,	/* devo_bus_ops */
227 	ddi_power		/* devo_power */
228 };
229 
230 extern	struct	mod_ops	mod_driverops;
231 static struct modldrv vswmodldrv = {
232 	&mod_driverops,
233 	"sun4v Virtual Switch",
234 	&vsw_ops,
235 };
236 
237 #define	LDC_ENTER_LOCK(ldcp)	\
238 				mutex_enter(&((ldcp)->ldc_cblock));\
239 				mutex_enter(&((ldcp)->ldc_rxlock));\
240 				mutex_enter(&((ldcp)->ldc_txlock));
241 #define	LDC_EXIT_LOCK(ldcp)	\
242 				mutex_exit(&((ldcp)->ldc_txlock));\
243 				mutex_exit(&((ldcp)->ldc_rxlock));\
244 				mutex_exit(&((ldcp)->ldc_cblock));
245 
246 /* Driver soft state ptr  */
247 static void	*vsw_state;
248 
249 /*
250  * Linked list of "vsw_t" structures - one per instance.
251  */
252 vsw_t		*vsw_head = NULL;
253 krwlock_t	vsw_rw;
254 
255 /*
256  * Property names
257  */
258 static char vdev_propname[] = "virtual-device";
259 static char vsw_propname[] = "virtual-network-switch";
260 static char physdev_propname[] = "vsw-phys-dev";
261 static char smode_propname[] = "vsw-switch-mode";
262 static char macaddr_propname[] = "local-mac-address";
263 static char remaddr_propname[] = "remote-mac-address";
264 static char ldcids_propname[] = "ldc-ids";
265 static char chan_propname[] = "channel-endpoint";
266 static char id_propname[] = "id";
267 static char reg_propname[] = "reg";
268 
269 /*
270  * Matching criteria passed to the MDEG to register interest
271  * in changes to 'virtual-device-port' nodes identified by their
272  * 'id' property.
273  */
274 static md_prop_match_t vport_prop_match[] = {
275 	{ MDET_PROP_VAL,    "id"   },
276 	{ MDET_LIST_END,    NULL    }
277 };
278 
279 static mdeg_node_match_t vport_match = { "virtual-device-port",
280 						vport_prop_match };
281 
282 /*
283  * Matching criteria passed to the MDEG to register interest
284  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
285  * by their 'name' and 'cfg-handle' properties.
286  */
287 static md_prop_match_t vdev_prop_match[] = {
288 	{ MDET_PROP_STR,    "name"   },
289 	{ MDET_PROP_VAL,    "cfg-handle" },
290 	{ MDET_LIST_END,    NULL    }
291 };
292 
293 static mdeg_node_match_t vdev_match = { "virtual-device",
294 						vdev_prop_match };
295 
296 
297 /*
298  * Specification of an MD node passed to the MDEG to filter any
299  * 'vport' nodes that do not belong to the specified node. This
300  * template is copied for each vsw instance and filled in with
301  * the appropriate 'cfg-handle' value before being passed to the MDEG.
302  */
303 static mdeg_prop_spec_t vsw_prop_template[] = {
304 	{ MDET_PROP_STR,    "name",		vsw_propname },
305 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
306 	{ MDET_LIST_END,    NULL,		NULL	}
307 };
308 
309 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
310 
311 #ifdef	DEBUG
312 /*
313  * Print debug messages - set to 0x1f to enable all msgs
314  * or 0x0 to turn all off.
315  */
316 int vswdbg = 0x0;
317 
318 /*
319  * debug levels:
320  * 0x01:	Function entry/exit tracing
321  * 0x02:	Internal function messages
322  * 0x04:	Verbose internal messages
323  * 0x08:	Warning messages
324  * 0x10:	Error messages
325  */
326 
327 void
328 vswdebug(vsw_t *vswp, const char *fmt, ...)
329 {
330 	char buf[512];
331 	va_list ap;
332 
333 	va_start(ap, fmt);
334 	(void) vsprintf(buf, fmt, ap);
335 	va_end(ap);
336 
337 	if (vswp == NULL)
338 		cmn_err(CE_CONT, "%s\n", buf);
339 	else
340 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
341 }
342 
343 #endif	/* DEBUG */
344 
345 static struct modlinkage modlinkage = {
346 	MODREV_1,
347 	&vswmodldrv,
348 	NULL
349 };
350 
351 int
352 _init(void)
353 {
354 	int status;
355 
356 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
357 
358 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
359 	if (status != 0) {
360 		return (status);
361 	}
362 
363 	mac_init_ops(&vsw_ops, DRV_NAME);
364 	status = mod_install(&modlinkage);
365 	if (status != 0) {
366 		ddi_soft_state_fini(&vsw_state);
367 	}
368 	return (status);
369 }
370 
371 int
372 _fini(void)
373 {
374 	int status;
375 
376 	status = mod_remove(&modlinkage);
377 	if (status != 0)
378 		return (status);
379 	mac_fini_ops(&vsw_ops);
380 	ddi_soft_state_fini(&vsw_state);
381 
382 	rw_destroy(&vsw_rw);
383 
384 	return (status);
385 }
386 
387 int
388 _info(struct modinfo *modinfop)
389 {
390 	return (mod_info(&modlinkage, modinfop));
391 }
392 
393 static int
394 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
395 {
396 	vsw_t		*vswp;
397 	int		instance;
398 	char		hashname[MAXNAMELEN];
399 	char		qname[TASKQ_NAMELEN];
400 	enum		{ PROG_init = 0x00,
401 				PROG_locks = 0x01,
402 				PROG_readmd = 0x02,
403 				PROG_fdb = 0x04,
404 				PROG_mfdb = 0x08,
405 				PROG_taskq = 0x10,
406 				PROG_rx_softint = 0x20,
407 				PROG_swmode = 0x40,
408 				PROG_macreg = 0x80,
409 				PROG_mdreg = 0x100}
410 			progress;
411 
412 	progress = PROG_init;
413 	int		rv;
414 
415 	switch (cmd) {
416 	case DDI_ATTACH:
417 		break;
418 	case DDI_RESUME:
419 		/* nothing to do for this non-device */
420 		return (DDI_SUCCESS);
421 	case DDI_PM_RESUME:
422 	default:
423 		return (DDI_FAILURE);
424 	}
425 
426 	instance = ddi_get_instance(dip);
427 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
428 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
429 		return (DDI_FAILURE);
430 	}
431 	vswp = ddi_get_soft_state(vsw_state, instance);
432 
433 	if (vswp == NULL) {
434 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
435 		goto vsw_attach_fail;
436 	}
437 
438 	vswp->dip = dip;
439 	vswp->instance = instance;
440 	ddi_set_driver_private(dip, (caddr_t)vswp);
441 
442 	mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL);
443 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
444 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
445 	mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL);
446 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
447 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
448 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
449 
450 	progress |= PROG_locks;
451 
452 	rv = vsw_read_mdprops(vswp);
453 	if (rv != 0)
454 		goto vsw_attach_fail;
455 
456 	progress |= PROG_readmd;
457 
458 	/* setup the unicast forwarding database  */
459 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
460 	    vswp->instance);
461 	D2(vswp, "creating unicast hash table (%s)...", hashname);
462 	vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS,
463 	    mod_hash_null_valdtor, sizeof (void *));
464 
465 	progress |= PROG_fdb;
466 
467 	/* setup the multicast fowarding database */
468 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
469 	    vswp->instance);
470 	D2(vswp, "creating multicast hash table %s)...", hashname);
471 	vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS,
472 	    mod_hash_null_valdtor, sizeof (void *));
473 
474 	progress |= PROG_mfdb;
475 
476 	/*
477 	 * Create the taskq which will process all the VIO
478 	 * control messages.
479 	 */
480 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
481 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
482 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
483 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
484 		    vswp->instance);
485 		goto vsw_attach_fail;
486 	}
487 
488 	progress |= PROG_taskq;
489 
490 	/*
491 	 * If LDC receive thread is enabled, then we need a
492 	 * soft-interrupt to deliver the packets to the upper layers.
493 	 * This applies only to the packets that need to be sent up
494 	 * the stack, but not to the packets that are sent out via
495 	 * the physical interface.
496 	 */
497 	if (vsw_ldc_rxthr_enabled) {
498 		vswp->rx_mhead = vswp->rx_mtail = NULL;
499 		vswp->soft_pri = PIL_4;
500 		vswp->rx_softint = B_TRUE;
501 
502 		rv = ddi_intr_add_softint(vswp->dip, &vswp->soft_handle,
503 		    vswp->soft_pri, vsw_rx_softintr, (void *)vswp);
504 		if (rv != DDI_SUCCESS) {
505 			cmn_err(CE_WARN, "!vsw%d: add_softint failed rv(%d)",
506 			    vswp->instance, rv);
507 			goto vsw_attach_fail;
508 		}
509 
510 		/*
511 		 * Initialize the soft_lock with the same priority as
512 		 * the soft interrupt to protect from the soft interrupt.
513 		 */
514 		mutex_init(&vswp->soft_lock, NULL, MUTEX_DRIVER,
515 		    DDI_INTR_PRI(vswp->soft_pri));
516 		progress |= PROG_rx_softint;
517 	} else {
518 		vswp->rx_softint = B_FALSE;
519 	}
520 
521 	/* prevent auto-detaching */
522 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
523 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
524 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
525 		    "instance %u", DDI_NO_AUTODETACH, instance);
526 	}
527 
528 	/*
529 	 * Setup the required switching mode,
530 	 * based on the mdprops that we read earlier.
531 	 */
532 	rv = vsw_setup_switching(vswp);
533 	if (rv == EAGAIN) {
534 		/*
535 		 * Unable to setup switching mode;
536 		 * as the error is EAGAIN, schedule a timeout to retry.
537 		 */
538 		mutex_enter(&vswp->swtmout_lock);
539 
540 		vswp->swtmout_enabled = B_TRUE;
541 		vswp->swtmout_id =
542 		    timeout(vsw_setup_switching_timeout, vswp,
543 		    (vsw_setup_switching_delay * drv_usectohz(MICROSEC)));
544 
545 		mutex_exit(&vswp->swtmout_lock);
546 	} else if (rv != 0) {
547 		goto vsw_attach_fail;
548 	}
549 
550 	progress |= PROG_swmode;
551 
552 	/* Register with mac layer as a provider */
553 	rv = vsw_mac_register(vswp);
554 	if (rv != 0)
555 		goto vsw_attach_fail;
556 
557 	progress |= PROG_macreg;
558 
559 	/*
560 	 * Now we have everything setup, register an interest in
561 	 * specific MD nodes.
562 	 *
563 	 * The callback is invoked in 2 cases, firstly if upon mdeg
564 	 * registration there are existing nodes which match our specified
565 	 * criteria, and secondly if the MD is changed (and again, there
566 	 * are nodes which we are interested in present within it. Note
567 	 * that our callback will be invoked even if our specified nodes
568 	 * have not actually changed).
569 	 *
570 	 */
571 	rv = vsw_mdeg_register(vswp);
572 	if (rv != 0)
573 		goto vsw_attach_fail;
574 
575 	progress |= PROG_mdreg;
576 
577 	WRITE_ENTER(&vsw_rw);
578 	vswp->next = vsw_head;
579 	vsw_head = vswp;
580 	RW_EXIT(&vsw_rw);
581 
582 	ddi_report_dev(vswp->dip);
583 	return (DDI_SUCCESS);
584 
585 vsw_attach_fail:
586 	DERR(NULL, "vsw_attach: failed");
587 
588 	if (progress & PROG_rx_softint) {
589 		(void) ddi_intr_remove_softint(vswp->soft_handle);
590 		mutex_destroy(&vswp->soft_lock);
591 	}
592 
593 	if (progress & PROG_mdreg) {
594 		vsw_mdeg_unregister(vswp);
595 		(void) vsw_detach_ports(vswp);
596 	}
597 
598 	if (progress & PROG_macreg)
599 		(void) vsw_mac_unregister(vswp);
600 
601 	if (progress & PROG_swmode) {
602 		vsw_stop_switching_timeout(vswp);
603 		mutex_enter(&vswp->mac_lock);
604 		vsw_mac_detach(vswp);
605 		vsw_mac_close(vswp);
606 		mutex_exit(&vswp->mac_lock);
607 	}
608 
609 	if (progress & PROG_taskq)
610 		ddi_taskq_destroy(vswp->taskq_p);
611 
612 	if (progress & PROG_mfdb)
613 		mod_hash_destroy_hash(vswp->mfdb);
614 
615 	if (progress & PROG_fdb)
616 		mod_hash_destroy_hash(vswp->fdb);
617 
618 	if (progress & PROG_locks) {
619 		rw_destroy(&vswp->plist.lockrw);
620 		rw_destroy(&vswp->mfdbrw);
621 		rw_destroy(&vswp->if_lockrw);
622 		mutex_destroy(&vswp->swtmout_lock);
623 		mutex_destroy(&vswp->mca_lock);
624 		mutex_destroy(&vswp->mac_lock);
625 		mutex_destroy(&vswp->hw_lock);
626 	}
627 
628 	ddi_soft_state_free(vsw_state, instance);
629 	return (DDI_FAILURE);
630 }
631 
632 static int
633 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
634 {
635 	vio_mblk_pool_t		*poolp, *npoolp;
636 	vsw_t			**vswpp, *vswp;
637 	int 			instance;
638 
639 	instance = ddi_get_instance(dip);
640 	vswp = ddi_get_soft_state(vsw_state, instance);
641 
642 	if (vswp == NULL) {
643 		return (DDI_FAILURE);
644 	}
645 
646 	switch (cmd) {
647 	case DDI_DETACH:
648 		break;
649 	case DDI_SUSPEND:
650 	case DDI_PM_SUSPEND:
651 	default:
652 		return (DDI_FAILURE);
653 	}
654 
655 	D2(vswp, "detaching instance %d", instance);
656 
657 	/* Stop any pending timeout to setup switching mode. */
658 	vsw_stop_switching_timeout(vswp);
659 
660 	if (vswp->if_state & VSW_IF_REG) {
661 		if (vsw_mac_unregister(vswp) != 0) {
662 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
663 			    "MAC layer", vswp->instance);
664 			return (DDI_FAILURE);
665 		}
666 	}
667 
668 	/*
669 	 * Destroy/free up the receive thread related structures.
670 	 */
671 	if (vswp->rx_softint == B_TRUE) {
672 		(void) ddi_intr_remove_softint(vswp->soft_handle);
673 		mutex_destroy(&vswp->soft_lock);
674 		if (vswp->rx_mhead != NULL) {
675 			freemsgchain(vswp->rx_mhead);
676 			vswp->rx_mhead = vswp->rx_mtail = NULL;
677 		}
678 	}
679 
680 	vsw_mdeg_unregister(vswp);
681 
682 	/* remove mac layer callback */
683 	mutex_enter(&vswp->mac_lock);
684 	if ((vswp->mh != NULL) && (vswp->mrh != NULL)) {
685 		mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE);
686 		vswp->mrh = NULL;
687 	}
688 	mutex_exit(&vswp->mac_lock);
689 
690 	if (vsw_detach_ports(vswp) != 0) {
691 		cmn_err(CE_WARN, "!vsw%d: Unable to detach ports",
692 		    vswp->instance);
693 		return (DDI_FAILURE);
694 	}
695 
696 	rw_destroy(&vswp->if_lockrw);
697 
698 	mutex_destroy(&vswp->hw_lock);
699 
700 	/*
701 	 * Now that the ports have been deleted, stop and close
702 	 * the physical device.
703 	 */
704 	mutex_enter(&vswp->mac_lock);
705 
706 	vsw_mac_detach(vswp);
707 	vsw_mac_close(vswp);
708 
709 	mutex_exit(&vswp->mac_lock);
710 
711 	mutex_destroy(&vswp->mac_lock);
712 	mutex_destroy(&vswp->swtmout_lock);
713 
714 	/*
715 	 * Destroy any free pools that may still exist.
716 	 */
717 	poolp = vswp->rxh;
718 	while (poolp != NULL) {
719 		npoolp = vswp->rxh = poolp->nextp;
720 		if (vio_destroy_mblks(poolp) != 0) {
721 			vswp->rxh = poolp;
722 			return (DDI_FAILURE);
723 		}
724 		poolp = npoolp;
725 	}
726 
727 	/*
728 	 * Remove this instance from any entries it may be on in
729 	 * the hash table by using the list of addresses maintained
730 	 * in the vsw_t structure.
731 	 */
732 	vsw_del_mcst_vsw(vswp);
733 
734 	vswp->mcap = NULL;
735 	mutex_destroy(&vswp->mca_lock);
736 
737 	/*
738 	 * By now any pending tasks have finished and the underlying
739 	 * ldc's have been destroyed, so its safe to delete the control
740 	 * message taskq.
741 	 */
742 	if (vswp->taskq_p != NULL)
743 		ddi_taskq_destroy(vswp->taskq_p);
744 
745 	/*
746 	 * At this stage all the data pointers in the hash table
747 	 * should be NULL, as all the ports have been removed and will
748 	 * have deleted themselves from the port lists which the data
749 	 * pointers point to. Hence we can destroy the table using the
750 	 * default destructors.
751 	 */
752 	D2(vswp, "vsw_detach: destroying hash tables..");
753 	mod_hash_destroy_hash(vswp->fdb);
754 	vswp->fdb = NULL;
755 
756 	WRITE_ENTER(&vswp->mfdbrw);
757 	mod_hash_destroy_hash(vswp->mfdb);
758 	vswp->mfdb = NULL;
759 	RW_EXIT(&vswp->mfdbrw);
760 	rw_destroy(&vswp->mfdbrw);
761 
762 	ddi_remove_minor_node(dip, NULL);
763 
764 	rw_destroy(&vswp->plist.lockrw);
765 	WRITE_ENTER(&vsw_rw);
766 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
767 		if (*vswpp == vswp) {
768 			*vswpp = vswp->next;
769 			break;
770 		}
771 	}
772 	RW_EXIT(&vsw_rw);
773 	ddi_soft_state_free(vsw_state, instance);
774 
775 	return (DDI_SUCCESS);
776 }
777 
778 static int
779 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
780 {
781 	_NOTE(ARGUNUSED(dip))
782 
783 	vsw_t	*vswp = NULL;
784 	dev_t	dev = (dev_t)arg;
785 	int	instance;
786 
787 	instance = getminor(dev);
788 
789 	switch (infocmd) {
790 	case DDI_INFO_DEVT2DEVINFO:
791 		if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) {
792 			*result = NULL;
793 			return (DDI_FAILURE);
794 		}
795 		*result = vswp->dip;
796 		return (DDI_SUCCESS);
797 
798 	case DDI_INFO_DEVT2INSTANCE:
799 		*result = (void *)(uintptr_t)instance;
800 		return (DDI_SUCCESS);
801 
802 	default:
803 		*result = NULL;
804 		return (DDI_FAILURE);
805 	}
806 }
807 
808 /*
809  * Get the value of the "vsw-phys-dev" property in the specified
810  * node. This property is the name of the physical device that
811  * the virtual switch will use to talk to the outside world.
812  *
813  * Note it is valid for this property to be NULL (but the property
814  * itself must exist). Callers of this routine should verify that
815  * the value returned is what they expected (i.e. either NULL or non NULL).
816  *
817  * On success returns value of the property in region pointed to by
818  * the 'name' argument, and with return value of 0. Otherwise returns 1.
819  */
820 static int
821 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
822 {
823 	int		len = 0;
824 	int		instance;
825 	char		*physname = NULL;
826 	char		*dev;
827 	const char	*dev_name;
828 	char		myname[MAXNAMELEN];
829 
830 	dev_name = ddi_driver_name(vswp->dip);
831 	instance = ddi_get_instance(vswp->dip);
832 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
833 
834 	if (md_get_prop_data(mdp, node, physdev_propname,
835 	    (uint8_t **)(&physname), &len) != 0) {
836 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
837 		    "device(s) from MD", vswp->instance);
838 		return (1);
839 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
840 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
841 		    vswp->instance, physname);
842 		return (1);
843 	} else if (strcmp(myname, physname) == 0) {
844 		/*
845 		 * Prevent the vswitch from opening itself as the
846 		 * network device.
847 		 */
848 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
849 		    vswp->instance, physname);
850 		return (1);
851 	} else {
852 		(void) strncpy(name, physname, strlen(physname) + 1);
853 		D2(vswp, "%s: using first device specified (%s)",
854 		    __func__, physname);
855 	}
856 
857 #ifdef DEBUG
858 	/*
859 	 * As a temporary measure to aid testing we check to see if there
860 	 * is a vsw.conf file present. If there is we use the value of the
861 	 * vsw_physname property in the file as the name of the physical
862 	 * device, overriding the value from the MD.
863 	 *
864 	 * There may be multiple devices listed, but for the moment
865 	 * we just use the first one.
866 	 */
867 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
868 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
869 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
870 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
871 			    vswp->instance, dev);
872 			ddi_prop_free(dev);
873 			return (1);
874 		} else {
875 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
876 			    "config file", vswp->instance, dev);
877 
878 			(void) strncpy(name, dev, strlen(dev) + 1);
879 		}
880 
881 		ddi_prop_free(dev);
882 	}
883 #endif
884 
885 	return (0);
886 }
887 
888 /*
889  * Read the 'vsw-switch-mode' property from the specified MD node.
890  *
891  * Returns 0 on success and the number of modes found in 'found',
892  * otherwise returns 1.
893  */
894 static int
895 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
896 						uint8_t *modes, int *found)
897 {
898 	int		len = 0;
899 	int		smode_num = 0;
900 	char		*smode = NULL;
901 	char		*curr_mode = NULL;
902 
903 	D1(vswp, "%s: enter", __func__);
904 
905 	/*
906 	 * Get the switch-mode property. The modes are listed in
907 	 * decreasing order of preference, i.e. prefered mode is
908 	 * first item in list.
909 	 */
910 	len = 0;
911 	smode_num = 0;
912 	if (md_get_prop_data(mdp, node, smode_propname,
913 	    (uint8_t **)(&smode), &len) != 0) {
914 		/*
915 		 * Unable to get switch-mode property from MD, nothing
916 		 * more we can do.
917 		 */
918 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
919 		    " from the MD", vswp->instance);
920 		*found = 0;
921 		return (1);
922 	}
923 
924 	curr_mode = smode;
925 	/*
926 	 * Modes of operation:
927 	 * 'switched'	 - layer 2 switching, underlying HW in
928 	 *			programmed mode.
929 	 * 'promiscuous' - layer 2 switching, underlying HW in
930 	 *			promiscuous mode.
931 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
932 	 *			in non-promiscuous mode.
933 	 */
934 	while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) {
935 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
936 		if (strcmp(curr_mode, "switched") == 0) {
937 			modes[smode_num++] = VSW_LAYER2;
938 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
939 			modes[smode_num++] = VSW_LAYER2_PROMISC;
940 		} else if (strcmp(curr_mode, "routed") == 0) {
941 			modes[smode_num++] = VSW_LAYER3;
942 		} else {
943 			cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
944 			    "setting to default switched mode",
945 			    vswp->instance, curr_mode);
946 			modes[smode_num++] = VSW_LAYER2;
947 		}
948 		curr_mode += strlen(curr_mode) + 1;
949 	}
950 	*found = smode_num;
951 
952 	D2(vswp, "%s: %d modes found", __func__, smode_num);
953 
954 	D1(vswp, "%s: exit", __func__);
955 
956 	return (0);
957 }
958 
959 /*
960  * Register with the MAC layer as a network device, so we
961  * can be plumbed if necessary.
962  */
963 static int
964 vsw_mac_register(vsw_t *vswp)
965 {
966 	mac_register_t	*macp;
967 	int		rv;
968 
969 	D1(vswp, "%s: enter", __func__);
970 
971 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
972 		return (EINVAL);
973 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
974 	macp->m_driver = vswp;
975 	macp->m_dip = vswp->dip;
976 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
977 	macp->m_callbacks = &vsw_m_callbacks;
978 	macp->m_min_sdu = 0;
979 	macp->m_max_sdu = ETHERMTU;
980 	rv = mac_register(macp, &vswp->if_mh);
981 	mac_free(macp);
982 	if (rv != 0) {
983 		/*
984 		 * Treat this as a non-fatal error as we may be
985 		 * able to operate in some other mode.
986 		 */
987 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
988 		    "a provider with MAC layer", vswp->instance);
989 		return (rv);
990 	}
991 
992 	vswp->if_state |= VSW_IF_REG;
993 
994 	D1(vswp, "%s: exit", __func__);
995 
996 	return (rv);
997 }
998 
999 static int
1000 vsw_mac_unregister(vsw_t *vswp)
1001 {
1002 	int		rv = 0;
1003 
1004 	D1(vswp, "%s: enter", __func__);
1005 
1006 	WRITE_ENTER(&vswp->if_lockrw);
1007 
1008 	if (vswp->if_state & VSW_IF_REG) {
1009 		rv = mac_unregister(vswp->if_mh);
1010 		if (rv != 0) {
1011 			DWARN(vswp, "%s: unable to unregister from MAC "
1012 			    "framework", __func__);
1013 
1014 			RW_EXIT(&vswp->if_lockrw);
1015 			D1(vswp, "%s: fail exit", __func__);
1016 			return (rv);
1017 		}
1018 
1019 		/* mark i/f as down and unregistered */
1020 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1021 	}
1022 	RW_EXIT(&vswp->if_lockrw);
1023 
1024 	D1(vswp, "%s: exit", __func__);
1025 
1026 	return (rv);
1027 }
1028 
1029 static int
1030 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1031 {
1032 	vsw_t			*vswp = (vsw_t *)arg;
1033 
1034 	D1(vswp, "%s: enter", __func__);
1035 
1036 	mutex_enter(&vswp->mac_lock);
1037 	if (vswp->mh == NULL) {
1038 		mutex_exit(&vswp->mac_lock);
1039 		return (EINVAL);
1040 	}
1041 
1042 	/* return stats from underlying device */
1043 	*val = mac_stat_get(vswp->mh, stat);
1044 
1045 	mutex_exit(&vswp->mac_lock);
1046 
1047 	return (0);
1048 }
1049 
1050 static void
1051 vsw_m_stop(void *arg)
1052 {
1053 	vsw_t		*vswp = (vsw_t *)arg;
1054 
1055 	D1(vswp, "%s: enter", __func__);
1056 
1057 	WRITE_ENTER(&vswp->if_lockrw);
1058 	vswp->if_state &= ~VSW_IF_UP;
1059 	RW_EXIT(&vswp->if_lockrw);
1060 
1061 	mutex_enter(&vswp->hw_lock);
1062 
1063 	(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
1064 
1065 	if (vswp->recfg_reqd)
1066 		vsw_reconfig_hw(vswp);
1067 
1068 	mutex_exit(&vswp->hw_lock);
1069 
1070 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1071 }
1072 
1073 static int
1074 vsw_m_start(void *arg)
1075 {
1076 	vsw_t		*vswp = (vsw_t *)arg;
1077 
1078 	D1(vswp, "%s: enter", __func__);
1079 
1080 	WRITE_ENTER(&vswp->if_lockrw);
1081 
1082 	vswp->if_state |= VSW_IF_UP;
1083 
1084 	if (vswp->switching_setup_done == B_FALSE) {
1085 		/*
1086 		 * If the switching mode has not been setup yet, just
1087 		 * return. The unicast address will be programmed
1088 		 * after the physical device is successfully setup by the
1089 		 * timeout handler.
1090 		 */
1091 		RW_EXIT(&vswp->if_lockrw);
1092 		return (0);
1093 	}
1094 
1095 	/* if in layer2 mode, program unicast address. */
1096 	if (vswp->mh != NULL) {
1097 		mutex_enter(&vswp->hw_lock);
1098 		(void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
1099 		mutex_exit(&vswp->hw_lock);
1100 	}
1101 
1102 	RW_EXIT(&vswp->if_lockrw);
1103 
1104 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1105 	return (0);
1106 }
1107 
1108 /*
1109  * Change the local interface address.
1110  *
1111  * Note: we don't support this entry point. The local
1112  * mac address of the switch can only be changed via its
1113  * MD node properties.
1114  */
1115 static int
1116 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1117 {
1118 	_NOTE(ARGUNUSED(arg, macaddr))
1119 
1120 	return (DDI_FAILURE);
1121 }
1122 
1123 static int
1124 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1125 {
1126 	vsw_t		*vswp = (vsw_t *)arg;
1127 	mcst_addr_t	*mcst_p = NULL;
1128 	uint64_t	addr = 0x0;
1129 	int		i, ret = 0;
1130 
1131 	D1(vswp, "%s: enter", __func__);
1132 
1133 	/*
1134 	 * Convert address into form that can be used
1135 	 * as hash table key.
1136 	 */
1137 	for (i = 0; i < ETHERADDRL; i++) {
1138 		addr = (addr << 8) | mca[i];
1139 	}
1140 
1141 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1142 
1143 	if (add) {
1144 		D2(vswp, "%s: adding multicast", __func__);
1145 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1146 			/*
1147 			 * Update the list of multicast addresses
1148 			 * contained within the vsw_t structure to
1149 			 * include this new one.
1150 			 */
1151 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1152 			if (mcst_p == NULL) {
1153 				DERR(vswp, "%s unable to alloc mem", __func__);
1154 				(void) vsw_del_mcst(vswp,
1155 				    VSW_LOCALDEV, addr, NULL);
1156 				return (1);
1157 			}
1158 			mcst_p->addr = addr;
1159 			ether_copy(mca, &mcst_p->mca);
1160 
1161 			/*
1162 			 * Call into the underlying driver to program the
1163 			 * address into HW.
1164 			 */
1165 			mutex_enter(&vswp->mac_lock);
1166 			if (vswp->mh != NULL) {
1167 				ret = mac_multicst_add(vswp->mh, mca);
1168 				if (ret != 0) {
1169 					cmn_err(CE_WARN, "!vsw%d: unable to "
1170 					    "add multicast address",
1171 					    vswp->instance);
1172 					mutex_exit(&vswp->mac_lock);
1173 					(void) vsw_del_mcst(vswp,
1174 					    VSW_LOCALDEV, addr, NULL);
1175 					kmem_free(mcst_p, sizeof (*mcst_p));
1176 					return (ret);
1177 				}
1178 				mcst_p->mac_added = B_TRUE;
1179 			}
1180 			mutex_exit(&vswp->mac_lock);
1181 
1182 			mutex_enter(&vswp->mca_lock);
1183 			mcst_p->nextp = vswp->mcap;
1184 			vswp->mcap = mcst_p;
1185 			mutex_exit(&vswp->mca_lock);
1186 		} else {
1187 			cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
1188 			    "address", vswp->instance);
1189 		}
1190 		return (ret);
1191 	}
1192 
1193 	D2(vswp, "%s: removing multicast", __func__);
1194 	/*
1195 	 * Remove the address from the hash table..
1196 	 */
1197 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1198 
1199 		/*
1200 		 * ..and then from the list maintained in the
1201 		 * vsw_t structure.
1202 		 */
1203 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1204 		ASSERT(mcst_p != NULL);
1205 
1206 		mutex_enter(&vswp->mac_lock);
1207 		if (vswp->mh != NULL && mcst_p->mac_added) {
1208 			(void) mac_multicst_remove(vswp->mh, mca);
1209 			mcst_p->mac_added = B_FALSE;
1210 		}
1211 		mutex_exit(&vswp->mac_lock);
1212 		kmem_free(mcst_p, sizeof (*mcst_p));
1213 	}
1214 
1215 	D1(vswp, "%s: exit", __func__);
1216 
1217 	return (0);
1218 }
1219 
1220 static int
1221 vsw_m_promisc(void *arg, boolean_t on)
1222 {
1223 	vsw_t		*vswp = (vsw_t *)arg;
1224 
1225 	D1(vswp, "%s: enter", __func__);
1226 
1227 	WRITE_ENTER(&vswp->if_lockrw);
1228 	if (on)
1229 		vswp->if_state |= VSW_IF_PROMISC;
1230 	else
1231 		vswp->if_state &= ~VSW_IF_PROMISC;
1232 	RW_EXIT(&vswp->if_lockrw);
1233 
1234 	D1(vswp, "%s: exit", __func__);
1235 
1236 	return (0);
1237 }
1238 
1239 static mblk_t *
1240 vsw_m_tx(void *arg, mblk_t *mp)
1241 {
1242 	vsw_t		*vswp = (vsw_t *)arg;
1243 
1244 	D1(vswp, "%s: enter", __func__);
1245 
1246 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1247 
1248 	D1(vswp, "%s: exit", __func__);
1249 
1250 	return (NULL);
1251 }
1252 
1253 /*
1254  * Register for machine description (MD) updates.
1255  *
1256  * Returns 0 on success, 1 on failure.
1257  */
1258 static int
1259 vsw_mdeg_register(vsw_t *vswp)
1260 {
1261 	mdeg_prop_spec_t	*pspecp;
1262 	mdeg_node_spec_t	*inst_specp;
1263 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1264 	size_t			templatesz;
1265 	int			rv;
1266 
1267 	D1(vswp, "%s: enter", __func__);
1268 
1269 	/*
1270 	 * Allocate and initialize a per-instance copy
1271 	 * of the global property spec array that will
1272 	 * uniquely identify this vsw instance.
1273 	 */
1274 	templatesz = sizeof (vsw_prop_template);
1275 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1276 
1277 	bcopy(vsw_prop_template, pspecp, templatesz);
1278 
1279 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1280 
1281 	/* initialize the complete prop spec structure */
1282 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1283 	inst_specp->namep = "virtual-device";
1284 	inst_specp->specp = pspecp;
1285 
1286 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1287 	    vswp->regprop);
1288 	/*
1289 	 * Register an interest in 'virtual-device' nodes with a
1290 	 * 'name' property of 'virtual-network-switch'
1291 	 */
1292 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1293 	    (void *)vswp, &mdeg_hdl);
1294 	if (rv != MDEG_SUCCESS) {
1295 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1296 		    __func__, rv);
1297 		goto mdeg_reg_fail;
1298 	}
1299 
1300 	/*
1301 	 * Register an interest in 'vsw-port' nodes.
1302 	 */
1303 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1304 	    (void *)vswp, &mdeg_port_hdl);
1305 	if (rv != MDEG_SUCCESS) {
1306 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1307 		(void) mdeg_unregister(mdeg_hdl);
1308 		goto mdeg_reg_fail;
1309 	}
1310 
1311 	/* save off data that will be needed later */
1312 	vswp->inst_spec = inst_specp;
1313 	vswp->mdeg_hdl = mdeg_hdl;
1314 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1315 
1316 	D1(vswp, "%s: exit", __func__);
1317 	return (0);
1318 
1319 mdeg_reg_fail:
1320 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1321 	    vswp->instance);
1322 	kmem_free(pspecp, templatesz);
1323 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1324 
1325 	vswp->mdeg_hdl = NULL;
1326 	vswp->mdeg_port_hdl = NULL;
1327 
1328 	return (1);
1329 }
1330 
1331 static void
1332 vsw_mdeg_unregister(vsw_t *vswp)
1333 {
1334 	D1(vswp, "vsw_mdeg_unregister: enter");
1335 
1336 	if (vswp->mdeg_hdl != NULL)
1337 		(void) mdeg_unregister(vswp->mdeg_hdl);
1338 
1339 	if (vswp->mdeg_port_hdl != NULL)
1340 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1341 
1342 	if (vswp->inst_spec != NULL) {
1343 		if (vswp->inst_spec->specp != NULL) {
1344 			(void) kmem_free(vswp->inst_spec->specp,
1345 			    sizeof (vsw_prop_template));
1346 			vswp->inst_spec->specp = NULL;
1347 		}
1348 
1349 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1350 		vswp->inst_spec = NULL;
1351 	}
1352 
1353 	D1(vswp, "vsw_mdeg_unregister: exit");
1354 }
1355 
1356 /*
1357  * Mdeg callback invoked for the vsw node itself.
1358  */
1359 static int
1360 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1361 {
1362 	vsw_t		*vswp;
1363 	md_t		*mdp;
1364 	mde_cookie_t	node;
1365 	uint64_t	inst;
1366 	char		*node_name = NULL;
1367 
1368 	if (resp == NULL)
1369 		return (MDEG_FAILURE);
1370 
1371 	vswp = (vsw_t *)cb_argp;
1372 
1373 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1374 	    " : prev matched %d", __func__, resp->added.nelem,
1375 	    resp->removed.nelem, resp->match_curr.nelem,
1376 	    resp->match_prev.nelem);
1377 
1378 	/*
1379 	 * We get an initial callback for this node as 'added'
1380 	 * after registering with mdeg. Note that we would have
1381 	 * already gathered information about this vsw node by
1382 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1383 	 * So, there is a window where the properties of this
1384 	 * node might have changed when we get this initial 'added'
1385 	 * callback. We handle this as if an update occured
1386 	 * and invoke the same function which handles updates to
1387 	 * the properties of this vsw-node if any.
1388 	 *
1389 	 * A non-zero 'match' value indicates that the MD has been
1390 	 * updated and that a virtual-network-switch node is
1391 	 * present which may or may not have been updated. It is
1392 	 * up to the clients to examine their own nodes and
1393 	 * determine if they have changed.
1394 	 */
1395 	if (resp->added.nelem != 0) {
1396 
1397 		if (resp->added.nelem != 1) {
1398 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1399 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1400 			return (MDEG_FAILURE);
1401 		}
1402 
1403 		mdp = resp->added.mdp;
1404 		node = resp->added.mdep[0];
1405 
1406 	} else if (resp->match_curr.nelem != 0) {
1407 
1408 		if (resp->match_curr.nelem != 1) {
1409 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1410 			    "invalid: %d\n", vswp->instance,
1411 			    resp->match_curr.nelem);
1412 			return (MDEG_FAILURE);
1413 		}
1414 
1415 		mdp = resp->match_curr.mdp;
1416 		node = resp->match_curr.mdep[0];
1417 
1418 	} else {
1419 		return (MDEG_FAILURE);
1420 	}
1421 
1422 	/* Validate name and instance */
1423 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1424 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1425 		return (MDEG_FAILURE);
1426 	}
1427 
1428 	/* is this a virtual-network-switch? */
1429 	if (strcmp(node_name, vsw_propname) != 0) {
1430 		DERR(vswp, "%s: Invalid node name: %s\n",
1431 		    __func__, node_name);
1432 		return (MDEG_FAILURE);
1433 	}
1434 
1435 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1436 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1437 		    __func__);
1438 		return (MDEG_FAILURE);
1439 	}
1440 
1441 	/* is this the right instance of vsw? */
1442 	if (inst != vswp->regprop) {
1443 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1444 		    __func__, inst);
1445 		return (MDEG_FAILURE);
1446 	}
1447 
1448 	vsw_update_md_prop(vswp, mdp, node);
1449 
1450 	return (MDEG_SUCCESS);
1451 }
1452 
1453 /*
1454  * Mdeg callback invoked for changes to the vsw-port nodes
1455  * under the vsw node.
1456  */
1457 static int
1458 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1459 {
1460 	vsw_t		*vswp;
1461 	int		idx;
1462 	md_t		*mdp;
1463 	mde_cookie_t	node;
1464 	uint64_t	inst;
1465 
1466 	if ((resp == NULL) || (cb_argp == NULL))
1467 		return (MDEG_FAILURE);
1468 
1469 	vswp = (vsw_t *)cb_argp;
1470 
1471 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1472 	    " : prev matched %d", __func__, resp->added.nelem,
1473 	    resp->removed.nelem, resp->match_curr.nelem,
1474 	    resp->match_prev.nelem);
1475 
1476 	/* process added ports */
1477 	for (idx = 0; idx < resp->added.nelem; idx++) {
1478 		mdp = resp->added.mdp;
1479 		node = resp->added.mdep[idx];
1480 
1481 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1482 
1483 		if (vsw_port_add(vswp, mdp, &node) != 0) {
1484 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1485 			    "(0x%lx)", vswp->instance, node);
1486 		}
1487 	}
1488 
1489 	/* process removed ports */
1490 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1491 		mdp = resp->removed.mdp;
1492 		node = resp->removed.mdep[idx];
1493 
1494 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1495 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1496 			    __func__, id_propname, idx);
1497 			continue;
1498 		}
1499 
1500 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1501 
1502 		if (vsw_port_detach(vswp, inst) != 0) {
1503 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1504 			    vswp->instance, inst);
1505 		}
1506 	}
1507 
1508 	/*
1509 	 * Currently no support for updating already active ports.
1510 	 * So, ignore the match_curr and match_priv arrays for now.
1511 	 */
1512 
1513 	D1(vswp, "%s: exit", __func__);
1514 
1515 	return (MDEG_SUCCESS);
1516 }
1517 
1518 /*
1519  * Scan the machine description for this instance of vsw
1520  * and read its properties. Called only from vsw_attach().
1521  * Returns: 0 on success, 1 on failure.
1522  */
1523 static int
1524 vsw_read_mdprops(vsw_t *vswp)
1525 {
1526 	md_t		*mdp = NULL;
1527 	mde_cookie_t	rootnode;
1528 	mde_cookie_t	*listp = NULL;
1529 	uint64_t	inst;
1530 	uint64_t	cfgh;
1531 	char		*name;
1532 	int		rv = 1;
1533 	int		num_nodes = 0;
1534 	int		num_devs = 0;
1535 	int		listsz = 0;
1536 	int		i;
1537 
1538 	/*
1539 	 * In each 'virtual-device' node in the MD there is a
1540 	 * 'cfg-handle' property which is the MD's concept of
1541 	 * an instance number (this may be completely different from
1542 	 * the device drivers instance #). OBP reads that value and
1543 	 * stores it in the 'reg' property of the appropriate node in
1544 	 * the device tree. We first read this reg property and use this
1545 	 * to compare against the 'cfg-handle' property of vsw nodes
1546 	 * in MD to get to this specific vsw instance and then read
1547 	 * other properties that we are interested in.
1548 	 * We also cache the value of 'reg' property and use it later
1549 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1550 	 */
1551 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1552 	    DDI_PROP_DONTPASS, reg_propname, -1);
1553 	if (inst == -1) {
1554 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1555 		    "OBP device tree", vswp->instance, reg_propname);
1556 		return (rv);
1557 	}
1558 
1559 	vswp->regprop = inst;
1560 
1561 	if ((mdp = md_get_handle()) == NULL) {
1562 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1563 		return (rv);
1564 	}
1565 
1566 	num_nodes = md_node_count(mdp);
1567 	ASSERT(num_nodes > 0);
1568 
1569 	listsz = num_nodes * sizeof (mde_cookie_t);
1570 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1571 
1572 	rootnode = md_root_node(mdp);
1573 
1574 	/* search for all "virtual_device" nodes */
1575 	num_devs = md_scan_dag(mdp, rootnode,
1576 	    md_find_name(mdp, vdev_propname),
1577 	    md_find_name(mdp, "fwd"), listp);
1578 	if (num_devs <= 0) {
1579 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1580 		goto vsw_readmd_exit;
1581 	}
1582 
1583 	/*
1584 	 * Now loop through the list of virtual-devices looking for
1585 	 * devices with name "virtual-network-switch" and for each
1586 	 * such device compare its instance with what we have from
1587 	 * the 'reg' property to find the right node in MD and then
1588 	 * read all its properties.
1589 	 */
1590 	for (i = 0; i < num_devs; i++) {
1591 
1592 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1593 			DWARN(vswp, "%s: name property not found\n",
1594 			    __func__);
1595 			goto vsw_readmd_exit;
1596 		}
1597 
1598 		/* is this a virtual-network-switch? */
1599 		if (strcmp(name, vsw_propname) != 0)
1600 			continue;
1601 
1602 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1603 			DWARN(vswp, "%s: cfg-handle property not found\n",
1604 			    __func__);
1605 			goto vsw_readmd_exit;
1606 		}
1607 
1608 		/* is this the required instance of vsw? */
1609 		if (inst != cfgh)
1610 			continue;
1611 
1612 		/* now read all properties of this vsw instance */
1613 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1614 		break;
1615 	}
1616 
1617 vsw_readmd_exit:
1618 
1619 	kmem_free(listp, listsz);
1620 	(void) md_fini_handle(mdp);
1621 	return (rv);
1622 }
1623 
1624 /*
1625  * Read the initial start-of-day values from the specified MD node.
1626  */
1627 static int
1628 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1629 {
1630 	int		i;
1631 	uint64_t 	macaddr = 0;
1632 
1633 	D1(vswp, "%s: enter", __func__);
1634 
1635 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1636 		return (1);
1637 	}
1638 
1639 	/* mac address for vswitch device itself */
1640 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1641 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1642 		    vswp->instance);
1643 		return (1);
1644 	}
1645 
1646 	vsw_save_lmacaddr(vswp, macaddr);
1647 
1648 	if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) {
1649 		cmn_err(CE_WARN, "vsw%d: Unable to read %s property from "
1650 		    "MD, defaulting to programmed mode", vswp->instance,
1651 		    smode_propname);
1652 
1653 		for (i = 0; i < NUM_SMODES; i++)
1654 			vswp->smode[i] = VSW_LAYER2;
1655 
1656 		vswp->smode_num = NUM_SMODES;
1657 	} else {
1658 		ASSERT(vswp->smode_num != 0);
1659 	}
1660 
1661 	D1(vswp, "%s: exit", __func__);
1662 	return (0);
1663 }
1664 
1665 /*
1666  * Check to see if the relevant properties in the specified node have
1667  * changed, and if so take the appropriate action.
1668  *
1669  * If any of the properties are missing or invalid we don't take
1670  * any action, as this function should only be invoked when modifications
1671  * have been made to what we assume is a working configuration, which
1672  * we leave active.
1673  *
1674  * Note it is legal for this routine to be invoked even if none of the
1675  * properties in the port node within the MD have actually changed.
1676  */
1677 static void
1678 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1679 {
1680 	char		physname[LIFNAMSIZ];
1681 	char		drv[LIFNAMSIZ];
1682 	uint_t		ddi_instance;
1683 	uint8_t		new_smode[NUM_SMODES];
1684 	int		i, smode_num = 0;
1685 	uint64_t 	macaddr = 0;
1686 	enum		{MD_init = 0x1,
1687 				MD_physname = 0x2,
1688 				MD_macaddr = 0x4,
1689 				MD_smode = 0x8} updated;
1690 	int		rv;
1691 
1692 	updated = MD_init;
1693 
1694 	D1(vswp, "%s: enter", __func__);
1695 
1696 	/*
1697 	 * Check if name of physical device in MD has changed.
1698 	 */
1699 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
1700 		/*
1701 		 * Do basic sanity check on new device name/instance,
1702 		 * if its non NULL. It is valid for the device name to
1703 		 * have changed from a non NULL to a NULL value, i.e.
1704 		 * the vsw is being changed to 'routed' mode.
1705 		 */
1706 		if ((strlen(physname) != 0) &&
1707 		    (ddi_parse(physname, drv,
1708 		    &ddi_instance) != DDI_SUCCESS)) {
1709 			cmn_err(CE_WARN, "!vsw%d: new device name %s is not"
1710 			    " a valid device name/instance",
1711 			    vswp->instance, physname);
1712 			goto fail_reconf;
1713 		}
1714 
1715 		if (strcmp(physname, vswp->physname)) {
1716 			D2(vswp, "%s: device name changed from %s to %s",
1717 			    __func__, vswp->physname, physname);
1718 
1719 			updated |= MD_physname;
1720 		} else {
1721 			D2(vswp, "%s: device name unchanged at %s",
1722 			    __func__, vswp->physname);
1723 		}
1724 	} else {
1725 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
1726 		    "device from updated MD.", vswp->instance);
1727 		goto fail_reconf;
1728 	}
1729 
1730 	/*
1731 	 * Check if MAC address has changed.
1732 	 */
1733 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1734 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1735 		    vswp->instance);
1736 		goto fail_reconf;
1737 	} else {
1738 		uint64_t maddr = macaddr;
1739 		READ_ENTER(&vswp->if_lockrw);
1740 		for (i = ETHERADDRL - 1; i >= 0; i--) {
1741 			if (vswp->if_addr.ether_addr_octet[i]
1742 			    != (macaddr & 0xFF)) {
1743 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
1744 				    __func__, i,
1745 				    vswp->if_addr.ether_addr_octet[i],
1746 				    (macaddr & 0xFF));
1747 				updated |= MD_macaddr;
1748 				macaddr = maddr;
1749 				break;
1750 			}
1751 			macaddr >>= 8;
1752 		}
1753 		RW_EXIT(&vswp->if_lockrw);
1754 		if (updated & MD_macaddr) {
1755 			vsw_save_lmacaddr(vswp, macaddr);
1756 		}
1757 	}
1758 
1759 	/*
1760 	 * Check if switching modes have changed.
1761 	 */
1762 	if (vsw_get_md_smodes(vswp, mdp, node,
1763 	    new_smode, &smode_num)) {
1764 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
1765 		    vswp->instance, smode_propname);
1766 		goto fail_reconf;
1767 	} else {
1768 		ASSERT(smode_num != 0);
1769 		if (smode_num != vswp->smode_num) {
1770 			D2(vswp, "%s: number of modes changed from %d to %d",
1771 			    __func__, vswp->smode_num, smode_num);
1772 		}
1773 
1774 		for (i = 0; i < smode_num; i++) {
1775 			if (new_smode[i] != vswp->smode[i]) {
1776 				D2(vswp, "%s: mode changed from %d to %d",
1777 				    __func__, vswp->smode[i], new_smode[i]);
1778 				updated |= MD_smode;
1779 				break;
1780 			}
1781 		}
1782 	}
1783 
1784 	/*
1785 	 * Now make any changes which are needed...
1786 	 */
1787 
1788 	if (updated & (MD_physname | MD_smode)) {
1789 
1790 		/*
1791 		 * Stop any pending timeout to setup switching mode.
1792 		 */
1793 		vsw_stop_switching_timeout(vswp);
1794 
1795 		/*
1796 		 * Remove unicst, mcst addrs of vsw interface
1797 		 * and ports from the physdev.
1798 		 */
1799 		vsw_unset_addrs(vswp);
1800 
1801 		/*
1802 		 * Stop, detach and close the old device..
1803 		 */
1804 		mutex_enter(&vswp->mac_lock);
1805 
1806 		vsw_mac_detach(vswp);
1807 		vsw_mac_close(vswp);
1808 
1809 		mutex_exit(&vswp->mac_lock);
1810 
1811 		/*
1812 		 * Update phys name.
1813 		 */
1814 		if (updated & MD_physname) {
1815 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
1816 			    vswp->instance, vswp->physname, physname);
1817 			(void) strncpy(vswp->physname,
1818 			    physname, strlen(physname) + 1);
1819 		}
1820 
1821 		/*
1822 		 * Update array with the new switch mode values.
1823 		 */
1824 		if (updated & MD_smode) {
1825 			for (i = 0; i < smode_num; i++)
1826 				vswp->smode[i] = new_smode[i];
1827 
1828 			vswp->smode_num = smode_num;
1829 			vswp->smode_idx = 0;
1830 		}
1831 
1832 		/*
1833 		 * ..and attach, start the new device.
1834 		 */
1835 		rv = vsw_setup_switching(vswp);
1836 		if (rv == EAGAIN) {
1837 			/*
1838 			 * Unable to setup switching mode.
1839 			 * As the error is EAGAIN, schedule a timeout to retry
1840 			 * and return. Programming addresses of ports and
1841 			 * vsw interface will be done when the timeout handler
1842 			 * completes successfully.
1843 			 */
1844 			mutex_enter(&vswp->swtmout_lock);
1845 
1846 			vswp->swtmout_enabled = B_TRUE;
1847 			vswp->swtmout_id =
1848 			    timeout(vsw_setup_switching_timeout, vswp,
1849 			    (vsw_setup_switching_delay *
1850 			    drv_usectohz(MICROSEC)));
1851 
1852 			mutex_exit(&vswp->swtmout_lock);
1853 
1854 			return;
1855 
1856 		} else if (rv) {
1857 			goto fail_update;
1858 		}
1859 
1860 		/*
1861 		 * program unicst, mcst addrs of vsw interface
1862 		 * and ports in the physdev.
1863 		 */
1864 		vsw_set_addrs(vswp);
1865 
1866 	} else if (updated & MD_macaddr) {
1867 		/*
1868 		 * We enter here if only MD_macaddr is exclusively updated.
1869 		 * If MD_physname and/or MD_smode are also updated, then
1870 		 * as part of that, we would have implicitly processed
1871 		 * MD_macaddr update (above).
1872 		 */
1873 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
1874 		    vswp->instance, macaddr);
1875 
1876 		READ_ENTER(&vswp->if_lockrw);
1877 		if (vswp->if_state & VSW_IF_UP) {
1878 
1879 			mutex_enter(&vswp->hw_lock);
1880 			/*
1881 			 * Remove old mac address of vsw interface
1882 			 * from the physdev
1883 			 */
1884 			(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
1885 			/*
1886 			 * Program new mac address of vsw interface
1887 			 * in the physdev
1888 			 */
1889 			rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
1890 			mutex_exit(&vswp->hw_lock);
1891 			if (rv != 0) {
1892 				cmn_err(CE_NOTE,
1893 				    "!vsw%d: failed to program interface "
1894 				    "unicast address\n", vswp->instance);
1895 			}
1896 			/*
1897 			 * Notify the MAC layer of the changed address.
1898 			 */
1899 			mac_unicst_update(vswp->if_mh,
1900 			    (uint8_t *)&vswp->if_addr);
1901 
1902 		}
1903 		RW_EXIT(&vswp->if_lockrw);
1904 
1905 	}
1906 
1907 	return;
1908 
1909 fail_reconf:
1910 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
1911 	return;
1912 
1913 fail_update:
1914 	cmn_err(CE_WARN, "!vsw%d: update of configuration failed",
1915 	    vswp->instance);
1916 }
1917 
1918 /*
1919  * Add a new port to the system.
1920  *
1921  * Returns 0 on success, 1 on failure.
1922  */
1923 int
1924 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
1925 {
1926 	uint64_t		ldc_id;
1927 	uint8_t			*addrp;
1928 	int			i, addrsz;
1929 	int			num_nodes = 0, nchan = 0;
1930 	int			listsz = 0;
1931 	mde_cookie_t		*listp = NULL;
1932 	struct ether_addr	ea;
1933 	uint64_t		macaddr;
1934 	uint64_t		inst = 0;
1935 	vsw_port_t		*port;
1936 
1937 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
1938 		DWARN(vswp, "%s: prop(%s) not found", __func__,
1939 		    id_propname);
1940 		return (1);
1941 	}
1942 
1943 	/*
1944 	 * Find the channel endpoint node(s) (which should be under this
1945 	 * port node) which contain the channel id(s).
1946 	 */
1947 	if ((num_nodes = md_node_count(mdp)) <= 0) {
1948 		DERR(vswp, "%s: invalid number of nodes found (%d)",
1949 		    __func__, num_nodes);
1950 		return (1);
1951 	}
1952 
1953 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
1954 
1955 	/* allocate enough space for node list */
1956 	listsz = num_nodes * sizeof (mde_cookie_t);
1957 	listp = kmem_zalloc(listsz, KM_SLEEP);
1958 
1959 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
1960 	    md_find_name(mdp, "fwd"), listp);
1961 
1962 	if (nchan <= 0) {
1963 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
1964 		kmem_free(listp, listsz);
1965 		return (1);
1966 	}
1967 
1968 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
1969 
1970 	/* use property from first node found */
1971 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
1972 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
1973 		    id_propname);
1974 		kmem_free(listp, listsz);
1975 		return (1);
1976 	}
1977 
1978 	/* don't need list any more */
1979 	kmem_free(listp, listsz);
1980 
1981 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
1982 
1983 	/* read mac-address property */
1984 	if (md_get_prop_data(mdp, *node, remaddr_propname,
1985 	    &addrp, &addrsz)) {
1986 		DWARN(vswp, "%s: prop(%s) not found",
1987 		    __func__, remaddr_propname);
1988 		return (1);
1989 	}
1990 
1991 	if (addrsz < ETHERADDRL) {
1992 		DWARN(vswp, "%s: invalid address size", __func__);
1993 		return (1);
1994 	}
1995 
1996 	macaddr = *((uint64_t *)addrp);
1997 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
1998 
1999 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2000 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2001 		macaddr >>= 8;
2002 	}
2003 
2004 	if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) {
2005 		DERR(vswp, "%s: failed to attach port", __func__);
2006 		return (1);
2007 	}
2008 
2009 	port = vsw_lookup_port(vswp, (int)inst);
2010 
2011 	/* just successfuly created the port, so it should exist */
2012 	ASSERT(port != NULL);
2013 
2014 	return (0);
2015 }
2016 
2017 /*
2018  * vsw_mac_rx -- A common function to send packets to the interface.
2019  * By default this function check if the interface is UP or not, the
2020  * rest of the behaviour depends on the flags as below:
2021  *
2022  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2023  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2024  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2025  */
2026 void
2027 vsw_mac_rx(vsw_t *vswp, int caller, mac_resource_handle_t mrh,
2028     mblk_t *mp, mblk_t *mpt, vsw_macrx_flags_t flags)
2029 {
2030 	int trigger = 0;
2031 
2032 	D1(vswp, "%s:enter\n", __func__);
2033 	READ_ENTER(&vswp->if_lockrw);
2034 	/* Check if the interface is up */
2035 	if (!(vswp->if_state & VSW_IF_UP)) {
2036 		RW_EXIT(&vswp->if_lockrw);
2037 		/* Free messages only if FREEMSG flag specified */
2038 		if (flags & VSW_MACRX_FREEMSG) {
2039 			freemsgchain(mp);
2040 		}
2041 		D1(vswp, "%s:exit\n", __func__);
2042 		return;
2043 	}
2044 	/*
2045 	 * If PROMISC flag is passed, then check if
2046 	 * the interface is in the PROMISC mode.
2047 	 * If not, drop the messages.
2048 	 */
2049 	if (flags & VSW_MACRX_PROMISC) {
2050 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2051 			RW_EXIT(&vswp->if_lockrw);
2052 			/* Free messages only if FREEMSG flag specified */
2053 			if (flags & VSW_MACRX_FREEMSG) {
2054 				freemsgchain(mp);
2055 			}
2056 			D1(vswp, "%s:exit\n", __func__);
2057 			return;
2058 		}
2059 	}
2060 	RW_EXIT(&vswp->if_lockrw);
2061 	/*
2062 	 * If COPYMSG flag is passed, then make a copy
2063 	 * of the message chain and send up the copy.
2064 	 */
2065 	if (flags & VSW_MACRX_COPYMSG) {
2066 		mp = copymsgchain(mp);
2067 		if (mp) {
2068 			mpt = mp;
2069 			/* find the tail */
2070 			while (mpt->b_next != NULL) {
2071 				mpt = mpt->b_next;
2072 			}
2073 		} else {
2074 			D1(vswp, "%s:exit\n", __func__);
2075 			return;
2076 		}
2077 	}
2078 
2079 	/*
2080 	 * If the softint is not enabled or the packets are
2081 	 * passed by the physical device, then the caller
2082 	 * is expected to be at the interrupt context. For
2083 	 * this case, mac_rx() directly.
2084 	 */
2085 	if ((vswp->rx_softint == B_FALSE) || (caller == VSW_PHYSDEV)) {
2086 		ASSERT(servicing_interrupt());
2087 		D3(vswp, "%s: sending up stack", __func__);
2088 		mac_rx(vswp->if_mh, mrh, mp);
2089 		D1(vswp, "%s:exit\n", __func__);
2090 		return;
2091 	}
2092 
2093 	/*
2094 	 * Here we may not be at the interrupt context, so
2095 	 * queue the packets and trigger a softint to post
2096 	 * the packets up the stack.
2097 	 */
2098 	mutex_enter(&vswp->soft_lock);
2099 	if (vswp->rx_mhead == NULL) {
2100 		vswp->rx_mhead = mp;
2101 		vswp->rx_mtail = mpt;
2102 		trigger = 1;
2103 	} else {
2104 		vswp->rx_mtail->b_next = mp;
2105 		vswp->rx_mtail = mpt;
2106 	}
2107 	mutex_exit(&vswp->soft_lock);
2108 	if (trigger) {
2109 		D3(vswp, "%s: triggering the softint", __func__);
2110 		(void) ddi_intr_trigger_softint(vswp->soft_handle, NULL);
2111 	}
2112 	D1(vswp, "%s:exit\n", __func__);
2113 }
2114 
2115 /*
2116  * vsw_rx_softintr -- vsw soft interrupt handler function.
2117  * Its job is to pickup the recieved packets that are queued
2118  * for the interface and send them up.
2119  *
2120  * NOTE: An interrupt handler is being used to handle the upper
2121  * layer(s) requirement to send up only at interrupt context.
2122  */
2123 /* ARGSUSED */
2124 static uint_t
2125 vsw_rx_softintr(caddr_t arg1, caddr_t arg2)
2126 {
2127 	mblk_t *mp;
2128 	vsw_t *vswp = (vsw_t *)arg1;
2129 
2130 	mutex_enter(&vswp->soft_lock);
2131 	mp = vswp->rx_mhead;
2132 	vswp->rx_mhead = vswp->rx_mtail = NULL;
2133 	mutex_exit(&vswp->soft_lock);
2134 	if (mp != NULL) {
2135 		READ_ENTER(&vswp->if_lockrw);
2136 		if (vswp->if_state & VSW_IF_UP) {
2137 			RW_EXIT(&vswp->if_lockrw);
2138 			mac_rx(vswp->if_mh, NULL, mp);
2139 		} else {
2140 			RW_EXIT(&vswp->if_lockrw);
2141 			freemsgchain(mp);
2142 		}
2143 	}
2144 	D1(vswp, "%s:exit\n", __func__);
2145 	return (DDI_INTR_CLAIMED);
2146 }
2147 
2148 /* copy mac address of vsw into soft state structure */
2149 static void
2150 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2151 {
2152 	int	i;
2153 
2154 	WRITE_ENTER(&vswp->if_lockrw);
2155 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2156 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2157 		macaddr >>= 8;
2158 	}
2159 	RW_EXIT(&vswp->if_lockrw);
2160 }
2161