xref: /illumos-gate/usr/src/uts/sun4v/io/vsw.c (revision 99dda20867d903eec23291ba1ecb18a82d70096b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/debug.h>
32 #include <sys/time.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/user.h>
36 #include <sys/stropts.h>
37 #include <sys/stream.h>
38 #include <sys/strlog.h>
39 #include <sys/strsubr.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpu.h>
42 #include <sys/kmem.h>
43 #include <sys/conf.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/ksynch.h>
47 #include <sys/stat.h>
48 #include <sys/kstat.h>
49 #include <sys/vtrace.h>
50 #include <sys/strsun.h>
51 #include <sys/dlpi.h>
52 #include <sys/ethernet.h>
53 #include <net/if.h>
54 #include <sys/varargs.h>
55 #include <sys/machsystm.h>
56 #include <sys/modctl.h>
57 #include <sys/modhash.h>
58 #include <sys/mac.h>
59 #include <sys/mac_ether.h>
60 #include <sys/taskq.h>
61 #include <sys/note.h>
62 #include <sys/mach_descrip.h>
63 #include <sys/mac.h>
64 #include <sys/mdeg.h>
65 #include <sys/ldc.h>
66 #include <sys/vsw_fdb.h>
67 #include <sys/vsw.h>
68 #include <sys/vio_mailbox.h>
69 #include <sys/vnet_mailbox.h>
70 #include <sys/vnet_common.h>
71 #include <sys/vio_util.h>
72 #include <sys/sdt.h>
73 #include <sys/atomic.h>
74 #include <sys/callb.h>
75 
76 /*
77  * Function prototypes.
78  */
79 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
80 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
81 static	int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
82 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
83 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *);
84 
85 /* MDEG routines */
86 static	int vsw_mdeg_register(vsw_t *vswp);
87 static	void vsw_mdeg_unregister(vsw_t *vswp);
88 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
89 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
90 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
91 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
92 	mde_cookie_t node);
93 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
94 static	int vsw_read_mdprops(vsw_t *vswp);
95 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
96 
97 /* Mac driver related routines */
98 static int vsw_mac_register(vsw_t *);
99 static int vsw_mac_unregister(vsw_t *);
100 static int vsw_m_stat(void *, uint_t, uint64_t *);
101 static void vsw_m_stop(void *arg);
102 static int vsw_m_start(void *arg);
103 static int vsw_m_unicst(void *arg, const uint8_t *);
104 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
105 static int vsw_m_promisc(void *arg, boolean_t);
106 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
107 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
108     mblk_t *mp, vsw_macrx_flags_t flags);
109 
110 /*
111  * Functions imported from other files.
112  */
113 extern void vsw_setup_switching_timeout(void *arg);
114 extern void vsw_stop_switching_timeout(vsw_t *vswp);
115 extern int vsw_setup_switching(vsw_t *);
116 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
117 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
118 extern void vsw_del_mcst_vsw(vsw_t *);
119 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
120 extern int vsw_detach_ports(vsw_t *vswp);
121 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
122 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
123 extern	int vsw_port_attach(vsw_t *vswp, int p_instance,
124 	uint64_t *ldcids, int nids, struct ether_addr *macaddr);
125 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
126 extern int vsw_mac_attach(vsw_t *vswp);
127 extern void vsw_mac_detach(vsw_t *vswp);
128 extern int vsw_mac_open(vsw_t *vswp);
129 extern void vsw_mac_close(vsw_t *vswp);
130 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int);
131 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int);
132 extern void vsw_reconfig_hw(vsw_t *);
133 extern void vsw_unset_addrs(vsw_t *vswp);
134 extern void vsw_set_addrs(vsw_t *vswp);
135 
136 
137 /*
138  * Internal tunables.
139  */
140 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
141 int	vsw_wretries = 100;		/* # of write attempts */
142 int	vsw_desc_delay = 0;		/* delay in us */
143 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
144 int	vsw_mac_open_retries = 20;	/* max # of mac_open() retries */
145 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
146 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
147 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
148 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
149 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
150 
151 /*
152  * Workaround for a version handshake bug in obp's vnet.
153  * If vsw initiates version negotiation starting from the highest version,
154  * obp sends a nack and terminates version handshake. To workaround
155  * this, we do not initiate version handshake when the channel comes up.
156  * Instead, we wait for the peer to send its version info msg and go through
157  * the version protocol exchange. If we successfully negotiate a version,
158  * before sending the ack, we send our version info msg to the peer
159  * using the <major,minor> version that we are about to ack.
160  */
161 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
162 
163 /*
164  * In the absence of "priority-ether-types" property in MD, the following
165  * internal tunable can be set to specify a single priority ethertype.
166  */
167 uint64_t vsw_pri_eth_type = 0;
168 
169 /*
170  * Number of transmit priority buffers that are preallocated per device.
171  * This number is chosen to be a small value to throttle transmission
172  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
173  */
174 uint32_t vsw_pri_tx_nmblks = 64;
175 
176 /*
177  * External tunables.
178  */
179 /*
180  * Enable/disable thread per ring. This is a mode selection
181  * that is done a vsw driver attach time.
182  */
183 boolean_t vsw_multi_ring_enable = B_FALSE;
184 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS;
185 
186 /* Number of transmit descriptors -  must be power of 2 */
187 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
188 
189 /*
190  * Max number of mblks received in one receive operation.
191  */
192 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
193 
194 /*
195  * Tunables for three different pools, that is, the size and
196  * number of mblks for each pool.
197  */
198 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128;	/* size=128 for pool1 */
199 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256;	/* size=256 for pool2 */
200 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048;	/* size=2048 for pool3 */
201 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
202 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
203 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
204 
205 /*
206  * vsw_max_tx_qcount is the maximum # of packets that can be queued
207  * before the tx worker thread begins processing the queue. Its value
208  * is chosen to be 4x the default length of tx descriptor ring.
209  */
210 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
211 
212 /*
213  * MAC callbacks
214  */
215 static	mac_callbacks_t	vsw_m_callbacks = {
216 	0,
217 	vsw_m_stat,
218 	vsw_m_start,
219 	vsw_m_stop,
220 	vsw_m_promisc,
221 	vsw_m_multicst,
222 	vsw_m_unicst,
223 	vsw_m_tx,
224 	NULL,
225 	NULL,
226 	NULL
227 };
228 
229 static	struct	cb_ops	vsw_cb_ops = {
230 	nulldev,			/* cb_open */
231 	nulldev,			/* cb_close */
232 	nodev,				/* cb_strategy */
233 	nodev,				/* cb_print */
234 	nodev,				/* cb_dump */
235 	nodev,				/* cb_read */
236 	nodev,				/* cb_write */
237 	nodev,				/* cb_ioctl */
238 	nodev,				/* cb_devmap */
239 	nodev,				/* cb_mmap */
240 	nodev,				/* cb_segmap */
241 	nochpoll,			/* cb_chpoll */
242 	ddi_prop_op,			/* cb_prop_op */
243 	NULL,				/* cb_stream */
244 	D_MP,				/* cb_flag */
245 	CB_REV,				/* rev */
246 	nodev,				/* int (*cb_aread)() */
247 	nodev				/* int (*cb_awrite)() */
248 };
249 
250 static	struct	dev_ops	vsw_ops = {
251 	DEVO_REV,		/* devo_rev */
252 	0,			/* devo_refcnt */
253 	vsw_getinfo,		/* devo_getinfo */
254 	nulldev,		/* devo_identify */
255 	nulldev,		/* devo_probe */
256 	vsw_attach,		/* devo_attach */
257 	vsw_detach,		/* devo_detach */
258 	nodev,			/* devo_reset */
259 	&vsw_cb_ops,		/* devo_cb_ops */
260 	(struct bus_ops *)NULL,	/* devo_bus_ops */
261 	ddi_power		/* devo_power */
262 };
263 
264 extern	struct	mod_ops	mod_driverops;
265 static struct modldrv vswmodldrv = {
266 	&mod_driverops,
267 	"sun4v Virtual Switch",
268 	&vsw_ops,
269 };
270 
271 #define	LDC_ENTER_LOCK(ldcp)	\
272 				mutex_enter(&((ldcp)->ldc_cblock));\
273 				mutex_enter(&((ldcp)->ldc_rxlock));\
274 				mutex_enter(&((ldcp)->ldc_txlock));
275 #define	LDC_EXIT_LOCK(ldcp)	\
276 				mutex_exit(&((ldcp)->ldc_txlock));\
277 				mutex_exit(&((ldcp)->ldc_rxlock));\
278 				mutex_exit(&((ldcp)->ldc_cblock));
279 
280 /* Driver soft state ptr  */
281 static void	*vsw_state;
282 
283 /*
284  * Linked list of "vsw_t" structures - one per instance.
285  */
286 vsw_t		*vsw_head = NULL;
287 krwlock_t	vsw_rw;
288 
289 /*
290  * Property names
291  */
292 static char vdev_propname[] = "virtual-device";
293 static char vsw_propname[] = "virtual-network-switch";
294 static char physdev_propname[] = "vsw-phys-dev";
295 static char smode_propname[] = "vsw-switch-mode";
296 static char macaddr_propname[] = "local-mac-address";
297 static char remaddr_propname[] = "remote-mac-address";
298 static char ldcids_propname[] = "ldc-ids";
299 static char chan_propname[] = "channel-endpoint";
300 static char id_propname[] = "id";
301 static char reg_propname[] = "reg";
302 static char pri_types_propname[] = "priority-ether-types";
303 
304 /*
305  * Matching criteria passed to the MDEG to register interest
306  * in changes to 'virtual-device-port' nodes identified by their
307  * 'id' property.
308  */
309 static md_prop_match_t vport_prop_match[] = {
310 	{ MDET_PROP_VAL,    "id"   },
311 	{ MDET_LIST_END,    NULL    }
312 };
313 
314 static mdeg_node_match_t vport_match = { "virtual-device-port",
315 						vport_prop_match };
316 
317 /*
318  * Matching criteria passed to the MDEG to register interest
319  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
320  * by their 'name' and 'cfg-handle' properties.
321  */
322 static md_prop_match_t vdev_prop_match[] = {
323 	{ MDET_PROP_STR,    "name"   },
324 	{ MDET_PROP_VAL,    "cfg-handle" },
325 	{ MDET_LIST_END,    NULL    }
326 };
327 
328 static mdeg_node_match_t vdev_match = { "virtual-device",
329 						vdev_prop_match };
330 
331 
332 /*
333  * Specification of an MD node passed to the MDEG to filter any
334  * 'vport' nodes that do not belong to the specified node. This
335  * template is copied for each vsw instance and filled in with
336  * the appropriate 'cfg-handle' value before being passed to the MDEG.
337  */
338 static mdeg_prop_spec_t vsw_prop_template[] = {
339 	{ MDET_PROP_STR,    "name",		vsw_propname },
340 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
341 	{ MDET_LIST_END,    NULL,		NULL	}
342 };
343 
344 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
345 
346 #ifdef	DEBUG
347 /*
348  * Print debug messages - set to 0x1f to enable all msgs
349  * or 0x0 to turn all off.
350  */
351 int vswdbg = 0x0;
352 
353 /*
354  * debug levels:
355  * 0x01:	Function entry/exit tracing
356  * 0x02:	Internal function messages
357  * 0x04:	Verbose internal messages
358  * 0x08:	Warning messages
359  * 0x10:	Error messages
360  */
361 
362 void
363 vswdebug(vsw_t *vswp, const char *fmt, ...)
364 {
365 	char buf[512];
366 	va_list ap;
367 
368 	va_start(ap, fmt);
369 	(void) vsprintf(buf, fmt, ap);
370 	va_end(ap);
371 
372 	if (vswp == NULL)
373 		cmn_err(CE_CONT, "%s\n", buf);
374 	else
375 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
376 }
377 
378 #endif	/* DEBUG */
379 
380 static struct modlinkage modlinkage = {
381 	MODREV_1,
382 	&vswmodldrv,
383 	NULL
384 };
385 
386 int
387 _init(void)
388 {
389 	int status;
390 
391 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
392 
393 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
394 	if (status != 0) {
395 		return (status);
396 	}
397 
398 	mac_init_ops(&vsw_ops, DRV_NAME);
399 	status = mod_install(&modlinkage);
400 	if (status != 0) {
401 		ddi_soft_state_fini(&vsw_state);
402 	}
403 	return (status);
404 }
405 
406 int
407 _fini(void)
408 {
409 	int status;
410 
411 	status = mod_remove(&modlinkage);
412 	if (status != 0)
413 		return (status);
414 	mac_fini_ops(&vsw_ops);
415 	ddi_soft_state_fini(&vsw_state);
416 
417 	rw_destroy(&vsw_rw);
418 
419 	return (status);
420 }
421 
422 int
423 _info(struct modinfo *modinfop)
424 {
425 	return (mod_info(&modlinkage, modinfop));
426 }
427 
428 static int
429 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
430 {
431 	vsw_t		*vswp;
432 	int		instance;
433 	char		hashname[MAXNAMELEN];
434 	char		qname[TASKQ_NAMELEN];
435 	enum		{ PROG_init = 0x00,
436 				PROG_locks = 0x01,
437 				PROG_readmd = 0x02,
438 				PROG_fdb = 0x04,
439 				PROG_mfdb = 0x08,
440 				PROG_taskq = 0x10,
441 				PROG_swmode = 0x20,
442 				PROG_macreg = 0x40,
443 				PROG_mdreg = 0x80}
444 			progress;
445 
446 	progress = PROG_init;
447 	int		rv;
448 
449 	switch (cmd) {
450 	case DDI_ATTACH:
451 		break;
452 	case DDI_RESUME:
453 		/* nothing to do for this non-device */
454 		return (DDI_SUCCESS);
455 	case DDI_PM_RESUME:
456 	default:
457 		return (DDI_FAILURE);
458 	}
459 
460 	instance = ddi_get_instance(dip);
461 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
462 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
463 		return (DDI_FAILURE);
464 	}
465 	vswp = ddi_get_soft_state(vsw_state, instance);
466 
467 	if (vswp == NULL) {
468 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
469 		goto vsw_attach_fail;
470 	}
471 
472 	vswp->dip = dip;
473 	vswp->instance = instance;
474 	ddi_set_driver_private(dip, (caddr_t)vswp);
475 
476 	mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL);
477 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
478 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
479 	mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL);
480 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
481 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
482 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
483 
484 	progress |= PROG_locks;
485 
486 	rv = vsw_read_mdprops(vswp);
487 	if (rv != 0)
488 		goto vsw_attach_fail;
489 
490 	progress |= PROG_readmd;
491 
492 	/* setup the unicast forwarding database  */
493 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
494 	    vswp->instance);
495 	D2(vswp, "creating unicast hash table (%s)...", hashname);
496 	vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS,
497 	    mod_hash_null_valdtor, sizeof (void *));
498 
499 	progress |= PROG_fdb;
500 
501 	/* setup the multicast fowarding database */
502 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
503 	    vswp->instance);
504 	D2(vswp, "creating multicast hash table %s)...", hashname);
505 	vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS,
506 	    mod_hash_null_valdtor, sizeof (void *));
507 
508 	progress |= PROG_mfdb;
509 
510 	/*
511 	 * Create the taskq which will process all the VIO
512 	 * control messages.
513 	 */
514 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
515 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
516 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
517 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
518 		    vswp->instance);
519 		goto vsw_attach_fail;
520 	}
521 
522 	progress |= PROG_taskq;
523 
524 	/* prevent auto-detaching */
525 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
526 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
527 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
528 		    "instance %u", DDI_NO_AUTODETACH, instance);
529 	}
530 
531 	/*
532 	 * Setup the required switching mode,
533 	 * based on the mdprops that we read earlier.
534 	 */
535 	rv = vsw_setup_switching(vswp);
536 	if (rv == EAGAIN) {
537 		/*
538 		 * Unable to setup switching mode;
539 		 * as the error is EAGAIN, schedule a timeout to retry.
540 		 */
541 		mutex_enter(&vswp->swtmout_lock);
542 
543 		vswp->swtmout_enabled = B_TRUE;
544 		vswp->swtmout_id =
545 		    timeout(vsw_setup_switching_timeout, vswp,
546 		    (vsw_setup_switching_delay * drv_usectohz(MICROSEC)));
547 
548 		mutex_exit(&vswp->swtmout_lock);
549 	} else if (rv != 0) {
550 		goto vsw_attach_fail;
551 	}
552 
553 	progress |= PROG_swmode;
554 
555 	/* Register with mac layer as a provider */
556 	rv = vsw_mac_register(vswp);
557 	if (rv != 0)
558 		goto vsw_attach_fail;
559 
560 	progress |= PROG_macreg;
561 
562 	/*
563 	 * Now we have everything setup, register an interest in
564 	 * specific MD nodes.
565 	 *
566 	 * The callback is invoked in 2 cases, firstly if upon mdeg
567 	 * registration there are existing nodes which match our specified
568 	 * criteria, and secondly if the MD is changed (and again, there
569 	 * are nodes which we are interested in present within it. Note
570 	 * that our callback will be invoked even if our specified nodes
571 	 * have not actually changed).
572 	 *
573 	 */
574 	rv = vsw_mdeg_register(vswp);
575 	if (rv != 0)
576 		goto vsw_attach_fail;
577 
578 	progress |= PROG_mdreg;
579 
580 	WRITE_ENTER(&vsw_rw);
581 	vswp->next = vsw_head;
582 	vsw_head = vswp;
583 	RW_EXIT(&vsw_rw);
584 
585 	ddi_report_dev(vswp->dip);
586 	return (DDI_SUCCESS);
587 
588 vsw_attach_fail:
589 	DERR(NULL, "vsw_attach: failed");
590 
591 	if (progress & PROG_mdreg) {
592 		vsw_mdeg_unregister(vswp);
593 		(void) vsw_detach_ports(vswp);
594 	}
595 
596 	if (progress & PROG_macreg)
597 		(void) vsw_mac_unregister(vswp);
598 
599 	if (progress & PROG_swmode) {
600 		vsw_stop_switching_timeout(vswp);
601 		mutex_enter(&vswp->mac_lock);
602 		vsw_mac_detach(vswp);
603 		vsw_mac_close(vswp);
604 		mutex_exit(&vswp->mac_lock);
605 	}
606 
607 	if (progress & PROG_taskq)
608 		ddi_taskq_destroy(vswp->taskq_p);
609 
610 	if (progress & PROG_mfdb)
611 		mod_hash_destroy_hash(vswp->mfdb);
612 
613 	if (progress & PROG_fdb)
614 		mod_hash_destroy_hash(vswp->fdb);
615 
616 	if (progress & PROG_readmd) {
617 		if (VSW_PRI_ETH_DEFINED(vswp)) {
618 			kmem_free(vswp->pri_types,
619 			    sizeof (uint16_t) * vswp->pri_num_types);
620 		}
621 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
622 	}
623 
624 	if (progress & PROG_locks) {
625 		rw_destroy(&vswp->plist.lockrw);
626 		rw_destroy(&vswp->mfdbrw);
627 		rw_destroy(&vswp->if_lockrw);
628 		mutex_destroy(&vswp->swtmout_lock);
629 		mutex_destroy(&vswp->mca_lock);
630 		mutex_destroy(&vswp->mac_lock);
631 		mutex_destroy(&vswp->hw_lock);
632 	}
633 
634 	ddi_soft_state_free(vsw_state, instance);
635 	return (DDI_FAILURE);
636 }
637 
638 static int
639 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
640 {
641 	vio_mblk_pool_t		*poolp, *npoolp;
642 	vsw_t			**vswpp, *vswp;
643 	int 			instance;
644 
645 	instance = ddi_get_instance(dip);
646 	vswp = ddi_get_soft_state(vsw_state, instance);
647 
648 	if (vswp == NULL) {
649 		return (DDI_FAILURE);
650 	}
651 
652 	switch (cmd) {
653 	case DDI_DETACH:
654 		break;
655 	case DDI_SUSPEND:
656 	case DDI_PM_SUSPEND:
657 	default:
658 		return (DDI_FAILURE);
659 	}
660 
661 	D2(vswp, "detaching instance %d", instance);
662 
663 	/* Stop any pending timeout to setup switching mode. */
664 	vsw_stop_switching_timeout(vswp);
665 
666 	if (vswp->if_state & VSW_IF_REG) {
667 		if (vsw_mac_unregister(vswp) != 0) {
668 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
669 			    "MAC layer", vswp->instance);
670 			return (DDI_FAILURE);
671 		}
672 	}
673 
674 	vsw_mdeg_unregister(vswp);
675 
676 	/* remove mac layer callback */
677 	mutex_enter(&vswp->mac_lock);
678 	if ((vswp->mh != NULL) && (vswp->mrh != NULL)) {
679 		mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE);
680 		vswp->mrh = NULL;
681 	}
682 	mutex_exit(&vswp->mac_lock);
683 
684 	if (vsw_detach_ports(vswp) != 0) {
685 		cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports",
686 		    vswp->instance);
687 		return (DDI_FAILURE);
688 	}
689 
690 	rw_destroy(&vswp->if_lockrw);
691 
692 	mutex_destroy(&vswp->hw_lock);
693 
694 	/*
695 	 * Now that the ports have been deleted, stop and close
696 	 * the physical device.
697 	 */
698 	mutex_enter(&vswp->mac_lock);
699 
700 	vsw_mac_detach(vswp);
701 	vsw_mac_close(vswp);
702 
703 	mutex_exit(&vswp->mac_lock);
704 
705 	mutex_destroy(&vswp->mac_lock);
706 	mutex_destroy(&vswp->swtmout_lock);
707 
708 	/*
709 	 * Destroy any free pools that may still exist.
710 	 */
711 	poolp = vswp->rxh;
712 	while (poolp != NULL) {
713 		npoolp = vswp->rxh = poolp->nextp;
714 		if (vio_destroy_mblks(poolp) != 0) {
715 			vswp->rxh = poolp;
716 			return (DDI_FAILURE);
717 		}
718 		poolp = npoolp;
719 	}
720 
721 	/*
722 	 * Remove this instance from any entries it may be on in
723 	 * the hash table by using the list of addresses maintained
724 	 * in the vsw_t structure.
725 	 */
726 	vsw_del_mcst_vsw(vswp);
727 
728 	vswp->mcap = NULL;
729 	mutex_destroy(&vswp->mca_lock);
730 
731 	/*
732 	 * By now any pending tasks have finished and the underlying
733 	 * ldc's have been destroyed, so its safe to delete the control
734 	 * message taskq.
735 	 */
736 	if (vswp->taskq_p != NULL)
737 		ddi_taskq_destroy(vswp->taskq_p);
738 
739 	/*
740 	 * At this stage all the data pointers in the hash table
741 	 * should be NULL, as all the ports have been removed and will
742 	 * have deleted themselves from the port lists which the data
743 	 * pointers point to. Hence we can destroy the table using the
744 	 * default destructors.
745 	 */
746 	D2(vswp, "vsw_detach: destroying hash tables..");
747 	mod_hash_destroy_hash(vswp->fdb);
748 	vswp->fdb = NULL;
749 
750 	WRITE_ENTER(&vswp->mfdbrw);
751 	mod_hash_destroy_hash(vswp->mfdb);
752 	vswp->mfdb = NULL;
753 	RW_EXIT(&vswp->mfdbrw);
754 	rw_destroy(&vswp->mfdbrw);
755 
756 	/* free pri_types table */
757 	if (VSW_PRI_ETH_DEFINED(vswp)) {
758 		kmem_free(vswp->pri_types,
759 		    sizeof (uint16_t) * vswp->pri_num_types);
760 		(void) vio_destroy_mblks(vswp->pri_tx_vmp);
761 	}
762 
763 	ddi_remove_minor_node(dip, NULL);
764 
765 	rw_destroy(&vswp->plist.lockrw);
766 	WRITE_ENTER(&vsw_rw);
767 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
768 		if (*vswpp == vswp) {
769 			*vswpp = vswp->next;
770 			break;
771 		}
772 	}
773 	RW_EXIT(&vsw_rw);
774 	ddi_soft_state_free(vsw_state, instance);
775 
776 	return (DDI_SUCCESS);
777 }
778 
779 static int
780 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
781 {
782 	_NOTE(ARGUNUSED(dip))
783 
784 	vsw_t	*vswp = NULL;
785 	dev_t	dev = (dev_t)arg;
786 	int	instance;
787 
788 	instance = getminor(dev);
789 
790 	switch (infocmd) {
791 	case DDI_INFO_DEVT2DEVINFO:
792 		if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) {
793 			*result = NULL;
794 			return (DDI_FAILURE);
795 		}
796 		*result = vswp->dip;
797 		return (DDI_SUCCESS);
798 
799 	case DDI_INFO_DEVT2INSTANCE:
800 		*result = (void *)(uintptr_t)instance;
801 		return (DDI_SUCCESS);
802 
803 	default:
804 		*result = NULL;
805 		return (DDI_FAILURE);
806 	}
807 }
808 
809 /*
810  * Get the value of the "vsw-phys-dev" property in the specified
811  * node. This property is the name of the physical device that
812  * the virtual switch will use to talk to the outside world.
813  *
814  * Note it is valid for this property to be NULL (but the property
815  * itself must exist). Callers of this routine should verify that
816  * the value returned is what they expected (i.e. either NULL or non NULL).
817  *
818  * On success returns value of the property in region pointed to by
819  * the 'name' argument, and with return value of 0. Otherwise returns 1.
820  */
821 static int
822 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
823 {
824 	int		len = 0;
825 	int		instance;
826 	char		*physname = NULL;
827 	char		*dev;
828 	const char	*dev_name;
829 	char		myname[MAXNAMELEN];
830 
831 	dev_name = ddi_driver_name(vswp->dip);
832 	instance = ddi_get_instance(vswp->dip);
833 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
834 
835 	if (md_get_prop_data(mdp, node, physdev_propname,
836 	    (uint8_t **)(&physname), &len) != 0) {
837 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
838 		    "device(s) from MD", vswp->instance);
839 		return (1);
840 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
841 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
842 		    vswp->instance, physname);
843 		return (1);
844 	} else if (strcmp(myname, physname) == 0) {
845 		/*
846 		 * Prevent the vswitch from opening itself as the
847 		 * network device.
848 		 */
849 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
850 		    vswp->instance, physname);
851 		return (1);
852 	} else {
853 		(void) strncpy(name, physname, strlen(physname) + 1);
854 		D2(vswp, "%s: using first device specified (%s)",
855 		    __func__, physname);
856 	}
857 
858 #ifdef DEBUG
859 	/*
860 	 * As a temporary measure to aid testing we check to see if there
861 	 * is a vsw.conf file present. If there is we use the value of the
862 	 * vsw_physname property in the file as the name of the physical
863 	 * device, overriding the value from the MD.
864 	 *
865 	 * There may be multiple devices listed, but for the moment
866 	 * we just use the first one.
867 	 */
868 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
869 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
870 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
871 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
872 			    vswp->instance, dev);
873 			ddi_prop_free(dev);
874 			return (1);
875 		} else {
876 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
877 			    "config file", vswp->instance, dev);
878 
879 			(void) strncpy(name, dev, strlen(dev) + 1);
880 		}
881 
882 		ddi_prop_free(dev);
883 	}
884 #endif
885 
886 	return (0);
887 }
888 
889 /*
890  * Read the 'vsw-switch-mode' property from the specified MD node.
891  *
892  * Returns 0 on success and the number of modes found in 'found',
893  * otherwise returns 1.
894  */
895 static int
896 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
897 						uint8_t *modes, int *found)
898 {
899 	int		len = 0;
900 	int		smode_num = 0;
901 	char		*smode = NULL;
902 	char		*curr_mode = NULL;
903 
904 	D1(vswp, "%s: enter", __func__);
905 
906 	/*
907 	 * Get the switch-mode property. The modes are listed in
908 	 * decreasing order of preference, i.e. prefered mode is
909 	 * first item in list.
910 	 */
911 	len = 0;
912 	smode_num = 0;
913 	if (md_get_prop_data(mdp, node, smode_propname,
914 	    (uint8_t **)(&smode), &len) != 0) {
915 		/*
916 		 * Unable to get switch-mode property from MD, nothing
917 		 * more we can do.
918 		 */
919 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
920 		    " from the MD", vswp->instance);
921 		*found = 0;
922 		return (1);
923 	}
924 
925 	curr_mode = smode;
926 	/*
927 	 * Modes of operation:
928 	 * 'switched'	 - layer 2 switching, underlying HW in
929 	 *			programmed mode.
930 	 * 'promiscuous' - layer 2 switching, underlying HW in
931 	 *			promiscuous mode.
932 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
933 	 *			in non-promiscuous mode.
934 	 */
935 	while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) {
936 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
937 		if (strcmp(curr_mode, "switched") == 0) {
938 			modes[smode_num++] = VSW_LAYER2;
939 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
940 			modes[smode_num++] = VSW_LAYER2_PROMISC;
941 		} else if (strcmp(curr_mode, "routed") == 0) {
942 			modes[smode_num++] = VSW_LAYER3;
943 		} else {
944 			DWARN(vswp, "%s: Unknown switch mode %s, "
945 			    "setting to default 'switched' mode",
946 			    __func__, curr_mode);
947 			modes[smode_num++] = VSW_LAYER2;
948 		}
949 		curr_mode += strlen(curr_mode) + 1;
950 	}
951 	*found = smode_num;
952 
953 	D2(vswp, "%s: %d modes found", __func__, smode_num);
954 
955 	D1(vswp, "%s: exit", __func__);
956 
957 	return (0);
958 }
959 
960 /*
961  * Register with the MAC layer as a network device, so we
962  * can be plumbed if necessary.
963  */
964 static int
965 vsw_mac_register(vsw_t *vswp)
966 {
967 	mac_register_t	*macp;
968 	int		rv;
969 
970 	D1(vswp, "%s: enter", __func__);
971 
972 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
973 		return (EINVAL);
974 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
975 	macp->m_driver = vswp;
976 	macp->m_dip = vswp->dip;
977 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
978 	macp->m_callbacks = &vsw_m_callbacks;
979 	macp->m_min_sdu = 0;
980 	macp->m_max_sdu = ETHERMTU;
981 	rv = mac_register(macp, &vswp->if_mh);
982 	mac_free(macp);
983 	if (rv != 0) {
984 		/*
985 		 * Treat this as a non-fatal error as we may be
986 		 * able to operate in some other mode.
987 		 */
988 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
989 		    "a provider with MAC layer", vswp->instance);
990 		return (rv);
991 	}
992 
993 	vswp->if_state |= VSW_IF_REG;
994 
995 	D1(vswp, "%s: exit", __func__);
996 
997 	return (rv);
998 }
999 
1000 static int
1001 vsw_mac_unregister(vsw_t *vswp)
1002 {
1003 	int		rv = 0;
1004 
1005 	D1(vswp, "%s: enter", __func__);
1006 
1007 	WRITE_ENTER(&vswp->if_lockrw);
1008 
1009 	if (vswp->if_state & VSW_IF_REG) {
1010 		rv = mac_unregister(vswp->if_mh);
1011 		if (rv != 0) {
1012 			DWARN(vswp, "%s: unable to unregister from MAC "
1013 			    "framework", __func__);
1014 
1015 			RW_EXIT(&vswp->if_lockrw);
1016 			D1(vswp, "%s: fail exit", __func__);
1017 			return (rv);
1018 		}
1019 
1020 		/* mark i/f as down and unregistered */
1021 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1022 	}
1023 	RW_EXIT(&vswp->if_lockrw);
1024 
1025 	D1(vswp, "%s: exit", __func__);
1026 
1027 	return (rv);
1028 }
1029 
1030 static int
1031 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1032 {
1033 	vsw_t			*vswp = (vsw_t *)arg;
1034 
1035 	D1(vswp, "%s: enter", __func__);
1036 
1037 	mutex_enter(&vswp->mac_lock);
1038 	if (vswp->mh == NULL) {
1039 		mutex_exit(&vswp->mac_lock);
1040 		return (EINVAL);
1041 	}
1042 
1043 	/* return stats from underlying device */
1044 	*val = mac_stat_get(vswp->mh, stat);
1045 
1046 	mutex_exit(&vswp->mac_lock);
1047 
1048 	return (0);
1049 }
1050 
1051 static void
1052 vsw_m_stop(void *arg)
1053 {
1054 	vsw_t		*vswp = (vsw_t *)arg;
1055 
1056 	D1(vswp, "%s: enter", __func__);
1057 
1058 	WRITE_ENTER(&vswp->if_lockrw);
1059 	vswp->if_state &= ~VSW_IF_UP;
1060 	RW_EXIT(&vswp->if_lockrw);
1061 
1062 	mutex_enter(&vswp->hw_lock);
1063 
1064 	(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
1065 
1066 	if (vswp->recfg_reqd)
1067 		vsw_reconfig_hw(vswp);
1068 
1069 	mutex_exit(&vswp->hw_lock);
1070 
1071 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1072 }
1073 
1074 static int
1075 vsw_m_start(void *arg)
1076 {
1077 	vsw_t		*vswp = (vsw_t *)arg;
1078 
1079 	D1(vswp, "%s: enter", __func__);
1080 
1081 	WRITE_ENTER(&vswp->if_lockrw);
1082 
1083 	vswp->if_state |= VSW_IF_UP;
1084 
1085 	if (vswp->switching_setup_done == B_FALSE) {
1086 		/*
1087 		 * If the switching mode has not been setup yet, just
1088 		 * return. The unicast address will be programmed
1089 		 * after the physical device is successfully setup by the
1090 		 * timeout handler.
1091 		 */
1092 		RW_EXIT(&vswp->if_lockrw);
1093 		return (0);
1094 	}
1095 
1096 	/* if in layer2 mode, program unicast address. */
1097 	if (vswp->mh != NULL) {
1098 		mutex_enter(&vswp->hw_lock);
1099 		(void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
1100 		mutex_exit(&vswp->hw_lock);
1101 	}
1102 
1103 	RW_EXIT(&vswp->if_lockrw);
1104 
1105 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1106 	return (0);
1107 }
1108 
1109 /*
1110  * Change the local interface address.
1111  *
1112  * Note: we don't support this entry point. The local
1113  * mac address of the switch can only be changed via its
1114  * MD node properties.
1115  */
1116 static int
1117 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1118 {
1119 	_NOTE(ARGUNUSED(arg, macaddr))
1120 
1121 	return (DDI_FAILURE);
1122 }
1123 
1124 static int
1125 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1126 {
1127 	vsw_t		*vswp = (vsw_t *)arg;
1128 	mcst_addr_t	*mcst_p = NULL;
1129 	uint64_t	addr = 0x0;
1130 	int		i, ret = 0;
1131 
1132 	D1(vswp, "%s: enter", __func__);
1133 
1134 	/*
1135 	 * Convert address into form that can be used
1136 	 * as hash table key.
1137 	 */
1138 	for (i = 0; i < ETHERADDRL; i++) {
1139 		addr = (addr << 8) | mca[i];
1140 	}
1141 
1142 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1143 
1144 	if (add) {
1145 		D2(vswp, "%s: adding multicast", __func__);
1146 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1147 			/*
1148 			 * Update the list of multicast addresses
1149 			 * contained within the vsw_t structure to
1150 			 * include this new one.
1151 			 */
1152 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1153 			if (mcst_p == NULL) {
1154 				DERR(vswp, "%s unable to alloc mem", __func__);
1155 				(void) vsw_del_mcst(vswp,
1156 				    VSW_LOCALDEV, addr, NULL);
1157 				return (1);
1158 			}
1159 			mcst_p->addr = addr;
1160 			ether_copy(mca, &mcst_p->mca);
1161 
1162 			/*
1163 			 * Call into the underlying driver to program the
1164 			 * address into HW.
1165 			 */
1166 			mutex_enter(&vswp->mac_lock);
1167 			if (vswp->mh != NULL) {
1168 				ret = mac_multicst_add(vswp->mh, mca);
1169 				if (ret != 0) {
1170 					cmn_err(CE_NOTE, "!vsw%d: unable to "
1171 					    "add multicast address",
1172 					    vswp->instance);
1173 					mutex_exit(&vswp->mac_lock);
1174 					(void) vsw_del_mcst(vswp,
1175 					    VSW_LOCALDEV, addr, NULL);
1176 					kmem_free(mcst_p, sizeof (*mcst_p));
1177 					return (ret);
1178 				}
1179 				mcst_p->mac_added = B_TRUE;
1180 			}
1181 			mutex_exit(&vswp->mac_lock);
1182 
1183 			mutex_enter(&vswp->mca_lock);
1184 			mcst_p->nextp = vswp->mcap;
1185 			vswp->mcap = mcst_p;
1186 			mutex_exit(&vswp->mca_lock);
1187 		} else {
1188 			cmn_err(CE_NOTE, "!vsw%d: unable to add multicast "
1189 			    "address", vswp->instance);
1190 		}
1191 		return (ret);
1192 	}
1193 
1194 	D2(vswp, "%s: removing multicast", __func__);
1195 	/*
1196 	 * Remove the address from the hash table..
1197 	 */
1198 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1199 
1200 		/*
1201 		 * ..and then from the list maintained in the
1202 		 * vsw_t structure.
1203 		 */
1204 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1205 		ASSERT(mcst_p != NULL);
1206 
1207 		mutex_enter(&vswp->mac_lock);
1208 		if (vswp->mh != NULL && mcst_p->mac_added) {
1209 			(void) mac_multicst_remove(vswp->mh, mca);
1210 			mcst_p->mac_added = B_FALSE;
1211 		}
1212 		mutex_exit(&vswp->mac_lock);
1213 		kmem_free(mcst_p, sizeof (*mcst_p));
1214 	}
1215 
1216 	D1(vswp, "%s: exit", __func__);
1217 
1218 	return (0);
1219 }
1220 
1221 static int
1222 vsw_m_promisc(void *arg, boolean_t on)
1223 {
1224 	vsw_t		*vswp = (vsw_t *)arg;
1225 
1226 	D1(vswp, "%s: enter", __func__);
1227 
1228 	WRITE_ENTER(&vswp->if_lockrw);
1229 	if (on)
1230 		vswp->if_state |= VSW_IF_PROMISC;
1231 	else
1232 		vswp->if_state &= ~VSW_IF_PROMISC;
1233 	RW_EXIT(&vswp->if_lockrw);
1234 
1235 	D1(vswp, "%s: exit", __func__);
1236 
1237 	return (0);
1238 }
1239 
1240 static mblk_t *
1241 vsw_m_tx(void *arg, mblk_t *mp)
1242 {
1243 	vsw_t		*vswp = (vsw_t *)arg;
1244 
1245 	D1(vswp, "%s: enter", __func__);
1246 
1247 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1248 
1249 	D1(vswp, "%s: exit", __func__);
1250 
1251 	return (NULL);
1252 }
1253 
1254 /*
1255  * Register for machine description (MD) updates.
1256  *
1257  * Returns 0 on success, 1 on failure.
1258  */
1259 static int
1260 vsw_mdeg_register(vsw_t *vswp)
1261 {
1262 	mdeg_prop_spec_t	*pspecp;
1263 	mdeg_node_spec_t	*inst_specp;
1264 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1265 	size_t			templatesz;
1266 	int			rv;
1267 
1268 	D1(vswp, "%s: enter", __func__);
1269 
1270 	/*
1271 	 * Allocate and initialize a per-instance copy
1272 	 * of the global property spec array that will
1273 	 * uniquely identify this vsw instance.
1274 	 */
1275 	templatesz = sizeof (vsw_prop_template);
1276 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1277 
1278 	bcopy(vsw_prop_template, pspecp, templatesz);
1279 
1280 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1281 
1282 	/* initialize the complete prop spec structure */
1283 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1284 	inst_specp->namep = "virtual-device";
1285 	inst_specp->specp = pspecp;
1286 
1287 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1288 	    vswp->regprop);
1289 	/*
1290 	 * Register an interest in 'virtual-device' nodes with a
1291 	 * 'name' property of 'virtual-network-switch'
1292 	 */
1293 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1294 	    (void *)vswp, &mdeg_hdl);
1295 	if (rv != MDEG_SUCCESS) {
1296 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1297 		    __func__, rv);
1298 		goto mdeg_reg_fail;
1299 	}
1300 
1301 	/*
1302 	 * Register an interest in 'vsw-port' nodes.
1303 	 */
1304 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1305 	    (void *)vswp, &mdeg_port_hdl);
1306 	if (rv != MDEG_SUCCESS) {
1307 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1308 		(void) mdeg_unregister(mdeg_hdl);
1309 		goto mdeg_reg_fail;
1310 	}
1311 
1312 	/* save off data that will be needed later */
1313 	vswp->inst_spec = inst_specp;
1314 	vswp->mdeg_hdl = mdeg_hdl;
1315 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1316 
1317 	D1(vswp, "%s: exit", __func__);
1318 	return (0);
1319 
1320 mdeg_reg_fail:
1321 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1322 	    vswp->instance);
1323 	kmem_free(pspecp, templatesz);
1324 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1325 
1326 	vswp->mdeg_hdl = NULL;
1327 	vswp->mdeg_port_hdl = NULL;
1328 
1329 	return (1);
1330 }
1331 
1332 static void
1333 vsw_mdeg_unregister(vsw_t *vswp)
1334 {
1335 	D1(vswp, "vsw_mdeg_unregister: enter");
1336 
1337 	if (vswp->mdeg_hdl != NULL)
1338 		(void) mdeg_unregister(vswp->mdeg_hdl);
1339 
1340 	if (vswp->mdeg_port_hdl != NULL)
1341 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1342 
1343 	if (vswp->inst_spec != NULL) {
1344 		if (vswp->inst_spec->specp != NULL) {
1345 			(void) kmem_free(vswp->inst_spec->specp,
1346 			    sizeof (vsw_prop_template));
1347 			vswp->inst_spec->specp = NULL;
1348 		}
1349 
1350 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1351 		vswp->inst_spec = NULL;
1352 	}
1353 
1354 	D1(vswp, "vsw_mdeg_unregister: exit");
1355 }
1356 
1357 /*
1358  * Mdeg callback invoked for the vsw node itself.
1359  */
1360 static int
1361 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1362 {
1363 	vsw_t		*vswp;
1364 	md_t		*mdp;
1365 	mde_cookie_t	node;
1366 	uint64_t	inst;
1367 	char		*node_name = NULL;
1368 
1369 	if (resp == NULL)
1370 		return (MDEG_FAILURE);
1371 
1372 	vswp = (vsw_t *)cb_argp;
1373 
1374 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1375 	    " : prev matched %d", __func__, resp->added.nelem,
1376 	    resp->removed.nelem, resp->match_curr.nelem,
1377 	    resp->match_prev.nelem);
1378 
1379 	/*
1380 	 * We get an initial callback for this node as 'added'
1381 	 * after registering with mdeg. Note that we would have
1382 	 * already gathered information about this vsw node by
1383 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1384 	 * So, there is a window where the properties of this
1385 	 * node might have changed when we get this initial 'added'
1386 	 * callback. We handle this as if an update occured
1387 	 * and invoke the same function which handles updates to
1388 	 * the properties of this vsw-node if any.
1389 	 *
1390 	 * A non-zero 'match' value indicates that the MD has been
1391 	 * updated and that a virtual-network-switch node is
1392 	 * present which may or may not have been updated. It is
1393 	 * up to the clients to examine their own nodes and
1394 	 * determine if they have changed.
1395 	 */
1396 	if (resp->added.nelem != 0) {
1397 
1398 		if (resp->added.nelem != 1) {
1399 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1400 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1401 			return (MDEG_FAILURE);
1402 		}
1403 
1404 		mdp = resp->added.mdp;
1405 		node = resp->added.mdep[0];
1406 
1407 	} else if (resp->match_curr.nelem != 0) {
1408 
1409 		if (resp->match_curr.nelem != 1) {
1410 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1411 			    "invalid: %d\n", vswp->instance,
1412 			    resp->match_curr.nelem);
1413 			return (MDEG_FAILURE);
1414 		}
1415 
1416 		mdp = resp->match_curr.mdp;
1417 		node = resp->match_curr.mdep[0];
1418 
1419 	} else {
1420 		return (MDEG_FAILURE);
1421 	}
1422 
1423 	/* Validate name and instance */
1424 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1425 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1426 		return (MDEG_FAILURE);
1427 	}
1428 
1429 	/* is this a virtual-network-switch? */
1430 	if (strcmp(node_name, vsw_propname) != 0) {
1431 		DERR(vswp, "%s: Invalid node name: %s\n",
1432 		    __func__, node_name);
1433 		return (MDEG_FAILURE);
1434 	}
1435 
1436 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1437 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1438 		    __func__);
1439 		return (MDEG_FAILURE);
1440 	}
1441 
1442 	/* is this the right instance of vsw? */
1443 	if (inst != vswp->regprop) {
1444 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1445 		    __func__, inst);
1446 		return (MDEG_FAILURE);
1447 	}
1448 
1449 	vsw_update_md_prop(vswp, mdp, node);
1450 
1451 	return (MDEG_SUCCESS);
1452 }
1453 
1454 /*
1455  * Mdeg callback invoked for changes to the vsw-port nodes
1456  * under the vsw node.
1457  */
1458 static int
1459 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1460 {
1461 	vsw_t		*vswp;
1462 	int		idx;
1463 	md_t		*mdp;
1464 	mde_cookie_t	node;
1465 	uint64_t	inst;
1466 	int		rv;
1467 
1468 	if ((resp == NULL) || (cb_argp == NULL))
1469 		return (MDEG_FAILURE);
1470 
1471 	vswp = (vsw_t *)cb_argp;
1472 
1473 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1474 	    " : prev matched %d", __func__, resp->added.nelem,
1475 	    resp->removed.nelem, resp->match_curr.nelem,
1476 	    resp->match_prev.nelem);
1477 
1478 	/* process added ports */
1479 	for (idx = 0; idx < resp->added.nelem; idx++) {
1480 		mdp = resp->added.mdp;
1481 		node = resp->added.mdep[idx];
1482 
1483 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1484 
1485 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1486 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1487 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1488 		}
1489 	}
1490 
1491 	/* process removed ports */
1492 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1493 		mdp = resp->removed.mdp;
1494 		node = resp->removed.mdep[idx];
1495 
1496 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1497 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1498 			    __func__, id_propname, idx);
1499 			continue;
1500 		}
1501 
1502 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1503 
1504 		if (vsw_port_detach(vswp, inst) != 0) {
1505 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1506 			    vswp->instance, inst);
1507 		}
1508 	}
1509 
1510 	/*
1511 	 * Currently no support for updating already active ports.
1512 	 * So, ignore the match_curr and match_priv arrays for now.
1513 	 */
1514 
1515 	D1(vswp, "%s: exit", __func__);
1516 
1517 	return (MDEG_SUCCESS);
1518 }
1519 
1520 /*
1521  * Scan the machine description for this instance of vsw
1522  * and read its properties. Called only from vsw_attach().
1523  * Returns: 0 on success, 1 on failure.
1524  */
1525 static int
1526 vsw_read_mdprops(vsw_t *vswp)
1527 {
1528 	md_t		*mdp = NULL;
1529 	mde_cookie_t	rootnode;
1530 	mde_cookie_t	*listp = NULL;
1531 	uint64_t	inst;
1532 	uint64_t	cfgh;
1533 	char		*name;
1534 	int		rv = 1;
1535 	int		num_nodes = 0;
1536 	int		num_devs = 0;
1537 	int		listsz = 0;
1538 	int		i;
1539 
1540 	/*
1541 	 * In each 'virtual-device' node in the MD there is a
1542 	 * 'cfg-handle' property which is the MD's concept of
1543 	 * an instance number (this may be completely different from
1544 	 * the device drivers instance #). OBP reads that value and
1545 	 * stores it in the 'reg' property of the appropriate node in
1546 	 * the device tree. We first read this reg property and use this
1547 	 * to compare against the 'cfg-handle' property of vsw nodes
1548 	 * in MD to get to this specific vsw instance and then read
1549 	 * other properties that we are interested in.
1550 	 * We also cache the value of 'reg' property and use it later
1551 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1552 	 */
1553 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1554 	    DDI_PROP_DONTPASS, reg_propname, -1);
1555 	if (inst == -1) {
1556 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1557 		    "OBP device tree", vswp->instance, reg_propname);
1558 		return (rv);
1559 	}
1560 
1561 	vswp->regprop = inst;
1562 
1563 	if ((mdp = md_get_handle()) == NULL) {
1564 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1565 		return (rv);
1566 	}
1567 
1568 	num_nodes = md_node_count(mdp);
1569 	ASSERT(num_nodes > 0);
1570 
1571 	listsz = num_nodes * sizeof (mde_cookie_t);
1572 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1573 
1574 	rootnode = md_root_node(mdp);
1575 
1576 	/* search for all "virtual_device" nodes */
1577 	num_devs = md_scan_dag(mdp, rootnode,
1578 	    md_find_name(mdp, vdev_propname),
1579 	    md_find_name(mdp, "fwd"), listp);
1580 	if (num_devs <= 0) {
1581 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1582 		goto vsw_readmd_exit;
1583 	}
1584 
1585 	/*
1586 	 * Now loop through the list of virtual-devices looking for
1587 	 * devices with name "virtual-network-switch" and for each
1588 	 * such device compare its instance with what we have from
1589 	 * the 'reg' property to find the right node in MD and then
1590 	 * read all its properties.
1591 	 */
1592 	for (i = 0; i < num_devs; i++) {
1593 
1594 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1595 			DWARN(vswp, "%s: name property not found\n",
1596 			    __func__);
1597 			goto vsw_readmd_exit;
1598 		}
1599 
1600 		/* is this a virtual-network-switch? */
1601 		if (strcmp(name, vsw_propname) != 0)
1602 			continue;
1603 
1604 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1605 			DWARN(vswp, "%s: cfg-handle property not found\n",
1606 			    __func__);
1607 			goto vsw_readmd_exit;
1608 		}
1609 
1610 		/* is this the required instance of vsw? */
1611 		if (inst != cfgh)
1612 			continue;
1613 
1614 		/* now read all properties of this vsw instance */
1615 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1616 		break;
1617 	}
1618 
1619 vsw_readmd_exit:
1620 
1621 	kmem_free(listp, listsz);
1622 	(void) md_fini_handle(mdp);
1623 	return (rv);
1624 }
1625 
1626 /*
1627  * Read the initial start-of-day values from the specified MD node.
1628  */
1629 static int
1630 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1631 {
1632 	int		i;
1633 	uint64_t 	macaddr = 0;
1634 
1635 	D1(vswp, "%s: enter", __func__);
1636 
1637 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1638 		return (1);
1639 	}
1640 
1641 	/* mac address for vswitch device itself */
1642 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1643 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1644 		    vswp->instance);
1645 		return (1);
1646 	}
1647 
1648 	vsw_save_lmacaddr(vswp, macaddr);
1649 
1650 	if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) {
1651 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1652 		    "defaulting to 'switched' mode",
1653 		    __func__, smode_propname);
1654 
1655 		for (i = 0; i < NUM_SMODES; i++)
1656 			vswp->smode[i] = VSW_LAYER2;
1657 
1658 		vswp->smode_num = NUM_SMODES;
1659 	} else {
1660 		ASSERT(vswp->smode_num != 0);
1661 	}
1662 
1663 	vsw_read_pri_eth_types(vswp, mdp, node);
1664 
1665 	D1(vswp, "%s: exit", __func__);
1666 	return (0);
1667 }
1668 
1669 /*
1670  * This function reads "priority-ether-types" property from md. This property
1671  * is used to enable support for priority frames. Applications which need
1672  * guaranteed and timely delivery of certain high priority frames to/from
1673  * a vnet or vsw within ldoms, should configure this property by providing
1674  * the ether type(s) for which the priority facility is needed.
1675  * Normal data frames are delivered over a ldc channel using the descriptor
1676  * ring mechanism which is constrained by factors such as descriptor ring size,
1677  * the rate at which the ring is processed at the peer ldc end point, etc.
1678  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1679  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1680  * descriptor ring path and enables a more reliable and timely delivery of
1681  * frames to the peer.
1682  */
1683 static void
1684 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1685 {
1686 	int		rv;
1687 	uint16_t	*types;
1688 	uint64_t	*data;
1689 	int		size;
1690 	int		i;
1691 	size_t		mblk_sz;
1692 
1693 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1694 	    (uint8_t **)&data, &size);
1695 	if (rv != 0) {
1696 		/*
1697 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1698 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1699 		 */
1700 		if (vsw_pri_eth_type != 0) {
1701 			size = sizeof (vsw_pri_eth_type);
1702 			data = &vsw_pri_eth_type;
1703 		} else {
1704 			D3(vswp, "%s: prop(%s) not found", __func__,
1705 			    pri_types_propname);
1706 			size = 0;
1707 		}
1708 	}
1709 
1710 	if (size == 0) {
1711 		vswp->pri_num_types = 0;
1712 		return;
1713 	}
1714 
1715 	/*
1716 	 * we have some priority-ether-types defined;
1717 	 * allocate a table of these types and also
1718 	 * allocate a pool of mblks to transmit these
1719 	 * priority packets.
1720 	 */
1721 	size /= sizeof (uint64_t);
1722 	vswp->pri_num_types = size;
1723 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1724 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1725 		types[i] = data[i] & 0xFFFF;
1726 	}
1727 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1728 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1729 }
1730 
1731 /*
1732  * Check to see if the relevant properties in the specified node have
1733  * changed, and if so take the appropriate action.
1734  *
1735  * If any of the properties are missing or invalid we don't take
1736  * any action, as this function should only be invoked when modifications
1737  * have been made to what we assume is a working configuration, which
1738  * we leave active.
1739  *
1740  * Note it is legal for this routine to be invoked even if none of the
1741  * properties in the port node within the MD have actually changed.
1742  */
1743 static void
1744 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1745 {
1746 	char		physname[LIFNAMSIZ];
1747 	char		drv[LIFNAMSIZ];
1748 	uint_t		ddi_instance;
1749 	uint8_t		new_smode[NUM_SMODES];
1750 	int		i, smode_num = 0;
1751 	uint64_t 	macaddr = 0;
1752 	enum		{MD_init = 0x1,
1753 				MD_physname = 0x2,
1754 				MD_macaddr = 0x4,
1755 				MD_smode = 0x8} updated;
1756 	int		rv;
1757 
1758 	updated = MD_init;
1759 
1760 	D1(vswp, "%s: enter", __func__);
1761 
1762 	/*
1763 	 * Check if name of physical device in MD has changed.
1764 	 */
1765 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
1766 		/*
1767 		 * Do basic sanity check on new device name/instance,
1768 		 * if its non NULL. It is valid for the device name to
1769 		 * have changed from a non NULL to a NULL value, i.e.
1770 		 * the vsw is being changed to 'routed' mode.
1771 		 */
1772 		if ((strlen(physname) != 0) &&
1773 		    (ddi_parse(physname, drv,
1774 		    &ddi_instance) != DDI_SUCCESS)) {
1775 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
1776 			    " a valid device name/instance",
1777 			    vswp->instance, physname);
1778 			goto fail_reconf;
1779 		}
1780 
1781 		if (strcmp(physname, vswp->physname)) {
1782 			D2(vswp, "%s: device name changed from %s to %s",
1783 			    __func__, vswp->physname, physname);
1784 
1785 			updated |= MD_physname;
1786 		} else {
1787 			D2(vswp, "%s: device name unchanged at %s",
1788 			    __func__, vswp->physname);
1789 		}
1790 	} else {
1791 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
1792 		    "device from updated MD.", vswp->instance);
1793 		goto fail_reconf;
1794 	}
1795 
1796 	/*
1797 	 * Check if MAC address has changed.
1798 	 */
1799 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1800 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1801 		    vswp->instance);
1802 		goto fail_reconf;
1803 	} else {
1804 		uint64_t maddr = macaddr;
1805 		READ_ENTER(&vswp->if_lockrw);
1806 		for (i = ETHERADDRL - 1; i >= 0; i--) {
1807 			if (vswp->if_addr.ether_addr_octet[i]
1808 			    != (macaddr & 0xFF)) {
1809 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
1810 				    __func__, i,
1811 				    vswp->if_addr.ether_addr_octet[i],
1812 				    (macaddr & 0xFF));
1813 				updated |= MD_macaddr;
1814 				macaddr = maddr;
1815 				break;
1816 			}
1817 			macaddr >>= 8;
1818 		}
1819 		RW_EXIT(&vswp->if_lockrw);
1820 		if (updated & MD_macaddr) {
1821 			vsw_save_lmacaddr(vswp, macaddr);
1822 		}
1823 	}
1824 
1825 	/*
1826 	 * Check if switching modes have changed.
1827 	 */
1828 	if (vsw_get_md_smodes(vswp, mdp, node,
1829 	    new_smode, &smode_num)) {
1830 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
1831 		    vswp->instance, smode_propname);
1832 		goto fail_reconf;
1833 	} else {
1834 		ASSERT(smode_num != 0);
1835 		if (smode_num != vswp->smode_num) {
1836 			D2(vswp, "%s: number of modes changed from %d to %d",
1837 			    __func__, vswp->smode_num, smode_num);
1838 		}
1839 
1840 		for (i = 0; i < smode_num; i++) {
1841 			if (new_smode[i] != vswp->smode[i]) {
1842 				D2(vswp, "%s: mode changed from %d to %d",
1843 				    __func__, vswp->smode[i], new_smode[i]);
1844 				updated |= MD_smode;
1845 				break;
1846 			}
1847 		}
1848 	}
1849 
1850 	/*
1851 	 * Now make any changes which are needed...
1852 	 */
1853 
1854 	if (updated & (MD_physname | MD_smode)) {
1855 
1856 		/*
1857 		 * Stop any pending timeout to setup switching mode.
1858 		 */
1859 		vsw_stop_switching_timeout(vswp);
1860 
1861 		/*
1862 		 * Remove unicst, mcst addrs of vsw interface
1863 		 * and ports from the physdev.
1864 		 */
1865 		vsw_unset_addrs(vswp);
1866 
1867 		/*
1868 		 * Stop, detach and close the old device..
1869 		 */
1870 		mutex_enter(&vswp->mac_lock);
1871 
1872 		vsw_mac_detach(vswp);
1873 		vsw_mac_close(vswp);
1874 
1875 		mutex_exit(&vswp->mac_lock);
1876 
1877 		/*
1878 		 * Update phys name.
1879 		 */
1880 		if (updated & MD_physname) {
1881 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
1882 			    vswp->instance, vswp->physname, physname);
1883 			(void) strncpy(vswp->physname,
1884 			    physname, strlen(physname) + 1);
1885 		}
1886 
1887 		/*
1888 		 * Update array with the new switch mode values.
1889 		 */
1890 		if (updated & MD_smode) {
1891 			for (i = 0; i < smode_num; i++)
1892 				vswp->smode[i] = new_smode[i];
1893 
1894 			vswp->smode_num = smode_num;
1895 			vswp->smode_idx = 0;
1896 		}
1897 
1898 		/*
1899 		 * ..and attach, start the new device.
1900 		 */
1901 		rv = vsw_setup_switching(vswp);
1902 		if (rv == EAGAIN) {
1903 			/*
1904 			 * Unable to setup switching mode.
1905 			 * As the error is EAGAIN, schedule a timeout to retry
1906 			 * and return. Programming addresses of ports and
1907 			 * vsw interface will be done when the timeout handler
1908 			 * completes successfully.
1909 			 */
1910 			mutex_enter(&vswp->swtmout_lock);
1911 
1912 			vswp->swtmout_enabled = B_TRUE;
1913 			vswp->swtmout_id =
1914 			    timeout(vsw_setup_switching_timeout, vswp,
1915 			    (vsw_setup_switching_delay *
1916 			    drv_usectohz(MICROSEC)));
1917 
1918 			mutex_exit(&vswp->swtmout_lock);
1919 
1920 			return;
1921 
1922 		} else if (rv) {
1923 			goto fail_update;
1924 		}
1925 
1926 		/*
1927 		 * program unicst, mcst addrs of vsw interface
1928 		 * and ports in the physdev.
1929 		 */
1930 		vsw_set_addrs(vswp);
1931 
1932 	} else if (updated & MD_macaddr) {
1933 		/*
1934 		 * We enter here if only MD_macaddr is exclusively updated.
1935 		 * If MD_physname and/or MD_smode are also updated, then
1936 		 * as part of that, we would have implicitly processed
1937 		 * MD_macaddr update (above).
1938 		 */
1939 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
1940 		    vswp->instance, macaddr);
1941 
1942 		READ_ENTER(&vswp->if_lockrw);
1943 		if (vswp->if_state & VSW_IF_UP) {
1944 
1945 			mutex_enter(&vswp->hw_lock);
1946 			/*
1947 			 * Remove old mac address of vsw interface
1948 			 * from the physdev
1949 			 */
1950 			(void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV);
1951 			/*
1952 			 * Program new mac address of vsw interface
1953 			 * in the physdev
1954 			 */
1955 			rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV);
1956 			mutex_exit(&vswp->hw_lock);
1957 			if (rv != 0) {
1958 				cmn_err(CE_NOTE,
1959 				    "!vsw%d: failed to program interface "
1960 				    "unicast address\n", vswp->instance);
1961 			}
1962 			/*
1963 			 * Notify the MAC layer of the changed address.
1964 			 */
1965 			mac_unicst_update(vswp->if_mh,
1966 			    (uint8_t *)&vswp->if_addr);
1967 
1968 		}
1969 		RW_EXIT(&vswp->if_lockrw);
1970 
1971 	}
1972 
1973 	return;
1974 
1975 fail_reconf:
1976 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
1977 	return;
1978 
1979 fail_update:
1980 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
1981 	    vswp->instance);
1982 }
1983 
1984 /*
1985  * Add a new port to the system.
1986  *
1987  * Returns 0 on success, 1 on failure.
1988  */
1989 int
1990 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
1991 {
1992 	uint64_t		ldc_id;
1993 	uint8_t			*addrp;
1994 	int			i, addrsz;
1995 	int			num_nodes = 0, nchan = 0;
1996 	int			listsz = 0;
1997 	mde_cookie_t		*listp = NULL;
1998 	struct ether_addr	ea;
1999 	uint64_t		macaddr;
2000 	uint64_t		inst = 0;
2001 	vsw_port_t		*port;
2002 
2003 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2004 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2005 		    id_propname);
2006 		return (1);
2007 	}
2008 
2009 	/*
2010 	 * Find the channel endpoint node(s) (which should be under this
2011 	 * port node) which contain the channel id(s).
2012 	 */
2013 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2014 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2015 		    __func__, num_nodes);
2016 		return (1);
2017 	}
2018 
2019 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2020 
2021 	/* allocate enough space for node list */
2022 	listsz = num_nodes * sizeof (mde_cookie_t);
2023 	listp = kmem_zalloc(listsz, KM_SLEEP);
2024 
2025 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2026 	    md_find_name(mdp, "fwd"), listp);
2027 
2028 	if (nchan <= 0) {
2029 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2030 		kmem_free(listp, listsz);
2031 		return (1);
2032 	}
2033 
2034 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2035 
2036 	/* use property from first node found */
2037 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2038 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2039 		    id_propname);
2040 		kmem_free(listp, listsz);
2041 		return (1);
2042 	}
2043 
2044 	/* don't need list any more */
2045 	kmem_free(listp, listsz);
2046 
2047 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2048 
2049 	/* read mac-address property */
2050 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2051 	    &addrp, &addrsz)) {
2052 		DWARN(vswp, "%s: prop(%s) not found",
2053 		    __func__, remaddr_propname);
2054 		return (1);
2055 	}
2056 
2057 	if (addrsz < ETHERADDRL) {
2058 		DWARN(vswp, "%s: invalid address size", __func__);
2059 		return (1);
2060 	}
2061 
2062 	macaddr = *((uint64_t *)addrp);
2063 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2064 
2065 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2066 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2067 		macaddr >>= 8;
2068 	}
2069 
2070 	if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) {
2071 		DERR(vswp, "%s: failed to attach port", __func__);
2072 		return (1);
2073 	}
2074 
2075 	port = vsw_lookup_port(vswp, (int)inst);
2076 
2077 	/* just successfuly created the port, so it should exist */
2078 	ASSERT(port != NULL);
2079 
2080 	return (0);
2081 }
2082 
2083 /*
2084  * vsw_mac_rx -- A common function to send packets to the interface.
2085  * By default this function check if the interface is UP or not, the
2086  * rest of the behaviour depends on the flags as below:
2087  *
2088  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2089  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2090  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2091  */
2092 void
2093 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2094     mblk_t *mp, vsw_macrx_flags_t flags)
2095 {
2096 	D1(vswp, "%s:enter\n", __func__);
2097 	READ_ENTER(&vswp->if_lockrw);
2098 	/* Check if the interface is up */
2099 	if (!(vswp->if_state & VSW_IF_UP)) {
2100 		RW_EXIT(&vswp->if_lockrw);
2101 		/* Free messages only if FREEMSG flag specified */
2102 		if (flags & VSW_MACRX_FREEMSG) {
2103 			freemsgchain(mp);
2104 		}
2105 		D1(vswp, "%s:exit\n", __func__);
2106 		return;
2107 	}
2108 	/*
2109 	 * If PROMISC flag is passed, then check if
2110 	 * the interface is in the PROMISC mode.
2111 	 * If not, drop the messages.
2112 	 */
2113 	if (flags & VSW_MACRX_PROMISC) {
2114 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2115 			RW_EXIT(&vswp->if_lockrw);
2116 			/* Free messages only if FREEMSG flag specified */
2117 			if (flags & VSW_MACRX_FREEMSG) {
2118 				freemsgchain(mp);
2119 			}
2120 			D1(vswp, "%s:exit\n", __func__);
2121 			return;
2122 		}
2123 	}
2124 	RW_EXIT(&vswp->if_lockrw);
2125 	/*
2126 	 * If COPYMSG flag is passed, then make a copy
2127 	 * of the message chain and send up the copy.
2128 	 */
2129 	if (flags & VSW_MACRX_COPYMSG) {
2130 		mp = copymsgchain(mp);
2131 		if (mp == NULL) {
2132 			D1(vswp, "%s:exit\n", __func__);
2133 			return;
2134 		}
2135 	}
2136 
2137 	D2(vswp, "%s: sending up stack", __func__);
2138 	mac_rx(vswp->if_mh, mrh, mp);
2139 	D1(vswp, "%s:exit\n", __func__);
2140 }
2141 
2142 /* copy mac address of vsw into soft state structure */
2143 static void
2144 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2145 {
2146 	int	i;
2147 
2148 	WRITE_ENTER(&vswp->if_lockrw);
2149 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2150 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2151 		macaddr >>= 8;
2152 	}
2153 	RW_EXIT(&vswp->if_lockrw);
2154 }
2155